X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=ids-dump.el;h=c2e07988991fdef85331f6a286e7fa154ca6d676;hb=df06e7bde23caa4a84cc316c763b062840c0a85d;hp=e16ff017461390afab2b9f4a8a5c1e262ec455dc;hpb=755483abc81abd0075987a3095245668bcb78ac5;p=chise%2Fids.git diff --git a/ids-dump.el b/ids-dump.el index e16ff01..c2e0798 100644 --- a/ids-dump.el +++ b/ids-dump.el @@ -1,6 +1,6 @@ ;;; ids-dump.el --- Dump utility of IDS-* files -;; Copyright (C) 2002 MORIOKA Tomohiko +;; Copyright (C) 2002,2003,2004,2005,2009,2011,2019 MORIOKA Tomohiko ;; Author: MORIOKA Tomohiko ;; Keywords: IDS, IDC, Ideographs, UCS, Unicode @@ -26,53 +26,19 @@ (require 'ids) -(defvar ids-dump-file-specs-alist - '((ucs-basic "IDS-UCS-Basic.txt" - ids-dump-insert-ccs-ranges - ucs "U+%04X\t%c\t%s\n" - (#x4E00 . #x9FA5)) - (ucs-ext-a "IDS-UCS-Ext-A.txt" - ids-dump-insert-ccs-ranges - ucs "U+%04X\t%c\t%s\n" - (#x3400 . #x4DB5) #xFA1F #xFA23) - (ucs-compat "IDS-UCS-Compat.txt" - ids-dump-insert-ccs-ranges - ucs "U+%04X\t%c\t%s\n" - (#xF900 . #xFA1E) (#xFA20 . #xFA22) (#xFA24 . #xFA2D)) - (ucs-ext-b-1 "IDS-UCS-Ext-B-1.txt" - ids-dump-insert-ccs-ranges - ucs "U-%08X\t%c\t%s\n" - (#x20000 . #x21FFF)) - (ucs-ext-b-2 "IDS-UCS-Ext-B-2.txt" - ids-dump-insert-ccs-ranges - ucs "U-%08X\t%c\t%s\n" - (#x22000 . #x23FFF)) - (ucs-ext-b-3 "IDS-UCS-Ext-B-3.txt" - ids-dump-insert-ccs-ranges - ucs "U-%08X\t%c\t%s\n" - (#x24000 . #x25FFF)) - (ucs-ext-b-4 "IDS-UCS-Ext-B-4.txt" - ids-dump-insert-ccs-ranges - ucs "U-%08X\t%c\t%s\n" - (#x26000 . #x27FFF)) - (ucs-ext-b-5 "IDS-UCS-Ext-B-5.txt" - ids-dump-insert-ccs-ranges - ucs "U-%08X\t%c\t%s\n" - (#x28000 . #x29FFF)) - (ucs-ext-b-6 "IDS-UCS-Ext-B-6.txt" - ids-dump-insert-ccs-ranges - ucs "U-%08X\t%c\t%s\n" - (#x2A000 . #x2A6D6)) - (ucs-compat-supplement "IDS-UCS-Compat-Supplement.txt" - ids-dump-insert-ccs-ranges - ucs "U-%08X\t%c\t%s\n" - (#x2F800 . #x2FA1D)) - (daikanwa "IDS-Daikanwa.txt" ids-dump-insert-daikanwa) - (cbeta "IDS-CBETA.txt" - ids-dump-insert-ccs-ranges - ideograph-cbeta "CB%05d\t%c\t%s\n" - (1 . 13363)) - )) +(defun ids-dump-format-list (ids-list) + (if ids-list + (let (ucs) + (mapconcat + (lambda (c) + (char-to-string + (if (setq ucs + (unless (encode-char c '=ucs 'defined-only) + (or (get-char-attribute c '=ucs@unicode) + (get-char-attribute c '=ucs@iso)))) + (decode-char '=ucs ucs) + c))) + (ids-format-list ids-list) "")))) (defun ids-dump-insert-line (ccs line-spec code) (let ((chr (decode-char ccs code)) @@ -82,7 +48,7 @@ (insert (format line-spec code (decode-builtin-char ccs code) (if id-list - (ids-format-list id-list) + (ids-dump-format-list id-list) (char-to-string chr))))))) (defun ids-dump-insert-ccs-ranges (ccs line-spec &rest ranges) @@ -100,22 +66,25 @@ (t (error 'wrong-type-argument range))) (setq ranges (cdr ranges))))) -(defun ids-dump-ranges (range filename) - (with-temp-buffer - (let* ((coding-system-for-write 'utf-8) - (spec (assq range ids-dump-file-specs-alist)) - (file (nth 1 spec)) - (func (nth 2 spec)) - (args (nthcdr 3 spec))) - (if (file-directory-p filename) - (setq filename (expand-file-name file filename))) - (insert ";; -*- coding: utf-8 -*-\n") - (apply func args) - (write-region (point-min)(point-max) - filename)))) +(defun ids-dump-insert-94x94-ccs-ranges (ccs line-spec &rest ranges) + (let (range code max-code l) + (while ranges + (setq range (car ranges)) + (cond ((consp range) + (setq code (car range) + max-code (cdr range)) + (while (<= code max-code) + (setq l (logand code 255)) + (if (and (<= #x21 l)(<= l #x7E)) + (ids-dump-insert-line ccs line-spec code)) + (setq code (1+ code)))) + ((integerp range) + (ids-dump-insert-line ccs line-spec range)) + (t (error 'wrong-type-argument range))) + (setq ranges (cdr ranges))))) -(defun ids-dump-insert-daikanwa () - (let ((i 1) +(defun ids-dump-insert-daikanwa (start end) + (let ((i start) mdh-alist chr sal) (map-char-attribute @@ -128,13 +97,14 @@ (cdr (assq (car val) mdh-alist))))) nil) 'morohashi-daikanwa) - (while (<= i 49964) + (while (<= i end) (when (setq chr (decode-char 'ideograph-daikanwa i)) (insert (format "M-%05d \t%c\t%s\n" i (decode-builtin-char 'ideograph-daikanwa i) - (ids-format-list - (get-char-attribute chr 'ideographic-structure))))) + (or (ids-dump-format-list + (get-char-attribute chr 'ideographic-structure)) + "")))) (when (setq sal (assq i mdh-alist)) (setq sal (cdr sal)) (when (setq chr (assq 1 sal)) @@ -142,38 +112,368 @@ (insert (format "M-%05d'\t%c\t%s\n" i chr - (ids-format-list - (get-char-attribute chr 'ideographic-structure))))) + (or (ids-dump-format-list + (get-char-attribute chr 'ideographic-structure)) + "")))) (when (setq chr (assq 2 sal)) (setq chr (cdr chr)) (insert (format "M-%05d\"\t%c\t%s\n" i chr - (ids-format-list + (ids-dump-format-list (get-char-attribute chr 'ideographic-structure))))) ) - (setq i (1+ i))) - (setq sal (sort (cdr (assq 'ho mdh-alist)) - (lambda (a b) - (< (car a)(car b))))) + (setq i (1+ i))))) + +(defun ids-dump-insert-daikanwa-hokan () + (let (chr sal) + (map-char-attribute + (lambda (key val) + (when (and (eq (car val) 'ho) + (null (nthcdr 2 val))) + (setq sal (cons (cons (nth 1 val) key) sal))) + nil) + 'morohashi-daikanwa) + (setq sal (sort sal (lambda (a b) (< (car a)(car b))))) (dolist (cell sal) (setq chr (cdr cell)) (insert (format "MH-%04d \t%c\t%s\n" (car cell) chr - (ids-format-list - (get-char-attribute chr 'ideographic-structure))))))) + (or (ids-dump-format-list + (get-char-attribute chr 'ideographic-structure)) + "")))))) + +(defun ids-dump-insert-jis-x0208-1990 () + (let ((row 16) + cell h l code chr) + (while (<= row 83) + (setq h (+ row 32)) + (setq cell 1) + (while (<= cell 94) + (setq l (+ cell 32)) + (setq chr (make-char 'japanese-jisx0208-1990 h l)) + (insert + (format "J90-%02X%02X\t%c\t%s\n" + h l + (decode-builtin-char 'japanese-jisx0208-1990 + (logior (lsh h 8) l)) + (or (ideographic-structure-to-ids + (get-char-attribute chr 'ideographic-structure)) + ""))) + (setq cell (1+ cell))) + (setq row (1+ row))) + (setq h (+ row 32)) + (setq cell 1) + (while (<= cell 6) + (setq l (+ cell 32)) + (setq chr (make-char 'japanese-jisx0208-1990 h l)) + (insert + (format "J90-%02X%02X\t%c\t%s\n" + h l + (decode-builtin-char 'japanese-jisx0208-1990 + (logior (lsh h 8) l)) + (or (ideographic-structure-to-ids + (get-char-attribute chr 'ideographic-structure)) + ""))) + (setq cell (1+ cell))))) + +(defun ids-dump-insert-big5 (ccs prefix) + (let ((h #x81) + l code chr structure) + (while (<= h #xFE) + (setq l #x40) + (while (<= l #x7E) + (setq chr (make-char ccs h l)) + (setq structure nil) + (when (setq structure + (get-char-attribute chr 'ideographic-structure)) + (insert + (format "%s%02X%02X\t%c\t%s\n" + prefix h l + (decode-builtin-char ccs + (logior (lsh h 8) l)) + (or (ids-format-list + (get-char-attribute chr 'ideographic-structure)) + "")))) + (setq l (1+ l))) + (setq l #xA1) + (while (<= l #xFE) + (setq chr (make-char ccs h l)) + (setq structure nil) + (when (setq structure + (get-char-attribute chr 'ideographic-structure)) + (insert + (format "%s%02X%02X\t%c\t%s\n" + prefix h l + (decode-builtin-char ccs + (logior (lsh h 8) l)) + (or (ids-format-list + (get-char-attribute chr 'ideographic-structure)) + "")))) + (setq l (1+ l))) + (setq h (1+ h))))) + +(defun ids-dump-insert-big5-pua (ccs prefix) + (let ((line-spec (concat prefix "%04X\t%c\t%s\n")) + (h #x81) + l) + (while (<= h #xA0) + (setq l #x40) + (while (<= l #x7E) + (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l)) + (setq l (1+ l))) + (setq l #xA1) + (while (<= l #xFE) + (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l)) + (setq l (1+ l))) + (setq h (1+ h))) + (setq h #xC6) + (setq l #xDE) + (while (<= l #xFE) + (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l)) + (setq l (1+ l))) + (setq h #xC7) + (while (<= h #xC8) + (setq l #x40) + (while (<= l #x7E) + (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l)) + (setq l (1+ l))) + (setq l #xA1) + (while (<= l #xFE) + (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l)) + (setq l (1+ l))) + (setq h (1+ h))) + (setq h #xFA) + (while (<= h #xFE) + (setq l #x40) + (while (<= l #x7E) + (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l)) + (setq l (1+ l))) + (setq l #xA1) + (while (<= l #xFE) + (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l)) + (setq l (1+ l))) + (setq h (1+ h))))) + +(defun ids-dump-range (file path func &rest args) + (with-temp-buffer + (let* ((coding-system-for-write 'utf-8-mcs-er)) + (if (file-directory-p path) + (setq path (expand-file-name file path))) + (insert ";; -*- coding: utf-8-mcs-er -*-\n") + (apply func args) + (write-region (point-min)(point-max) path)))) + +;;;###autoload +(defun ids-dump-ucs-basic (filename) + (interactive "Fdump IDS-UCS-Basic : ") + (ids-dump-range "IDS-UCS-Basic.txt" filename + #'ids-dump-insert-ccs-ranges 'ucs "U+%04X\t%c\t%s\n" + '(#x4E00 . #x9FEA))) + +;;;###autoload +(defun ids-dump-ucs-basic@unicode (filename) + (interactive "Fdump IDS-UCS-Basic : ") + (ids-dump-range "IDS-UCS-Basic_u.txt" filename + #'ids-dump-insert-ccs-ranges '=ucs@unicode + "UU+%04X\t%c\t%s\n" + '(#x4E00 . #x9FA5))) + +;;;###autoload +(defun ids-dump-ucs-ext-a (filename) + (interactive "Fdump IDS-UCS-Ext-A : ") + (ids-dump-range "IDS-UCS-Ext-A.txt" filename + #'ids-dump-insert-ccs-ranges 'ucs "U+%04X\t%c\t%s\n" + '(#x3400 . #x4DB5) #xFA1F #xFA23)) + +;;;###autoload +(defun ids-dump-ucs-compat (filename) + (interactive "Fdump IDS-UCS-Compat : ") + (ids-dump-range "IDS-UCS-Compat.txt" filename + #'ids-dump-insert-ccs-ranges 'ucs "U+%04X\t%c\t%s\n" + '(#xF900 . #xFA1E) '(#xFA20 . #xFA22) '(#xFA24 . #xFA2D))) + +;;;###autoload +(defun ids-dump-ucs-ext-b-1 (filename) + (interactive "Fdump IDS-UCS-Ext-B-1 : ") + (ids-dump-range "IDS-UCS-Ext-B-1.txt" filename + #'ids-dump-insert-ccs-ranges 'ucs "U-%08X\t%c\t%s\n" + '(#x20000 . #x21FFF))) + +;;;###autoload +(defun ids-dump-ucs-ext-b-2 (filename) + (interactive "Fdump IDS-UCS-Ext-B-2 : ") + (ids-dump-range "IDS-UCS-Ext-B-2.txt" filename + #'ids-dump-insert-ccs-ranges 'ucs "U-%08X\t%c\t%s\n" + '(#x22000 . #x23FFF))) + +;;;###autoload +(defun ids-dump-ucs-ext-b-3 (filename) + (interactive "Fdump IDS-UCS-Ext-B-3 : ") + (ids-dump-range "IDS-UCS-Ext-B-3.txt" filename + #'ids-dump-insert-ccs-ranges 'ucs "U-%08X\t%c\t%s\n" + '(#x24000 . #x25FFF))) + +;;;###autoload +(defun ids-dump-ucs-ext-b-4 (filename) + (interactive "Fdump IDS-UCS-Ext-B-4 : ") + (ids-dump-range "IDS-UCS-Ext-B-4.txt" filename + #'ids-dump-insert-ccs-ranges 'ucs "U-%08X\t%c\t%s\n" + '(#x26000 . #x27FFF))) + +;;;###autoload +(defun ids-dump-ucs-ext-b-5 (filename) + (interactive "Fdump IDS-UCS-Ext-B-5 : ") + (ids-dump-range "IDS-UCS-Ext-B-5.txt" filename + #'ids-dump-insert-ccs-ranges 'ucs "U-%08X\t%c\t%s\n" + '(#x28000 . #x29FFF))) + +;;;###autoload +(defun ids-dump-ucs-ext-b-6 (filename) + (interactive "Fdump IDS-UCS-Ext-B-6 : ") + (ids-dump-range "IDS-UCS-Ext-B-6.txt" filename + #'ids-dump-insert-ccs-ranges 'ucs "U-%08X\t%c\t%s\n" + '(#x2A000 . #x2A6D6))) + +;;;###autoload +(defun ids-dump-ucs-compat-supplement (filename) + (interactive "Fdump IDS-UCS-Compat-Supplement : ") + (ids-dump-range "IDS-UCS-Compat-Supplement.txt" filename + #'ids-dump-insert-ccs-ranges 'ucs "U-%08X\t%c\t%s\n" + '(#x2F800 . #x2FA1D))) + +;;;###autoload +(defun ids-dump-cns11643-1 (filename) + (interactive "Fdump IDS-CNS-1 : ") + (ids-dump-range "IDS-CNS-1.txt" filename + #'ids-dump-insert-94x94-ccs-ranges + 'chinese-cns11643-1 "C1-%04X\t%c\t%s\n" + '(#x4421 . #x7D4B))) + +;;;###autoload +(defun ids-dump-cns11643-2 (filename) + (interactive "Fdump IDS-CNS-2 : ") + (ids-dump-range "IDS-CNS-2.txt" filename + #'ids-dump-insert-94x94-ccs-ranges + 'chinese-cns11643-2 "C2-%04X\t%c\t%s\n" + '(#x2121 . #x7244))) + +;;;###autoload +(defun ids-dump-cns11643-3 (filename) + (interactive "Fdump IDS-CNS-3 : ") + (ids-dump-range "IDS-CNS-3.txt" filename + #'ids-dump-insert-94x94-ccs-ranges + 'chinese-cns11643-3 "C3-%04X\t%c\t%s\n" + '(#x2121 . #x6246))) + +;;;###autoload +(defun ids-dump-daikanwa-01 (filename) + (interactive "Fdump IDS-Daikanwa-01 : ") + (ids-dump-range "IDS-Daikanwa-01.txt" filename + #'ids-dump-insert-daikanwa 00001 01449)) + +;;;###autoload +(defun ids-dump-daikanwa-02 (filename) + (interactive "Fdump IDS-Daikanwa-02 : ") + (ids-dump-range "IDS-Daikanwa-02.txt" filename + #'ids-dump-insert-daikanwa 01450 04674)) + +;;;###autoload +(defun ids-dump-daikanwa-03 (filename) + (interactive "Fdump IDS-Daikanwa-03 : ") + (ids-dump-range "IDS-Daikanwa-03.txt" filename + #'ids-dump-insert-daikanwa 04675 07410)) + +;;;###autoload +(defun ids-dump-daikanwa-04 (filename) + (interactive "Fdump IDS-Daikanwa-04 : ") + (ids-dump-range "IDS-Daikanwa-04.txt" filename + #'ids-dump-insert-daikanwa 07411 11529)) + +;;;###autoload +(defun ids-dump-daikanwa-05 (filename) + (interactive "Fdump IDS-Daikanwa-05 : ") + (ids-dump-range "IDS-Daikanwa-05.txt" filename + #'ids-dump-insert-daikanwa 11530 14414)) + +;;;###autoload +(defun ids-dump-daikanwa-06 (filename) + (interactive "Fdump IDS-Daikanwa-06 : ") + (ids-dump-range "IDS-Daikanwa-06.txt" filename + #'ids-dump-insert-daikanwa 14415 17574)) + +;;;###autoload +(defun ids-dump-daikanwa-07 (filename) + (interactive "Fdump IDS-Daikanwa-07 : ") + (ids-dump-range "IDS-Daikanwa-07.txt" filename + #'ids-dump-insert-daikanwa 17575 22677)) + +;;;###autoload +(defun ids-dump-daikanwa-08 (filename) + (interactive "Fdump IDS-Daikanwa-08 : ") + (ids-dump-range "IDS-Daikanwa-08.txt" filename + #'ids-dump-insert-daikanwa 22678 28107)) + +;;;###autoload +(defun ids-dump-daikanwa-09 (filename) + (interactive "Fdump IDS-Daikanwa-09 : ") + (ids-dump-range "IDS-Daikanwa-09.txt" filename + #'ids-dump-insert-daikanwa 28108 32803)) + +;;;###autoload +(defun ids-dump-daikanwa-10 (filename) + (interactive "Fdump IDS-Daikanwa-10 : ") + (ids-dump-range "IDS-Daikanwa-10.txt" filename + #'ids-dump-insert-daikanwa 32804 38699)) + +;;;###autoload +(defun ids-dump-daikanwa-11 (filename) + (interactive "Fdump IDS-Daikanwa-11 : ") + (ids-dump-range "IDS-Daikanwa-11.txt" filename + #'ids-dump-insert-daikanwa 38700 42209)) + +;;;###autoload +(defun ids-dump-daikanwa-12 (filename) + (interactive "Fdump IDS-Daikanwa-12 : ") + (ids-dump-range "IDS-Daikanwa-12.txt" filename + #'ids-dump-insert-daikanwa 42210 48902)) + +;;;###autoload +(defun ids-dump-daikanwa-index (filename) + (interactive "Fdump IDS-Daikanwa-dx : ") + (ids-dump-range "IDS-Daikanwa-dx.txt" filename + #'ids-dump-insert-daikanwa 48903 49964)) + +;;;###autoload +(defun ids-dump-daikanwa-hokan (filename) + (interactive "Fdump IDS-Daikanwa-ho : ") + (ids-dump-range "IDS-Daikanwa-ho.txt" filename + #'ids-dump-insert-daikanwa-hokan)) + +;;;###autoload +(defun ids-dump-cbeta (filename) + (interactive "Fdump IDS-CBETA : ") + (ids-dump-range "IDS-CBETA.txt" filename + #'ids-dump-insert-ccs-ranges + 'ideograph-cbeta "CB%05d\t%c\t%s\n" + '(1 . 13363))) -(dolist (spec ids-dump-file-specs-alist) - (eval `(defun ,(intern (concat "ids-dump-" (symbol-name (car spec)))) - (filename) - (interactive ,(concat "Fdump " - (file-name-sans-extension (nth 1 spec)) - " : ")) - (ids-dump-ranges ',(car spec) filename)))) +;;;###autoload +(defun ids-dump-jis-x0208-1990 (filename) + (interactive "Fdump IDS-JIS-X0208-1990 : ") + (ids-dump-range "IDS-JIS-X0208-1990.txt" filename + #'ids-dump-insert-jis-x0208-1990)) +;;;###autoload +(defun ids-dump-big5-cdp (filename) + (interactive "Fdump IDS-CDP : ") + (ids-dump-range "IDS-CDP.txt" filename + #'ids-dump-insert-big5-pua + '=big5-cdp "CDP-")) + ;;; @ End. ;;;