X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=ids-dump.el;h=a52c8d37e54f03bea6728fa4edae94fc03f52fd5;hb=567d6a11cc0e2451f6759b94b92498d18dadbd53;hp=ceaa578aca47c2680f5a0595a75c6b292e16cc5e;hpb=47c62f48fdd5f0667452cb4bbbc3ff57463740cb;p=chise%2Fids.git diff --git a/ids-dump.el b/ids-dump.el index ceaa578..a52c8d3 100644 --- a/ids-dump.el +++ b/ids-dump.el @@ -1,6 +1,6 @@ ;;; ids-dump.el --- Dump utility of IDS-* files -;; Copyright (C) 2002 MORIOKA Tomohiko +;; Copyright (C) 2002,2003,2004,2005,2009 MORIOKA Tomohiko ;; Author: MORIOKA Tomohiko ;; Keywords: IDS, IDC, Ideographs, UCS, Unicode @@ -26,6 +26,20 @@ (require 'ids) +(defun ids-dump-format-list (ids-list) + (if ids-list + (let (ucs) + (mapconcat + (lambda (c) + (char-to-string + (if (setq ucs + (unless (encode-char c '=ucs 'defined-only) + (or (get-char-attribute c '=ucs@unicode) + (get-char-attribute c '=ucs@iso)))) + (decode-char '=ucs ucs) + c))) + (ids-format-list ids-list) "")))) + (defun ids-dump-insert-line (ccs line-spec code) (let ((chr (decode-char ccs code)) id-list) @@ -34,7 +48,7 @@ (insert (format line-spec code (decode-builtin-char ccs code) (if id-list - (ids-format-list id-list) + (ids-dump-format-list id-list) (char-to-string chr))))))) (defun ids-dump-insert-ccs-ranges (ccs line-spec &rest ranges) @@ -52,6 +66,23 @@ (t (error 'wrong-type-argument range))) (setq ranges (cdr ranges))))) +(defun ids-dump-insert-94x94-ccs-ranges (ccs line-spec &rest ranges) + (let (range code max-code l) + (while ranges + (setq range (car ranges)) + (cond ((consp range) + (setq code (car range) + max-code (cdr range)) + (while (<= code max-code) + (setq l (logand code 255)) + (if (and (<= #x21 l)(<= l #x7E)) + (ids-dump-insert-line ccs line-spec code)) + (setq code (1+ code)))) + ((integerp range) + (ids-dump-insert-line ccs line-spec range)) + (t (error 'wrong-type-argument range))) + (setq ranges (cdr ranges))))) + (defun ids-dump-insert-daikanwa (start end) (let ((i start) mdh-alist @@ -71,7 +102,7 @@ (insert (format "M-%05d \t%c\t%s\n" i (decode-builtin-char 'ideograph-daikanwa i) - (or (ids-format-list + (or (ids-dump-format-list (get-char-attribute chr 'ideographic-structure)) "")))) (when (setq sal (assq i mdh-alist)) @@ -81,7 +112,7 @@ (insert (format "M-%05d'\t%c\t%s\n" i chr - (or (ids-format-list + (or (ids-dump-format-list (get-char-attribute chr 'ideographic-structure)) "")))) (when (setq chr (assq 2 sal)) @@ -89,7 +120,7 @@ (insert (format "M-%05d\"\t%c\t%s\n" i chr - (ids-format-list + (ids-dump-format-list (get-char-attribute chr 'ideographic-structure))))) ) (setq i (1+ i))))) @@ -110,8 +141,9 @@ (format "MH-%04d \t%c\t%s\n" (car cell) chr - (ids-format-list - (get-char-attribute chr 'ideographic-structure))))))) + (or (ids-dump-format-list + (get-char-attribute chr 'ideographic-structure)) + "")))))) (defun ids-dump-insert-jis-x0208-1990 () (let ((row 16) @@ -127,7 +159,7 @@ h l (decode-builtin-char 'japanese-jisx0208-1990 (logior (lsh h 8) l)) - (or (ids-format-list + (or (ideographic-structure-to-ids (get-char-attribute chr 'ideographic-structure)) ""))) (setq cell (1+ cell))) @@ -142,11 +174,89 @@ h l (decode-builtin-char 'japanese-jisx0208-1990 (logior (lsh h 8) l)) - (or (ids-format-list + (or (ideographic-structure-to-ids (get-char-attribute chr 'ideographic-structure)) ""))) (setq cell (1+ cell))))) +(defun ids-dump-insert-big5 (ccs prefix) + (let ((h #x81) + l code chr structure) + (while (<= h #xFE) + (setq l #x40) + (while (<= l #x7E) + (setq chr (make-char ccs h l)) + (setq structure nil) + (when (setq structure + (get-char-attribute chr 'ideographic-structure)) + (insert + (format "%s%02X%02X\t%c\t%s\n" + prefix h l + (decode-builtin-char ccs + (logior (lsh h 8) l)) + (or (ids-format-list + (get-char-attribute chr 'ideographic-structure)) + "")))) + (setq l (1+ l))) + (setq l #xA1) + (while (<= l #xFE) + (setq chr (make-char ccs h l)) + (setq structure nil) + (when (setq structure + (get-char-attribute chr 'ideographic-structure)) + (insert + (format "%s%02X%02X\t%c\t%s\n" + prefix h l + (decode-builtin-char ccs + (logior (lsh h 8) l)) + (or (ids-format-list + (get-char-attribute chr 'ideographic-structure)) + "")))) + (setq l (1+ l))) + (setq h (1+ h))))) + +(defun ids-dump-insert-big5-pua (ccs prefix) + (let ((line-spec (concat prefix "%04X\t%c\t%s\n")) + (h #x81) + l) + (while (<= h #xA0) + (setq l #x40) + (while (<= l #x7E) + (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l)) + (setq l (1+ l))) + (setq l #xA1) + (while (<= l #xFE) + (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l)) + (setq l (1+ l))) + (setq h (1+ h))) + (setq h #xC6) + (setq l #xDE) + (while (<= l #xFE) + (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l)) + (setq l (1+ l))) + (setq h #xC7) + (while (<= h #xC8) + (setq l #x40) + (while (<= l #x7E) + (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l)) + (setq l (1+ l))) + (setq l #xA1) + (while (<= l #xFE) + (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l)) + (setq l (1+ l))) + (setq h (1+ h))) + (setq h #xFA) + (while (<= h #xFE) + (setq l #x40) + (while (<= l #x7E) + (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l)) + (setq l (1+ l))) + (setq l #xA1) + (while (<= l #xFE) + (ids-dump-insert-line ccs line-spec (logior (lsh h 8) l)) + (setq l (1+ l))) + (setq h (1+ h))))) + (defun ids-dump-range (file path func &rest args) (with-temp-buffer (let* ((coding-system-for-write 'utf-8-mcs-er)) @@ -164,6 +274,14 @@ '(#x4E00 . #x9FA5))) ;;;###autoload +(defun ids-dump-ucs-basic@unicode (filename) + (interactive "Fdump IDS-UCS-Basic : ") + (ids-dump-range "IDS-UCS-Basic_u.txt" filename + #'ids-dump-insert-ccs-ranges '=ucs@unicode + "UU+%04X\t%c\t%s\n" + '(#x4E00 . #x9FA5))) + +;;;###autoload (defun ids-dump-ucs-ext-a (filename) (interactive "Fdump IDS-UCS-Ext-A : ") (ids-dump-range "IDS-UCS-Ext-A.txt" filename @@ -227,6 +345,30 @@ '(#x2F800 . #x2FA1D))) ;;;###autoload +(defun ids-dump-cns11643-1 (filename) + (interactive "Fdump IDS-CNS-1 : ") + (ids-dump-range "IDS-CNS-1.txt" filename + #'ids-dump-insert-94x94-ccs-ranges + 'chinese-cns11643-1 "C1-%04X\t%c\t%s\n" + '(#x4421 . #x7D4B))) + +;;;###autoload +(defun ids-dump-cns11643-2 (filename) + (interactive "Fdump IDS-CNS-2 : ") + (ids-dump-range "IDS-CNS-2.txt" filename + #'ids-dump-insert-94x94-ccs-ranges + 'chinese-cns11643-2 "C2-%04X\t%c\t%s\n" + '(#x2121 . #x7244))) + +;;;###autoload +(defun ids-dump-cns11643-3 (filename) + (interactive "Fdump IDS-CNS-3 : ") + (ids-dump-range "IDS-CNS-3.txt" filename + #'ids-dump-insert-94x94-ccs-ranges + 'chinese-cns11643-3 "C3-%04X\t%c\t%s\n" + '(#x2121 . #x6246))) + +;;;###autoload (defun ids-dump-daikanwa-01 (filename) (interactive "Fdump IDS-Daikanwa-01 : ") (ids-dump-range "IDS-Daikanwa-01.txt" filename @@ -324,6 +466,13 @@ (ids-dump-range "IDS-JIS-X0208-1990.txt" filename #'ids-dump-insert-jis-x0208-1990)) +;;;###autoload +(defun ids-dump-big5-cdp (filename) + (interactive "Fdump IDS-CDP : ") + (ids-dump-range "IDS-CDP.txt" filename + #'ids-dump-insert-big5-pua + '=big5-cdp "CDP-")) + ;;; @ End. ;;;