X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Futf-2000%2Fchar-db-util.el;h=60c70c67603538684a08038b23b12458d9f28f0c;hb=1c0dcd37222b5cd57e184af3593bfdb82bd942a8;hp=4501ebc19bcf93bc1886296d66e90461aa9f3ea1;hpb=0ebcaf2b5d0ded3654e56f6e5099d1a64e11493d;p=chise%2Fxemacs-chise.git- diff --git a/lisp/utf-2000/char-db-util.el b/lisp/utf-2000/char-db-util.el index 4501ebc..60c70c6 100644 --- a/lisp/utf-2000/char-db-util.el +++ b/lisp/utf-2000/char-db-util.el @@ -61,6 +61,16 @@ ("So" symbol other) )) +(defconst ideographic-radicals + (let ((v (make-vector 215 nil)) + (i 1)) + (while (< i 215) + (aset v i (int-char (+ #x2EFF i))) + (setq i (1+ i))) + (if (< (charset-iso-final-char (car (split-char (aref v 34)))) ?0) + (aset v 34 (make-char 'chinese-gb2312 #x62 #x3A))) + v)) + (defun char-attribute-name< (ka kb) (cond ((find-charset ka) @@ -71,25 +81,28 @@ (charset-dimension kb)) (cond ((= (charset-chars ka)(charset-chars kb)) (cond - ((>= (charset-final ka) ?@) - (if (>= (charset-final kb) ?@) - (< (charset-final ka) - (charset-final kb)) + ((>= (charset-iso-final-char ka) ?@) + (if (>= (charset-iso-final-char kb) ?@) + (< (charset-iso-final-char ka) + (charset-iso-final-char kb)) t)) - ((>= (charset-final ka) ?0) + ((>= (charset-iso-final-char ka) ?0) (cond - ((>= (charset-final kb) ?@) + ((>= (charset-iso-final-char kb) ?@) nil) - ((>= (charset-final kb) ?0) - (< (charset-final ka) - (charset-final kb))) + ((>= (charset-iso-final-char kb) ?0) + (< (charset-iso-final-char ka) + (charset-iso-final-char kb))) (t))))) ((<= (charset-chars ka)(charset-chars kb))))) (t (< (charset-dimension ka) (charset-dimension kb)) ))) - (t))) + ((symbolp kb) + nil) + (t + t))) ((find-charset kb) t) ((symbolp ka) @@ -102,7 +115,8 @@ (defun insert-char-data (char) (let ((data (char-attribute-alist char)) - cell ret name has-long-ccs-name rest) + cell ret has-long-ccs-name rest + radical strokes) (when data (save-restriction (narrow-to-region (point)(point)) @@ -139,9 +153,9 @@ ) (when (setq cell (assq '->ucs data)) (setq cell (cdr cell)) - (insert (format "(->ucs\t\t. #x%04X) + (insert (format "(->ucs\t\t. #x%04X)\t; %c " - cell)) + cell (decode-char 'ucs cell))) (setq data (del-alist '->ucs data)) ) (when (setq cell (assq 'general-category data)) @@ -176,7 +190,8 @@ cell)) (setq data (del-alist 'mirrored data)) ) - (when (setq cell (assq 'decimal-digit-value data)) + (cond + ((setq cell (assq 'decimal-digit-value data)) (setq cell (cdr cell)) (insert (format "(decimal-digit-value . %S) " @@ -197,13 +212,113 @@ (setq data (del-alist 'numeric-value data)) ) ) + (t + (when (setq cell (assq 'digit-value data)) + (setq cell (cdr cell)) + (insert (format "(digit-value\t. %S) + " + cell)) + (setq data (del-alist 'digit-value data)) + ) + (when (setq cell (assq 'numeric-value data)) + (setq cell (cdr cell)) + (insert (format "(numeric-value\t. %S) + " + cell)) + (setq data (del-alist 'numeric-value data)) + ))) (when (setq cell (assq 'iso-10646-comment data)) + (setq cell (cdr cell)) + (insert (format "(iso-10646-comment\t. %S) + " + cell)) + (setq data (del-alist 'iso-10646-comment data)) + ) + (when (setq cell (assq 'morohashi-daikanwa data)) + (setq cell (cdr cell)) + (insert (format "(morohashi-daikanwa\t%s) + " + (mapconcat (function prin1-to-string) cell " "))) + (setq data (del-alist 'morohashi-daikanwa data)) + ) + (setq radical nil) + (when (setq cell (assq 'ideographic-radical data)) + (setq radical (cdr cell)) + (insert (format "(ideographic-radical . %S)\t; %c + " + radical + (aref ideographic-radicals radical))) + (setq data (del-alist 'ideographic-radical data)) + ) + (when (setq cell (assq 'kangxi-radical data)) + (setq cell (cdr cell)) + (unless (eq cell radical) + (insert (format "(kangxi-radical\t . %S)\t; %c + " + cell + (aref ideographic-radicals cell))) + (setq radical cell)) + (setq data (del-alist 'kangxi-radical data)) + ) + (when (setq cell (assq 'japanese-radical data)) + (setq cell (cdr cell)) + (unless (eq cell radical) + (insert (format "(japanese-radical . %S)\t; %c + " + cell + (aref ideographic-radicals cell))) + (setq radical cell)) + (setq data (del-alist 'japanese-radical data)) + ) + (when (setq cell (assq 'cns-radical data)) + (setq cell (cdr cell)) + (insert (format "(cns-radical\t . %S)\t; %c + " + cell + (aref ideographic-radicals cell))) + (setq data (del-alist 'cns-radical data)) + ) + (setq strokes nil) + (cond + ((setq cell (assq 'ideographic-strokes data)) + (setq strokes (cdr cell)) + (insert (format "(ideographic-strokes . %S) + " + strokes)) + (setq data (del-alist 'ideographic-strokes data)) + (when (setq cell (assq 'kangxi-strokes data)) + (setq cell (cdr cell)) + (unless (eq cell strokes) + (insert (format "(kangxi-strokes\t . %S) + " + cell)) + (setq strokes cell)) + (setq data (del-alist 'kangxi-strokes data)) + ) + (when (setq cell (assq 'japanese-strokes data)) (setq cell (cdr cell)) - (insert (format "(iso-10646-comment\t . %S) + (unless (eq cell strokes) + (insert (format "(japanese-strokes\t . %S) + " + cell)) + (setq strokes cell)) + (setq data (del-alist 'japanese-strokes data)) + ) + (when (setq cell (assq 'total-strokes data)) + (setq cell (cdr cell)) + (insert (format "(total-strokes\t . %S) " cell)) - (setq data (del-alist 'iso-10646-comment data)) + (setq data (del-alist 'total-strokes data)) ) + ) + ((setq cell (assq 'total-strokes data)) + (setq cell (cdr cell)) + (insert (format "(total-strokes\t. %S) + " + cell)) + (setq data (del-alist 'total-strokes data)) + )) (when (setq cell (assq '->decomposition data)) (setq cell (cdr cell)) (insert (format "(->decomposition\t%s) @@ -232,9 +347,9 @@ cell " "))) (setq data (del-alist '->uppercase data)) ) - (when (setq cell (assq '->titlecase data)) + (when (setq cell (assq '->lowercase data)) (setq cell (cdr cell)) - (insert (format "(->titlecase\t%s) + (insert (format "(->lowercase\t%s) " (mapconcat (lambda (code) (cond ((symbolp code) @@ -244,11 +359,11 @@ (t (format "\n %S" code)))) cell " "))) - (setq data (del-alist '->titlecase data)) + (setq data (del-alist '->lowercase data)) ) - (when (setq cell (assq '->lowercase data)) + (when (setq cell (assq '->titlecase data)) (setq cell (cdr cell)) - (insert (format "(->lowercase\t%s) + (insert (format "(->titlecase\t%s) " (mapconcat (lambda (code) (cond ((symbolp code) @@ -258,7 +373,7 @@ (t (format "\n %S" code)))) cell " "))) - (setq data (del-alist '->lowercase data)) + (setq data (del-alist '->titlecase data)) ) (setq data (sort data @@ -279,17 +394,39 @@ (while data (setq cell (car data)) (cond ((setq ret (find-charset (car cell))) - (insert (format (if has-long-ccs-name - "(%-26s %s) + (or (string-match "^mojikyo-pj-" + (symbol-name (charset-name ret))) + (insert + (format + (if has-long-ccs-name + (if (memq ret + (list (find-charset 'ideograph-daikanwa) + (find-charset 'mojikyo))) + "(%-26s . %05d)\t; %c " - "(%-18s %s) + "(%-26s . #x%X)\t; %c " - ) - (charset-name ret) - (mapconcat (lambda (b) - (format "#x%02X" b) - ) - (cdr cell) " ")))) + ) + (if (memq ret + (list (find-charset 'ideograph-daikanwa) + (find-charset 'mojikyo))) + "(%-18s . %05d)\t; %c + " + "(%-18s . #x%X)\t; %c + " + )) + (charset-name ret) + (if (= (charset-iso-graphic-plane ret) 1) + (logior (cdr cell) + (cond ((= (charset-dimension ret) 1) + #x80) + ((= (charset-dimension ret) 2) + #x8080) + ((= (charset-dimension ret) 3) + #x808080) + (t 0))) + (cdr cell)) + (decode-builtin-char ret (cdr cell)))))) ((string-match "^->" (symbol-name (car cell))) (insert (format "(%-18s %s) @@ -304,9 +441,15 @@ (format "\n %S" code)))) (cdr cell) " ")))) ((consp (cdr cell)) - (insert (format "%S + (insert (format "(%-18s %s) " - cell))) + (car cell) + (mapconcat (function prin1-to-string) + (cdr cell) " ")))) + ((eq (car cell) 'jisx0208-1978/4X) + (insert (format "(%-18s . #x%04X) + " + (car cell)(cdr cell)))) (t (insert (format "(%-18s . %S) " @@ -321,24 +464,25 @@ (tabify (point-min)(point-max)) )))) -(defun insert-char-range-data (min max) - (let ((code min) - char - variants) - (while (<= code max) - (setq char (int-char code)) - (insert-char-data char) - (setq variants (char-variants char)) - (while variants - (insert-char-data (car variants)) - (setq variants (cdr variants))) - (setq code (1+ code)) - ))) - -(defun write-char-range-data-to-file (min max file) - (with-temp-buffer - (insert-char-range-data min max) - (write-region (point-min)(point-max) file))) +(defun decode-builtin-char (charset code-point) + (setq charset (get-charset charset)) + (if (and (not (memq (charset-name charset) + '(ideograph-daikanwa mojikyo))) + (or (memq (charset-name charset) + '(ascii latin-viscii-upper + latin-viscii-lower + arabic-iso8859-6 + japanese-jisx0213-1 + japanese-jisx0213-2)) + (= (char-int (charset-iso-final-char charset)) 0))) + (decode-char charset code-point) + (let ((table (charset-mapping-table charset))) + (if table + (prog2 + (set-charset-mapping-table charset nil) + (decode-char charset code-point) + (set-charset-mapping-table charset table)) + (decode-char charset code-point))))) ;;;###autoload (defun char-db-update-comment () @@ -352,9 +496,15 @@ (when (find-charset (car cdef)) (goto-char (match-end 0)) (setq char - (if (or (memq (car cdef) '(ascii latin-viscii-upper - latin-viscii-lower)) - (= (char-int (charset-final (car cdef))) 0)) + (if (and + (not (eq (car cdef) 'ideograph-daikanwa)) + (or (memq (car cdef) '(ascii latin-viscii-upper + latin-viscii-lower + arabic-iso8859-6 + japanese-jisx0213-1 + japanese-jisx0213-2)) + (= (char-int (charset-iso-final-char (car cdef))) + 0))) (apply (function make-char) cdef) (if (setq table (charset-mapping-table (car cdef))) (set-charset-mapping-table (car cdef) nil)) @@ -363,12 +513,40 @@ (if table (set-charset-mapping-table (car cdef) table))))) (when (not (or (< (char-int char) 32) - (and (<= (char-int char) 128) + (and (<= 128 (char-int char)) (< (char-int char) 160)))) (delete-region (point) (point-at-eol)) (insert (format "\t; %c" char))) ))))) +(defun insert-char-data-with-variant (char) + (insert-char-data char) + (let ((variants (or (char-variants char) + (let ((ucs (get-char-attribute char '->ucs))) + (if ucs + (delete char (char-variants (int-char ucs)))))))) + (while variants + (insert-char-data (car variants)) + (setq variants (cdr variants)) + ))) + +(defun insert-char-range-data (min max) + (let ((code min) + char) + (while (<= code max) + (setq char (int-char code)) + (insert-char-data-with-variant char) + (setq code (1+ code)) + ))) + +(defun write-char-range-data-to-file (min max file) + (let ((coding-system-for-write 'utf-8)) + (with-temp-buffer + (insert-char-range-data min max) + (write-region (point-min)(point-max) file)))) + +(defvar what-character-original-window-configuration) + ;;;###autoload (defun what-char-definition (char) (interactive (list (char-after))) @@ -382,7 +560,8 @@ (erase-buffer) (condition-case err (progn - (insert-char-data char) + (insert-char-data-with-variant char) + ;; (char-db-update-comment) (set-buffer-modified-p nil) (view-mode the-buf (lambda (buf) (set-window-configuration