X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Futf-2000%2Fideograph-util.el;h=5c9bcccdb42a32fd5e6da8ffe5356eb1c3984706;hb=d80d6a39d08c01fa110a284bb385a707fb2d0fef;hp=7c1a46212d9ac5831d4dc643cdf1134af88463a1;hpb=5a98616f90a5c4f53c1863f0762673ab5ef57b63;p=chise%2Fxemacs-chise.git- diff --git a/lisp/utf-2000/ideograph-util.el b/lisp/utf-2000/ideograph-util.el index 7c1a462..5c9bccc 100644 --- a/lisp/utf-2000/ideograph-util.el +++ b/lisp/utf-2000/ideograph-util.el @@ -1,26 +1,26 @@ ;;; ideograph-util.el --- Ideographic Character Database utility -;; Copyright (C) 1999,2000 MORIOKA Tomohiko. +;; Copyright (C) 1999,2000,2001,2002,2003 MORIOKA Tomohiko. ;; Author: MORIOKA Tomohiko ;; Keywords: UTF-2000, ISO/IEC 10646, Unicode, UCS-4, MULE. -;; This file is part of UTF-2000. +;; This file is part of XEmacs UTF-2000. -;; UTF-2000 is free software; you can redistribute it and/or modify it -;; under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) -;; any later version. +;; XEmacs UTF-2000 is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 2, or (at +;; your option) any later version. -;; UTF-2000 is distributed in the hope that it will be useful, but -;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; XEmacs UTF-2000 is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with XEmacs; see the file COPYING. If not, write to the Free -;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -;; 02111-1307, USA. +;; along with XEmacs UTF-2000; see the file COPYING. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. ;;; Code: @@ -29,16 +29,24 @@ (defvar ideograph-radical-chars-vector (make-vector 215 nil)) -(defun char-ideographic-radical (char) - (or (get-char-attribute char 'ideographic-radical) - (let ((radical - (or (get-char-attribute char 'daikanwa-radical) - (get-char-attribute char 'kangxi-radical) - (get-char-attribute char 'japanese-radical) - (get-char-attribute char 'korean-radical)))) - (when radical - (put-char-attribute char 'ideographic-radical radical) - radical)))) +(defun char-ideographic-radical (char &optional radical) + (let (ret) + (or (catch 'tag + (dolist (cell (get-char-attribute char 'ideographic-)) + (if (and (setq ret (plist-get cell :radical)) + (or (eq ret radical) + (null radical))) + (throw 'tag ret)))) + (get-char-attribute char 'ideographic-radical) + (progn + (setq ret + (or (get-char-attribute char 'daikanwa-radical) + (get-char-attribute char 'kangxi-radical) + (get-char-attribute char 'japanese-radical) + (get-char-attribute char 'korean-radical))) + (when ret + (put-char-attribute char 'ideographic-radical ret) + ret))))) (defvar ideograph-radical-strokes-vector ;;0 1 2 3 4 5 6 7 8 9 @@ -51,130 +59,76 @@ 3 4 4 4 3 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 3 4 4 4 4 - 4 4 4 4 4 5 5 5 5 5 + 4 4 4 4 3 5 4 5 5 5 ;; 100 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 5 6 6 6 6 6 6 6 6 6 6 6 6 - 6 6 6 6 6 6 6 6 6 6 + 4 6 6 6 6 6 6 6 6 6 4 6 6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 7 7 - 7 7 4 3 7 7 7 8 8 8 - 8 8 8 8 8 8 9 9 9 9 - 9 9 9 9 9 9 9 10 10 10 + 7 7 4 3 7 7 7 8 7 8 + 3 8 8 8 8 8 9 9 9 9 + 9 9 9 9 8 9 9 10 10 10 10 10 10 10 10 11 11 11 11 11 ;; 200 11 12 12 12 12 13 13 13 13 14 14 15 16 16 17]) -(defun char-ideographic-strokes (char) - (or (get-char-attribute char 'ideographic-strokes) - (let ((strokes - (or (get-char-attribute char 'daikanwa-strokes) - (get-char-attribute char 'kangxi-strokes) - (get-char-attribute char 'japanese-strokes) - (get-char-attribute char 'korean-strokes) - (let ((r (char-ideographic-radical char)) - (ts (get-char-attribute char 'total-strokes))) - (if (and r ts) - (- ts (aref ideograph-radical-strokes-vector r)))) - ))) - (when strokes - (put-char-attribute char 'ideographic-strokes strokes) - strokes)))) +(defun char-ideographic-strokes (char &optional radical) + (let (ret) + (or (catch 'tag + (dolist (cell (get-char-attribute char 'ideographic-)) + (if (and (setq ret (plist-get cell :radical)) + (or (eq ret radical) + (null radical))) + (throw 'tag (plist-get cell :strokes))))) + (get-char-attribute char 'daikanwa-strokes) + (get-char-attribute char 'ideographic-strokes) + (let ((strokes + (or (get-char-attribute char 'kangxi-strokes) + (get-char-attribute char 'japanese-strokes) + (get-char-attribute char 'korean-strokes) + (let ((r (char-ideographic-radical char)) + (ts (get-char-attribute char 'total-strokes))) + (if (and r ts) + (- ts (aref ideograph-radical-strokes-vector r)))) + ))) + (when strokes + (put-char-attribute char 'ideographic-strokes strokes) + strokes))))) ;;;###autoload (defun update-ideograph-radical-table () (interactive) - (let ((i #x3400) - j - char radical - (charsets '(japanese-jisx0208-1978 - japanese-jisx0208 - japanese-jisx0208-1990 - japanese-jisx0212 - japanese-jisx0213-1 - japanese-jisx0213-2 - chinese-cns11643-1 - chinese-cns11643-2 - chinese-cns11643-3 - chinese-cns11643-4 - chinese-cns11643-5 - chinese-cns11643-6 - chinese-cns11643-7 - korean-ksc5601 - chinese-gb2312 - chinese-isoir165 - chinese-big5-1 - chinese-big5-2)) - ret script) - (while (<= i #x9FFF) - (setq char (decode-char 'ucs i)) - (when (and (or (null (setq script (get-char-attribute char 'script))) - (memq 'Ideograph script)) - (setq radical (char-ideographic-radical char))) - (or (get-char-attribute char 'ucs) - (put-char-attribute char 'ucs i)) - (char-ideographic-strokes char) - (if (not (memq char - (setq ret - (aref ideograph-radical-chars-vector radical)))) - (aset ideograph-radical-chars-vector radical - (cons char ret)))) - (setq i (1+ i))) - (setq i #x100000) - (while (<= i #x10FFFF) - (setq char (decode-char 'ucs i)) - (when (and (or (null (setq script (get-char-attribute char 'script))) - (memq 'Ideograph script)) - (setq radical (char-ideographic-radical char))) - (if (not (memq char + (let (ret radical script) + (map-char-attribute + (lambda (char radical) + (when (and radical + (or (null (setq script (get-char-attribute char 'script))) + (memq 'Ideograph script))) + (unless (memq char (setq ret - (aref ideograph-radical-chars-vector radical)))) - (aset ideograph-radical-chars-vector radical - (cons char ret)))) - (setq i (1+ i))) - (setq i 0) - (while (< i 50101) - (setq char (decode-char 'ideograph-daikanwa i)) - (if (and (setq radical (char-ideographic-radical char)) - (not - (memq char - (setq ret - (aref ideograph-radical-chars-vector radical))))) - (aset ideograph-radical-chars-vector radical - (cons char ret))) - (setq i (1+ i))) - (setq i 0) - (while (< i (* 94 60 22)) - (setq char (decode-char 'mojikyo i)) - (if (and (setq radical (char-ideographic-radical char)) - (not - (memq char - (setq ret - (aref ideograph-radical-chars-vector radical))))) - (aset ideograph-radical-chars-vector radical - (cons char ret))) - (setq i (1+ i))) - (while charsets - (setq i 33) - (while (< i 127) - (setq j 33) - (while (< j 127) - (setq char (make-char (car charsets) i j)) - (if (and (or (null (setq script (get-char-attribute char 'script))) - (memq 'Ideograph script)) - (setq radical (char-ideographic-radical char)) - (not (memq char - (setq ret - (aref ideograph-radical-chars-vector - radical))))) - (aset ideograph-radical-chars-vector radical - (cons char ret))) - (setq j (1+ j))) - (setq i (1+ i))) - (setq charsets (cdr charsets))) - )) + (aref ideograph-radical-chars-vector radical))) + (char-ideographic-strokes char) + (aset ideograph-radical-chars-vector radical + (cons char ret)))) + nil) + 'ideographic-radical) + (map-char-attribute + (lambda (char data) + (dolist (cell data) + (setq radical (plist-get cell :radical)) + (when (and radical + (or (null (setq script (get-char-attribute char 'script))) + (memq 'Ideograph script))) + (unless (memq char + (setq ret + (aref ideograph-radical-chars-vector radical))) + (char-ideographic-strokes char) + (aset ideograph-radical-chars-vector radical + (cons char ret)))))) + 'ideographic-))) (defun int-list< (a b) (if (numberp (car a)) @@ -185,140 +139,143 @@ nil) (numberp (car b)))) -(defun ideograph-char< (a b) - (let ((a-m-m (get-char-attribute a 'ideograph-daikanwa)) - (b-m-m (get-char-attribute b 'ideograph-daikanwa)) - a-m-r b-m-r - a-s b-s - a-u b-u m) - (if a-m-m - (setq a-s (char-ideographic-strokes a)) - (setq a-m-r (get-char-attribute a 'morohashi-daikanwa)) - (if a-m-r - (progn - (setq a-m-m (car a-m-r) - a-m-r (cdr a-m-r)) - (if (= (car a-m-r) 0) - (setq a-s (char-ideographic-strokes - (decode-char 'ideograph-daikanwa a-m-m))) - (if (setq m (get-char-attribute a '->mojikyo)) - (setq a-s (char-ideographic-strokes - (decode-char 'mojikyo m))) - (setq a-s (char-ideographic-strokes a))))) - (setq a-s (char-ideographic-strokes a)))) - (if b-m-m - (setq b-s (char-ideographic-strokes b)) - (setq b-m-r (get-char-attribute b 'morohashi-daikanwa)) - (if b-m-r - (progn - (setq b-m-m (car b-m-r) - b-m-r (cdr b-m-r)) - (if (= (car b-m-r) 0) - (setq b-s (char-ideographic-strokes - (decode-char 'ideograph-daikanwa b-m-m))) - (if (setq m (get-char-attribute b '->mojikyo)) - (setq b-s (char-ideographic-strokes - (decode-char 'mojikyo m))) - (setq b-s (char-ideographic-strokes b))))) - (setq b-s (char-ideographic-strokes b)))) - (if a-s - (if b-s - (if (= a-s b-s) - (if a-m-m - (if b-m-m - (int-list< (cons a-m-m a-m-r) - (cons b-m-m b-m-r)) - t) - (if b-m-m - nil - (setq a-u (get-char-attribute a 'ucs) - b-u (get-char-attribute b 'ucs)) - (if a-u - (if b-u - (< a-u b-u) - (setq b-u (get-char-attribute b '->ucs)) - (if b-u - (<= a-u b-u) - t)) - (setq a-u (get-char-attribute a '->ucs)) - (if a-u - (if b-u - (< a-u b-u) - (setq b-u (get-char-attribute b '->ucs)) - (if b-u - (< a-u b-u) - t)) - (if (or b-u (get-char-attribute b '->ucs)) - nil - (< (char-int a)(char-int b))))))) - (< a-s b-s)) - t)))) +(defun morohashi-daikanwa< (a b) + (if (integerp a) + (setq a (list a))) + (if (integerp b) + (setq b (list b))) + (cond ((eq (car a) 'ho) + (if (eq (car b) 'ho) + (int-list< (cdr a)(cdr b)) + nil)) + ((numberp (car a)) + (if (eq (car b) 'ho) + t + (int-list< a b))) + (t + (if (eq (car b) 'ho) + t + (int-list< a b))))) + +;; (defun nil=-int< (a b) +;; (cond ((null a) nil) +;; ((null b) nil) +;; (t (< a b)))) -;; (defun ideograph-char< (a b) -;; (let (ra rb mma mmb msa msb) -;; (cond -;; ((progn -;; (if (setq ra (or (get-char-attribute a 'non-morohashi) -;; (get-char-attribute a 'morohashi-daikanwa))) -;; (setq msa (cdr ra) -;; mma (car ra)) -;; (setq mma (get-char-attribute a 'ideograph-daikanwa)))) -;; (cond -;; ((progn -;; (if (setq rb (or (get-char-attribute b 'non-morohashi) -;; (get-char-attribute b 'morohashi-daikanwa))) -;; (setq msb (cdr rb) -;; mmb (car rb)) -;; (setq mmb (get-char-attribute b 'ideograph-daikanwa)))) -;; (cond -;; ((= mma mmb) -;; (cond ((eq (car msa)(car msb)) -;; (cond ((< (length msa)(length msb))) -;; ((= (length msa)(length msb)) -;; (cond ((integerp (nth 1 msa)) -;; (cond ((integerp (nth 1 msb)) -;; (< (nth 1 msa)(nth 1 msb))) -;; (t nil))) -;; (t -;; (cond ((setq ra (get-char-attribute a 'ucs)) -;; (cond -;; ((setq rb (get-char-attribute b 'ucs)) -;; (< ra rb)) -;; (t)))))))) -;; ) -;; ((null (car msa))) -;; ((null (car msb)) -;; nil) -;; (t (< (car msa)(car msb))))) -;; (t (< mma mmb)))) -;; (t))) -;; ((or (get-char-attribute b 'non-morohashi) -;; (get-char-attribute b 'morohashi-daikanwa) -;; (get-char-attribute b 'ideograph-daikanwa)) -;; nil) -;; ((setq ra (get-char-attribute a 'ucs)) -;; (cond -;; ((setq rb (get-char-attribute b 'ucs)) -;; (< ra rb)))) -;; (t -;; (cond -;; ((setq ra (char-ideographic-strokes a)) -;; (cond ((setq rb (char-ideographic-strokes b)) -;; (cond ((= ra rb) -;; (not (char-ideographic-strokes b))) -;; ((< ra rb)))))) -;; ))))) +;; (defun nil>-int< (a b) +;; (cond ((null a) nil) +;; ((null b) t) +;; (t (< a b)))) + +;;;###autoload +(defun char-representative-of-daikanwa (char) + (if (or (encode-char char 'ideograph-daikanwa 'defined-only) + (encode-char char '=daikanwa-rev2 'defined-only)) + char + (let ((m (get-char-attribute char 'morohashi-daikanwa)) + m-m m-s pat) + (or (when m + (setq m-m (pop m)) + (setq m-s (pop m)) + (if (= m-s 0) + (or (decode-char '=daikanwa-rev2 m-m 'defined-only) + (decode-char 'ideograph-daikanwa m-m)) + (when m + (setq pat (list m-m m-s)) + (map-char-attribute (lambda (c v) + (if (equal pat v) + c)) + 'morohashi-daikanwa)))) + char)))) + +(defun char-attributes-poly< (c1 c2 accessors testers defaulters) + (catch 'tag + (let (a1 a2 accessor tester dm) + (while (and accessors testers) + (setq accessor (car accessors) + tester (car testers) + dm (car defaulters)) + (when (and accessor tester) + (setq a1 (funcall accessor c1) + a2 (funcall accessor c2)) + (cond ((null a1) + (if a2 + (cond ((eq dm '<) + (throw 'tag t)) + ((eq dm '>) + (throw 'tag nil))))) + ((null a2) + (cond ((eq dm '<) + (throw 'tag nil)) + ((eq dm '>) + (throw 'tag t)))) + (t + (cond ((funcall tester a1 a2) + (throw 'tag t)) + ((funcall tester a2 a1) + (throw 'tag nil)))))) + (setq accessors (cdr accessors) + testers (cdr testers) + defaulters (cdr defaulters)))))) + +(defvar ideographic-radical nil) + +(defun char-daikanwa-strokes (char &optional radical) + (unless radical + (setq radical ideographic-radical)) + (let ((drc (char-representative-of-daikanwa char))) + (char-ideographic-strokes + (if (= (char-ideographic-radical drc radical) + (char-ideographic-radical char radical)) + drc + char) + radical))) + +;;;###autoload +(defun char-daikanwa (char) + (or (encode-char char 'ideograph-daikanwa 'defined-only) + (encode-char char '=daikanwa-rev2 'defined-only) + (get-char-attribute char 'morohashi-daikanwa))) + +;;;###autoload +(defun char-ucs (char) + (or (encode-char char '=ucs 'defined-only) + (get-char-attribute char '=>ucs))) + +(defun char-id (char) + (logand (char-int char) #x3FFFFFFF)) + +(defun ideograph-char< (a b &optional radical) + (let ((ideographic-radical (or radical + ideographic-radical))) + (char-attributes-poly< + a b + '(char-daikanwa-strokes char-daikanwa char-ucs char-id) + '(< morohashi-daikanwa< < <) + '(> > > >)))) (defun insert-ideograph-radical-char-data (radical) (let ((chars (sort (copy-list (aref ideograph-radical-chars-vector radical)) - (function ideograph-char<))) - (attributes (sort (char-attribute-list) #'char-attribute-name<)) - (ccs (sort (charset-list) #'char-attribute-name<))) + (lambda (a b) + (ideograph-char< a b radical)))) + attributes ccss) + (dolist (name (char-attribute-list)) + (unless (memq name char-db-ignored-attributes) + (if (find-charset name) + (push name ccss) + (push name attributes)))) + (setq attributes (sort attributes #'char-attribute-name<) + ccss (sort ccss #'char-attribute-name<)) (aset ideograph-radical-chars-vector radical chars) - (while chars - (insert-char-data (car chars) nil attributes ccs) - (setq chars (cdr chars))))) + (dolist (char chars) + (when (or (not (some (lambda (atr) + (get-char-attribute char atr)) + char-db-ignored-attributes)) + (some (lambda (ccs) + (encode-char char ccs 'defined-only)) + ccss)) + (insert-char-data char nil attributes ccss))))) (defun write-ideograph-radical-char-data (radical file) (if (file-directory-p file) @@ -335,11 +292,50 @@ file)))) (with-temp-buffer (insert-ideograph-radical-char-data radical) - (char-db-update-comment) (let ((coding-system-for-write 'utf-8)) (write-region (point-min)(point-max) file) ))) +(defun ideographic-structure= (char1 char2) + (if (char-ref-p char1) + (setq char1 (plist-get char1 :char))) + (if (char-ref-p char2) + (setq char2 (plist-get char2 :char))) + (let ((s1 (if (characterp char1) + (get-char-attribute char1 'ideographic-structure) + (cdr (assq 'ideographic-structure char1)))) + (s2 (if (characterp char2) + (get-char-attribute char2 'ideographic-structure) + (cdr (assq 'ideographic-structure char2)))) + e1 e2) + (if (or (null s1)(null s2)) + (char-spec= char1 char2) + (catch 'tag + (while (and s1 s2) + (setq e1 (car s1) + e2 (car s2)) + (unless (ideographic-structure= e1 e2) + (throw 'tag nil)) + (setq s1 (cdr s1) + s2 (cdr s2))) + (and (null s1)(null s2)))))) + +;;;###autoload +(defun ideographic-structure-find-char (structure) + (let (rest) + (map-char-attribute (lambda (char value) + (setq rest structure) + (catch 'tag + (while (and rest value) + (unless (ideographic-structure= + (car rest)(car value)) + (throw 'tag nil)) + (setq rest (cdr rest) + value (cdr value))) + (unless (or rest value) + char))) + 'ideographic-structure))) + (provide 'ideograph-util) ;;; ideograph-util.el ends here