X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Futf-2000%2Fideograph-util.el;h=5cd7fda25534c22d5c3b801e731a30cc2df5777e;hb=454c8d0bfccba8490af50390eb86cfb8bd42ae72;hp=dea2031dd3274bdba749be96fb78232e22585b78;hpb=45fd1039f9b5aaace22617e9f4f0e51f7392c41a;p=chise%2Fxemacs-chise.git diff --git a/lisp/utf-2000/ideograph-util.el b/lisp/utf-2000/ideograph-util.el index dea2031..5cd7fda 100644 --- a/lisp/utf-2000/ideograph-util.el +++ b/lisp/utf-2000/ideograph-util.el @@ -1,26 +1,26 @@ ;;; ideograph-util.el --- Ideographic Character Database utility -;; Copyright (C) 1999 MORIOKA Tomohiko. +;; Copyright (C) 1999,2000,2001 MORIOKA Tomohiko. -;; Author: MORIOKA Tomohiko +;; Author: MORIOKA Tomohiko ;; Keywords: UTF-2000, ISO/IEC 10646, Unicode, UCS-4, MULE. -;; This file is part of UTF-2000. +;; This file is part of XEmacs UTF-2000. -;; UTF-2000 is free software; you can redistribute it and/or modify it -;; under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) -;; any later version. +;; XEmacs UTF-2000 is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 2, or (at +;; your option) any later version. -;; UTF-2000 is distributed in the hope that it will be useful, but -;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; XEmacs UTF-2000 is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with XEmacs; see the file COPYING. If not, write to the Free -;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -;; 02111-1307, USA. +;; along with XEmacs UTF-2000; see the file COPYING. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. ;;; Code: @@ -29,7 +29,7 @@ (defvar ideograph-radical-chars-vector (make-vector 215 nil)) -(defun char-ideograph-radical (char) +(defun char-ideographic-radical (char) (or (get-char-attribute char 'ideographic-radical) (let ((radical (or (get-char-attribute char 'daikanwa-radical) @@ -40,13 +40,45 @@ (put-char-attribute char 'ideographic-radical radical) radical)))) -(defun char-ideograph-strokes (char) - (or (get-char-attribute char 'ideographic-strokes) +(defvar ideograph-radical-strokes-vector + ;;0 1 2 3 4 5 6 7 8 9 + [nil 1 1 1 1 1 1 2 2 2 + 2 2 2 2 2 2 2 2 2 2 + 2 2 2 2 2 2 2 2 2 2 + 3 3 3 3 3 3 3 3 3 3 + 3 3 3 3 3 3 3 3 3 3 + 3 3 3 3 3 3 3 3 3 3 + 3 4 4 4 3 4 4 4 4 4 + 4 4 4 4 4 4 4 4 4 4 + 4 4 4 4 4 3 4 4 4 4 + 4 4 4 4 3 5 4 5 5 5 + ;; 100 + 5 5 5 5 5 5 5 5 5 5 + 5 5 5 5 5 5 5 5 6 6 + 6 6 6 6 6 6 6 6 6 6 + 4 6 6 6 6 6 6 6 6 6 + 4 6 6 6 6 6 6 7 7 7 + 7 7 7 7 7 7 7 7 7 7 + 7 7 4 3 7 7 7 8 7 8 + 3 8 8 8 8 8 9 9 9 9 + 9 9 9 9 8 9 9 10 10 10 + 10 10 10 10 10 11 11 11 11 11 + ;; 200 + 11 12 12 12 12 13 13 13 13 14 + 14 15 16 16 17]) + +(defun char-ideographic-strokes (char) + (or (get-char-attribute char 'daikanwa-strokes) + (get-char-attribute char 'ideographic-strokes) (let ((strokes - (or (get-char-attribute char 'daikanwa-strokes) - (get-char-attribute char 'kangxi-strokes) + (or (get-char-attribute char 'kangxi-strokes) (get-char-attribute char 'japanese-strokes) - (get-char-attribute char 'korean-strokes)))) + (get-char-attribute char 'korean-strokes) + (let ((r (char-ideographic-radical char)) + (ts (get-char-attribute char 'total-strokes))) + (if (and r ts) + (- ts (aref ideograph-radical-strokes-vector r)))) + ))) (when strokes (put-char-attribute char 'ideographic-strokes strokes) strokes)))) @@ -54,111 +86,130 @@ ;;;###autoload (defun update-ideograph-radical-table () (interactive) - (let ((i #x3400) - j - char radical - (charsets '(japanese-jisx0208 - japanese-jisx0208-1978 - japanese-jisx0212 - chinese-cns11643-1 - chinese-cns11643-2 - chinese-cns11643-3 - chinese-cns11643-4 - chinese-cns11643-5 - chinese-cns11643-6 - chinese-cns11643-7 - korean-ksc5601 - chinese-gb2312 - chinese-isoir165 - chinese-big5-1 - chinese-big5-2)) - ret) - (while (<= i #x9FFF) - (setq char (int-char i)) - (when (setq radical (char-ideograph-radical char)) - (or (get-char-attribute char 'ucs) - (put-char-attribute char 'ucs i)) - (if (not (memq char + (let (ret script) + (map-char-attribute + (lambda (char radical) + (when (and radical + (or (null (setq script (get-char-attribute char 'script))) + (memq 'Ideograph script))) + (unless (memq char (setq ret - (aref ideograph-radical-chars-vector radical)))) - (aset ideograph-radical-chars-vector radical - (cons char ret)))) - (setq i (1+ i))) - (setq i 0) - (while (< i 256) - (setq j 0) - (while (< j 256) - (setq char (make-char 'ideograph-daikanwa i j)) - (if (and (setq radical (char-ideograph-radical char)) - (not - (memq char - (setq ret - (aref ideograph-radical-chars-vector radical))))) - (aset ideograph-radical-chars-vector radical - (cons char ret))) - (setq j (1+ j))) - (setq i (1+ i))) - (while charsets - (setq i 33) - (while (< i 127) - (setq j 33) - (while (< j 127) - (setq char (make-char (car charsets) i j)) - (if (and (setq radical (char-ideograph-radical char)) - (not (memq char - (setq ret - (aref ideograph-radical-chars-vector - radical))))) - (aset ideograph-radical-chars-vector radical - (cons char ret))) - (setq j (1+ j))) - (setq i (1+ i))) - (setq charsets (cdr charsets))) - )) + (aref ideograph-radical-chars-vector radical))) + (char-ideographic-strokes char) + (aset ideograph-radical-chars-vector radical + (cons char ret)))) + nil) + 'ideographic-radical))) + +(defun int-list< (a b) + (if (numberp (car a)) + (if (numberp (car b)) + (if (= (car a) (car b)) + (int-list< (cdr a)(cdr b)) + (< (car a) (car b))) + nil) + (numberp (car b)))) + +(defun morohashi-daikanwa< (a b) + (if (integerp a) + (setq a (list a))) + (if (integerp b) + (setq b (list b))) + (cond ((eq (car a) 'ho) + (if (eq (car b) 'ho) + (int-list< (cdr a)(cdr b)) + nil)) + ((numberp (car a)) + (if (eq (car b) 'ho) + t + (int-list< a b))) + (t + (if (eq (car b) 'ho) + t + (int-list< a b))))) + +(defun char-representative-of-daikanwa (char) + (if (get-char-attribute char 'ideograph-daikanwa) + char + (let ((m (get-char-attribute char 'morohashi-daikanwa)) + m-m m-s pat) + (or (when m + (setq m-m (pop m)) + (setq m-s (pop m)) + (if (= m-s 0) + (decode-char 'ideograph-daikanwa m-m) + (when m + (setq pat (list m-m m-s)) + (map-char-attribute (lambda (c v) + (if (equal pat v) + c)) + 'morohashi-daikanwa)))) + char)))) (defun ideograph-char< (a b) - (let (ra rb) - (cond - ((setq ra (or (get-char-attribute a 'morohashi-daikanwa) - (get-char-attribute a 'non-morohashi))) - (cond - ((setq rb (or (get-char-attribute b 'morohashi-daikanwa) - (get-char-attribute b 'non-morohashi))) - (cond - ((= (car ra)(car rb)) - (cond ((eq (car (cdr ra))(car (cdr rb))) - (cond ((< (length ra)(length rb))) - ((= (length ra)(length rb)) - (cond ((setq ra (get-char-attribute a 'ucs)) - (cond - ((setq rb (get-char-attribute b 'ucs)) - (< ra rb)) - (t)))))) - ) - ((null (car (cdr ra)))) - ((null (car (cdr rb))) - nil) - (t (< (car (cdr ra))(car (cdr rb)))))) - (t (< (car ra)(car rb))))) - ((setq ra (get-char-attribute a 'ucs)) - (cond - ((setq rb (get-char-attribute b 'ucs)) - (< ra rb)))) - (t - (cond - ((setq ra (char-ideograph-strokes a)) - (cond ((setq rb (char-ideograph-strokes b)) - (cond ((= ra rb) - (not (char-ideograph-strokes b))) - ((< ra rb)))))) - ))))))) + (let (a-m b-m a-s b-s a-u b-u ret) + (setq ret (char-representative-of-daikanwa a)) + (setq a-s (char-ideographic-strokes + (if (= (get-char-attribute ret 'ideographic-radical) + (get-char-attribute a 'ideographic-radical)) + ret + a))) + (setq ret (char-representative-of-daikanwa b)) + (setq b-s (char-ideographic-strokes + (if (= (get-char-attribute ret 'ideographic-radical) + (get-char-attribute b 'ideographic-radical)) + ret + b))) + (if a-s + (if b-s + (if (= a-s b-s) + (if (setq a-m (or (get-char-attribute a 'ideograph-daikanwa) + (get-char-attribute a 'morohashi-daikanwa))) + (if (setq b-m + (or (get-char-attribute b 'ideograph-daikanwa) + (get-char-attribute b 'morohashi-daikanwa))) + (morohashi-daikanwa< a-m b-m) + t) + (if (setq b-m + (or (get-char-attribute b 'ideograph-daikanwa) + (get-char-attribute b 'morohashi-daikanwa))) + nil + (setq a-u (get-char-attribute a 'ucs) + b-u (get-char-attribute b 'ucs)) + (if a-u + (if b-u + (< a-u b-u) + (setq b-u (or (get-char-attribute b '=>ucs) + (get-char-attribute b '->ucs))) + (if b-u + (<= a-u b-u) + t)) + (setq a-u (or (get-char-attribute a '=>ucs) + (get-char-attribute a '->ucs))) + (if a-u + (if b-u + (< a-u b-u) + (setq b-u (or (get-char-attribute b '=>ucs) + (get-char-attribute b '->ucs))) + (if b-u + (< a-u b-u) + t)) + (if (or b-u (or (get-char-attribute b '=>ucs) + (get-char-attribute b '->ucs))) + nil + (< (char-int a)(char-int b))))))) + (< a-s b-s)) + t)))) (defun insert-ideograph-radical-char-data (radical) (let ((chars (sort (copy-list (aref ideograph-radical-chars-vector radical)) - (function ideograph-char<)))) + (function ideograph-char<))) + (attributes (sort (char-attribute-list) #'char-attribute-name<)) + (ccs (sort (charset-list) #'char-attribute-name<))) + (aset ideograph-radical-chars-vector radical chars) (while chars - (insert-char-data (car chars)) + (insert-char-data (car chars) nil attributes ccs) (setq chars (cdr chars))))) (defun write-ideograph-radical-char-data (radical file) @@ -166,13 +217,20 @@ (let ((name (get-char-attribute (int-char (+ #x2EFF radical)) 'name))) (if (string-match "KANGXI RADICAL " name) (setq name (capitalize (substring name (match-end 0))))) + (setq name (mapconcat (lambda (char) + (if (eq char ? ) + "-" + (char-to-string char))) name "")) (setq file (expand-file-name (format "Ideograph-R%03d-%s.el" radical name) file)))) (with-temp-buffer (insert-ideograph-radical-char-data radical) - (write-region (point-min)(point-max) file))) + (char-db-update-comment) + (let ((coding-system-for-write 'utf-8)) + (write-region (point-min)(point-max) file) + ))) (provide 'ideograph-util)