;;; ideograph-util.el --- Ideographic Character Database utility
-;; Copyright (C) 1999 MORIOKA Tomohiko.
+;; Copyright (C) 1999,2000 MORIOKA Tomohiko.
-;; Author: MORIOKA Tomohiko <tomo@m17n.org>
+;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
;; Keywords: UTF-2000, ISO/IEC 10646, Unicode, UCS-4, MULE.
;; This file is part of UTF-2000.
(let ((i #x3400)
j
char radical
- (charsets '(japanese-jisx0208
- japanese-jisx0208-1978
+ (charsets '(japanese-jisx0208-1978
+ japanese-jisx0208
+ japanese-jisx0208-1990
japanese-jisx0212
+ japanese-jisx0213-1
+ japanese-jisx0213-2
chinese-cns11643-1
chinese-cns11643-2
chinese-cns11643-3
chinese-isoir165
chinese-big5-1
chinese-big5-2))
- ret)
+ ret script)
(while (<= i #x9FFF)
- (setq char (int-char i))
- (when (setq radical (char-ideograph-radical char))
+ (setq char (decode-char 'ucs i))
+ (when (and (or (null (setq script (get-char-attribute char 'script)))
+ (memq 'Ideograph script))
+ (setq radical (char-ideograph-radical char)))
(or (get-char-attribute char 'ucs)
(put-char-attribute char 'ucs i))
+ (char-ideograph-strokes char)
(if (not (memq char
(setq ret
(aref ideograph-radical-chars-vector radical))))
(aset ideograph-radical-chars-vector radical
(cons char ret))))
(setq i (1+ i)))
- (setq i 0)
- (while (< i 256)
- (setq j 0)
- (while (< j 256)
- (setq char (make-char 'ideograph-daikanwa i j))
- (if (and (setq radical (char-ideograph-radical char))
- (not
- (memq char
- (setq ret
- (aref ideograph-radical-chars-vector radical)))))
+ (setq i #x100000)
+ (while (<= i #x10FFFF)
+ (setq char (decode-char 'ucs i))
+ (when (and (or (null (setq script (get-char-attribute char 'script)))
+ (memq 'Ideograph script))
+ (setq radical (char-ideograph-radical char)))
+ (if (not (memq char
+ (setq ret
+ (aref ideograph-radical-chars-vector radical))))
(aset ideograph-radical-chars-vector radical
- (cons char ret)))
- (setq j (1+ j)))
+ (cons char ret))))
+ (setq i (1+ i)))
+ (setq i 0)
+ (while (< i 50101)
+ (setq char (decode-char 'ideograph-daikanwa i))
+ (if (and (setq radical (char-ideograph-radical char))
+ (not
+ (memq char
+ (setq ret
+ (aref ideograph-radical-chars-vector radical)))))
+ (aset ideograph-radical-chars-vector radical
+ (cons char ret)))
(setq i (1+ i)))
(while charsets
(setq i 33)
(setq j 33)
(while (< j 127)
(setq char (make-char (car charsets) i j))
- (if (and (setq radical (char-ideograph-radical char))
+ (if (and (or (null (setq script (get-char-attribute char 'script)))
+ (memq 'Ideograph script))
+ (setq radical (char-ideograph-radical char))
(not (memq char
(setq ret
(aref ideograph-radical-chars-vector
))
(defun ideograph-char< (a b)
- (let (ra rb)
+ (let (ra rb mma mmb msa msb)
(cond
- ((setq ra (or (get-char-attribute a 'morohashi-daikanwa)
- (get-char-attribute a 'non-morohashi)))
+ ((progn
+ (if (setq ra (or (get-char-attribute a 'non-morohashi)
+ (get-char-attribute a 'morohashi-daikanwa)))
+ (setq msa (cdr ra)
+ mma (car ra))
+ (setq mma (get-char-attribute a 'ideograph-daikanwa))))
(cond
- ((setq rb (or (get-char-attribute b 'morohashi-daikanwa)
- (get-char-attribute b 'non-morohashi)))
+ ((progn
+ (if (setq rb (or (get-char-attribute b 'non-morohashi)
+ (get-char-attribute b 'morohashi-daikanwa)))
+ (setq msb (cdr rb)
+ mmb (car rb))
+ (setq mmb (get-char-attribute b 'ideograph-daikanwa))))
(cond
- ((= (car ra)(car rb))
- (cond ((eq (car (cdr ra))(car (cdr rb)))
- (cond ((< (length ra)(length rb)))
- ((= (length ra)(length rb))
- (cond ((setq ra (get-char-attribute a 'ucs))
- (cond
- ((setq rb (get-char-attribute b 'ucs))
- (< ra rb))
- (t))))))
+ ((= mma mmb)
+ (cond ((eq (car msa)(car msb))
+ (cond ((< (length msa)(length msb)))
+ ((= (length msa)(length msb))
+ (cond ((integerp (nth 1 msa))
+ (cond ((integerp (nth 1 msb))
+ (< (nth 1 msa)(nth 1 msb)))
+ (t nil)))
+ (t
+ (cond ((setq ra (get-char-attribute a 'ucs))
+ (cond
+ ((setq rb (get-char-attribute b 'ucs))
+ (< ra rb))
+ (t))))))))
)
- ((null (car (cdr ra))))
- ((null (car (cdr rb)))
+ ((null (car msa)))
+ ((null (car msb))
nil)
- (t (< (car (cdr ra))(car (cdr rb))))))
- (t (< (car ra)(car rb)))))
- ((setq ra (get-char-attribute a 'ucs))
- (cond
- ((setq rb (get-char-attribute b 'ucs))
- (< ra rb))))
- (t
- (cond
- ((setq ra (char-ideograph-strokes a))
- (cond ((setq rb (char-ideograph-strokes b))
- (cond ((= ra rb)
- (not (char-ideograph-strokes b)))
- ((< ra rb))))))
- )))))))
+ (t (< (car msa)(car msb)))))
+ (t (< mma mmb))))
+ (t)))
+ ((or (get-char-attribute b 'non-morohashi)
+ (get-char-attribute b 'morohashi-daikanwa)
+ (get-char-attribute b 'ideograph-daikanwa))
+ nil)
+ ((setq ra (get-char-attribute a 'ucs))
+ (cond
+ ((setq rb (get-char-attribute b 'ucs))
+ (< ra rb))))
+ (t
+ (cond
+ ((setq ra (char-ideograph-strokes a))
+ (cond ((setq rb (char-ideograph-strokes b))
+ (cond ((= ra rb)
+ (not (char-ideograph-strokes b)))
+ ((< ra rb))))))
+ )))))
(defun insert-ideograph-radical-char-data (radical)
(let ((chars
(sort (copy-list (aref ideograph-radical-chars-vector radical))
- (function ideograph-char<))))
+ (function ideograph-char<)))
+ (attributes (sort (char-attribute-list) #'char-attribute-name<))
+ (ccs (sort (charset-list) #'char-attribute-name<)))
+ (aset ideograph-radical-chars-vector radical chars)
(while chars
- (insert-char-data (car chars))
+ (insert-char-data (car chars) nil attributes ccs)
(setq chars (cdr chars)))))
(defun write-ideograph-radical-char-data (radical file)
(let ((name (get-char-attribute (int-char (+ #x2EFF radical)) 'name)))
(if (string-match "KANGXI RADICAL " name)
(setq name (capitalize (substring name (match-end 0)))))
+ (setq name (mapconcat (lambda (char)
+ (if (eq char ? )
+ "-"
+ (char-to-string char))) name ""))
(setq file
(expand-file-name
(format "Ideograph-R%03d-%s.el" radical name)
file))))
(with-temp-buffer
(insert-ideograph-radical-char-data radical)
- (write-region (point-min)(point-max) file)))
+ (char-db-update-comment)
+ (let ((coding-system-for-write 'utf-8))
+ (write-region (point-min)(point-max) file)
+ )))
(provide 'ideograph-util)