X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Futf-2000%2Fchar-db-util.el;h=d2169c90fbe906bc82abc4c2f8215f7a9c032d70;hb=90c42eb4fa13a7f78c7b3a33b62d80e4aff44e60;hp=2073fd175d6dd8bf8775cac721f061ec427b5fd7;hpb=b68e454dd75fa28f4fa85242fe418aac23abcb72;p=chise%2Fxemacs-chise.git diff --git a/lisp/utf-2000/char-db-util.el b/lisp/utf-2000/char-db-util.el index 2073fd1..d2169c9 100644 --- a/lisp/utf-2000/char-db-util.el +++ b/lisp/utf-2000/char-db-util.el @@ -1,6 +1,6 @@ ;;; char-db-util.el --- Character Database utility -;; Copyright (C) 1998,1999,2000,2001,2002 MORIOKA Tomohiko. +;; Copyright (C) 1998,1999,2000,2001,2002,2003 MORIOKA Tomohiko. ;; Author: MORIOKA Tomohiko ;; Keywords: UTF-2000, ISO/IEC 10646, Unicode, UCS-4, MULE. @@ -65,10 +65,8 @@ (let ((v (make-vector 215 nil)) (i 1)) (while (< i 215) - (aset v i (int-char (+ #x2EFF i))) + (aset v i (decode-char 'ucs (+ #x2EFF i))) (setq i (1+ i))) - (unless (charset-iso-final-char (car (split-char (aref v 34)))) - (aset v 34 (make-char 'chinese-gb2312 #x62 #x3A))) v)) (defvar char-db-ignored-attributes nil) @@ -143,29 +141,61 @@ nil))) (defvar char-db-coded-charset-priority-list - (let ((rest default-coded-charset-priority-list) - dest) - (while rest - (when (symbolp (car rest)) - (cond ((memq (car rest) - '(latin-viscii-lower - latin-viscii-upper - ipa - lao - ethiopic - arabic-digit - arabic-1-column - arabic-2-column))) - ((string-match "^mojikyo-" (symbol-name (car rest)))) - ((string-match "^chinese-big5" (symbol-name (car rest)))) - ((string-match "^ideograph-gt-pj-" (symbol-name (car rest))) - (unless (memq 'ideograph-gt dest) - (setq dest (cons 'ideograph-gt dest)))) - (t - (setq dest (cons (car rest) dest))))) - (setq rest (cdr rest))) - (append (sort dest #'char-attribute-name<) - '(chinese-big5-cdp chinese-big5-eten chinese-big5)))) + '(ascii + control-1 + latin-iso8859-1 + latin-iso8859-2 + latin-iso8859-3 + latin-iso8859-4 + latin-iso8859-9 + latin-jisx0201 + cyrillic-iso8859-5 + greek-iso8859-7 + thai-tis620 + =jis-x0208 + japanese-jisx0208 + japanese-jisx0212 + japanese-jisx0208-1978 + chinese-gb2312 + chinese-cns11643-1 + chinese-cns11643-2 + chinese-cns11643-3 + chinese-cns11643-4 + chinese-cns11643-5 + chinese-cns11643-6 + chinese-cns11643-7 + japanese-jisx0208-1990 + =jis-x0213-1-2000 + =jis-x0213-2-2000 + korean-ksc5601 + ;; chinese-gb12345 + chinese-isoir165 + katakana-jisx0201 + hebrew-iso8859-8 + latin-viscii + ethiopic-ucs + =gt + =big5-cdp + =gt-k + ideograph-daikanwa-2 + ideograph-daikanwa + ideograph-cbeta + ideograph-hanziku-1 + ideograph-hanziku-2 + ideograph-hanziku-3 + ideograph-hanziku-4 + ideograph-hanziku-5 + ideograph-hanziku-6 + ideograph-hanziku-7 + ideograph-hanziku-8 + ideograph-hanziku-9 + ideograph-hanziku-10 + ideograph-hanziku-11 + ideograph-hanziku-12 + =cbeta + =jef-china3 + =big5-eten + =big5)) (defun char-db-make-char-spec (char) (let (ret char-spec) @@ -173,13 +203,21 @@ (cond ((and (setq ret (get-char-attribute char 'ucs)) (not (and (<= #xE000 ret)(<= ret #xF8FF)))) (setq char-spec (list (cons 'ucs ret))) - (if (setq ret (get-char-attribute char 'name)) - (setq char-spec (cons (cons 'name ret) char-spec))) + (cond ((setq ret (get-char-attribute char 'name)) + (setq char-spec (cons (cons 'name ret) char-spec)) + ) + ((setq ret (get-char-attribute char 'name*)) + (setq char-spec (cons (cons 'name* ret) char-spec)) + )) ) ((setq ret - (let ((default-coded-charset-priority-list - char-db-coded-charset-priority-list)) - (split-char char))) + (catch 'tag + (let ((rest char-db-coded-charset-priority-list)) + (while rest + (if (setq ret + (get-char-attribute char (car rest))) + (throw 'tag (cons (car rest) ret))) + (setq rest (cdr rest)))))) (setq char-spec (list ret)) (dolist (ccs (delq (car ret) (charset-list))) (if (or (and (charset-iso-final-char ccs) @@ -188,8 +226,12 @@ (setq char-spec (cons (cons ccs ret) char-spec)))) (if (null char-spec) (setq char-spec (split-char char))) - (if (setq ret (get-char-attribute char 'name)) - (setq char-spec (cons (cons 'name ret) char-spec))) + (cond ((setq ret (get-char-attribute char 'name)) + (setq char-spec (cons (cons 'name ret) char-spec)) + ) + ((setq ret (get-char-attribute char 'name*)) + (setq char-spec (cons (cons 'name* ret) char-spec)) + )) )) char-spec) ((consp char) @@ -200,31 +242,6 @@ (setq column (current-column))) (let (char-spec ret al cal key temp-char) (setq char-spec (char-db-make-char-spec char)) - ;; (cond ((characterp char) - ;; (cond ((and (setq ret (get-char-attribute char 'ucs)) - ;; (not (and (<= #xE000 ret)(<= ret #xF8FF)))) - ;; (setq char-spec (list (cons 'ucs ret))) - ;; (if (setq ret (get-char-attribute char 'name)) - ;; (setq char-spec (cons (cons 'name ret) char-spec))) - ;; ) - ;; ((setq ret - ;; (let ((default-coded-charset-priority-list - ;; char-db-coded-charset-priority-list)) - ;; (split-char char))) - ;; (setq char-spec (list ret)) - ;; (dolist (ccs (delq (car ret) (charset-list))) - ;; (if (or (and (charset-iso-final-char ccs) - ;; (setq ret (get-char-attribute char ccs))) - ;; (eq ccs 'ideograph-daikanwa)) - ;; (setq char-spec (cons (cons ccs ret) char-spec)))) - ;; (if (null char-spec) - ;; (setq char-spec (split-char char))) - ;; (if (setq ret (get-char-attribute char 'name)) - ;; (setq char-spec (cons (cons 'name ret) char-spec))) - ;; ))) - ;; ((consp char) - ;; (setq char-spec char) - ;; (setq char nil))) (unless (or (characterp char) ; char (condition-case nil (setq char (find-char char-spec)) @@ -395,8 +412,8 @@ ideograph-gt-pj-10 ideograph-gt-pj-11)) (setq ret (decode-char ccs code-point)) - (setq ret (get-char-attribute ret 'ideograph-gt))) - (decode-builtin-char 'ideograph-gt ret)) + (setq ret (get-char-attribute ret '=gt))) + (decode-builtin-char '=gt ret)) (t (decode-builtin-char ccs code-point)))) (cond ((and (<= 0 (char-int ret)) @@ -460,6 +477,15 @@ value line-breaking)) (setq attributes (delq 'name attributes)) ) + (when (and (memq 'name* attributes) + (setq value (get-char-attribute char 'name*))) + (insert (format + (if (> (+ (current-column) (length value)) 48) + "(name* . %S)%s" + "(name* . %S)%s") + value line-breaking)) + (setq attributes (delq 'name* attributes)) + ) (when (and (memq 'script attributes) (setq value (get-char-attribute char 'script))) (insert (format "(script\t\t%s)%s" @@ -482,20 +508,41 @@ line-breaking)) (setq attributes (delq '=>ucs* attributes)) ) - (when (and (memq '=>ucs-jis attributes) - (setq value (get-char-attribute char '=>ucs-jis))) - (insert (format "(=>ucs-jis\t\t. #x%04X)\t; %c%s" + (when (and (memq '=>ucs-gb attributes) + (setq value (get-char-attribute char '=>ucs-gb))) + (insert (format "(=>ucs-gb\t\t. #x%04X)\t; %c%s" value (decode-char 'ucs value) line-breaking)) - (setq attributes (delq '=>ucs-jis attributes)) + (setq attributes (delq '=>ucs-gb attributes)) ) (when (and (memq '=>ucs-cns attributes) (setq value (get-char-attribute char '=>ucs-cns))) (insert (format "(=>ucs-cns\t\t. #x%04X)\t; %c%s" - value (decode-char 'ucs value) + value (decode-char 'ucs-cns value) line-breaking)) (setq attributes (delq '=>ucs-cns attributes)) ) + (when (and (memq '=>ucs-big5 attributes) + (setq value (get-char-attribute char '=>ucs-big5))) + (insert (format "(=>ucs-big5\t\t. #x%04X)\t; %c%s" + value (decode-char 'ucs-big5 value) + line-breaking)) + (setq attributes (delq '=>ucs-big5 attributes)) + ) + (when (and (memq '=>ucs-jis attributes) + (setq value (get-char-attribute char '=>ucs-jis))) + (insert (format "(=>ucs-jis\t\t. #x%04X)\t; %c%s" + value (decode-char 'ucs-jis value) + line-breaking)) + (setq attributes (delq '=>ucs-jis attributes)) + ) + (when (and (memq '=>ucs-ks attributes) + (setq value (get-char-attribute char '=>ucs-ks))) + (insert (format "(=>ucs-ks\t\t. #x%04X)\t; %c%s" + value (decode-char 'ucs-ks value) + line-breaking)) + (setq attributes (delq '=>ucs-ks attributes)) + ) (when (and (memq '->ucs attributes) (setq value (get-char-attribute char '->ucs))) (insert (format (if char-db-convert-obsolete-format @@ -587,13 +634,6 @@ line-breaking)) (setq attributes (delq 'morohashi-daikanwa attributes)) ) - ;; (when (and (memq 'hanyu-dazidian attributes) - ;; (setq value (get-char-attribute char 'hanyu-dazidian))) - ;; (insert (format "(hanyu-dazidian %s)%s" - ;; (mapconcat #'number-to-string value " ") - ;; line-breaking)) - ;; (setq attributes (delq 'hanyu-dazidian attributes)) - ;; ) (setq radical nil strokes nil) (when (and (memq 'ideographic-radical attributes) @@ -926,8 +966,7 @@ (format (cond ((memq name '(ideograph-daikanwa-2 ideograph-daikanwa - ideograph-gt - ideograph-cbeta)) + =gt =gt-k =cbeta)) (if has-long-ccs-name "(%-26s . %05d)\t; %c%s" "(%-18s . %05d)\t; %c%s")) @@ -935,7 +974,7 @@ (if has-long-ccs-name "(%-26s . %06d)\t; %c%s" "(%-18s . %06d)\t; %c%s")) - ((eq name 'ucs) + ((>= (charset-dimension name) 2) (if has-long-ccs-name "(%-26s . #x%04X)\t; %c%s" "(%-18s . #x%04X)\t; %c%s")) @@ -975,41 +1014,6 @@ (tabify (point-min)(point-max)) )) -;;;###autoload -(defun char-db-update-comment () - (interactive) - (save-excursion - (goto-char (point-min)) - (let (cdef table char) - (while (re-search-forward "^[ \t]*\\(([^.()]+)\\)" nil t) - (goto-char (match-beginning 1)) - (setq cdef (read (current-buffer))) - (when (find-charset (car cdef)) - (goto-char (match-end 0)) - (setq char - (if (and - (not (eq (car cdef) 'ideograph-daikanwa)) - (or (memq (car cdef) '(ascii latin-viscii-upper - latin-viscii-lower - arabic-iso8859-6 - japanese-jisx0213-1 - japanese-jisx0213-2)) - (= (char-int (charset-iso-final-char (car cdef))) - 0))) - (apply (function make-char) cdef) - (if (setq table (charset-mapping-table (car cdef))) - (set-charset-mapping-table (car cdef) nil)) - (prog1 - (apply (function make-char) cdef) - (if table - (set-charset-mapping-table (car cdef) table))))) - (when (not (or (< (char-int char) 32) - (and (<= 128 (char-int char)) - (< (char-int char) 160)))) - (delete-region (point) (point-at-eol)) - (insert (format "\t; %c" char))) - ))))) - (defun insert-char-data-with-variant (char &optional printable no-ucs-variant script excluded-script)