X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Futf-2000%2Fchar-db-util.el;h=9ac1fdf2e935ffc6758348cf3e6a02c7ec120302;hb=bb12f0b722cfbde9444f517fbcaf2d41fbdcba3c;hp=bbedac2506c3874ff112a2656607d5a0b181e6cd;hpb=d4677936ec9d8aa81167ad858aae514a2c6e3cb1;p=chise%2Fxemacs-chise.git- diff --git a/lisp/utf-2000/char-db-util.el b/lisp/utf-2000/char-db-util.el index bbedac2..9ac1fdf 100644 --- a/lisp/utf-2000/char-db-util.el +++ b/lisp/utf-2000/char-db-util.el @@ -1,6 +1,6 @@ ;;; char-db-util.el --- Character Database utility -;; Copyright (C) 1998,1999,2000,2001 MORIOKA Tomohiko. +;; Copyright (C) 1998,1999,2000,2001,2002 MORIOKA Tomohiko. ;; Author: MORIOKA Tomohiko ;; Keywords: UTF-2000, ISO/IEC 10646, Unicode, UCS-4, MULE. @@ -67,10 +67,38 @@ (while (< i 215) (aset v i (int-char (+ #x2EFF i))) (setq i (1+ i))) - (if (< (charset-iso-final-char (car (split-char (aref v 34)))) ?0) - (aset v 34 (make-char 'chinese-gb2312 #x62 #x3A))) + (unless (charset-iso-final-char (car (split-char (aref v 34)))) + (aset v 34 (make-char 'chinese-gb2312 #x62 #x3A))) v)) +;;;###autoload +(defun char-ref-p (obj) + (and (consp obj) + (keywordp (car obj)))) + +;;;###autoload +(defun char-ref= (cr1 cr2) + (cond ((char-ref-p cr1) + (if (char-ref-p cr2) + (char-spec= (plist-get cr1 :char) + (plist-get cr2 :char)) + (char-spec= (plist-get cr1 :char) cr2))) + (t + (char-spec= cr1 + (if (char-ref-p cr2) + (plist-get cr2 :char) + cr2))))) + +;;;###autoload +(defun char-spec= (cs1 cs2) + (if (characterp cs1) + (if (characterp cs2) + (eq cs1 cs2) + (eq cs1 (find-char cs2))) + (if (characterp cs2) + (eq (find-char cs1) cs2) + (eq (find-char cs1) (find-char cs2))))) + (defun char-attribute-name< (ka kb) (cond ((find-charset ka) @@ -80,23 +108,24 @@ ((= (charset-dimension ka) (charset-dimension kb)) (cond ((= (charset-chars ka)(charset-chars kb)) - (cond - ((>= (charset-iso-final-char ka) ?@) - (if (>= (charset-iso-final-char kb) ?@) - (< (charset-iso-final-char ka) - (charset-iso-final-char kb)) - t)) - ((>= (charset-iso-final-char ka) ?0) - (cond - ((>= (charset-iso-final-char kb) ?@) - nil) - ((>= (charset-iso-final-char kb) ?0) - (< (charset-iso-final-char ka) - (charset-iso-final-char kb))) - (t))) - (t (if (>= (charset-iso-final-char kb) ?0) - nil - (> (charset-id ka)(charset-id kb)))))) + (if (charset-iso-final-char ka) + (cond + ((>= (charset-iso-final-char ka) ?@) + (if (and (charset-iso-final-char kb) + (>= (charset-iso-final-char kb) ?@)) + (< (charset-iso-final-char ka) + (charset-iso-final-char kb)) + t)) + (t + (if (charset-iso-final-char kb) + (if (>= (charset-iso-final-char kb) ?@) + nil + (< (charset-iso-final-char ka) + (charset-iso-final-char kb))) + t))) + (if (charset-iso-final-char kb) + nil + (< (charset-id ka)(charset-id kb))))) ((<= (charset-chars ka)(charset-chars kb))))) (t (< (charset-dimension ka) @@ -116,6 +145,31 @@ ((symbolp kb) nil))) +(defvar char-db-coded-charset-priority-list + (let ((rest default-coded-charset-priority-list) + dest) + (while rest + (when (symbolp (car rest)) + (cond ((memq (car rest) + '(latin-viscii-lower + latin-viscii-upper + ipa + lao + ethiopic + arabic-digit + arabic-1-column + arabic-2-column))) + ((string-match "^mojikyo-" (symbol-name (car rest)))) + ((string-match "^chinese-big5" (symbol-name (car rest)))) + ((string-match "^ideograph-gt-pj-" (symbol-name (car rest))) + (unless (memq 'ideograph-gt dest) + (setq dest (cons 'ideograph-gt dest)))) + (t + (setq dest (cons (car rest) dest))))) + (setq rest (cdr rest))) + (append (sort dest #'char-attribute-name<) + '(chinese-big5-cdp chinese-big5-eten chinese-big5)))) + (defun char-db-insert-char-spec (char &optional readable column) (unless column (setq column (current-column))) @@ -127,13 +181,18 @@ (if (setq ret (get-char-attribute char 'name)) (setq char-spec (cons (cons 'name ret) char-spec))) ) - ((setq ret (split-char char)) + ((setq ret + (let ((default-coded-charset-priority-list + char-db-coded-charset-priority-list)) + (split-char char))) (setq char-spec (list ret)) (dolist (ccs (delq (car ret) (charset-list))) - (if (or (and (>= (charset-iso-final-char ccs) ?0) + (if (or (and (charset-iso-final-char ccs) (setq ret (get-char-attribute char ccs))) (eq ccs 'ideograph-daikanwa)) (setq char-spec (cons (cons ccs ret) char-spec)))) + (if (null char-spec) + (setq char-spec (split-char char))) (if (setq ret (get-char-attribute char 'name)) (setq char-spec (cons (cons 'name ret) char-spec))) ))) @@ -154,10 +213,14 @@ cal nil) (while char-spec (setq key (car (car char-spec))) - (if (find-charset key) - (setq cal (cons key cal)) - (setq al (cons key al))) + (unless (memq key char-db-ignored-attributes) + (if (find-charset key) + (setq cal (cons key cal)) + (setq al (cons key al)))) (setq char-spec (cdr char-spec))) + (unless (or cal + (memq 'ideographic-structure al)) + (push 'ideographic-structure al)) (insert-char-attributes char readable (or al 'none) cal) @@ -299,20 +362,42 @@ (defvar char-db-convert-obsolete-format t) +(defvar char-db-ignored-attributes nil) + (defun insert-char-attributes (char &optional readable attributes ccs-attributes column) - (setq attributes - (sort (if attributes - (if (consp attributes) - (copy-sequence attributes)) - (char-attribute-list)) - #'char-attribute-name<)) - (setq ccs-attributes - (sort (if ccs-attributes - (copy-sequence ccs-attributes) - (charset-list)) - #'char-attribute-name<)) + (let (atr-d ccs-d) + (setq attributes + (sort (if attributes + (if (consp attributes) + (progn + (dolist (name attributes) + (unless (memq name char-db-ignored-attributes) + (push name atr-d))) + atr-d)) + (dolist (name (char-attribute-list)) + (unless (memq name char-db-ignored-attributes) + (if (find-charset name) + (push name ccs-d) + (push name atr-d)))) + atr-d) + #'char-attribute-name<)) + (setq ccs-attributes + (sort (if ccs-attributes + (progn + (setq ccs-d nil) + (dolist (name ccs-attributes) + (unless (memq name char-db-ignored-attributes) + (push name ccs-d))) + ccs-d) + (or ccs-d + (progn + (dolist (name (charset-list)) + (unless (memq name char-db-ignored-attributes) + (push name ccs-d))) + ccs-d))) + #'char-attribute-name<))) (unless column (setq column (current-column))) (let (name value has-long-ccs-name rest @@ -346,6 +431,20 @@ line-breaking)) (setq attributes (delq '=>ucs attributes)) ) + (when (and (memq '=>ucs* attributes) + (setq value (get-char-attribute char '=>ucs*))) + (insert (format "(=>ucs*\t\t. #x%04X)\t; %c%s" + value (decode-char 'ucs value) + line-breaking)) + (setq attributes (delq '=>ucs* attributes)) + ) + (when (and (memq '=>ucs-jis attributes) + (setq value (get-char-attribute char '=>ucs-jis))) + (insert (format "(=>ucs-jis\t\t. #x%04X)\t; %c%s" + value (decode-char 'ucs value) + line-breaking)) + (setq attributes (delq '=>ucs-jis attributes)) + ) (when (and (memq '->ucs attributes) (setq value (get-char-attribute char '->ucs))) (insert (format (if char-db-convert-obsolete-format @@ -437,6 +536,13 @@ line-breaking)) (setq attributes (delq 'morohashi-daikanwa attributes)) ) + ;; (when (and (memq 'hanyu-dazidian attributes) + ;; (setq value (get-char-attribute char 'hanyu-dazidian))) + ;; (insert (format "(hanyu-dazidian %s)%s" + ;; (mapconcat #'number-to-string value " ") + ;; line-breaking)) + ;; (setq attributes (delq 'hanyu-dazidian attributes)) + ;; ) (setq radical nil strokes nil) (when (and (memq 'ideographic-radical attributes) @@ -579,6 +685,24 @@ line-breaking)) (setq attributes (delq '->mojikyo attributes)) ) + (when (and (memq 'hanyu-dazidian-vol attributes) + (setq value (get-char-attribute char 'hanyu-dazidian-vol))) + (insert (format "(hanyu-dazidian-vol . %d)%s" + value line-breaking)) + (setq attributes (delq 'hanyu-dazidian-vol attributes)) + ) + (when (and (memq 'hanyu-dazidian-page attributes) + (setq value (get-char-attribute char 'hanyu-dazidian-page))) + (insert (format "(hanyu-dazidian-page . %d)%s" + value line-breaking)) + (setq attributes (delq 'hanyu-dazidian-page attributes)) + ) + (when (and (memq 'hanyu-dazidian-char attributes) + (setq value (get-char-attribute char 'hanyu-dazidian-char))) + (insert (format "(hanyu-dazidian-char . %d)%s" + value line-breaking)) + (setq attributes (delq 'hanyu-dazidian-char attributes)) + ) (setq rest ccs-attributes) (while (and rest (progn @@ -601,6 +725,7 @@ ((memq name '(->lowercase ->uppercase ->titlecase ->fullwidth <-fullwidth + ->identical ->vulgar-ideograph <-vulgar-ideograph ->ancient-ideograph <-ancient-ideograph ->original-ideograph <-original-ideograph @@ -727,8 +852,10 @@ (setq value (get-char-attribute char name))) (insert (format - (cond ((memq name '(ideograph-daikanwa ideograph-gt - ideograph-cbeta)) + (cond ((memq name '(ideograph-daikanwa-2 + ideograph-daikanwa + ideograph-gt + ideograph-cbeta)) (if has-long-ccs-name "(%-26s . %05d)\t; %c%s" "(%-18s . %05d)\t; %c%s")) @@ -868,6 +995,15 @@ (condition-case err (progn (insert-char-data-with-variant char 'printable) + (unless (char-attribute-alist char) + (insert (format ";; = %c\n" + (let* ((rest (split-char char)) + (ccs (pop rest)) + (code (pop rest))) + (while rest + (setq code (logior (lsh code 8) + (pop rest)))) + (decode-char ccs code))))) ;; (char-db-update-comment) (set-buffer-modified-p nil) (view-mode the-buf (lambda (buf)