X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Futf-2000%2Fchar-db-util.el;h=38b3001fb0970a013c0341d2087861e20b27cd51;hb=218083ff5dbe96d50f6cd39c371b5ddf61d16415;hp=d001d8a1843ccc91d6222102083cea7b9021fc2b;hpb=8bbf5dfe09080f3360130e3da05d42404545c46f;p=chise%2Fxemacs-chise.git- diff --git a/lisp/utf-2000/char-db-util.el b/lisp/utf-2000/char-db-util.el index d001d8a..38b3001 100644 --- a/lisp/utf-2000/char-db-util.el +++ b/lisp/utf-2000/char-db-util.el @@ -1,6 +1,6 @@ ;;; char-db-util.el --- Character Database utility -;; Copyright (C) 1998,1999,2000,2001,2002 MORIOKA Tomohiko. +;; Copyright (C) 1998,1999,2000,2001,2002,2003 MORIOKA Tomohiko. ;; Author: MORIOKA Tomohiko ;; Keywords: UTF-2000, ISO/IEC 10646, Unicode, UCS-4, MULE. @@ -65,37 +65,14 @@ (let ((v (make-vector 215 nil)) (i 1)) (while (< i 215) - (aset v i (int-char (+ #x2EFF i))) + (aset v i (decode-char 'ucs (+ #x2EFF i))) (setq i (1+ i))) - (unless (charset-iso-final-char (car (split-char (aref v 34)))) - (aset v 34 (make-char 'chinese-gb2312 #x62 #x3A))) + ;; (unless (charset-iso-final-char (car (split-char (aref v 34)))) + ;; (aset v 34 (make-char 'chinese-gb2312 #x62 #x3A))) v)) (defvar char-db-ignored-attributes nil) -;;;###autoload -(defun char-ref= (cr1 cr2) - (cond ((char-ref-p cr1) - (if (char-ref-p cr2) - (char-spec= (plist-get cr1 :char) - (plist-get cr2 :char)) - (char-spec= (plist-get cr1 :char) cr2))) - (t - (char-spec= cr1 - (if (char-ref-p cr2) - (plist-get cr2 :char) - cr2))))) - -;;;###autoload -(defun char-spec= (cs1 cs2) - (if (characterp cs1) - (if (characterp cs2) - (eq cs1 cs2) - (eq cs1 (find-char cs2))) - (if (characterp cs2) - (eq (find-char cs1) cs2) - (eq (find-char cs1) (find-char cs2))))) - (defun char-attribute-name< (ka kb) (cond ((find-charset ka) @@ -156,6 +133,8 @@ arabic-digit arabic-1-column arabic-2-column))) + ((string-match "^ideograph-cbeta" (symbol-name (car rest)))) + ((string-match "^china3-jef" (symbol-name (car rest)))) ((string-match "^chinese-big5" (symbol-name (car rest)))) ((string-match "^ideograph-gt-pj-" (symbol-name (car rest))) (unless (memq 'ideograph-gt dest) @@ -164,23 +143,30 @@ (setq dest (cons (car rest) dest))))) (setq rest (cdr rest))) (append (sort dest #'char-attribute-name<) - '(chinese-big5-cdp chinese-big5-eten chinese-big5)))) + '(chinese-big5-cdp ideograph-cbeta china3-jef + chinese-big5-eten chinese-big5)))) -(defun char-db-insert-char-spec (char &optional readable column) - (unless column - (setq column (current-column))) - (let (char-spec ret al cal key temp-char) +(defun char-db-make-char-spec (char) + (let (ret char-spec) (cond ((characterp char) (cond ((and (setq ret (get-char-attribute char 'ucs)) (not (and (<= #xE000 ret)(<= ret #xF8FF)))) (setq char-spec (list (cons 'ucs ret))) - (if (setq ret (get-char-attribute char 'name)) - (setq char-spec (cons (cons 'name ret) char-spec))) + (cond ((setq ret (get-char-attribute char 'name)) + (setq char-spec (cons (cons 'name ret) char-spec)) + ) + ((setq ret (get-char-attribute char 'name*)) + (setq char-spec (cons (cons 'name* ret) char-spec)) + )) ) ((setq ret - (let ((default-coded-charset-priority-list - char-db-coded-charset-priority-list)) - (split-char char))) + (catch 'tag + (let ((rest char-db-coded-charset-priority-list)) + (while rest + (if (setq ret + (get-char-attribute char (car rest))) + (throw 'tag (cons (car rest) ret))) + (setq rest (cdr rest)))))) (setq char-spec (list ret)) (dolist (ccs (delq (car ret) (charset-list))) (if (or (and (charset-iso-final-char ccs) @@ -189,13 +175,23 @@ (setq char-spec (cons (cons ccs ret) char-spec)))) (if (null char-spec) (setq char-spec (split-char char))) - (if (setq ret (get-char-attribute char 'name)) - (setq char-spec (cons (cons 'name ret) char-spec))) - ))) + (cond ((setq ret (get-char-attribute char 'name)) + (setq char-spec (cons (cons 'name ret) char-spec)) + ) + ((setq ret (get-char-attribute char 'name*)) + (setq char-spec (cons (cons 'name* ret) char-spec)) + )) + )) + char-spec) ((consp char) - (setq char-spec char) - (setq char nil))) - (unless (or char + char)))) + +(defun char-db-insert-char-spec (char &optional readable column) + (unless column + (setq column (current-column))) + (let (char-spec ret al cal key temp-char) + (setq char-spec (char-db-make-char-spec char)) + (unless (or (characterp char) ; char (condition-case nil (setq char (find-char char-spec)) (error nil))) @@ -211,9 +207,20 @@ (setq key (car (car char-spec))) (unless (memq key char-db-ignored-attributes) (if (find-charset key) - (setq cal (cons key cal)) + (if (get-char-attribute char key) + (setq cal (cons key cal))) (setq al (cons key al)))) (setq char-spec (cdr char-spec))) + (unless cal + (setq char-spec (char-db-make-char-spec char)) + (while char-spec + (setq key (car (car char-spec))) + (unless (memq key char-db-ignored-attributes) + (if (find-charset key) + (setq cal (cons key cal)) + (setq al (cons key al)))) + (setq char-spec (cdr char-spec))) + ) (unless (or cal (memq 'ideographic-structure al)) (push 'ideographic-structure al)) @@ -419,6 +426,15 @@ value line-breaking)) (setq attributes (delq 'name attributes)) ) + (when (and (memq 'name* attributes) + (setq value (get-char-attribute char 'name*))) + (insert (format + (if (> (+ (current-column) (length value)) 48) + "(name* . %S)%s" + "(name* . %S)%s") + value line-breaking)) + (setq attributes (delq 'name* attributes)) + ) (when (and (memq 'script attributes) (setq value (get-char-attribute char 'script))) (insert (format "(script\t\t%s)%s" @@ -441,13 +457,34 @@ line-breaking)) (setq attributes (delq '=>ucs* attributes)) ) + (when (and (memq '=>ucs-gb attributes) + (setq value (get-char-attribute char '=>ucs-gb))) + (insert (format "(=>ucs-gb\t\t. #x%04X)\t; %c%s" + value (decode-char 'ucs value) + line-breaking)) + (setq attributes (delq '=>ucs-gb attributes)) + ) + (when (and (memq '=>ucs-cns attributes) + (setq value (get-char-attribute char '=>ucs-cns))) + (insert (format "(=>ucs-cns\t\t. #x%04X)\t; %c%s" + value (decode-char 'ucs-cns value) + line-breaking)) + (setq attributes (delq '=>ucs-cns attributes)) + ) (when (and (memq '=>ucs-jis attributes) (setq value (get-char-attribute char '=>ucs-jis))) (insert (format "(=>ucs-jis\t\t. #x%04X)\t; %c%s" - value (decode-char 'ucs value) + value (decode-char 'ucs-jis value) line-breaking)) (setq attributes (delq '=>ucs-jis attributes)) ) + (when (and (memq '=>ucs-ks attributes) + (setq value (get-char-attribute char '=>ucs-ks))) + (insert (format "(=>ucs-ks\t\t. #x%04X)\t; %c%s" + value (decode-char 'ucs-ks value) + line-breaking)) + (setq attributes (delq '=>ucs-ks attributes)) + ) (when (and (memq '->ucs attributes) (setq value (get-char-attribute char '->ucs))) (insert (format (if char-db-convert-obsolete-format @@ -731,10 +768,13 @@ value line-breaking)) (setq attributes (delq 'hanyu-dazidian-char attributes)) ) - (when (and (not readable) - (memq '->ucs-variants attributes)) - (setq attributes (delq '->ucs-variants attributes)) - ) + (unless readable + (when (memq '->ucs-variants attributes) + (setq attributes (delq '->ucs-variants attributes)) + ) + (when (memq 'composition attributes) + (setq attributes (delq 'composition attributes)) + )) (setq rest ccs-attributes) (while (and rest (progn @@ -757,22 +797,6 @@ ((or (eq name 'ideographic-structure) (eq name 'ideographic-) (string-match "^\\(->\\|<-\\)" (symbol-name name))) - ;; (memq name '(->lowercase - ;; ->uppercase ->titlecase - ;; ->fullwidth <-fullwidth - ;; ->identical - ;; ->vulgar-ideograph <-vulgar-ideograph - ;; ->ancient-ideograph <-ancient-ideograph - ;; ->original-ideograph <-original-ideograph - ;; ->simplified-ideograph <-simplified-ideograph - ;; ->wrong-ideograph <-wrong-ideograph - ;; ->same-ideograph - ;; ->ideographic-variants - ;; ->synonyms - ;; ->radical <-radical - ;; ->bopomofo <-bopomofo - ;; ->ideographic <-ideographic - ;; ideographic-structure)) (insert (format "(%-18s%s " name line-breaking)) (setq lbs (concat "\n" (make-string (current-column) ?\ )) separator nil) @@ -892,6 +916,7 @@ (cond ((memq name '(ideograph-daikanwa-2 ideograph-daikanwa ideograph-gt + ideograph-gt-k ideograph-cbeta)) (if has-long-ccs-name "(%-26s . %05d)\t; %c%s" @@ -900,7 +925,7 @@ (if has-long-ccs-name "(%-26s . %06d)\t; %c%s" "(%-18s . %06d)\t; %c%s")) - ((eq name 'ucs) + ((>= (charset-dimension name) 2) ; (eq name 'ucs) (if has-long-ccs-name "(%-26s . #x%04X)\t; %c%s" "(%-18s . #x%04X)\t; %c%s")) @@ -940,41 +965,6 @@ (tabify (point-min)(point-max)) )) -;;;###autoload -(defun char-db-update-comment () - (interactive) - (save-excursion - (goto-char (point-min)) - (let (cdef table char) - (while (re-search-forward "^[ \t]*\\(([^.()]+)\\)" nil t) - (goto-char (match-beginning 1)) - (setq cdef (read (current-buffer))) - (when (find-charset (car cdef)) - (goto-char (match-end 0)) - (setq char - (if (and - (not (eq (car cdef) 'ideograph-daikanwa)) - (or (memq (car cdef) '(ascii latin-viscii-upper - latin-viscii-lower - arabic-iso8859-6 - japanese-jisx0213-1 - japanese-jisx0213-2)) - (= (char-int (charset-iso-final-char (car cdef))) - 0))) - (apply (function make-char) cdef) - (if (setq table (charset-mapping-table (car cdef))) - (set-charset-mapping-table (car cdef) nil)) - (prog1 - (apply (function make-char) cdef) - (if table - (set-charset-mapping-table (car cdef) table))))) - (when (not (or (< (char-int char) 32) - (and (<= 128 (char-int char)) - (< (char-int char) 160)))) - (delete-region (point) (point-at-eol)) - (insert (format "\t; %c" char))) - ))))) - (defun insert-char-data-with-variant (char &optional printable no-ucs-variant script excluded-script)