X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Futf-2000%2Fchar-db-util.el;h=aacb226f1a7fc6f32cddff9804077fdc3d6f6cdd;hb=6f8a91966f78f32c2150f434d378f7ac4d858bc0;hp=753497741eab8da7096e320cfaa3d8f133ac2107;hpb=adb580accfef1ef8eb6a41c52e6f7afada6e8f5b;p=chise%2Fxemacs-chise.git diff --git a/lisp/utf-2000/char-db-util.el b/lisp/utf-2000/char-db-util.el index 7534977..aacb226 100644 --- a/lisp/utf-2000/char-db-util.el +++ b/lisp/utf-2000/char-db-util.el @@ -65,7 +65,7 @@ (let ((v (make-vector 215 nil)) (i 1)) (while (< i 215) - (aset v i (decode-char 'ucs (+ #x2EFF i))) + (aset v i (decode-char '=ucs (+ #x2EFF i))) (setq i (1+ i))) v)) @@ -141,39 +141,68 @@ nil))) (defvar char-db-coded-charset-priority-list - (let ((rest default-coded-charset-priority-list) - dest) - (while rest - (when (symbolp (car rest)) - (cond ((memq (car rest) - '(latin-viscii-lower - latin-viscii-upper - ipa - lao - ethiopic - arabic-digit - arabic-1-column - arabic-2-column))) - ((string-match "^mojikyo-" (symbol-name (car rest)))) - ((string-match "^ideograph-cbeta" (symbol-name (car rest)))) - ((string-match "^china3-jef" (symbol-name (car rest)))) - ((string-match "^chinese-big5" (symbol-name (car rest)))) - ((string-match "^ideograph-gt-pj-" (symbol-name (car rest))) - (unless (memq 'ideograph-gt dest) - (setq dest (cons 'ideograph-gt dest)))) - (t - (setq dest (cons (car rest) dest))))) - (setq rest (cdr rest))) - (append (sort dest #'char-attribute-name<) - '(chinese-big5-cdp ideograph-cbeta china3-jef - chinese-big5-eten chinese-big5)))) + '(ascii + control-1 + latin-iso8859-1 + latin-iso8859-2 + latin-iso8859-3 + latin-iso8859-4 + latin-iso8859-9 + latin-jisx0201 + cyrillic-iso8859-5 + greek-iso8859-7 + thai-tis620 + =jis-x0208 + japanese-jisx0208 + japanese-jisx0212 + japanese-jisx0208-1978 + chinese-gb2312 + chinese-cns11643-1 + chinese-cns11643-2 + chinese-cns11643-3 + chinese-cns11643-4 + chinese-cns11643-5 + chinese-cns11643-6 + chinese-cns11643-7 + =jis-x0213-1-2000 + =jis-x0213-2-2000 + korean-ksc5601 + chinese-isoir165 + katakana-jisx0201 + hebrew-iso8859-8 + =jis-x0208-1990 + chinese-gb12345 + latin-viscii + ethiopic-ucs + =gt + =big5-cdp + =gt-k + ideograph-daikanwa-2 + ideograph-daikanwa + =cbeta + ideograph-hanziku-1 + ideograph-hanziku-2 + ideograph-hanziku-3 + ideograph-hanziku-4 + ideograph-hanziku-5 + ideograph-hanziku-6 + ideograph-hanziku-7 + ideograph-hanziku-8 + ideograph-hanziku-9 + ideograph-hanziku-10 + ideograph-hanziku-11 + ideograph-hanziku-12 + =cbeta + =jef-china3 + =big5-eten + =big5)) (defun char-db-make-char-spec (char) (let (ret char-spec) (cond ((characterp char) - (cond ((and (setq ret (get-char-attribute char 'ucs)) + (cond ((and (setq ret (encode-char char '=ucs 'defined-only)) (not (and (<= #xE000 ret)(<= ret #xF8FF)))) - (setq char-spec (list (cons 'ucs ret))) + (setq char-spec (list (cons '=ucs ret))) (cond ((setq ret (get-char-attribute char 'name)) (setq char-spec (cons (cons 'name ret) char-spec)) ) @@ -191,9 +220,13 @@ (setq rest (cdr rest)))))) (setq char-spec (list ret)) (dolist (ccs (delq (car ret) (charset-list))) - (if (or (and (charset-iso-final-char ccs) - (setq ret (get-char-attribute char ccs))) - (eq ccs 'ideograph-daikanwa)) + (if (and (or (charset-iso-final-char ccs) + (memq ccs + '(ideograph-daikanwa + =daikanwa-rev2 + ;; =gt-k + ))) + (setq ret (get-char-attribute char ccs))) (setq char-spec (cons (cons ccs ret) char-spec)))) (if (null char-spec) (setq char-spec (split-char char))) @@ -345,7 +378,7 @@ (insert separator) (insert ":char\t") (cond ((numberp value) - (setq value (decode-char 'ucs value))) + (setq value (decode-char '=ucs value))) ;; ((consp value) ;; (setq value (or (find-char value) ;; value))) @@ -371,25 +404,25 @@ (setq ret (cond ((eq ccs 'arabic-iso8859-6) (decode-char ccs code-point)) - ((and (memq ccs '(ideograph-gt-pj-1 - ideograph-gt-pj-2 - ideograph-gt-pj-3 - ideograph-gt-pj-4 - ideograph-gt-pj-5 - ideograph-gt-pj-6 - ideograph-gt-pj-7 - ideograph-gt-pj-8 - ideograph-gt-pj-9 - ideograph-gt-pj-10 - ideograph-gt-pj-11)) + ((and (memq ccs '(=gt-pj-1 + =gt-pj-2 + =gt-pj-3 + =gt-pj-4 + =gt-pj-5 + =gt-pj-6 + =gt-pj-7 + =gt-pj-8 + =gt-pj-9 + =gt-pj-10 + =gt-pj-11)) (setq ret (decode-char ccs code-point)) - (setq ret (get-char-attribute ret 'ideograph-gt))) - (decode-builtin-char 'ideograph-gt ret)) + (setq ret (get-char-attribute ret '=gt))) + (decode-builtin-char '=gt ret)) (t (decode-builtin-char ccs code-point)))) (cond ((and (<= 0 (char-int ret)) (<= (char-int ret) #x1F)) - (decode-char 'ucs (+ #x2400 (char-int ret)))) + (decode-char '=ucs (+ #x2400 (char-int ret)))) ((= (char-int ret) #x7F) ?\u2421) (t ret)))) @@ -465,54 +498,89 @@ line-breaking)) (setq attributes (delq 'script attributes)) ) - (when (and (memq '=>ucs attributes) - (setq value (get-char-attribute char '=>ucs))) - (insert (format "(=>ucs\t\t. #x%04X)\t; %c%s" - value (decode-char 'ucs value) - line-breaking)) - (setq attributes (delq '=>ucs attributes)) - ) - (when (and (memq '=>ucs* attributes) - (setq value (get-char-attribute char '=>ucs*))) - (insert (format "(=>ucs*\t\t. #x%04X)\t; %c%s" - value (decode-char 'ucs value) - line-breaking)) - (setq attributes (delq '=>ucs* attributes)) - ) - (when (and (memq '=>ucs-gb attributes) - (setq value (get-char-attribute char '=>ucs-gb))) - (insert (format "(=>ucs-gb\t\t. #x%04X)\t; %c%s" - value (decode-char 'ucs value) - line-breaking)) - (setq attributes (delq '=>ucs-gb attributes)) - ) - (when (and (memq '=>ucs-cns attributes) - (setq value (get-char-attribute char '=>ucs-cns))) - (insert (format "(=>ucs-cns\t\t. #x%04X)\t; %c%s" - value (decode-char 'ucs-cns value) - line-breaking)) - (setq attributes (delq '=>ucs-cns attributes)) - ) - (when (and (memq '=>ucs-jis attributes) - (setq value (get-char-attribute char '=>ucs-jis))) - (insert (format "(=>ucs-jis\t\t. #x%04X)\t; %c%s" - value (decode-char 'ucs-jis value) - line-breaking)) - (setq attributes (delq '=>ucs-jis attributes)) - ) - (when (and (memq '=>ucs-ks attributes) - (setq value (get-char-attribute char '=>ucs-ks))) - (insert (format "(=>ucs-ks\t\t. #x%04X)\t; %c%s" - value (decode-char 'ucs-ks value) - line-breaking)) - (setq attributes (delq '=>ucs-ks attributes)) - ) + (dolist (name '(=>ucs =>ucs*)) + (when (and (memq name attributes) + (setq value (get-char-attribute char name))) + (insert (format "(%-18s . #x%04X)\t; %c%s" + name value (decode-char '=ucs value) + line-breaking)) + (setq attributes (delq name attributes)))) + ;; (when (and (memq '=>ucs* attributes) + ;; (setq value (get-char-attribute char '=>ucs*))) + ;; (insert (format "(=>ucs*\t\t. #x%04X)\t; %c%s" + ;; value (decode-char '=ucs value) + ;; line-breaking)) + ;; (setq attributes (delq '=>ucs* attributes)) + ;; ) + (dolist (name '(=>ucs@gb =>ucs@cns =>ucs@jis =>ucs@ks =>ucs@big5)) + (when (and (memq name attributes) + (setq value (get-char-attribute char name))) + (insert (format "(%-18s . #x%04X)\t; %c%s" + name value + (decode-char (intern + (concat "=" + (substring + (symbol-name name) 2))) + value) + line-breaking)) + (setq attributes (delq name attributes)) + )) + (dolist (name '(=>ucs-gb =>ucs-cns =>ucs-jis =>ucs-ks =>ucs-big5)) + (when (and (memq name attributes) + (setq value (get-char-attribute char name))) + (insert (format "(%-18s . #x%04X)\t; %c%s" + (intern + (concat "=>ucs@" + (substring (symbol-name name) 6))) + value + (decode-char (intern + (concat "=ucs@" + (substring + (symbol-name name) 6))) + value) + line-breaking)) + (setq attributes (delq name attributes)))) + ;; (when (and (memq '=>ucs-gb attributes) + ;; (setq value (get-char-attribute char '=>ucs-gb))) + ;; (insert (format "(=>ucs@gb\t\t. #x%04X)\t; %c%s" + ;; value (decode-char '=ucs@gb value) + ;; line-breaking)) + ;; (setq attributes (delq '=>ucs-gb attributes)) + ;; ) + ;; (when (and (memq '=>ucs-cns attributes) + ;; (setq value (get-char-attribute char '=>ucs-cns))) + ;; (insert (format "(=>ucs@cns\t\t. #x%04X)\t; %c%s" + ;; value (decode-char '=ucs@cns value) + ;; line-breaking)) + ;; (setq attributes (delq '=>ucs-cns attributes)) + ;; ) + ;; (when (and (memq '=>ucs-big5 attributes) + ;; (setq value (get-char-attribute char '=>ucs-big5))) + ;; (insert (format "(=>ucs-big5\t\t. #x%04X)\t; %c%s" + ;; value (decode-char 'ucs-big5 value) + ;; line-breaking)) + ;; (setq attributes (delq '=>ucs-big5 attributes)) + ;; ) + ;; (when (and (memq '=>ucs-jis attributes) + ;; (setq value (get-char-attribute char '=>ucs-jis))) + ;; (insert (format "(=>ucs@jis\t\t. #x%04X)\t; %c%s" + ;; value (decode-char '=ucs@jis value) + ;; line-breaking)) + ;; (setq attributes (delq '=>ucs-jis attributes)) + ;; ) + ;; (when (and (memq '=>ucs-ks attributes) + ;; (setq value (get-char-attribute char '=>ucs-ks))) + ;; (insert (format "(=>ucs-ks\t\t. #x%04X)\t; %c%s" + ;; value (decode-char 'ucs-ks value) + ;; line-breaking)) + ;; (setq attributes (delq '=>ucs-ks attributes)) + ;; ) (when (and (memq '->ucs attributes) (setq value (get-char-attribute char '->ucs))) (insert (format (if char-db-convert-obsolete-format "(=>ucs\t\t. #x%04X)\t; %c%s" "(->ucs\t\t. #x%04X)\t; %c%s") - value (decode-char 'ucs value) + value (decode-char '=ucs value) line-breaking)) (setq attributes (delq '->ucs attributes)) ) @@ -696,7 +764,7 @@ (while (consp value) (setq cell (car value)) (if (integerp cell) - (setq cell (decode-char 'ucs cell))) + (setq cell (decode-char '=ucs cell))) (cond ((characterp cell) (if separator (insert lbs)) @@ -805,7 +873,12 @@ (while attributes (setq name (car attributes)) (if (setq value (get-char-attribute char name)) - (cond ((eq name 'jisx0208-1978/4X) + (cond ((string-match "^=>ucs@" (symbol-name name)) + (insert (format "(%-18s . #x%04X)\t; %c%s" + name value (decode-char '=ucs value) + line-breaking)) + ) + ((eq name 'jisx0208-1978/4X) (insert (format "(%-18s . #x%04X)%s" name value line-breaking))) @@ -818,7 +891,7 @@ (while (consp value) (setq cell (car value)) (if (integerp cell) - (setq cell (decode-char 'ucs cell))) + (setq cell (decode-char '=ucs cell))) (cond ((characterp cell) (if separator (insert lbs)) @@ -928,11 +1001,10 @@ (setq value (get-char-attribute char name))) (insert (format - (cond ((memq name '(ideograph-daikanwa-2 - ideograph-daikanwa - ideograph-gt - ideograph-gt-k - ideograph-cbeta)) + (cond ((memq name '(ideograph-daikanwa + =daikanwa-rev1 + =daikanwa-rev2 + =gt =gt-k =cbeta)) (if has-long-ccs-name "(%-26s . %05d)\t; %c%s" "(%-18s . %05d)\t; %c%s")) @@ -976,8 +1048,15 @@ (goto-char (point-min)) (while (re-search-forward "[ \t]+$" nil t) (replace-match "")) + ;; from tabify. + (goto-char (point-min)) + (while (re-search-forward "[ \t][ \t][ \t]*" nil t) + (let ((column (current-column)) + (indent-tabs-mode t)) + (delete-region (match-beginning 0) (point)) + (indent-to column))) (goto-char (point-max)) - (tabify (point-min)(point-max)) + ;; (tabify (point-min)(point-max)) )) (defun insert-char-data-with-variant (char &optional printable @@ -998,7 +1077,7 @@ (or (null excluded-script) (null (setq vs (get-char-attribute variant 'script))) (not (memq excluded-script vs)))) - (or (and no-ucs-variant (get-char-attribute variant 'ucs)) + (or (and no-ucs-variant (get-char-attribute variant '=ucs)) (insert-char-data variant printable))) (setq variants (cdr variants)) ))) @@ -1007,12 +1086,11 @@ (let ((code min) char) (while (<= code max) - (setq char (decode-char 'ucs code)) - (if (get-char-attribute char 'ucs) + (setq char (decode-char '=ucs code)) + (if (encode-char char '=ucs 'defined-only) (insert-char-data-with-variant char nil 'no-ucs-variant script excluded-script)) - (setq code (1+ code)) - ))) + (setq code (1+ code))))) (defun write-char-range-data-to-file (min max file &optional script excluded-script)