X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Futf-2000%2Fchar-db-util.el;h=aacb226f1a7fc6f32cddff9804077fdc3d6f6cdd;hb=6f8a91966f78f32c2150f434d378f7ac4d858bc0;hp=78bc6547565e77a654ec7e065a3dd95a4c3724cd;hpb=026445a5fcb93ce75d78c2d7af98f14ae3359842;p=chise%2Fxemacs-chise.git diff --git a/lisp/utf-2000/char-db-util.el b/lisp/utf-2000/char-db-util.el index 78bc654..aacb226 100644 --- a/lisp/utf-2000/char-db-util.el +++ b/lisp/utf-2000/char-db-util.el @@ -1,6 +1,6 @@ ;;; char-db-util.el --- Character Database utility -;; Copyright (C) 1998,1999,2000,2001 MORIOKA Tomohiko. +;; Copyright (C) 1998,1999,2000,2001,2002,2003 MORIOKA Tomohiko. ;; Author: MORIOKA Tomohiko ;; Keywords: UTF-2000, ISO/IEC 10646, Unicode, UCS-4, MULE. @@ -65,12 +65,35 @@ (let ((v (make-vector 215 nil)) (i 1)) (while (< i 215) - (aset v i (int-char (+ #x2EFF i))) + (aset v i (decode-char '=ucs (+ #x2EFF i))) (setq i (1+ i))) - (if (< (charset-iso-final-char (car (split-char (aref v 34)))) ?0) - (aset v 34 (make-char 'chinese-gb2312 #x62 #x3A))) v)) +(defvar char-db-ignored-attributes nil) + +;;;###autoload +(defun char-ref= (cr1 cr2) + (cond ((char-ref-p cr1) + (if (char-ref-p cr2) + (char-spec= (plist-get cr1 :char) + (plist-get cr2 :char)) + (char-spec= (plist-get cr1 :char) cr2))) + (t + (char-spec= cr1 + (if (char-ref-p cr2) + (plist-get cr2 :char) + cr2))))) + +;;;###autoload +(defun char-spec= (cs1 cs2) + (if (characterp cs1) + (if (characterp cs2) + (eq cs1 cs2) + (eq cs1 (find-char cs2))) + (if (characterp cs2) + (eq (find-char cs1) cs2) + (eq (find-char cs1) (find-char cs2))))) + (defun char-attribute-name< (ka kb) (cond ((find-charset ka) @@ -80,23 +103,24 @@ ((= (charset-dimension ka) (charset-dimension kb)) (cond ((= (charset-chars ka)(charset-chars kb)) - (cond - ((>= (charset-iso-final-char ka) ?@) - (if (>= (charset-iso-final-char kb) ?@) - (< (charset-iso-final-char ka) - (charset-iso-final-char kb)) - t)) - ((>= (charset-iso-final-char ka) ?0) - (cond - ((>= (charset-iso-final-char kb) ?@) - nil) - ((>= (charset-iso-final-char kb) ?0) - (< (charset-iso-final-char ka) - (charset-iso-final-char kb))) - (t))) - (t (if (>= (charset-iso-final-char kb) ?0) - nil - (> (charset-id ka)(charset-id kb)))))) + (if (charset-iso-final-char ka) + (cond + ((>= (charset-iso-final-char ka) ?@) + (if (and (charset-iso-final-char kb) + (>= (charset-iso-final-char kb) ?@)) + (< (charset-iso-final-char ka) + (charset-iso-final-char kb)) + t)) + (t + (if (charset-iso-final-char kb) + (if (>= (charset-iso-final-char kb) ?@) + nil + (< (charset-iso-final-char ka) + (charset-iso-final-char kb))) + t))) + (if (charset-iso-final-char kb) + nil + (< (charset-id ka)(charset-id kb))))) ((<= (charset-chars ka)(charset-chars kb))))) (t (< (charset-dimension ka) @@ -116,45 +140,155 @@ ((symbolp kb) nil))) -(defun char-db-insert-char-spec (char &optional readable column) - (unless column - (setq column (current-column))) - (let (char-spec ret al cal key) +(defvar char-db-coded-charset-priority-list + '(ascii + control-1 + latin-iso8859-1 + latin-iso8859-2 + latin-iso8859-3 + latin-iso8859-4 + latin-iso8859-9 + latin-jisx0201 + cyrillic-iso8859-5 + greek-iso8859-7 + thai-tis620 + =jis-x0208 + japanese-jisx0208 + japanese-jisx0212 + japanese-jisx0208-1978 + chinese-gb2312 + chinese-cns11643-1 + chinese-cns11643-2 + chinese-cns11643-3 + chinese-cns11643-4 + chinese-cns11643-5 + chinese-cns11643-6 + chinese-cns11643-7 + =jis-x0213-1-2000 + =jis-x0213-2-2000 + korean-ksc5601 + chinese-isoir165 + katakana-jisx0201 + hebrew-iso8859-8 + =jis-x0208-1990 + chinese-gb12345 + latin-viscii + ethiopic-ucs + =gt + =big5-cdp + =gt-k + ideograph-daikanwa-2 + ideograph-daikanwa + =cbeta + ideograph-hanziku-1 + ideograph-hanziku-2 + ideograph-hanziku-3 + ideograph-hanziku-4 + ideograph-hanziku-5 + ideograph-hanziku-6 + ideograph-hanziku-7 + ideograph-hanziku-8 + ideograph-hanziku-9 + ideograph-hanziku-10 + ideograph-hanziku-11 + ideograph-hanziku-12 + =cbeta + =jef-china3 + =big5-eten + =big5)) + +(defun char-db-make-char-spec (char) + (let (ret char-spec) (cond ((characterp char) - (cond ((setq ret (get-char-attribute char 'ucs)) - (setq char-spec (list (cons 'ucs ret))) - (if (setq ret (get-char-attribute char 'name)) - (setq char-spec (cons (cons 'name ret) char-spec))) + (cond ((and (setq ret (encode-char char '=ucs 'defined-only)) + (not (and (<= #xE000 ret)(<= ret #xF8FF)))) + (setq char-spec (list (cons '=ucs ret))) + (cond ((setq ret (get-char-attribute char 'name)) + (setq char-spec (cons (cons 'name ret) char-spec)) + ) + ((setq ret (get-char-attribute char 'name*)) + (setq char-spec (cons (cons 'name* ret) char-spec)) + )) ) - ((setq ret (split-char char)) + ((setq ret + (catch 'tag + (let ((rest char-db-coded-charset-priority-list)) + (while rest + (if (setq ret + (get-char-attribute char (car rest))) + (throw 'tag (cons (car rest) ret))) + (setq rest (cdr rest)))))) (setq char-spec (list ret)) (dolist (ccs (delq (car ret) (charset-list))) - (if (and (>= (charset-iso-final-char ccs) ?0) + (if (and (or (charset-iso-final-char ccs) + (memq ccs + '(ideograph-daikanwa + =daikanwa-rev2 + ;; =gt-k + ))) (setq ret (get-char-attribute char ccs))) (setq char-spec (cons (cons ccs ret) char-spec)))) - (if (setq ret (get-char-attribute char 'name)) - (setq char-spec (cons (cons 'name ret) char-spec))) - ))) + (if (null char-spec) + (setq char-spec (split-char char))) + (cond ((setq ret (get-char-attribute char 'name)) + (setq char-spec (cons (cons 'name ret) char-spec)) + ) + ((setq ret (get-char-attribute char 'name*)) + (setq char-spec (cons (cons 'name* ret) char-spec)) + )) + )) + char-spec) ((consp char) - (setq char-spec char) - (setq char nil))) - (if (or char - (setq char (condition-case nil - (define-char char-spec) - (error nil)))) - (progn - (setq al nil - cal nil) - (while char-spec - (setq key (car (car char-spec))) - (if (find-charset key) - (setq cal (cons key cal)) - (setq al (cons key al))) - (setq char-spec (cdr char-spec))) - (insert-char-attributes char - readable - (or al 'none) cal)) - (insert (prin1-to-string char-spec))))) + char)))) + +(defun char-db-insert-char-spec (char &optional readable column) + (unless column + (setq column (current-column))) + (let (char-spec ret al cal key temp-char) + (setq char-spec (char-db-make-char-spec char)) + (unless (or (characterp char) ; char + (condition-case nil + (setq char (find-char char-spec)) + (error nil))) + ;; define temporary character + ;; Current implementation is dirty. + (setq temp-char (define-char (cons '(ideograph-daikanwa . 0) + char-spec))) + (remove-char-attribute temp-char 'ideograph-daikanwa) + (setq char temp-char)) + (setq al nil + cal nil) + (while char-spec + (setq key (car (car char-spec))) + (unless (memq key char-db-ignored-attributes) + (if (find-charset key) + (if (get-char-attribute char key) + (setq cal (cons key cal))) + (setq al (cons key al)))) + (setq char-spec (cdr char-spec))) + (unless cal + (setq char-spec (char-db-make-char-spec char)) + (while char-spec + (setq key (car (car char-spec))) + (unless (memq key char-db-ignored-attributes) + (if (find-charset key) + (setq cal (cons key cal)) + (setq al (cons key al)))) + (setq char-spec (cdr char-spec))) + ) + (unless (or cal + (memq 'ideographic-structure al)) + (push 'ideographic-structure al)) + (insert-char-attributes char + readable + (or al 'none) cal) + (when temp-char + ;; undefine temporary character + ;; Current implementation is dirty. + (setq char-spec (char-attribute-alist temp-char)) + (while char-spec + (remove-char-attribute temp-char (car (car char-spec))) + (setq char-spec (cdr char-spec)))))) (defun char-db-insert-alist (alist &optional readable column) (unless column @@ -171,7 +305,7 @@ (cond ((eq name 'char) (insert "(char . ") (if (setq ret (condition-case nil - (define-char value) + (find-char value) (error nil))) (progn (setq al nil @@ -196,7 +330,7 @@ (if (and (consp cell) (consp (car cell)) (setq ret (condition-case nil - (define-char cell) + (find-char cell) (error nil))) ) (progn @@ -234,43 +368,61 @@ (setq column (current-column))) (let ((line-breaking (concat "\n" (make-string (1+ column) ?\ ))) + (separator "") name value) (insert "(") (while plist (setq name (pop plist)) (setq value (pop plist)) (cond ((eq name :char) + (insert separator) (insert ":char\t") + (cond ((numberp value) + (setq value (decode-char '=ucs value))) + ;; ((consp value) + ;; (setq value (or (find-char value) + ;; value))) + ) (char-db-insert-char-spec value readable) - (insert line-breaking)) - (t - (insert (format "%s\t%S%s" + (insert line-breaking) + (setq separator "")) + ((eq name :radical) + (insert (format "%s%s\t%d ; %c%s" + separator name value - line-breaking)))) + (aref ideographic-radicals value) + line-breaking)) + (setq separator "")) + (t + (insert (format "%s%s\t%S" separator name value)) + (setq separator line-breaking))) )) (insert ")")) (defun char-db-decode-isolated-char (ccs code-point) (let (ret) (setq ret - (if (and (memq ccs '(ideograph-gt-pj-1 - ideograph-gt-pj-2 - ideograph-gt-pj-3 - ideograph-gt-pj-4 - ideograph-gt-pj-5 - ideograph-gt-pj-6 - ideograph-gt-pj-7 - ideograph-gt-pj-8 - ideograph-gt-pj-9 - ideograph-gt-pj-10 - ideograph-gt-pj-11)) - (setq ret (decode-char ccs code-point)) - (setq ret (get-char-attribute ret 'ideograph-gt))) - (decode-builtin-char 'ideograph-gt ret) - (decode-builtin-char ccs code-point))) + (cond ((eq ccs 'arabic-iso8859-6) + (decode-char ccs code-point)) + ((and (memq ccs '(=gt-pj-1 + =gt-pj-2 + =gt-pj-3 + =gt-pj-4 + =gt-pj-5 + =gt-pj-6 + =gt-pj-7 + =gt-pj-8 + =gt-pj-9 + =gt-pj-10 + =gt-pj-11)) + (setq ret (decode-char ccs code-point)) + (setq ret (get-char-attribute ret '=gt))) + (decode-builtin-char '=gt ret)) + (t + (decode-builtin-char ccs code-point)))) (cond ((and (<= 0 (char-int ret)) (<= (char-int ret) #x1F)) - (decode-char 'ucs (+ #x2400 (char-int ret)))) + (decode-char '=ucs (+ #x2400 (char-int ret)))) ((= (char-int ret) #x7F) ?\u2421) (t ret)))) @@ -280,17 +432,37 @@ (defun insert-char-attributes (char &optional readable attributes ccs-attributes column) - (setq attributes - (sort (if attributes - (if (consp attributes) - (copy-sequence attributes)) - (char-attribute-list)) - #'char-attribute-name<)) - (setq ccs-attributes - (sort (if ccs-attributes - (copy-sequence ccs-attributes) - (charset-list)) - #'char-attribute-name<)) + (let (atr-d ccs-d) + (setq attributes + (sort (if attributes + (if (consp attributes) + (progn + (dolist (name attributes) + (unless (memq name char-db-ignored-attributes) + (push name atr-d))) + atr-d)) + (dolist (name (char-attribute-list)) + (unless (memq name char-db-ignored-attributes) + (if (find-charset name) + (push name ccs-d) + (push name atr-d)))) + atr-d) + #'char-attribute-name<)) + (setq ccs-attributes + (sort (if ccs-attributes + (progn + (setq ccs-d nil) + (dolist (name ccs-attributes) + (unless (memq name char-db-ignored-attributes) + (push name ccs-d))) + ccs-d) + (or ccs-d + (progn + (dolist (name (charset-list)) + (unless (memq name char-db-ignored-attributes) + (push name ccs-d))) + ccs-d))) + #'char-attribute-name<))) (unless column (setq column (current-column))) (let (name value has-long-ccs-name rest @@ -303,12 +475,21 @@ (when (and (memq 'name attributes) (setq value (get-char-attribute char 'name))) (insert (format - (if (> (length value) 47) + (if (> (+ (current-column) (length value)) 48) "(name . %S)%s" "(name . %S)%s") value line-breaking)) (setq attributes (delq 'name attributes)) ) + (when (and (memq 'name* attributes) + (setq value (get-char-attribute char 'name*))) + (insert (format + (if (> (+ (current-column) (length value)) 48) + "(name* . %S)%s" + "(name* . %S)%s") + value line-breaking)) + (setq attributes (delq 'name* attributes)) + ) (when (and (memq 'script attributes) (setq value (get-char-attribute char 'script))) (insert (format "(script\t\t%s)%s" @@ -317,19 +498,89 @@ line-breaking)) (setq attributes (delq 'script attributes)) ) - (when (and (memq '=>ucs attributes) - (setq value (get-char-attribute char '=>ucs))) - (insert (format "(=>ucs\t\t. #x%04X)\t; %c%s" - value (decode-char 'ucs value) - line-breaking)) - (setq attributes (delq '=>ucs attributes)) - ) + (dolist (name '(=>ucs =>ucs*)) + (when (and (memq name attributes) + (setq value (get-char-attribute char name))) + (insert (format "(%-18s . #x%04X)\t; %c%s" + name value (decode-char '=ucs value) + line-breaking)) + (setq attributes (delq name attributes)))) + ;; (when (and (memq '=>ucs* attributes) + ;; (setq value (get-char-attribute char '=>ucs*))) + ;; (insert (format "(=>ucs*\t\t. #x%04X)\t; %c%s" + ;; value (decode-char '=ucs value) + ;; line-breaking)) + ;; (setq attributes (delq '=>ucs* attributes)) + ;; ) + (dolist (name '(=>ucs@gb =>ucs@cns =>ucs@jis =>ucs@ks =>ucs@big5)) + (when (and (memq name attributes) + (setq value (get-char-attribute char name))) + (insert (format "(%-18s . #x%04X)\t; %c%s" + name value + (decode-char (intern + (concat "=" + (substring + (symbol-name name) 2))) + value) + line-breaking)) + (setq attributes (delq name attributes)) + )) + (dolist (name '(=>ucs-gb =>ucs-cns =>ucs-jis =>ucs-ks =>ucs-big5)) + (when (and (memq name attributes) + (setq value (get-char-attribute char name))) + (insert (format "(%-18s . #x%04X)\t; %c%s" + (intern + (concat "=>ucs@" + (substring (symbol-name name) 6))) + value + (decode-char (intern + (concat "=ucs@" + (substring + (symbol-name name) 6))) + value) + line-breaking)) + (setq attributes (delq name attributes)))) + ;; (when (and (memq '=>ucs-gb attributes) + ;; (setq value (get-char-attribute char '=>ucs-gb))) + ;; (insert (format "(=>ucs@gb\t\t. #x%04X)\t; %c%s" + ;; value (decode-char '=ucs@gb value) + ;; line-breaking)) + ;; (setq attributes (delq '=>ucs-gb attributes)) + ;; ) + ;; (when (and (memq '=>ucs-cns attributes) + ;; (setq value (get-char-attribute char '=>ucs-cns))) + ;; (insert (format "(=>ucs@cns\t\t. #x%04X)\t; %c%s" + ;; value (decode-char '=ucs@cns value) + ;; line-breaking)) + ;; (setq attributes (delq '=>ucs-cns attributes)) + ;; ) + ;; (when (and (memq '=>ucs-big5 attributes) + ;; (setq value (get-char-attribute char '=>ucs-big5))) + ;; (insert (format "(=>ucs-big5\t\t. #x%04X)\t; %c%s" + ;; value (decode-char 'ucs-big5 value) + ;; line-breaking)) + ;; (setq attributes (delq '=>ucs-big5 attributes)) + ;; ) + ;; (when (and (memq '=>ucs-jis attributes) + ;; (setq value (get-char-attribute char '=>ucs-jis))) + ;; (insert (format "(=>ucs@jis\t\t. #x%04X)\t; %c%s" + ;; value (decode-char '=ucs@jis value) + ;; line-breaking)) + ;; (setq attributes (delq '=>ucs-jis attributes)) + ;; ) + ;; (when (and (memq '=>ucs-ks attributes) + ;; (setq value (get-char-attribute char '=>ucs-ks))) + ;; (insert (format "(=>ucs-ks\t\t. #x%04X)\t; %c%s" + ;; value (decode-char 'ucs-ks value) + ;; line-breaking)) + ;; (setq attributes (delq '=>ucs-ks attributes)) + ;; ) (when (and (memq '->ucs attributes) (setq value (get-char-attribute char '->ucs))) (insert (format (if char-db-convert-obsolete-format "(=>ucs\t\t. #x%04X)\t; %c%s" "(->ucs\t\t. #x%04X)\t; %c%s") - value (decode-char 'ucs value) + value (decode-char '=ucs value) line-breaking)) (setq attributes (delq '->ucs attributes)) ) @@ -505,6 +756,38 @@ (setq radical value))) (setq attributes (delq 'shinjigen-1-radical attributes)) ) + (when (and (memq 'ideographic- attributes) + (setq value (get-char-attribute char 'ideographic-))) + (insert "(ideographic- ") + (setq lbs (concat "\n" (make-string (current-column) ?\ )) + separator nil) + (while (consp value) + (setq cell (car value)) + (if (integerp cell) + (setq cell (decode-char '=ucs cell))) + (cond ((characterp cell) + (if separator + (insert lbs)) + (if readable + (insert (format "%S" cell)) + (char-db-insert-char-spec cell readable)) + (setq separator lbs)) + ((consp cell) + (if separator + (insert lbs)) + (if (consp (car cell)) + (char-db-insert-char-spec cell readable) + (char-db-insert-char-reference cell readable)) + (setq separator lbs)) + (t + (if separator + (insert separator)) + (insert (prin1-to-string cell)) + (setq separator " "))) + (setq value (cdr value))) + (insert ")") + (insert line-breaking) + (setq attributes (delq 'ideographic- attributes))) (when (and (memq 'total-strokes attributes) (setq value (get-char-attribute char 'total-strokes))) (insert (format "(total-strokes . %S)%s" @@ -557,6 +840,24 @@ line-breaking)) (setq attributes (delq '->mojikyo attributes)) ) + (when (and (memq 'hanyu-dazidian-vol attributes) + (setq value (get-char-attribute char 'hanyu-dazidian-vol))) + (insert (format "(hanyu-dazidian-vol . %d)%s" + value line-breaking)) + (setq attributes (delq 'hanyu-dazidian-vol attributes)) + ) + (when (and (memq 'hanyu-dazidian-page attributes) + (setq value (get-char-attribute char 'hanyu-dazidian-page))) + (insert (format "(hanyu-dazidian-page . %d)%s" + value line-breaking)) + (setq attributes (delq 'hanyu-dazidian-page attributes)) + ) + (when (and (memq 'hanyu-dazidian-char attributes) + (setq value (get-char-attribute char 'hanyu-dazidian-char))) + (insert (format "(hanyu-dazidian-char . %d)%s" + value line-breaking)) + (setq attributes (delq 'hanyu-dazidian-char attributes)) + ) (setq rest ccs-attributes) (while (and rest (progn @@ -572,29 +873,31 @@ (while attributes (setq name (car attributes)) (if (setq value (get-char-attribute char name)) - (cond ((eq name 'jisx0208-1978/4X) + (cond ((string-match "^=>ucs@" (symbol-name name)) + (insert (format "(%-18s . #x%04X)\t; %c%s" + name value (decode-char '=ucs value) + line-breaking)) + ) + ((eq name 'jisx0208-1978/4X) (insert (format "(%-18s . #x%04X)%s" name value line-breaking))) - ((memq name '(->lowercase - ->uppercase ->titlecase - ->fullwidth <-fullwidth - ->vulgar-ideograph <-vulgar-ideograph - ->ancient-ideograph <-ancient-ideograph - ->simplified-ideograph <-simplified-ideograph - ->same-ideograph - ->bopomofo)) + ((or (eq name 'ideographic-structure) + (eq name 'ideographic-) + (string-match "^\\(->\\|<-\\)" (symbol-name name))) (insert (format "(%-18s%s " name line-breaking)) (setq lbs (concat "\n" (make-string (current-column) ?\ )) separator nil) (while (consp value) (setq cell (car value)) (if (integerp cell) - (setq cell (decode-char 'ucs cell))) + (setq cell (decode-char '=ucs cell))) (cond ((characterp cell) (if separator (insert lbs)) - (char-db-insert-char-spec cell readable) + (if readable + (insert (format "%S" cell)) + (char-db-insert-char-spec cell readable)) (setq separator lbs)) ((consp cell) (if separator @@ -638,20 +941,20 @@ (setq value (cdr value))) (insert ")") (insert line-breaking)) - ((string-match "^->" (symbol-name name)) - (insert - (format "(%-18s %s)%s" - name - (mapconcat (lambda (code) - (cond ((symbolp code) - (symbol-name code)) - ((integerp code) - (format "#x%04X" code)) - (t - (format "%s%S" - line-breaking code)))) - value " ") - line-breaking))) + ;; ((string-match "^->" (symbol-name name)) + ;; (insert + ;; (format "(%-18s %s)%s" + ;; name + ;; (mapconcat (lambda (code) + ;; (cond ((symbolp code) + ;; (symbol-name code)) + ;; ((integerp code) + ;; (format "#x%04X" code)) + ;; (t + ;; (format "%s%S" + ;; line-breaking code)))) + ;; value " ") + ;; line-breaking))) ((consp value) (insert (format "(%-18s " name)) (setq lbs (concat "\n" (make-string (current-column) ?\ )) @@ -661,7 +964,7 @@ (if (and (consp cell) (consp (car cell)) (setq ret (condition-case nil - (define-char cell) + (find-char cell) (error nil)))) (progn (setq rest cell @@ -698,7 +1001,10 @@ (setq value (get-char-attribute char name))) (insert (format - (cond ((memq name '(ideograph-daikanwa ideograph-gt)) + (cond ((memq name '(ideograph-daikanwa + =daikanwa-rev1 + =daikanwa-rev2 + =gt =gt-k =cbeta)) (if has-long-ccs-name "(%-26s . %05d)\t; %c%s" "(%-18s . %05d)\t; %c%s")) @@ -706,7 +1012,7 @@ (if has-long-ccs-name "(%-26s . %06d)\t; %c%s" "(%-18s . %06d)\t; %c%s")) - ((eq name 'ucs) + ((>= (charset-dimension name) 2) (if has-long-ccs-name "(%-26s . #x%04X)\t; %c%s" "(%-18s . #x%04X)\t; %c%s")) @@ -742,45 +1048,17 @@ (goto-char (point-min)) (while (re-search-forward "[ \t]+$" nil t) (replace-match "")) + ;; from tabify. + (goto-char (point-min)) + (while (re-search-forward "[ \t][ \t][ \t]*" nil t) + (let ((column (current-column)) + (indent-tabs-mode t)) + (delete-region (match-beginning 0) (point)) + (indent-to column))) (goto-char (point-max)) - (tabify (point-min)(point-max)) + ;; (tabify (point-min)(point-max)) )) -;;;###autoload -(defun char-db-update-comment () - (interactive) - (save-excursion - (goto-char (point-min)) - (let (cdef table char) - (while (re-search-forward "^[ \t]*\\(([^.()]+)\\)" nil t) - (goto-char (match-beginning 1)) - (setq cdef (read (current-buffer))) - (when (find-charset (car cdef)) - (goto-char (match-end 0)) - (setq char - (if (and - (not (eq (car cdef) 'ideograph-daikanwa)) - (or (memq (car cdef) '(ascii latin-viscii-upper - latin-viscii-lower - arabic-iso8859-6 - japanese-jisx0213-1 - japanese-jisx0213-2)) - (= (char-int (charset-iso-final-char (car cdef))) - 0))) - (apply (function make-char) cdef) - (if (setq table (charset-mapping-table (car cdef))) - (set-charset-mapping-table (car cdef) nil)) - (prog1 - (apply (function make-char) cdef) - (if table - (set-charset-mapping-table (car cdef) table))))) - (when (not (or (< (char-int char) 32) - (and (<= 128 (char-int char)) - (< (char-int char) 160)))) - (delete-region (point) (point-at-eol)) - (insert (format "\t; %c" char))) - ))))) - (defun insert-char-data-with-variant (char &optional printable no-ucs-variant script excluded-script) @@ -790,6 +1068,7 @@ (if ucs (delete char (char-variants (int-char ucs))))))) variant vs) + (setq variants (sort variants #'<)) (while variants (setq variant (car variants)) (if (and (or (null script) @@ -798,7 +1077,7 @@ (or (null excluded-script) (null (setq vs (get-char-attribute variant 'script))) (not (memq excluded-script vs)))) - (or (and no-ucs-variant (get-char-attribute variant 'ucs)) + (or (and no-ucs-variant (get-char-attribute variant '=ucs)) (insert-char-data variant printable))) (setq variants (cdr variants)) ))) @@ -807,12 +1086,11 @@ (let ((code min) char) (while (<= code max) - (setq char (decode-char 'ucs code)) - (if (get-char-attribute char 'ucs) + (setq char (decode-char '=ucs code)) + (if (encode-char char '=ucs 'defined-only) (insert-char-data-with-variant char nil 'no-ucs-variant script excluded-script)) - (setq code (1+ code)) - ))) + (setq code (1+ code))))) (defun write-char-range-data-to-file (min max file &optional script excluded-script) @@ -837,6 +1115,15 @@ (condition-case err (progn (insert-char-data-with-variant char 'printable) + (unless (char-attribute-alist char) + (insert (format ";; = %c\n" + (let* ((rest (split-char char)) + (ccs (pop rest)) + (code (pop rest))) + (while rest + (setq code (logior (lsh code 8) + (pop rest)))) + (decode-char ccs code))))) ;; (char-db-update-comment) (set-buffer-modified-p nil) (view-mode the-buf (lambda (buf)