X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;ds=sidebyside;f=lisp%2Futf-2000%2Fchar-db-util.el;h=b0d07b7a1d4c0d9b75f92bc1ccbd39ab5f0b3a15;hb=b5ebdf2c6a17025483ceff54d9d893e4ffe88a5d;hp=e7393e51a0abd1d1bffad824522e48512a58ed16;hpb=6fd920e7e59cb470495d05e098b4189602b4ac65;p=chise%2Fxemacs-chise.git diff --git a/lisp/utf-2000/char-db-util.el b/lisp/utf-2000/char-db-util.el index e7393e5..b0d07b7 100644 --- a/lisp/utf-2000/char-db-util.el +++ b/lisp/utf-2000/char-db-util.el @@ -5,22 +5,22 @@ ;; Author: MORIOKA Tomohiko ;; Keywords: UTF-2000, ISO/IEC 10646, Unicode, UCS-4, MULE. -;; This file is part of UTF-2000. +;; This file is part of XEmacs UTF-2000. -;; UTF-2000 is free software; you can redistribute it and/or modify it -;; under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 2, or (at your option) -;; any later version. +;; XEmacs UTF-2000 is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 2, or (at +;; your option) any later version. -;; UTF-2000 is distributed in the hope that it will be useful, but -;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; XEmacs UTF-2000 is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with XEmacs; see the file COPYING. If not, write to the Free -;; Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -;; 02111-1307, USA. +;; along with XEmacs UTF-2000; see the file COPYING. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. ;;; Code: @@ -93,7 +93,10 @@ ((>= (charset-iso-final-char kb) ?0) (< (charset-iso-final-char ka) (charset-iso-final-char kb))) - (t))))) + (t))) + (t (if (>= (charset-iso-final-char kb) ?0) + nil + (> (charset-id ka)(charset-id kb)))))) ((<= (charset-chars ka)(charset-chars kb))))) (t (< (charset-dimension ka) @@ -113,6 +116,59 @@ ((symbolp kb) nil))) +(defun char-db-insert-char-spec (char &optional readable column) + (unless column + (setq column (current-column))) + (let (char-spec ret al cal key temp-char) + (cond ((characterp char) + (cond ((and (setq ret (get-char-attribute char 'ucs)) + (not (and (<= #xE000 ret)(<= ret #xF8FF)))) + (setq char-spec (list (cons 'ucs ret))) + (if (setq ret (get-char-attribute char 'name)) + (setq char-spec (cons (cons 'name ret) char-spec))) + ) + ((setq ret (split-char char)) + (setq char-spec (list ret)) + (dolist (ccs (delq (car ret) (charset-list))) + (if (or (and (>= (charset-iso-final-char ccs) ?0) + (setq ret (get-char-attribute char ccs))) + (eq ccs 'ideograph-daikanwa)) + (setq char-spec (cons (cons ccs ret) char-spec)))) + (if (setq ret (get-char-attribute char 'name)) + (setq char-spec (cons (cons 'name ret) char-spec))) + ))) + ((consp char) + (setq char-spec char) + (setq char nil))) + (unless (or char + (condition-case nil + (setq char (find-char char-spec)) + (error nil))) + ;; define temporary character + ;; Current implementation is dirty. + (setq temp-char (define-char (cons '(ideograph-daikanwa . 0) + char-spec))) + (remove-char-attribute temp-char 'ideograph-daikanwa) + (setq char temp-char)) + (setq al nil + cal nil) + (while char-spec + (setq key (car (car char-spec))) + (if (find-charset key) + (setq cal (cons key cal)) + (setq al (cons key al))) + (setq char-spec (cdr char-spec))) + (insert-char-attributes char + readable + (or al 'none) cal) + (when temp-char + ;; undefine temporary character + ;; Current implementation is dirty. + (setq char-spec (char-attribute-alist temp-char)) + (while char-spec + (remove-char-attribute temp-char (car (car char-spec))) + (setq char-spec (cdr char-spec)))))) + (defun char-db-insert-alist (alist &optional readable column) (unless column (setq column (current-column))) @@ -128,7 +184,7 @@ (cond ((eq name 'char) (insert "(char . ") (if (setq ret (condition-case nil - (define-char value) + (find-char value) (error nil))) (progn (setq al nil @@ -153,7 +209,7 @@ (if (and (consp cell) (consp (car cell)) (setq ret (condition-case nil - (define-char cell) + (find-char cell) (error nil))) ) (progn @@ -186,18 +242,77 @@ (setq alist (cdr alist)))) (insert ")")) +(defun char-db-insert-char-reference (plist &optional readable column) + (unless column + (setq column (current-column))) + (let ((line-breaking + (concat "\n" (make-string (1+ column) ?\ ))) + name value) + (insert "(") + (while plist + (setq name (pop plist)) + (setq value (pop plist)) + (cond ((eq name :char) + (insert ":char\t") + (cond ((numberp value) + (setq value (decode-char 'ucs value))) + ;; ((consp value) + ;; (setq value (or (find-char value) + ;; value))) + ) + (char-db-insert-char-spec value readable) + (insert line-breaking)) + (t + (insert (format "%s\t%S%s" + name value + line-breaking)))) + )) + (insert ")")) + +(defun char-db-decode-isolated-char (ccs code-point) + (let (ret) + (setq ret + (cond ((eq ccs 'arabic-iso8859-6) + (decode-char ccs code-point)) + ((and (memq ccs '(ideograph-gt-pj-1 + ideograph-gt-pj-2 + ideograph-gt-pj-3 + ideograph-gt-pj-4 + ideograph-gt-pj-5 + ideograph-gt-pj-6 + ideograph-gt-pj-7 + ideograph-gt-pj-8 + ideograph-gt-pj-9 + ideograph-gt-pj-10 + ideograph-gt-pj-11)) + (setq ret (decode-char ccs code-point)) + (setq ret (get-char-attribute ret 'ideograph-gt))) + (decode-builtin-char 'ideograph-gt ret)) + (t + (decode-builtin-char ccs code-point)))) + (cond ((and (<= 0 (char-int ret)) + (<= (char-int ret) #x1F)) + (decode-char 'ucs (+ #x2400 (char-int ret)))) + ((= (char-int ret) #x7F) + ?\u2421) + (t ret)))) + +(defvar char-db-convert-obsolete-format t) + (defun insert-char-attributes (char &optional readable attributes ccs-attributes column) (setq attributes - (if attributes - (if (consp attributes) - (copy-sequence attributes)) - (sort (char-attribute-list) #'char-attribute-name<))) + (sort (if attributes + (if (consp attributes) + (copy-sequence attributes)) + (char-attribute-list)) + #'char-attribute-name<)) (setq ccs-attributes - (if ccs-attributes - (copy-sequence ccs-attributes) - (sort (charset-list) #'char-attribute-name<))) + (sort (if ccs-attributes + (copy-sequence ccs-attributes) + (charset-list)) + #'char-attribute-name<)) (unless column (setq column (current-column))) (let (name value has-long-ccs-name rest @@ -210,9 +325,9 @@ (when (and (memq 'name attributes) (setq value (get-char-attribute char 'name))) (insert (format - (if (> (length value) 47) + (if (> (+ (current-column) (length value)) 48) "(name . %S)%s" - "(name\t\t. %S)%s") + "(name . %S)%s") value line-breaking)) (setq attributes (delq 'name attributes)) ) @@ -224,9 +339,18 @@ line-breaking)) (setq attributes (delq 'script attributes)) ) + (when (and (memq '=>ucs attributes) + (setq value (get-char-attribute char '=>ucs))) + (insert (format "(=>ucs\t\t. #x%04X)\t; %c%s" + value (decode-char 'ucs value) + line-breaking)) + (setq attributes (delq '=>ucs attributes)) + ) (when (and (memq '->ucs attributes) (setq value (get-char-attribute char '->ucs))) - (insert (format "(->ucs\t\t. #x%04X)\t; %c%s" + (insert (format (if char-db-convert-obsolete-format + "(=>ucs\t\t. #x%04X)\t; %c%s" + "(->ucs\t\t. #x%04X)\t; %c%s") value (decode-char 'ucs value) line-breaking)) (setq attributes (delq '->ucs attributes)) @@ -419,7 +543,8 @@ ((integerp code) (format "#x%04X" code)) (t - (format "%s%S" line-breaking code)))) + (format "%s %S" + line-breaking code)))) value " ") line-breaking)) (setq attributes (delq '->ideograph attributes)) @@ -444,48 +569,9 @@ line-breaking)) (setq attributes (delq '->decomposition attributes)) ) - (when (and (memq '->uppercase attributes) - (setq value (get-char-attribute char '->uppercase))) - (insert (format "(->uppercase\t%s)%s" - (mapconcat (lambda (code) - (cond ((symbolp code) - (symbol-name code)) - ((integerp code) - (format "#x%04X" code)) - (t - (format "%s%S" line-breaking code)))) - value " ") - line-breaking)) - (setq attributes (delq '->uppercase attributes)) - ) - (when (and (memq '->lowercase attributes) - (setq value (get-char-attribute char '->lowercase))) - (insert (format "(->lowercase\t%s)%s" - (mapconcat (lambda (code) - (cond ((symbolp code) - (symbol-name code)) - ((integerp code) - (format "#x%04X" code)) - (t - (format "%s%S" line-breaking code)))) - value " ") - line-breaking)) - (setq attributes (delq '->lowercase attributes)) - ) - (when (and (memq '->titlecase attributes) - (setq value (get-char-attribute char '->titlecase))) - (insert (format "(->titlecase\t%s)%s" - (mapconcat (lambda (code) - (cond ((symbolp code) - (symbol-name code)) - ((integerp code) - (format "#x%04X" code)) - (t - (format "%s%S" line-breaking code)))) - value " ") - line-breaking)) - (setq attributes (delq '->titlecase attributes)) - ) + (if (equal (get-char-attribute char '->titlecase) + (get-char-attribute char '->uppercase)) + (setq attributes (delq '->titlecase attributes))) (when (and (memq '->mojikyo attributes) (setq value (get-char-attribute char '->mojikyo))) (insert (format "(->mojikyo\t\t. %06d)\t; %c%s" @@ -512,25 +598,56 @@ (insert (format "(%-18s . #x%04X)%s" name value line-breaking))) - ((string-match "^->" (symbol-name name)) - (insert - (format "(%-18s %s)%s" - name - (mapconcat (lambda (code) - (cond ((symbolp code) - (symbol-name code)) - ((integerp code) - (format "#x%04X" code)) - (t - (format "%s%S" - line-breaking code)))) - value " ") - line-breaking))) + ((memq name '(->lowercase + ->uppercase ->titlecase + ->fullwidth <-fullwidth + ->vulgar-ideograph <-vulgar-ideograph + ->ancient-ideograph <-ancient-ideograph + ->original-ideograph <-original-ideograph + ->simplified-ideograph <-simplified-ideograph + ->wrong-ideograph <-wrong-ideograph + ->same-ideograph + ->ideographic-variants + ->synonyms + ->radical <-radical + ->bopomofo <-bopomofo + ->ideographic <-ideographic + ideographic-structure)) + (insert (format "(%-18s%s " name line-breaking)) + (setq lbs (concat "\n" (make-string (current-column) ?\ )) + separator nil) + (while (consp value) + (setq cell (car value)) + (if (integerp cell) + (setq cell (decode-char 'ucs cell))) + (cond ((characterp cell) + (if separator + (insert lbs)) + (char-db-insert-char-spec cell readable) + (setq separator lbs)) + ((consp cell) + (if separator + (insert lbs)) + (if (consp (car cell)) + (char-db-insert-char-spec cell readable) + (char-db-insert-char-reference cell readable)) + (setq separator lbs)) + (t + (if separator + (insert separator)) + (insert (prin1-to-string cell)) + (setq separator " "))) + (setq value (cdr value))) + (insert ")") + (insert line-breaking)) ((memq name '(ideograph= original-ideograph-of ancient-ideograph-of vulgar-ideograph-of - simplified-ideograph-of)) + wrong-ideograph-of + simplified-ideograph-of + ideographic-variants + ideographic-different-form-of)) (insert (format "(%-18s%s " name line-breaking)) (setq lbs (concat "\n" (make-string (current-column) ?\ )) separator nil) @@ -550,6 +667,20 @@ (setq value (cdr value))) (insert ")") (insert line-breaking)) + ((string-match "^->" (symbol-name name)) + (insert + (format "(%-18s %s)%s" + name + (mapconcat (lambda (code) + (cond ((symbolp code) + (symbol-name code)) + ((integerp code) + (format "#x%04X" code)) + (t + (format "%s%S" + line-breaking code)))) + value " ") + line-breaking))) ((consp value) (insert (format "(%-18s " name)) (setq lbs (concat "\n" (make-string (current-column) ?\ )) @@ -559,7 +690,7 @@ (if (and (consp cell) (consp (car cell)) (setq ret (condition-case nil - (define-char cell) + (find-char cell) (error nil)))) (progn (setq rest cell @@ -592,28 +723,26 @@ (setq attributes (cdr attributes))) (while ccs-attributes (setq name (car ccs-attributes)) - (if (setq value (get-char-attribute char name)) + (if (and (eq name (charset-name name)) + (setq value (get-char-attribute char name))) (insert (format - (if has-long-ccs-name - (cond ((eq name 'ideograph-daikanwa) + (cond ((memq name '(ideograph-daikanwa ideograph-gt)) + (if has-long-ccs-name "(%-26s . %05d)\t; %c%s" - ) - ((eq name 'mojikyo) + "(%-18s . %05d)\t; %c%s")) + ((eq name 'mojikyo) + (if has-long-ccs-name "(%-26s . %06d)\t; %c%s" - ) - (t - "(%-26s . #x%X)\t; %c%s" - )) - (cond ((eq name 'ideograph-daikanwa) - "(%-18s . %05d)\t; %c%s" - ) - ((eq name 'mojikyo) - "(%-18s . %06d)\t; %c%s" - ) - (t - "(%-18s . #x%X)\t; %c%s" - ))) + "(%-18s . %06d)\t; %c%s")) + ((eq name 'ucs) + (if has-long-ccs-name + "(%-26s . #x%04X)\t; %c%s" + "(%-18s . #x%04X)\t; %c%s")) + (t + (if has-long-ccs-name + "(%-26s . #x%02X)\t; %c%s" + "(%-18s . #x%02X)\t; %c%s"))) name (if (= (charset-iso-graphic-plane name) 1) (logior value @@ -625,7 +754,7 @@ #x808080) (t 0))) value) - (decode-builtin-char name value) + (char-db-decode-isolated-char name value) line-breaking))) (setq ccs-attributes (cdr ccs-attributes))) (insert ")"))) @@ -681,38 +810,45 @@ (insert (format "\t; %c" char))) ))))) -(defun insert-char-data-with-variant (char &optional script printable - no-ucs-variant) +(defun insert-char-data-with-variant (char &optional printable + no-ucs-variant + script excluded-script) (insert-char-data char printable) (let ((variants (or (char-variants char) (let ((ucs (get-char-attribute char '->ucs))) (if ucs (delete char (char-variants (int-char ucs))))))) variant vs) + (setq variants (sort variants #'<)) (while variants (setq variant (car variants)) - (if (or (null script) - (null (setq vs (get-char-attribute variant 'script))) - (memq script vs)) + (if (and (or (null script) + (null (setq vs (get-char-attribute variant 'script))) + (memq script vs)) + (or (null excluded-script) + (null (setq vs (get-char-attribute variant 'script))) + (not (memq excluded-script vs)))) (or (and no-ucs-variant (get-char-attribute variant 'ucs)) (insert-char-data variant printable))) (setq variants (cdr variants)) ))) -(defun insert-char-range-data (min max &optional script) +(defun insert-char-range-data (min max &optional script excluded-script) (let ((code min) char) (while (<= code max) (setq char (decode-char 'ucs code)) (if (get-char-attribute char 'ucs) - (insert-char-data-with-variant char script nil 'no-ucs-variant)) + (insert-char-data-with-variant char nil 'no-ucs-variant + script excluded-script)) (setq code (1+ code)) ))) -(defun write-char-range-data-to-file (min max file &optional script) +(defun write-char-range-data-to-file (min max file + &optional script excluded-script) (let ((coding-system-for-write 'utf-8)) (with-temp-buffer - (insert-char-range-data min max script) + (insert-char-range-data min max script excluded-script) (write-region (point-min)(point-max) file)))) (defvar what-character-original-window-configuration) @@ -730,7 +866,7 @@ (erase-buffer) (condition-case err (progn - (insert-char-data-with-variant char nil 'printable) + (insert-char-data-with-variant char 'printable) ;; (char-db-update-comment) (set-buffer-modified-p nil) (view-mode the-buf (lambda (buf)