X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Futf-2000%2Fchar-db-util.el;h=43d3d92f97a25a5ff70c511b2bea01dce7b69f0c;hb=757b65c82e193224098b26bd06a6efa5410c618a;hp=1cf7476573ecbf42d4d8100269efc0c71d99e502;hpb=23a058dbaf577391ba3b8798834826ffd424dc98;p=chise%2Fxemacs-chise.git.1 diff --git a/lisp/utf-2000/char-db-util.el b/lisp/utf-2000/char-db-util.el index 1cf7476..43d3d92 100644 --- a/lisp/utf-2000/char-db-util.el +++ b/lisp/utf-2000/char-db-util.el @@ -1,24 +1,24 @@ ;;; char-db-util.el --- Character Database utility -;; Copyright (C) 1998,1999,2000,2001,2002,2003 MORIOKA Tomohiko. +;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004 MORIOKA Tomohiko. ;; Author: MORIOKA Tomohiko -;; Keywords: UTF-2000, ISO/IEC 10646, Unicode, UCS-4, MULE. +;; Keywords: CHISE, Character Database, ISO/IEC 10646, Unicode, UCS-4, MULE. -;; This file is part of XEmacs UTF-2000. +;; This file is part of XEmacs CHISE. -;; XEmacs UTF-2000 is free software; you can redistribute it and/or +;; XEmacs CHISE is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation; either version 2, or (at ;; your option) any later version. -;; XEmacs UTF-2000 is distributed in the hope that it will be useful, +;; XEmacs CHISE is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with XEmacs UTF-2000; see the file COPYING. If not, write to +;; along with XEmacs CHISE; see the file COPYING. If not, write to ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, ;; Boston, MA 02111-1307, USA. @@ -69,6 +69,10 @@ (setq i (1+ i))) v)) +(defvar char-db-feature-domains + '(ucs daikanwa cns gt jis jis/alt jis/a jis/b + jis-x0213 misc unknown)) + (defvar char-db-ignored-attributes nil) (defun char-attribute-name< (ka kb) @@ -76,33 +80,20 @@ ((find-charset ka) (cond ((find-charset kb) - (cond - ((= (charset-dimension ka) - (charset-dimension kb)) - (cond ((= (charset-chars ka)(charset-chars kb)) - (if (charset-iso-final-char ka) - (cond - ((>= (charset-iso-final-char ka) ?@) - (if (and (charset-iso-final-char kb) - (>= (charset-iso-final-char kb) ?@)) - (< (charset-iso-final-char ka) - (charset-iso-final-char kb)) - t)) - (t - (if (charset-iso-final-char kb) - (if (>= (charset-iso-final-char kb) ?@) - nil - (< (charset-iso-final-char ka) - (charset-iso-final-char kb))) - t))) - (if (charset-iso-final-char kb) - nil - (< (charset-id ka)(charset-id kb))))) - ((<= (charset-chars ka)(charset-chars kb))))) - (t - (< (charset-dimension ka) - (charset-dimension kb)) - ))) + (if (<= (charset-id ka) 0) + (if (<= (charset-id kb) 0) + (cond + ((= (charset-dimension ka) + (charset-dimension kb)) + (> (charset-id ka)(charset-id kb))) + (t + (> (charset-dimension ka) + (charset-dimension kb)) + )) + t) + (if (<= (charset-id kb) 0) + nil + (< (charset-id ka)(charset-id kb))))) ((symbolp kb) nil) (t @@ -151,6 +142,7 @@ chinese-gb12345 latin-viscii ethiopic-ucs + =big5-cdp =gt ideograph-daikanwa-2 ideograph-daikanwa @@ -169,7 +161,6 @@ ideograph-hanziku-12 =big5 =big5-eten - =big5-cdp =gt-k =jef-china3)) @@ -221,10 +212,11 @@ ((consp char) char)))) -(defun char-db-insert-char-spec (char &optional readable column) +(defun char-db-insert-char-spec (char &optional readable column + required-features) (unless column (setq column (current-column))) - (let (char-spec ret al cal key temp-char) + (let (char-spec al cal key temp-char) (setq char-spec (char-db-make-char-spec char)) (unless (or (characterp char) ; char (condition-case nil @@ -259,6 +251,11 @@ (unless (or cal (memq 'ideographic-structure al)) (push 'ideographic-structure al)) + (dolist (feature required-features) + (if (find-charset feature) + (if (encode-char char feature 'defined-only) + (setq cal (adjoin feature cal))) + (setq al (adjoin feature al)))) (insert-char-attributes char readable (or al 'none) cal) @@ -451,7 +448,8 @@ (concat "\n" (make-string (1+ column) ?\ ))) lbs cell separator ret key al cal - dest-ccss) + dest-ccss + sources required-features) (insert "(") (when (and (memq 'name attributes) (setq value (get-char-attribute char 'name))) @@ -607,8 +605,17 @@ ) (setq radical nil strokes nil) + (when (and (memq 'ideographic-radical attributes) + (setq value (get-char-attribute char 'ideographic-radical))) + (setq radical value) + (insert (format "(ideographic-radical . %S)\t; %c%s" + radical + (aref ideographic-radicals radical) + line-breaking)) + (setq attributes (delq 'ideographic-radical attributes)) + ) (let (key) - (dolist (domain '(ucs daikanwa cns)) + (dolist (domain char-db-feature-domains) (setq key (intern (format "%s@%s" 'ideographic-radical domain))) (when (and (memq key attributes) (setq value (get-char-attribute char key))) @@ -630,27 +637,29 @@ line-breaking)) (setq attributes (delq key attributes)) ) - (setq key (intern (format "%s@%s*sources" - 'ideographic-radical domain))) + (setq key (intern (format "%s@%s" 'total-strokes domain))) (when (and (memq key attributes) (setq value (get-char-attribute char key))) - (insert (format "(%s%s" key line-breaking)) - (dolist (cell value) - (insert (format " %s" cell))) - (insert ")") - (insert line-breaking) + (insert (format "(%s . %S)%s" + key + value + line-breaking)) (setq attributes (delq key attributes)) ) + (dolist (feature '(ideographic-radical + ideographic-strokes + total-strokes)) + (setq key (intern (format "%s@%s*sources" feature domain))) + (when (and (memq key attributes) + (setq value (get-char-attribute char key))) + (insert (format "(%s%s" key line-breaking)) + (dolist (cell value) + (insert (format " %s" cell))) + (insert ")") + (insert line-breaking) + (setq attributes (delq key attributes)) + )) )) - (when (and (memq 'ideographic-radical attributes) - (setq value (get-char-attribute char 'ideographic-radical))) - (setq radical value) - (insert (format "(ideographic-radical . %S)\t; %c%s" - radical - (aref ideographic-radicals radical) - line-breaking)) - (setq attributes (delq 'ideographic-radical attributes)) - ) (when (and (memq 'ideographic-strokes attributes) (setq value (get-char-attribute char 'ideographic-strokes))) (setq strokes value) @@ -833,8 +842,8 @@ (setq attributes (delq 'hanyu-dazidian-char attributes)) ) (unless readable - (when (memq '->ucs-variants attributes) - (setq attributes (delq '->ucs-variants attributes)) + (when (memq '->ucs-unified attributes) + (setq attributes (delq '->ucs-unified attributes)) ) (when (memq 'composition attributes) (setq attributes (delq 'composition attributes)) @@ -874,11 +883,37 @@ (if (integerp cell) (setq cell (decode-char '=ucs cell))) (cond ((characterp cell) + (setq sources + (get-char-attribute + char + (intern (format "%s*sources" name)))) + (setq required-features nil) + (dolist (source sources) + (setq required-features + (cons + (if (find-charset + (setq ret (intern + (format "=%s" source)))) + ret + source) + required-features))) + (when (string-match "@JP" (symbol-name name)) + (setq required-features + (union required-features + '(=jis-x0208 + =jis-x0208-1990 + =jis-x0213-1-2000 + =jis-x0213-2-2000 + =jis-x0212 + =jis-x0208-1983 + =jis-x0208-1978)))) (if separator (insert lbs)) (if readable (insert (format "%S" cell)) - (char-db-insert-char-spec cell readable)) + (char-db-insert-char-spec cell readable + nil + required-features)) (setq separator lbs)) ((consp cell) (if separator @@ -900,9 +935,10 @@ ancient-ideograph-of vulgar-ideograph-of wrong-ideograph-of - simplified-ideograph-of + ;; simplified-ideograph-of ideographic-variants - ideographic-different-form-of)) + ;; ideographic-different-form-of + )) (insert (format "(%-18s%s " name line-breaking)) (setq lbs (concat "\n" (make-string (current-column) ?\ )) separator nil) @@ -922,21 +958,7 @@ (setq value (cdr value))) (insert ")") (insert line-breaking)) - ;; ((string-match "^->" (symbol-name name)) - ;; (insert - ;; (format "(%-18s %s)%s" - ;; name - ;; (mapconcat (lambda (code) - ;; (cond ((symbolp code) - ;; (symbol-name code)) - ;; ((integerp code) - ;; (format "#x%04X" code)) - ;; (t - ;; (format "%s%S" - ;; line-breaking code)))) - ;; value " ") - ;; line-breaking))) - ((consp value) + ((consp value) (insert (format "(%-18s " name)) (setq lbs (concat "\n" (make-string (current-column) ?\ )) separator nil) @@ -1042,7 +1064,7 @@ )) (defun insert-char-data-with-variant (char &optional printable - no-ucs-variant + no-ucs-unified script excluded-script) (insert-char-data char printable) (let ((variants (or (char-variants char) @@ -1059,7 +1081,7 @@ (or (null excluded-script) (null (setq vs (get-char-attribute variant 'script))) (not (memq excluded-script vs)))) - (or (and no-ucs-variant (get-char-attribute variant '=ucs)) + (or (and no-ucs-unified (get-char-attribute variant '=ucs)) (insert-char-data variant printable))) (setq variants (cdr variants)) ))) @@ -1070,14 +1092,15 @@ (while (<= code max) (setq char (decode-char '=ucs code)) (if (encode-char char '=ucs 'defined-only) - (insert-char-data-with-variant char nil 'no-ucs-variant + (insert-char-data-with-variant char nil 'no-ucs-unified script excluded-script)) (setq code (1+ code))))) (defun write-char-range-data-to-file (min max file &optional script excluded-script) - (let ((coding-system-for-write 'utf-8)) + (let ((coding-system-for-write 'utf-8-mcs)) (with-temp-buffer + (insert ";; -*- coding: utf-8-mcs -*-\n") (insert-char-range-data min max script excluded-script) (write-region (point-min)(point-max) file))))