;;; char-db-util.el --- Character Database utility
-;; Copyright (C) 1998, 1999 MORIOKA Tomohiko.
+;; Copyright (C) 1998,1999,2000 MORIOKA Tomohiko.
-;; Author: MORIOKA Tomohiko <tomo@m17n.org>
+;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
;; Keywords: UTF-2000, ISO/IEC 10646, Unicode, UCS-4, MULE.
;; This file is part of UTF-2000.
("So" symbol other)
))
+(defconst ideographic-radicals
+ (let ((v (make-vector 215 nil))
+ (i 1))
+ (while (< i 215)
+ (aset v i (int-char (+ #x2EFF i)))
+ (setq i (1+ i)))
+ (if (< (charset-iso-final-char (car (split-char (aref v 34)))) ?0)
+ (aset v 34 (make-char 'chinese-gb2312 #x62 #x3A)))
+ v))
+
(defun char-attribute-name< (ka kb)
(cond
((find-charset ka)
(charset-dimension kb))
(cond ((= (charset-chars ka)(charset-chars kb))
(cond
- ((>= (charset-final ka) ?@)
- (if (>= (charset-final kb) ?@)
- (< (charset-final ka)
- (charset-final kb))
+ ((>= (charset-iso-final-char ka) ?@)
+ (if (>= (charset-iso-final-char kb) ?@)
+ (< (charset-iso-final-char ka)
+ (charset-iso-final-char kb))
t))
- ((>= (charset-final ka) ?0)
+ ((>= (charset-iso-final-char ka) ?0)
(cond
- ((>= (charset-final kb) ?@)
+ ((>= (charset-iso-final-char kb) ?@)
nil)
- ((>= (charset-final kb) ?0)
- (< (charset-final ka)
- (charset-final kb)))
+ ((>= (charset-iso-final-char kb) ?0)
+ (< (charset-iso-final-char ka)
+ (charset-iso-final-char kb)))
(t)))))
((<= (charset-chars ka)(charset-chars kb)))))
(t
(< (charset-dimension ka)
(charset-dimension kb))
)))
- (t)))
+ ((symbolp kb)
+ nil)
+ (t
+ t)))
((find-charset kb)
t)
((symbolp ka)
((symbolp kb)
nil)))
-(defun insert-char-data (char)
- (let ((data (char-attribute-alist char))
- cell ret name has-long-ccs-name rest)
- (when data
- (save-restriction
- (narrow-to-region (point)(point))
- (insert "(define-char
+(defun insert-char-data (char &optional readable
+ attributes ccs-attributes)
+ (setq attributes
+ (if attributes
+ (copy-sequence attributes)
+ (sort (char-attribute-list) #'char-attribute-name<)))
+ (setq ccs-attributes
+ (if ccs-attributes
+ (copy-sequence ccs-attributes)
+ (sort (charset-list) #'char-attribute-name<)))
+ (let (name value has-long-ccs-name rest
+ radical strokes)
+ (save-restriction
+ (narrow-to-region (point)(point))
+ (insert "(define-char
'(")
- (when (setq cell (assq 'name data))
- (setq cell (cdr cell))
- (insert (format
- (if (> (length cell) 47)
- "(name . %S)
+ (when (setq value (get-char-attribute char 'name))
+ (insert (format
+ (if (> (length value) 47)
+ "(name . %S)
"
- "(name\t\t. %S)
+ "(name\t\t. %S)
")
- cell))
- (setq data (del-alist 'name data))
- )
- (when (setq cell (assq 'name data))
- (setq cell (cdr cell))
- (insert (format
- (if (> (length cell) 47)
- "(name . %S)
+ value))
+ (setq attributes (delq 'name attributes))
+ )
+ (when (setq value (get-char-attribute char 'script))
+ (insert (format "(script\t\t%s)
"
- "(name\t\t. %S)
- ")
- cell))
- (setq data (del-alist 'name data))
- )
- (when (setq cell (assq 'ucs data))
- (setq cell (cdr cell))
- (insert (format "(ucs\t\t. #x%04X)
+ (mapconcat (function prin1-to-string)
+ value " ")))
+ (setq attributes (delq 'script attributes))
+ )
+ (when (setq value (get-char-attribute char '->ucs))
+ (insert (format "(->ucs\t\t. #x%04X)\t; %c
"
- cell))
- (setq data (del-alist 'ucs data))
- )
- (when (setq cell (assq '->ucs data))
- (setq cell (cdr cell))
- (insert (format "(->ucs\t\t. #x%04X)
+ value (decode-char 'ucs value)))
+ (setq attributes (delq '->ucs attributes))
+ )
+ (when (setq value (get-char-attribute char 'general-category))
+ (insert (format
+ "(general-category\t%s) ; %s
"
- cell))
- (setq data (del-alist '->ucs data))
- )
- (when (setq cell (assq 'general-category data))
- (setq ret (cdr cell))
- (insert (format
- "(general-category\t%s) ; %s
- "
- (mapconcat (lambda (cell)
- (format "%S" cell))
- ret " ")
- (cond ((rassoc (cdr cell)
- unidata-normative-category-alist)
- "Normative Category")
- ((rassoc (cdr cell)
- unidata-informative-category-alist)
- "Informative Category")
- (t
- "Unknown Category"))))
- (setq data (del-alist 'general-category data))
- )
- (when (setq cell (assq 'bidi-category data))
- (setq cell (cdr cell))
- (insert (format "(bidi-category\t. %S)
+ (mapconcat (lambda (cell)
+ (format "%S" cell))
+ value " ")
+ (cond ((rassoc value unidata-normative-category-alist)
+ "Normative Category")
+ ((rassoc value unidata-informative-category-alist)
+ "Informative Category")
+ (t
+ "Unknown Category"))))
+ (setq attributes (delq 'general-category attributes))
+ )
+ (when (setq value (get-char-attribute char 'bidi-category))
+ (insert (format "(bidi-category\t. %S)
"
- cell))
- (setq data (del-alist 'bidi-category data))
- )
- (when (setq cell (assq 'mirrored data))
- (setq cell (cdr cell))
- (insert (format "(mirrored\t\t. %S)
+ value))
+ (setq attributes (delq 'bidi-category attributes))
+ )
+ (unless (eq (setq value (get-char-attribute char 'mirrored 'empty))
+ 'empty)
+ (insert (format "(mirrored\t\t. %S)
"
- cell))
- (setq data (del-alist 'mirrored data))
- )
- (when (setq cell (assq 'decimal-digit-value data))
- (setq cell (cdr cell))
- (insert (format "(decimal-digit-value . %S)
- "
- cell))
- (setq data (del-alist 'decimal-digit-value data))
- (when (setq cell (assq 'digit-value data))
- (setq cell (cdr cell))
- (insert (format "(digit-value\t . %S)
- "
- cell))
- (setq data (del-alist 'digit-value data))
- )
- (when (setq cell (assq 'numeric-value data))
- (setq cell (cdr cell))
- (insert (format "(numeric-value\t . %S)
- "
- cell))
- (setq data (del-alist 'numeric-value data))
- )
- )
- (when (setq cell (assq 'iso-10646-comment data))
- (setq cell (cdr cell))
- (insert (format "(iso-10646-comment\t . %S)
- "
- cell))
- (setq data (del-alist 'iso-10646-comment data))
- )
- (when (setq cell (assq '->decomposition data))
- (setq cell (cdr cell))
- (insert (format "(->decomposition\t%s)
- "
- (mapconcat (lambda (code)
- (cond ((symbolp code)
- (symbol-name code))
- ((integerp code)
- (format "#x%04X" code))
- (t
- (format "\n %S" code))))
- cell " ")))
- (setq data (del-alist '->decomposition data))
- )
- (when (setq cell (assq '->uppercase data))
- (setq cell (cdr cell))
- (insert (format "(->uppercase\t%s)
- "
- (mapconcat (lambda (code)
- (cond ((symbolp code)
- (symbol-name code))
- ((integerp code)
- (format "#x%04X" code))
- (t
- (format "\n %S" code))))
- cell " ")))
- (setq data (del-alist '->uppercase data))
+ value))
+ (setq attributes (delq 'mirrored attributes))
+ )
+ (cond
+ ((setq value (get-char-attribute char 'decimal-digit-value))
+ (insert (format "(decimal-digit-value . %S)
+ "
+ value))
+ (setq attributes (delq 'decimal-digit-value attributes))
+ (when (setq value (get-char-attribute char 'digit-value))
+ (insert (format "(digit-value\t . %S)
+ "
+ value))
+ (setq attributes (delq 'digit-value attributes))
)
- (when (setq cell (assq '->titlecase data))
- (setq cell (cdr cell))
- (insert (format "(->titlecase\t%s)
- "
- (mapconcat (lambda (code)
- (cond ((symbolp code)
- (symbol-name code))
- ((integerp code)
- (format "#x%04X" code))
- (t
- (format "\n %S" code))))
- cell " ")))
- (setq data (del-alist '->titlecase data))
+ (when (setq value (get-char-attribute char 'numeric-value))
+ (insert (format "(numeric-value\t . %S)
+ "
+ value))
+ (setq attributes (delq 'numeric-value attributes))
)
- (when (setq cell (assq '->lowercase data))
- (setq cell (cdr cell))
- (insert (format "(->lowercase\t%s)
- "
- (mapconcat (lambda (code)
- (cond ((symbolp code)
- (symbol-name code))
- ((integerp code)
- (format "#x%04X" code))
- (t
- (format "\n %S" code))))
- cell " ")))
- (setq data (del-alist '->lowercase data))
+ )
+ (t
+ (when (setq value (get-char-attribute char 'digit-value))
+ (insert (format "(digit-value\t. %S)
+ "
+ value))
+ (setq attributes (delq 'digit-value attributes))
)
- (setq data
- (sort data
- (lambda (a b)
- (char-attribute-name< (car a)(car b)))))
- (setq rest data)
- (while (and rest
+ (when (setq value (get-char-attribute char 'numeric-value))
+ (insert (format "(numeric-value\t. %S)
+ "
+ value))
+ (setq attributes (delq 'numeric-value attributes))
+ )))
+ (when (setq value (get-char-attribute char 'iso-10646-comment))
+ (insert (format "(iso-10646-comment\t. %S)
+ "
+ value))
+ (setq attributes (delq 'iso-10646-comment attributes))
+ )
+ (when (setq value (get-char-attribute char 'morohashi-daikanwa))
+ (insert (format "(morohashi-daikanwa\t%s)
+ "
+ (mapconcat (function prin1-to-string) value " ")))
+ (setq attributes (delq 'morohashi-daikanwa attributes))
+ )
+ (setq radical nil
+ strokes nil)
+ (when (setq value (get-char-attribute char 'ideographic-radical))
+ (setq radical value)
+ (insert (format "(ideographic-radical . %S)\t; %c
+ "
+ radical
+ (aref ideographic-radicals radical)))
+ (setq attributes (delq 'ideographic-radical attributes))
+ )
+ (when (setq value (get-char-attribute char 'ideographic-strokes))
+ (setq strokes value)
+ (insert (format "(ideographic-strokes . %S)
+ "
+ strokes))
+ (setq attributes (delq 'ideographic-strokes attributes))
+ )
+ (when (setq value (get-char-attribute char 'kangxi-radical))
+ (unless (eq value radical)
+ (insert (format "(kangxi-radical\t . %S)\t; %c
+ "
+ value
+ (aref ideographic-radicals value)))
+ (or radical
+ (setq radical value)))
+ (setq attributes (delq 'kangxi-radical attributes))
+ )
+ (when (setq value (get-char-attribute char 'kangxi-strokes))
+ (unless (eq value strokes)
+ (insert (format "(kangxi-strokes\t . %S)
+ "
+ value))
+ (or strokes
+ (setq strokes value)))
+ (setq attributes (delq 'kangxi-strokes attributes))
+ )
+ (when (setq value (get-char-attribute char 'japanese-radical))
+ (unless (eq value radical)
+ (insert (format "(japanese-radical\t . %S)\t; %c
+ "
+ value
+ (aref ideographic-radicals value)))
+ (or radical
+ (setq radical value)))
+ (setq attributes (delq 'japanese-radical attributes))
+ )
+ (when (setq value (get-char-attribute char 'japanese-strokes))
+ (unless (eq value strokes)
+ (insert (format "(japanese-strokes\t . %S)
+ "
+ value))
+ (or strokes
+ (setq strokes value)))
+ (setq attributes (delq 'japanese-strokes attributes))
+ )
+ (when (setq value (get-char-attribute char 'cns-radical))
+ (insert (format "(cns-radical\t . %S)\t; %c
+ "
+ value
+ (aref ideographic-radicals value)))
+ (setq attributes (delq 'cns-radical attributes))
+ )
+ (when (setq value (get-char-attribute char 'cns-strokes))
+ (unless (eq value strokes)
+ (insert (format "(cns-strokes\t . %S)
+ "
+ value))
+ (or strokes
+ (setq strokes value)))
+ (setq attributes (delq 'cns-strokes attributes))
+ )
+ (when (setq value (get-char-attribute char 'shinjigen-1-radical))
+ (unless (eq value radical)
+ (insert (format "(shinjigen-1-radical . %S)\t; %c
+ "
+ value
+ (aref ideographic-radicals value)))
+ (or radical
+ (setq radical value)))
+ (setq attributes (delq 'shinjigen-1-radical attributes))
+ )
+ (when (setq value (get-char-attribute char 'total-strokes))
+ (insert (format "(total-strokes\t . %S)
+ "
+ value))
+ (setq attributes (delq 'total-strokes attributes))
+ )
+ (when (setq value (get-char-attribute char '->ideograph))
+ (insert (format "(->ideograph\t%s)
+ "
+ (mapconcat (lambda (code)
+ (cond ((symbolp code)
+ (symbol-name code))
+ ((integerp code)
+ (format "#x%04X" code))
+ (t
+ (format "\n %S" code))))
+ value " ")))
+ (setq attributes (delq '->ideograph attributes))
+ )
+ (when (setq value (get-char-attribute char '->decomposition))
+ (insert (format "(->decomposition\t%s)
+ "
+ (mapconcat (lambda (code)
+ (cond ((symbolp code)
+ (symbol-name code))
+ ((characterp code)
+ (if readable
+ (format "%S" code)
+ (format "#x%04X"
+ (char-int code))
+ ))
+ ((integerp code)
+ (format "#x%04X" code))
+ (t
+ (format "\n %S" code))))
+ value " ")))
+ (setq attributes (delq '->decomposition attributes))
+ )
+ (when (setq value (get-char-attribute char '->uppercase))
+ (insert (format "(->uppercase\t%s)
+ "
+ (mapconcat (lambda (code)
+ (cond ((symbolp code)
+ (symbol-name code))
+ ((integerp code)
+ (format "#x%04X" code))
+ (t
+ (format "\n %S" code))))
+ value " ")))
+ (setq attributes (delq '->uppercase attributes))
+ )
+ (when (setq value (get-char-attribute char '->lowercase))
+ (insert (format "(->lowercase\t%s)
+ "
+ (mapconcat (lambda (code)
+ (cond ((symbolp code)
+ (symbol-name code))
+ ((integerp code)
+ (format "#x%04X" code))
+ (t
+ (format "\n %S" code))))
+ value " ")))
+ (setq attributes (delq '->lowercase attributes))
+ )
+ (when (setq value (get-char-attribute char '->titlecase))
+ (insert (format "(->titlecase\t%s)
+ "
+ (mapconcat (lambda (code)
+ (cond ((symbolp code)
+ (symbol-name code))
+ ((integerp code)
+ (format "#x%04X" code))
+ (t
+ (format "\n %S" code))))
+ value " ")))
+ (setq attributes (delq '->titlecase attributes))
+ )
+ (when (setq value (get-char-attribute char '->mojikyo))
+ (insert (format "(->mojikyo\t\t. %06d)\t; %c
+ "
+ value (decode-char 'mojikyo value)))
+ (setq attributes (delq '->mojikyo attributes))
+ )
+ (setq rest ccs-attributes)
+ (while (and rest
(progn
- (setq cell (car rest))
- (if (setq ret (find-charset (car cell)))
- (if (>= (length (symbol-name (charset-name ret))) 19)
+ (setq value (get-char-attribute char (car rest)))
+ (if value
+ (if (>= (length (symbol-name (car rest))) 19)
(progn
(setq has-long-ccs-name t)
nil)
t)
t)))
(setq rest (cdr rest)))
- (while data
- (setq cell (car data))
- (cond ((setq ret (find-charset (car cell)))
- (insert (format (if has-long-ccs-name
- "(%-26s %s)
- "
- "(%-18s %s)
- "
- )
- (charset-name ret)
- (mapconcat (lambda (b)
- (format "#x%02X" b)
- )
- (cdr cell) " "))))
- ((string-match "^->" (symbol-name (car cell)))
- (insert
- (format "(%-18s %s)
- "
- (car cell)
- (mapconcat (lambda (code)
- (cond ((symbolp code)
- (symbol-name code))
- ((integerp code)
- (format "#x%04X" code))
- (t
- (format "\n %S" code))))
- (cdr cell) " "))))
- ((consp (cdr cell))
- (insert (format "%S
- "
- cell)))
- (t
- (insert (format "(%-18s . %S)
- "
- (car cell)(cdr cell)))
- ))
- (setq data (cdr data)))
- (insert "))\n")
- (goto-char (point-min))
- (while (re-search-forward "[ \t]+$" nil t)
- (replace-match ""))
- (goto-char (point-max))
- (tabify (point-min)(point-max))
- ))))
-
-(defun insert-char-range-data (min max)
- (let ((code min)
- char
- variants)
- (while (<= code max)
- (setq char (int-char code))
- (insert-char-data char)
- (setq variants (char-variants char))
- (while variants
- (insert-char-data (car variants))
- (setq variants (cdr variants)))
- (setq code (1+ code))
+ (while attributes
+ (setq name (car attributes))
+ (if (setq value (get-char-attribute char name))
+ (cond ((string-match "^->" (symbol-name name))
+ (insert
+ (format "(%-18s %s)
+ "
+ name
+ (mapconcat (lambda (code)
+ (cond ((symbolp code)
+ (symbol-name code))
+ ((integerp code)
+ (format "#x%04X" code))
+ (t
+ (format "\n %S" code))))
+ value " "))))
+ ((consp value)
+ (insert (format "(%-18s %s)
+ "
+ name
+ (mapconcat (function prin1-to-string)
+ value " "))))
+ ((eq name 'jisx0208-1978/4X)
+ (insert (format "(%-18s . #x%04X)
+ "
+ name value)))
+ (t
+ (insert (format "(%-18s . %S)
+ "
+ name value)))
+ ))
+ (setq attributes (cdr attributes)))
+ (while ccs-attributes
+ (setq name (car ccs-attributes))
+ (if (setq value (get-char-attribute char name))
+ (insert
+ (format
+ (if has-long-ccs-name
+ (cond ((eq name 'ideograph-daikanwa)
+ "(%-26s . %05d)\t; %c
+ "
+ )
+ ((eq name 'mojikyo)
+ "(%-26s . %06d)\t; %c
+ "
+ )
+ (t
+ "(%-26s . #x%X)\t; %c
+ "
+ ))
+ (cond ((eq name 'ideograph-daikanwa)
+ "(%-18s . %05d)\t; %c
+ "
+ )
+ ((eq name 'mojikyo)
+ "(%-18s . %06d)\t; %c
+ "
+ )
+ (t
+ "(%-18s . #x%X)\t; %c
+ "
+ )))
+ name
+ (if (= (charset-iso-graphic-plane name) 1)
+ (logior value
+ (cond ((= (charset-dimension name) 1)
+ #x80)
+ ((= (charset-dimension name) 2)
+ #x8080)
+ ((= (charset-dimension name) 3)
+ #x808080)
+ (t 0)))
+ value)
+ (decode-builtin-char name value))))
+ (setq ccs-attributes (cdr ccs-attributes)))
+ (insert "))\n")
+ (goto-char (point-min))
+ (while (re-search-forward "[ \t]+$" nil t)
+ (replace-match ""))
+ (goto-char (point-max))
+ (tabify (point-min)(point-max))
)))
-(defun write-char-range-data-to-file (min max file)
- (with-temp-buffer
- (insert-char-range-data min max)
- (write-region (point-min)(point-max) file)))
-
;;;###autoload
(defun char-db-update-comment ()
(interactive)
(when (find-charset (car cdef))
(goto-char (match-end 0))
(setq char
- (if (or (memq (car cdef) '(ascii latin-viscii-upper
- latin-viscii-lower))
- (= (char-int (charset-final (car cdef))) 0))
+ (if (and
+ (not (eq (car cdef) 'ideograph-daikanwa))
+ (or (memq (car cdef) '(ascii latin-viscii-upper
+ latin-viscii-lower
+ arabic-iso8859-6
+ japanese-jisx0213-1
+ japanese-jisx0213-2))
+ (= (char-int (charset-iso-final-char (car cdef)))
+ 0)))
(apply (function make-char) cdef)
(if (setq table (charset-mapping-table (car cdef)))
(set-charset-mapping-table (car cdef) nil))
(if table
(set-charset-mapping-table (car cdef) table)))))
(when (not (or (< (char-int char) 32)
- (and (<= (char-int char) 128)
+ (and (<= 128 (char-int char))
(< (char-int char) 160))))
(delete-region (point) (point-at-eol))
(insert (format "\t; %c" char)))
)))))
+(defun insert-char-data-with-variant (char &optional script printable
+ no-ucs-variant)
+ (insert-char-data char printable)
+ (let ((variants (or (char-variants char)
+ (let ((ucs (get-char-attribute char '->ucs)))
+ (if ucs
+ (delete char (char-variants (int-char ucs)))))))
+ variant vs)
+ (while variants
+ (setq variant (car variants))
+ (if (or (null script)
+ (null (setq vs (get-char-attribute variant 'script)))
+ (memq script vs))
+ (or (and no-ucs-variant (get-char-attribute variant 'ucs))
+ (insert-char-data variant printable)))
+ (setq variants (cdr variants))
+ )))
+
+(defun insert-char-range-data (min max &optional script)
+ (let ((code min)
+ char)
+ (while (<= code max)
+ (setq char (decode-char 'ucs code))
+ (if (get-char-attribute char 'ucs)
+ (insert-char-data-with-variant char script nil 'no-ucs-variant))
+ (setq code (1+ code))
+ )))
+
+(defun write-char-range-data-to-file (min max file &optional script)
+ (let ((coding-system-for-write 'utf-8))
+ (with-temp-buffer
+ (insert-char-range-data min max script)
+ (write-region (point-min)(point-max) file))))
+
+(defvar what-character-original-window-configuration)
+
;;;###autoload
(defun what-char-definition (char)
(interactive (list (char-after)))
(erase-buffer)
(condition-case err
(progn
- (insert-char-data char)
+ (insert-char-data-with-variant char nil 'printable)
+ ;; (char-db-update-comment)
(set-buffer-modified-p nil)
(view-mode the-buf (lambda (buf)
(set-window-configuration