1 ;;; iddef.el --- Parser and utility for IDDef format files.
3 ;; Copyright (C) 2001,2002 MORIOKA Tomohiko
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: IDDef, IDS, IDC, Ideographs, UCS, Unicode
8 ;; This file is a part of Tomoyo-Tools.
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 ;; IDDef is a tab-separated format to describe some
28 ;; character-attributes of each Ideographs mainly for Ideographic
37 (defun iddef-read-buffer (buffer)
38 (with-current-buffer buffer
39 (goto-char (point-min))
44 (while (re-search-forward
45 "^U\\+\\([0-9A-F]+\\)\t\\([0-9]+\\)\t[^\t]+\t\\([^\t\n]+\\)"
47 (setq ucs (string-to-int (match-string 1) 16)
48 radical (string-to-int (match-string 2))
50 (setq ret (ids-parse-string seq))
51 (when (and (consp ret)
53 (setq struct (cdr (assq 'ideographic-structure ret)))))
54 (setq char (decode-char 'ucs ucs))
55 (unless (get-char-attribute char 'ideograph-daikanwa)
56 (when (and (setq morohashi
57 (get-char-attribute char 'morohashi-daikanwa))
58 (>= (length morohashi) 3))
60 (if (= (nth 1 morohashi) 0)
61 (decode-char 'ideograph-daikanwa
62 (setq morohashi (car morohashi)))
63 (setq morohashi (list (car morohashi)
65 (map-char-attribute (lambda (char val)
66 (if (equal morohashi val)
68 'morohashi-daikanwa)))
70 (unless (get-char-attribute m-chr 'ucs)
73 'ideographic-structure
74 (ideographic-structure-convert-to-daikanwa struct))))))
75 (put-char-attribute char 'ideographic-structure struct)
77 (get-char-attribute char '->same-ideograph)
78 (get-char-attribute char '->identical)))
80 (cond ((characterp ref) ref)
82 (find-char (plist-get ref :char)))
85 (unless (get-char-attribute ret 'ucs)
86 (put-char-attribute ret 'ideographic-structure struct))))
90 (defun iddef-read-file (file)
91 (interactive "fIDDef file : ")
93 (let ((coding-system-for-read 'utf-8))
94 (insert-file-contents file))
95 (iddef-read-buffer (current-buffer))))
98 (defun iddef-check-mapping-buffer (buffer)
99 (with-current-buffer buffer
100 (goto-char (point-min))
101 (let (ucs radical plane code ccs chr ret)
102 (while (re-search-forward "^U\\+\\([0-9A-F]+\\)\t\\([0-9]+\\)\t[^\t]*\t[^\t]*\t[^\t]*\t\\([0-9A-C]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\)" nil t)
103 (setq ucs (string-to-int (match-string 1) 16)
104 radical (string-to-int (match-string 2))
105 plane (string-to-int (match-string 3) 16)
106 code (string-to-int (match-string 4) 16))
110 (setq chr (decode-char 'chinese-big5 code))
111 (if (and (setq ret (get-char-attribute chr 'ucs))
112 (<= #xE000 ret)(<= ret #xF848))
114 (intern (format "ideograph-hanziku-%d" plane))))
116 (setq chr (decode-char ccs code))
117 (if (setq ret (or (get-char-attribute chr 'ucs)
118 (get-char-attribute chr '=>ucs)
119 (get-char-attribute chr '->ucs)))
121 (put-char-attribute chr 'ucs-cdp ucs))
122 (if (eq (get-char-attribute chr ccs) code)
123 (put-char-attribute chr 'ucs ucs)
124 (setq chr (define-char (list (cons 'ucs ucs)
127 (unless (get-char-attribute chr 'ideographic-radical)
128 (put-char-attribute chr 'ideographic-radical radical))
132 (defun iddef-check-mapping-file (file)
133 (interactive "fIDDef file : ")
135 (let ((coding-system-for-read 'utf-8))
136 (insert-file-contents file))
137 (iddef-check-mapping-buffer (current-buffer))))
145 ;;; iddef.el ends here