1 ;;; ids.el --- Parser and utility for Ideographic Description Sequence.
3 ;; Copyright (C) 2001 MORIOKA Tomohiko
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: IDS, IDC, Ideographs, UCS, Unicode
8 ;; This file is a part of Tomoyo-Tools.
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 ;; Ideographic Description Sequence (IDS) is defined in ISO/IEC
28 ;; 10646-1:2000 Annex F.
32 (defun ids-parse-terminal (string)
33 (if (>= (length string) 1)
34 (let* ((chr (aref string 0))
35 (ucs (get-char-attribute chr 'ucs))
37 (unless (and ucs (<= #x2FF0 ucs)(<= ucs #x2FFF))
38 (if (and ucs (<= #xE000 ucs)(<= ucs #xF8FF)
39 (setq big5 (get-char-attribute chr 'chinese-big5)))
40 (setq chr (decode-char 'chinese-big5-cdp big5)))
42 (substring string 1))))))
44 (defun ids-parse-op-2 (string)
45 (if (>= (length string) 1)
46 (let* ((chr (aref string 0))
47 (ucs (get-char-attribute chr 'ucs)))
48 (if (or (eq ucs #x2FF0)
50 (and (<= #x2FF4 ucs)(<= ucs #x2FFB)))
52 (substring string 1))))))
54 (defun ids-parse-op-3 (string)
55 (if (>= (length string) 1)
56 (let ((chr (aref string 0)))
57 (if (memq chr '(?\u2FF2 ?\u2FF3))
59 (substring string 1))))))
61 (defun ids-parse-component (string)
62 (let ((ret (ids-parse-element string))
65 (if (and (listp (car ret))
66 (setq rret (ideographic-structure-find-char
67 (cdr (assq 'ideographic-structure (car ret))))))
71 (defun ids-parse-element (string)
72 (let (ret op arg1 arg2 arg3)
73 (cond ((ids-parse-terminal string))
74 ((setq ret (ids-parse-op-2 string))
76 (when (setq ret (ids-parse-component (cdr ret)))
78 (when (setq ret (ids-parse-component (cdr ret)))
80 (cons (list (list 'ideographic-structure op arg1 arg2))
82 ((setq ret (ids-parse-op-3 string))
84 (when (setq ret (ids-parse-component (cdr ret)))
86 (when (setq ret (ids-parse-component (cdr ret)))
88 (when (setq ret (ids-parse-component (cdr ret)))
90 (cons (list (list 'ideographic-structure op arg1 arg2 arg3))
94 (defun ids-parse-string (string)
95 (let ((ret (ids-parse-element string)))
96 (if (= (length (cdr ret)) 0)
103 (defun ids-read-buffer (buffer)
104 (with-current-buffer buffer
105 (goto-char (point-min))
110 (while (re-search-forward
111 "^U\\+\\([0-9A-F]+\\)\t\\([0-9]+\\)\t[^\t]+\t\\([^\t\n]+\\)"
113 (setq ucs (string-to-int (match-string 1) 16)
114 radical (string-to-int (match-string 2))
115 seq (match-string 3))
116 (setq ret (ids-parse-string seq))
117 (when (and (consp ret)
119 (setq struct (cdr (assq 'ideographic-structure ret)))))
120 (setq char (decode-char 'ucs ucs))
121 (unless (get-char-attribute char 'ideograph-daikanwa)
122 (when (and (setq morohashi
123 (get-char-attribute char 'morohashi-daikanwa))
124 (>= (length morohashi) 3))
126 (if (= (nth 1 morohashi) 0)
127 (decode-char 'ideograph-daikanwa
128 (setq morohashi (car morohashi)))
129 (setq morohashi (list (car morohashi)
131 (map-char-attribute (lambda (char val)
132 (if (equal morohashi val)
134 'morohashi-daikanwa)))
137 'ideographic-structure
138 (ideographic-structure-convert-to-daikanwa struct))))
139 (put-char-attribute char 'ideographic-structure struct)
141 (get-char-attribute char '->same-ideograph)
142 (get-char-attribute char '->identical)))
144 (cond ((characterp ref) ref)
146 (find-char (plist-get ref :char)))
149 (put-char-attribute ret 'ideographic-structure struct)))
152 ;; (ids-read-buffer "IDDef1.txt")