1 ;;; cbeta.el --- Parser for CBETA Ideographs representation.
3 ;; Copyright (C) 2001,2002,2006,2007 MORIOKA Tomohiko
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: CBETA, IDS, Ideographs, UCS, Unicode
8 ;; This file is a part of the CHISE-IDS package.
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (defvar cbeta-replacement-char-alist
28 (list '(?\u2502 . ?\u4E28)
30 (cons ?\u2524 (decode-char '=gt-k 00153))
31 (cons ?\u3026 (decode-char 'japanese-jisx0208 #x5035))
40 (defun cbeta-parse-element (string simplify robust strict-component)
41 (let ((chr (aref string 0))
44 (if (> (length string) 1)
45 (let* ((ret (cbeta-parse-1 (substring string 1) simplify
46 robust strict-component))
50 (eq (aref str 0) ?\)))
52 (if (> (length str) 1)
53 (substring str 1)))))))
57 (cons (if (setq ret (assq chr cbeta-replacement-char-alist))
60 (if (> (length string) 1)
61 (substring string 1)))))))
63 (defun cbeta-parse-component (string simplify robust strict-component)
64 (let ((ret (cbeta-parse-1 string simplify robust strict-component))
69 (setq rret (ideographic-structure-find-char
70 (cdr (assq 'ideographic-structure (car ret))))))
74 (defun cbeta-parse-horizontal (l-chr string simplify
75 robust strict-component)
76 (let ((ret (cbeta-parse-component
77 string simplify robust strict-component))
82 (setq rc (ideographic-structure-find-char
83 (cdr (assq 'ideographic-structure l-chr)))))
86 (list 'ideographic-structure
87 ;; '(:cdp-combinator 1 :char #x2FF0)
92 (defun cbeta-parse-vertical (u-chr string simplify
93 robust strict-component)
94 (let ((ret (cbeta-parse-component
95 string simplify robust strict-component))
100 (setq rc (ideographic-structure-find-char
101 (cdr (assq 'ideographic-structure u-chr)))))
104 (list 'ideographic-structure
105 ;; '(:cdp-combinator 2 :char #x2FF1)
110 (defun cbeta-parse-other (u-chr string simplify
111 robust strict-component)
112 (let ((ret (cbeta-parse-component
113 string simplify robust strict-component))
118 (setq rc (ideographic-structure-find-char
119 (cdr (assq 'ideographic-structure u-chr)))))
122 (list 'ideographic-structure
123 (cond ((memq u-chr '(?\u56D7))
125 ((memq u-chr '(?\u51E0))
127 ((memq u-chr '(?\u51F5))
129 ((memq u-chr '(?\u531A))
131 ((memq u-chr '(?\u5382 ?\u5C38))
138 (defun cbeta-substitute-char (s-chr old-chr new-chr)
140 (if (characterp s-chr)
141 (get-char-attribute s-chr 'ideographic-structure)
142 (cdr (assq 'ideographic-structure s-chr))))
146 (setq component (car structure)
147 structure (cdr structure))
148 (cond ((equal component old-chr)
149 (setq ret (nconc (nreverse dest)
150 (cons new-chr structure)))
153 (list (cons 'ideographic-structure ret))
155 ((setq ret (cbeta-substitute-char component old-chr new-chr))
156 (setq ret (nconc (nreverse dest)
157 (cons ret structure)))
160 (list (cons 'ideographic-structure ret))
163 (setq dest (cons component dest))))))))
165 (defun cbeta-delete-char (s-chr d-chr &optional strict-component)
166 (let ((dcl (if strict-component
168 (char-component-variants d-chr)))
170 (if (characterp s-chr)
171 (char-feature s-chr 'ideographic-structure)
172 (cdr (assq 'ideographic-structure s-chr))))
176 (setq component (car structure)
177 structure (cdr structure))
178 (cond ((memq component dcl) ; (equal component d-chr)
179 (setq ret (nconc (nreverse dest) structure))
182 (list (cons 'ideographic-structure ret))
184 ((setq ret (cbeta-delete-char component d-chr strict-component))
185 (setq ret (nconc (nreverse dest)
186 (cons ret structure)))
189 (list (cons 'ideographic-structure ret))
192 (setq dest (cons component dest))))))))
194 (defun cbeta-parse-substitution (s-chr string simplify
195 robust strict-component)
196 (let ((ret (cbeta-parse-1 string simplify robust strict-component))
199 (setq old-chr (car ret)
204 (setq str (substring str 1))
205 (setq ret (cbeta-parse-1 str simplify robust strict-component))
207 (setq new-chr (car ret)
209 (when (setq ret (cbeta-substitute-char s-chr old-chr new-chr))
212 (defun cbeta-parse-elimination (s-chr string simplify
213 robust strict-component)
214 (let ((ret (cbeta-parse-1 string simplify robust strict-component))
217 (setq old-chr (car ret)
219 (cond ((setq ret (cbeta-delete-char
220 s-chr old-chr strict-component))
223 (cons s-chr str))))))
225 (defun cbeta-parse-1 (string simplify &optional robust strict-component)
226 (let ((ret (cbeta-parse-element string simplify robust strict-component))
233 (setq op (aref str 0))
235 (setq str (substring str 1)))
237 (cbeta-parse-horizontal
238 c1 str simplify robust strict-component))
240 (cbeta-parse-vertical
241 c1 str simplify robust strict-component))
244 c1 str simplify robust strict-component))
246 (or (cbeta-parse-substitution
247 c1 str simplify robust strict-component)
248 (cbeta-parse-elimination
249 c1 str simplify robust strict-component)))))
258 ;;; cbeta.el ends here