X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=cbeta.el;h=4ce562fade934e6b5775b1ba472ddc32531d5951;hb=200eaab487c887e8cfe45e78eed2d7572ca5c837;hp=7abea03310e95d62b88108aada89c0a4368c2a58;hpb=f095e5d08dc4e5af4e9382e5e83434465c72bd5a;p=chise%2Fids.git diff --git a/cbeta.el b/cbeta.el index 7abea03..4ce562f 100644 --- a/cbeta.el +++ b/cbeta.el @@ -1,11 +1,11 @@ ;;; cbeta.el --- Parser for CBETA Ideographs representation. -;; Copyright (C) 2001,2002 MORIOKA Tomohiko +;; Copyright (C) 2001,2002,2006,2007 MORIOKA Tomohiko ;; Author: MORIOKA Tomohiko ;; Keywords: CBETA, IDS, Ideographs, UCS, Unicode -;; This file is a part of Tomoyo-Tools. +;; This file is a part of the CHISE-IDS package. ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as @@ -24,8 +24,6 @@ ;;; Code: -;; (require 'mojikyo) - (defvar cbeta-replacement-char-alist (list '(?\u2502 . ?\u4E28) '(?\u251C . ?\u2E8A) @@ -39,12 +37,13 @@ '(?\u3120 . ?\u5E7A) )) -(defun cbeta-parse-element (string simplify) +(defun cbeta-parse-element (string simplify robust strict-component) (let ((chr (aref string 0)) ret) (cond ((eq chr ?\() (if (> (length string) 1) - (let* ((ret (cbeta-parse-1 (substring string 1) simplify)) + (let* ((ret (cbeta-parse-1 (substring string 1) simplify + robust strict-component)) (str (cdr ret))) (if (and str (>= (length str) 1) @@ -61,8 +60,8 @@ (if (> (length string) 1) (substring string 1))))))) -(defun cbeta-parse-component (string simplify) - (let ((ret (cbeta-parse-1 string simplify)) +(defun cbeta-parse-component (string simplify robust strict-component) + (let ((ret (cbeta-parse-1 string simplify robust strict-component)) rret) (when ret (if (and simplify @@ -72,8 +71,10 @@ (cons rret (cdr ret)) ret)))) -(defun cbeta-parse-horizontal (l-chr string simplify) - (let ((ret (cbeta-parse-component string simplify)) +(defun cbeta-parse-horizontal (l-chr string simplify + robust strict-component) + (let ((ret (cbeta-parse-component + string simplify robust strict-component)) rc) (when ret (if (and simplify @@ -88,8 +89,10 @@ l-chr (car ret))) (cdr ret))))) -(defun cbeta-parse-vertical (u-chr string simplify) - (let ((ret (cbeta-parse-component string simplify)) +(defun cbeta-parse-vertical (u-chr string simplify + robust strict-component) + (let ((ret (cbeta-parse-component + string simplify robust strict-component)) rc) (when ret (if (and simplify @@ -104,8 +107,10 @@ u-chr (car ret))) (cdr ret))))) -(defun cbeta-parse-other (u-chr string simplify) - (let ((ret (cbeta-parse-component string simplify)) +(defun cbeta-parse-other (u-chr string simplify + robust strict-component) + (let ((ret (cbeta-parse-component + string simplify robust strict-component)) rc) (when ret (if (and simplify @@ -157,23 +162,26 @@ (t (setq dest (cons component dest)))))))) -(defun cbeta-delete-char (s-chr d-chr) - (let ((structure +(defun cbeta-delete-char (s-chr d-chr &optional strict-component) + (let ((dcl (if strict-component + (list d-chr) + (char-component-variants d-chr))) + (structure (if (characterp s-chr) - (get-char-attribute s-chr 'ideographic-structure) + (char-feature s-chr 'ideographic-structure) (cdr (assq 'ideographic-structure s-chr)))) component dest ret) (catch 'tag (while structure (setq component (car structure) structure (cdr structure)) - (cond ((equal component d-chr) + (cond ((memq component dcl) ; (equal component d-chr) (setq ret (nconc (nreverse dest) structure)) (throw 'tag (if (cdr (cdr ret)) (list (cons 'ideographic-structure ret)) (car (cdr ret))))) - ((setq ret (cbeta-delete-char component d-chr)) + ((setq ret (cbeta-delete-char component d-chr strict-component)) (setq ret (nconc (nreverse dest) (cons ret structure))) (throw 'tag @@ -183,8 +191,9 @@ (t (setq dest (cons component dest)))))))) -(defun cbeta-parse-substitution (s-chr string simplify) - (let ((ret (cbeta-parse-1 string simplify)) +(defun cbeta-parse-substitution (s-chr string simplify + robust strict-component) + (let ((ret (cbeta-parse-1 string simplify robust strict-component)) old-chr new-chr str) (when ret (setq old-chr (car ret) @@ -193,24 +202,28 @@ (eq (aref str 0) ?+) (>= (length str) 2)) (setq str (substring str 1)) - (setq ret (cbeta-parse-1 str simplify)) + (setq ret (cbeta-parse-1 str simplify robust strict-component)) (when ret (setq new-chr (car ret) str (cdr ret)) (when (setq ret (cbeta-substitute-char s-chr old-chr new-chr)) (cons ret str))))))) -(defun cbeta-parse-elimination (s-chr string simplify) - (let ((ret (cbeta-parse-1 string simplify)) +(defun cbeta-parse-elimination (s-chr string simplify + robust strict-component) + (let ((ret (cbeta-parse-1 string simplify robust strict-component)) old-chr str) (when ret (setq old-chr (car ret) str (cdr ret)) - (when (setq ret (cbeta-delete-char s-chr old-chr)) - (cons ret str))))) + (cond ((setq ret (cbeta-delete-char + s-chr old-chr strict-component)) + (cons ret str)) + (robust + (cons s-chr str)))))) -(defun cbeta-parse-1 (string simplify) - (let ((ret (cbeta-parse-element string simplify)) +(defun cbeta-parse-1 (string simplify &optional robust strict-component) + (let ((ret (cbeta-parse-element string simplify robust strict-component)) c1 str op) (when ret @@ -221,14 +234,19 @@ (> (length str) 1) (setq str (substring str 1))) (cond ((eq op ?*) - (cbeta-parse-horizontal c1 str simplify)) + (cbeta-parse-horizontal + c1 str simplify robust strict-component)) ((eq op ?/) - (cbeta-parse-vertical c1 str simplify)) + (cbeta-parse-vertical + c1 str simplify robust strict-component)) ((eq op ?@) - (cbeta-parse-other c1 str simplify)) + (cbeta-parse-other + c1 str simplify robust strict-component)) ((eq op ?-) - (or (cbeta-parse-substitution c1 str simplify) - (cbeta-parse-elimination c1 str simplify))))) + (or (cbeta-parse-substitution + c1 str simplify robust strict-component) + (cbeta-parse-elimination + c1 str simplify robust strict-component))))) ret))))