X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=ids.el;h=8214e834e8ab8e9ccc434e6daec2b1d7c76ded08;hb=d96133367870463ee41670620d1a56c106a038cc;hp=c33c7da9562c0db7f346c694e90f7bcd38f3f966;hpb=3588e2642b0cad74b5f75c69a742fbf180014bdc;p=chise%2Fids.git diff --git a/ids.el b/ids.el index c33c7da..8214e83 100644 --- a/ids.el +++ b/ids.el @@ -1,6 +1,6 @@ ;;; ids.el --- Parser and utility for Ideographic Description Sequence. -;; Copyright (C) 2001 MORIOKA Tomohiko +;; Copyright (C) 2001,2002,2003,2005 MORIOKA Tomohiko ;; Author: MORIOKA Tomohiko ;; Keywords: IDS, IDC, Ideographs, UCS, Unicode @@ -29,22 +29,30 @@ ;;; Code: +(require 'ideograph-util) + +(defun ideographic-structure-find-char (structure) + (dolist (product (char-feature (nth 1 structure) 'ideographic-products)) + (if (equal structure + (char-feature product 'ideographic-structure)) + (return product)))) + (defun ids-parse-terminal (string) (if (>= (length string) 1) (let* ((chr (aref string 0)) - (ucs (get-char-attribute chr 'ucs)) + (ucs (encode-char chr '=ucs 'defined-only)) big5) (unless (and ucs (<= #x2FF0 ucs)(<= ucs #x2FFF)) (if (and ucs (<= #xE000 ucs)(<= ucs #xF8FF) - (setq big5 (get-char-attribute chr 'chinese-big5))) - (setq chr (decode-char 'chinese-big5-cdp big5))) + (setq big5 (encode-char chr 'chinese-big5))) + (setq chr (decode-char '=big5-cdp big5))) (cons chr (substring string 1)))))) (defun ids-parse-op-2 (string) (if (>= (length string) 1) (let* ((chr (aref string 0)) - (ucs (get-char-attribute chr 'ucs))) + (ucs (encode-char chr '=ucs 'defined-only))) (if (or (eq ucs #x2FF0) (eq ucs #x2FF1) (and (<= #x2FF4 ucs)(<= ucs #x2FFB))) @@ -58,98 +66,69 @@ (cons chr (substring string 1)))))) -(defun ids-parse-component (string) - (let ((ret (ids-parse-element string)) +(defun ids-parse-component (string simplify) + (let ((ret (ids-parse-element string simplify)) rret) (when ret - (if (and (listp (car ret)) + (if (and simplify + (listp (car ret)) (setq rret (ideographic-structure-find-char (cdr (assq 'ideographic-structure (car ret)))))) (cons rret (cdr ret)) ret)))) -(defun ids-parse-element (string) +(defun ids-parse-element (string simplify) (let (ret op arg1 arg2 arg3) (cond ((ids-parse-terminal string)) ((setq ret (ids-parse-op-2 string)) (setq op (car ret)) - (when (setq ret (ids-parse-component (cdr ret))) + (when (setq ret (ids-parse-component (cdr ret) simplify)) (setq arg1 (car ret)) - (when (setq ret (ids-parse-component (cdr ret))) + (when (setq ret (ids-parse-component (cdr ret) simplify)) (setq arg2 (car ret)) (cons (list (list 'ideographic-structure op arg1 arg2)) (cdr ret))))) ((setq ret (ids-parse-op-3 string)) (setq op (car ret)) - (when (setq ret (ids-parse-component (cdr ret))) + (when (setq ret (ids-parse-component (cdr ret) simplify)) (setq arg1 (car ret)) - (when (setq ret (ids-parse-component (cdr ret))) + (when (setq ret (ids-parse-component (cdr ret) simplify)) (setq arg2 (car ret)) - (when (setq ret (ids-parse-component (cdr ret))) + (when (setq ret (ids-parse-component (cdr ret) simplify)) (setq arg3 (car ret)) (cons (list (list 'ideographic-structure op arg1 arg2 arg3)) (cdr ret))))))))) ;;;###autoload -(defun ids-parse-string (string) - (let ((ret (ids-parse-element string))) +(defun ids-parse-string (ids-string &optional simplify) + "Parse IDS-STRING and return the result." + (let ((ret (ids-parse-element ids-string simplify))) (if (= (length (cdr ret)) 0) (car ret)))) - -(require 'ids-util) - -;;;###autoload -(defun ids-read-buffer (buffer) - (with-current-buffer buffer - (goto-char (point-min)) - (let (ucs - radical seq ret - char struct - morohashi m-chr) - (while (re-search-forward - "^U\\+\\([0-9A-F]+\\)\t\\([0-9]+\\)\t[^\t]+\t\\([^\t\n]+\\)" - nil t) - (setq ucs (string-to-int (match-string 1) 16) - radical (string-to-int (match-string 2)) - seq (match-string 3)) - (setq ret (ids-parse-string seq)) - (when (and (consp ret) - (consp - (setq struct (cdr (assq 'ideographic-structure ret))))) - (setq char (decode-char 'ucs ucs)) - (unless (get-char-attribute char 'ideograph-daikanwa) - (when (and (setq morohashi - (get-char-attribute char 'morohashi-daikanwa)) - (>= (length morohashi) 3)) - (setq m-chr - (if (= (nth 1 morohashi) 0) - (decode-char 'ideograph-daikanwa - (setq morohashi (car morohashi))) - (setq morohashi (list (car morohashi) - (nth 1 morohashi))) - (map-char-attribute (lambda (char val) - (if (equal morohashi val) - char)) - 'morohashi-daikanwa))) - (put-char-attribute - m-chr - 'ideographic-structure - (ideographic-structure-convert-to-daikanwa struct)))) - (put-char-attribute char 'ideographic-structure struct) - (dolist (ref (union - (get-char-attribute char '->same-ideograph) - (get-char-attribute char '->identical))) - (if (setq ret - (cond ((characterp ref) ref) - ((char-ref-p ref) - (find-char (plist-get ref :char))) - (t - (find-char ref)))) - (put-char-attribute ret 'ideographic-structure struct))) - ))))) - -;; (ids-read-buffer "IDDef1.txt") +;; (defun ids-format-unit (ids-char) +;; (let (ret) +;; (cond ((characterp ids-char) +;; (char-to-string ids-char)) +;; ((integerp ids-char) +;; (char-to-string (decode-char 'ucs ids-char))) +;; ((setq ret (find-char ids-char)) +;; (char-to-string ret)) +;; ((setq ret (assq 'ideographic-structure ids-char)) +;; (ids-format-list (cdr ret)))))) + +;; ;;;###autoload +;; (defun ids-format-list (ids-list) +;; "Format ideographic-structure IDS-LIST as an IDS-string." +;; (mapconcat (lambda (cell) +;; (ids-format-unit +;; (if (char-ref-p cell) +;; (plist-get cell :char) +;; cell))) +;; ids-list "")) + +(define-obsolete-function-alias + 'ids-format-list 'ideographic-structure-to-ids) ;;; @ End. ;;;