From: tomo Date: Fri, 14 Dec 2001 05:55:14 +0000 (+0000) Subject: Add idc.el. X-Git-Tag: chise-base-0_23~130 X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=dd2f7abb43fa875ee0c26be3a4351877944da036;p=chise%2Ftomoyo-tools.git Add idc.el. --- diff --git a/TOMOYO-ELS b/TOMOYO-ELS index 2fc2e21..5704075 100644 --- a/TOMOYO-ELS +++ b/TOMOYO-ELS @@ -5,7 +5,7 @@ ;;; Code: (setq tomoyo-modules-to-compile - '(csv idc-util)) + '(csv idc idc-util)) (setq tomoyo-modules-not-to-compile nil) diff --git a/idc.el b/idc.el new file mode 100644 index 0000000..aca75e0 --- /dev/null +++ b/idc.el @@ -0,0 +1,119 @@ +(defun idc-parse-terminal (string) + (if (>= (length string) 1) + (let* ((chr (aref string 0)) + (ucs (get-char-attribute chr 'ucs)) + big5) + (unless (and ucs (<= #x2FF0 ucs)(<= ucs #x2FFF)) + (if (and ucs (<= #xE000 ucs)(<= ucs #xF8FF) + (setq big5 (get-char-attribute chr 'chinese-big5))) + (setq chr (decode-char 'chinese-big5-cdp big5))) + (cons chr + (substring string 1)))))) + +(defun idc-parse-op-2 (string) + (if (>= (length string) 1) + (let* ((chr (aref string 0)) + (ucs (get-char-attribute chr 'ucs))) + (if (or (eq ucs #x2FF0) + (eq ucs #x2FF1) + (and (<= #x2FF4 ucs)(<= ucs #x2FFB))) + (cons chr + (substring string 1)))))) + +(defun idc-parse-op-3 (string) + (if (>= (length string) 1) + (let ((chr (aref string 0))) + (if (memq chr '(?\u2FF2 ?\u2FF3)) + (cons chr + (substring string 1)))))) + +(defun idc-parse-component (string) + (let ((ret (idc-parse-element string)) + rret) + (when ret + (if (and (listp (car ret)) + (setq rret (ideographic-structure-find-char + (cdr (assq 'ideographic-structure (car ret)))))) + (cons rret (cdr ret)) + ret)))) + +(defun idc-parse-element (string) + (let (ret op arg1 arg2 arg3) + (cond ((idc-parse-terminal string)) + ((setq ret (idc-parse-op-2 string)) + (setq op (car ret)) + (when (setq ret (idc-parse-component (cdr ret))) + (setq arg1 (car ret)) + (when (setq ret (idc-parse-component (cdr ret))) + (setq arg2 (car ret)) + (cons (list (list 'ideographic-structure op arg1 arg2)) + (cdr ret))))) + ((setq ret (idc-parse-op-3 string)) + (setq op (car ret)) + (when (setq ret (idc-parse-component (cdr ret))) + (setq arg1 (car ret)) + (when (setq ret (idc-parse-component (cdr ret))) + (setq arg2 (car ret)) + (when (setq ret (idc-parse-component (cdr ret))) + (setq arg3 (car ret)) + (cons (list (list 'ideographic-structure op arg1 arg2 arg3)) + (cdr ret))))))))) + +(defun idc-parse-string (string) + (let ((ret (idc-parse-element string))) + (if (= (length (cdr ret)) 0) + (car ret)))) + + +(require 'idc-util) + +(defun idc-read-buffer (buffer) + (with-current-buffer buffer + (goto-char (point-min)) + (let (ucs + radical seq ret + char struct + morohashi m-chr) + (while (re-search-forward + "^U\\+\\([0-9A-F]+\\)\t\\([0-9]+\\)\t[^\t]+\t\\([^\t\n]+\\)" + nil t) + (setq ucs (string-to-int (match-string 1) 16) + radical (string-to-int (match-string 2)) + seq (match-string 3)) + (setq ret (idc-parse-string seq)) + (when (and (consp ret) + (consp + (setq struct (cdr (assq 'ideographic-structure ret))))) + (setq char (decode-char 'ucs ucs)) + (unless (get-char-attribute char 'ideograph-daikanwa) + (when (and (setq morohashi + (get-char-attribute char 'morohashi-daikanwa)) + (>= (length morohashi) 3)) + (setq m-chr + (if (= (nth 1 morohashi) 0) + (decode-char 'ideograph-daikanwa + (setq morohashi (car morohashi))) + (setq morohashi (list (car morohashi) + (nth 1 morohashi))) + (map-char-attribute (lambda (char val) + (if (equal morohashi val) + char)) + 'morohashi-daikanwa))) + (put-char-attribute + m-chr + 'ideographic-structure + (ideographic-structure-convert-to-daikanwa struct)))) + (put-char-attribute char 'ideographic-structure struct) + (dolist (ref (union + (get-char-attribute char '->same-ideograph) + (get-char-attribute char '->identical))) + (if (setq ret + (cond ((characterp ref) ref) + ((char-ref-p ref) + (find-char (plist-get ref :char))) + (t + (find-char ref)))) + (put-char-attribute ret 'ideographic-structure struct))) + ))))) + +;; (idc-read-buffer "IDDef1.txt") \ No newline at end of file