From: tomo Date: Fri, 14 Dec 2001 18:04:36 +0000 (+0000) Subject: Rename `idc' to `ids'. X-Git-Tag: ids-0_0~24 X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3588e2642b0cad74b5f75c69a742fbf180014bdc;p=chise%2Fids.git Rename `idc' to `ids'. --- 3588e2642b0cad74b5f75c69a742fbf180014bdc diff --git a/ids-util.el b/ids-util.el new file mode 100644 index 0000000..8dfb3c5 --- /dev/null +++ b/ids-util.el @@ -0,0 +1,185 @@ +;;; ids-util.el --- Utilities about ideographic-structure property + +;; Copyright (C) 2001 MORIOKA Tomohiko + +;; Author: MORIOKA Tomohiko +;; Keywords: ideographic-structure, UTF-2000, database + +;; This file is a part of Tomoyo Utilities. + +;; This program is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 2, or (at +;; your option) any later version. + +;; This program is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program; see the file COPYING. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;;; Code: + +;;;###autoload +(defun ideographic-structure-convert-to-ucs (structure) + (let (dest cell ucs ret) + (while structure + (setq cell (car structure)) + (setq dest + (cons + (cond ((characterp cell) + (if (or (get-char-attribute cell 'ucs) + (null + (setq ucs + (or (get-char-attribute cell '=>ucs) + (get-char-attribute cell '->ucs))))) + cell + (decode-char 'ucs ucs))) + ((and (consp cell) + (symbolp (car cell))) + cell) + ((setq ret (find-char cell)) + (if (or (get-char-attribute ret 'ucs) + (null + (setq ucs + (or (get-char-attribute ret '=>ucs) + (get-char-attribute ret '->ucs))))) + cell + (decode-char 'ucs ucs))) + ((setq ret (assq 'ideographic-structure cell)) + (put-alist 'ideographic-structure + (ideographic-structure-convert-to-ucs + (cdr ret)) + (copy-alist cell))) + (t cell)) + dest)) + (setq structure (cdr structure))) + (nreverse dest))) + +(defvar morohashi-char-replace-alist + (list + (cons (decode-char 'chinese-big5-cdp #x8B42) + (decode-char 'chinese-big5-cdp #x8B42)) + (cons (decode-char 'chinese-big5-cdp #x8AFC) + (decode-char 'chinese-big5-cdp #x8AFC)) + (cons (decode-char 'ucs #x2EBE) + (decode-char 'ucs #x2EBF)) + (cons (decode-char 'ucs #x5922) + (decode-char 'ideograph-daikanwa 05802)) + (cons (decode-char 'ucs #x656C) + (decode-char 'ideograph-daikanwa 13303)) + (cons (decode-char 'ucs #x8449) + (decode-char 'ideograph-daikanwa 31387)) + (cons (decode-char 'ucs #x2EA4) + (decode-char 'ucs #x722B)) + (cons (decode-char 'ucs #x5151) + (decode-char 'ideograph-daikanwa 01356)) + (cons (decode-char 'ucs #x544A) + (decode-char 'ideograph-daikanwa 03381)) + (cons (decode-char 'ucs #x5F66) + (decode-char 'ideograph-daikanwa 09980)) + (cons (decode-char 'ucs #x8005) + (decode-char 'ideograph-daikanwa 28853)) + (cons (decode-char 'ucs #x82E5) + (decode-char 'ideograph-daikanwa 30796)) + (cons (decode-char 'ucs #x82F1) + (decode-char 'ideograph-daikanwa 30808)) + (cons (decode-char 'ucs #x9063) + (decode-char 'ideograph-daikanwa 39052)) + (cons (decode-char 'ucs #x4EA0) + (decode-char 'chinese-big5-cdp #x8B42)) + (cons (decode-char 'ucs #x5154) + (decode-char 'ideograph-daikanwa 01368)) + (cons (decode-char 'ucs #x53CA) + (decode-char 'ideograph-daikanwa 03118)) + (cons (decode-char 'ucs #x5468) + (decode-char 'ideograph-daikanwa 03441)) + (cons (decode-char 'ucs #x5C1A) + (decode-char 'ucs #x5C19)) + (cons (decode-char 'ucs #x5D29) + (decode-char 'ideograph-daikanwa 08212)) + (cons (decode-char 'ucs #x670B) + (decode-char 'ideograph-daikanwa 14340)) + (cons (decode-char 'ucs #x7FBD) + (decode-char 'ideograph-daikanwa 28614)) + (cons (decode-char 'ucs #x8096) + (decode-char 'ideograph-daikanwa 29263)) + (cons (decode-char 'ucs #x8981) + (decode-char 'ideograph-daikanwa 34768)) + (cons (decode-char 'ucs #x8AF8) + (decode-char 'ideograph-daikanwa 35743)) + (cons (decode-char 'ucs #x9023) + (decode-char 'ideograph-daikanwa 38902)) + (cons (decode-char 'ucs #x9752) + (decode-char 'ucs #x9751)) + )) + +;;;###autoload +(defun ideographic-structure-convert-to-daikanwa (structure) + (let (dest cell morohashi ret) + (while structure + (setq cell (car structure)) + (setq dest + (cons + (cond ((characterp cell) + (cond ((setq ret + (assq cell morohashi-char-replace-alist)) + (cdr ret)) + ((get-char-attribute cell 'ideograph-daikanwa) + cell) + ((setq morohashi + (get-char-attribute + cell 'morohashi-daikanwa)) + (cond ((null (cdr (cdr morohashi))) + cell) + ((= (nth 1 morohashi) 0) + (decode-char 'ideograph-daikanwa + (car morohashi))) + (t + (setq morohashi (list (car morohashi) + (nth 1 morohashi))) + (or (map-char-attribute + (lambda (char val) + (if (equal morohashi val) + char)) + 'morohashi-daikanwa) + cell)))) + (t + cell))) + ((and (consp cell) + (symbolp (car cell))) + cell) + ((setq ret (find-char cell)) + (if (or (get-char-attribute ret 'ideograph-daikanwa) + (null + (setq morohashi + (get-char-attribute + ret 'morohashi-daikanwa))) + (null (cdr (cdr morohashi)))) + cell + (if (= (nth 1 morohashi) 0) + (decode-char 'ideograph-daikanwa (car morohashi)) + cell))) + ((setq ret (assq 'ideographic-structure cell)) + (put-alist 'ideographic-structure + (ideographic-structure-convert-to-daikanwa + (cdr ret)) + (copy-alist cell))) + (t cell)) + dest)) + (setq structure (cdr structure))) + (nreverse dest))) + + +;;; @ End. +;;; + +(provide 'ids-util) + +;;; ids-util.el ends here diff --git a/ids.el b/ids.el new file mode 100644 index 0000000..c33c7da --- /dev/null +++ b/ids.el @@ -0,0 +1,159 @@ +;;; ids.el --- Parser and utility for Ideographic Description Sequence. + +;; Copyright (C) 2001 MORIOKA Tomohiko + +;; Author: MORIOKA Tomohiko +;; Keywords: IDS, IDC, Ideographs, UCS, Unicode + +;; This file is a part of Tomoyo-Tools. + +;; This program is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 2, or (at +;; your option) any later version. + +;; This program is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program; see the file COPYING. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; Ideographic Description Sequence (IDS) is defined in ISO/IEC +;; 10646-1:2000 Annex F. + +;;; Code: + +(defun ids-parse-terminal (string) + (if (>= (length string) 1) + (let* ((chr (aref string 0)) + (ucs (get-char-attribute chr 'ucs)) + big5) + (unless (and ucs (<= #x2FF0 ucs)(<= ucs #x2FFF)) + (if (and ucs (<= #xE000 ucs)(<= ucs #xF8FF) + (setq big5 (get-char-attribute chr 'chinese-big5))) + (setq chr (decode-char 'chinese-big5-cdp big5))) + (cons chr + (substring string 1)))))) + +(defun ids-parse-op-2 (string) + (if (>= (length string) 1) + (let* ((chr (aref string 0)) + (ucs (get-char-attribute chr 'ucs))) + (if (or (eq ucs #x2FF0) + (eq ucs #x2FF1) + (and (<= #x2FF4 ucs)(<= ucs #x2FFB))) + (cons chr + (substring string 1)))))) + +(defun ids-parse-op-3 (string) + (if (>= (length string) 1) + (let ((chr (aref string 0))) + (if (memq chr '(?\u2FF2 ?\u2FF3)) + (cons chr + (substring string 1)))))) + +(defun ids-parse-component (string) + (let ((ret (ids-parse-element string)) + rret) + (when ret + (if (and (listp (car ret)) + (setq rret (ideographic-structure-find-char + (cdr (assq 'ideographic-structure (car ret)))))) + (cons rret (cdr ret)) + ret)))) + +(defun ids-parse-element (string) + (let (ret op arg1 arg2 arg3) + (cond ((ids-parse-terminal string)) + ((setq ret (ids-parse-op-2 string)) + (setq op (car ret)) + (when (setq ret (ids-parse-component (cdr ret))) + (setq arg1 (car ret)) + (when (setq ret (ids-parse-component (cdr ret))) + (setq arg2 (car ret)) + (cons (list (list 'ideographic-structure op arg1 arg2)) + (cdr ret))))) + ((setq ret (ids-parse-op-3 string)) + (setq op (car ret)) + (when (setq ret (ids-parse-component (cdr ret))) + (setq arg1 (car ret)) + (when (setq ret (ids-parse-component (cdr ret))) + (setq arg2 (car ret)) + (when (setq ret (ids-parse-component (cdr ret))) + (setq arg3 (car ret)) + (cons (list (list 'ideographic-structure op arg1 arg2 arg3)) + (cdr ret))))))))) + +;;;###autoload +(defun ids-parse-string (string) + (let ((ret (ids-parse-element string))) + (if (= (length (cdr ret)) 0) + (car ret)))) + + +(require 'ids-util) + +;;;###autoload +(defun ids-read-buffer (buffer) + (with-current-buffer buffer + (goto-char (point-min)) + (let (ucs + radical seq ret + char struct + morohashi m-chr) + (while (re-search-forward + "^U\\+\\([0-9A-F]+\\)\t\\([0-9]+\\)\t[^\t]+\t\\([^\t\n]+\\)" + nil t) + (setq ucs (string-to-int (match-string 1) 16) + radical (string-to-int (match-string 2)) + seq (match-string 3)) + (setq ret (ids-parse-string seq)) + (when (and (consp ret) + (consp + (setq struct (cdr (assq 'ideographic-structure ret))))) + (setq char (decode-char 'ucs ucs)) + (unless (get-char-attribute char 'ideograph-daikanwa) + (when (and (setq morohashi + (get-char-attribute char 'morohashi-daikanwa)) + (>= (length morohashi) 3)) + (setq m-chr + (if (= (nth 1 morohashi) 0) + (decode-char 'ideograph-daikanwa + (setq morohashi (car morohashi))) + (setq morohashi (list (car morohashi) + (nth 1 morohashi))) + (map-char-attribute (lambda (char val) + (if (equal morohashi val) + char)) + 'morohashi-daikanwa))) + (put-char-attribute + m-chr + 'ideographic-structure + (ideographic-structure-convert-to-daikanwa struct)))) + (put-char-attribute char 'ideographic-structure struct) + (dolist (ref (union + (get-char-attribute char '->same-ideograph) + (get-char-attribute char '->identical))) + (if (setq ret + (cond ((characterp ref) ref) + ((char-ref-p ref) + (find-char (plist-get ref :char))) + (t + (find-char ref)))) + (put-char-attribute ret 'ideographic-structure struct))) + ))))) + +;; (ids-read-buffer "IDDef1.txt") + +;;; @ End. +;;; + +(provide 'ids) + +;;; ids.el ends here