From 68ab47bdfd866fa8ab2e3dfd3a9feb8453a9de11 Mon Sep 17 00:00:00 2001 From: MORIOKA Tomohiko Date: Wed, 4 Sep 2013 22:52:03 +0900 Subject: [PATCH] Assign genre `entry@zh-classical' instead of `morpheme-entry@zh-classical' and `sentence-entry@zh-classical'. (concord-kanbun-add-sentence-entry): Use genre `entry@zh-classical' instead of `sentence-entry@zh-classical'. (concord-kanbun-add-morpheme-entry): Use genre `entry@zh-classical' instead of `morpheme-entry@zh-classical'. (concord-kanbun-add-morpheme): Use feature `->entry@morpheme' instead of `->entry' to store entry object; if canonical-form is not equal to entry, add feature `->entry@morpheme/canonical' to store canonical-form. (concord-kanbun-read-sentence): Add word-classes of morphemes into morphemes in feature `=name'; use feature `->entry@sentence' instead of `->entry' to store entry object for sentence. (concord-kanbun-read-buffer): Display source-names and sentence-numbers. (concord-kanbun-batch-read-file): New function. --- concord-kanbun-dic.el | 51 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 13 deletions(-) diff --git a/concord-kanbun-dic.el b/concord-kanbun-dic.el index 1cfcd70..ea9416a 100644 --- a/concord-kanbun-dic.el +++ b/concord-kanbun-dic.el @@ -1,8 +1,7 @@ -(concord-assign-genre 'morpheme-entry@zh-classical "/usr/local/var/kanbun/db") +(concord-assign-genre 'entry@zh-classical "/usr/local/var/kanbun/db") (concord-assign-genre 'word-class@zh-classical "/usr/local/var/kanbun/db") (concord-assign-genre 'morpheme@zh-classical "/usr/local/var/kanbun/db") (concord-assign-genre 'sentence@zh-classical "/usr/local/var/kanbun/db") -(concord-assign-genre 'sentence-entry@zh-classical "/usr/local/var/kanbun/db") (mount-char-attribute-table '*instance@morpheme-entry/zh-classical) (defun concord-kanbun-encode-name-as-id (name) @@ -37,12 +36,10 @@ (defun concord-kanbun-add-sentence-entry (entry) (let (entry-id me-cobj) - (unless (setq me-cobj (concord-decode-object - '=name entry - 'sentence-entry@zh-classical)) + (unless (setq me-cobj (concord-decode-object '=name entry + 'entry@zh-classical)) (setq entry-id (intern (concord-kanbun-encode-name-as-id entry))) - (setq me-cobj (concord-make-object - 'sentence-entry@zh-classical entry-id)) + (setq me-cobj (concord-make-object 'entry@zh-classical entry-id)) (concord-object-put me-cobj '=name entry)) me-cobj)) @@ -52,10 +49,9 @@ entry-id me-cobj chr ret ucs) (unless (setq me-cobj (concord-decode-object '=name entry - 'morpheme-entry@zh-classical)) + 'entry@zh-classical)) (setq entry-id (intern (concord-kanbun-encode-name-as-id entry))) - (setq me-cobj (concord-make-object - 'morpheme-entry@zh-classical entry-id)) + (setq me-cobj (concord-make-object 'entry@zh-classical entry-id)) (concord-object-put me-cobj '=name entry) (if (= len 1) (concord-object-put me-cobj 'character (list (aref entry 0)))) @@ -109,6 +105,7 @@ canonical-form ja-form ja-kana ja-conj-type) (let* (entry-cobj + canonical-entry-cobj wc-cobj wc-name mm-name mj-name mj-id mj-cobj) @@ -126,7 +123,12 @@ (setq mj-cobj (concord-make-object 'morpheme@zh-classical mj-id)) (concord-object-put mj-cobj '=name mj-name) (when (setq entry-cobj (concord-kanbun-add-morpheme-entry entry)) - (concord-object-put mj-cobj '->entry (list entry-cobj))) + (concord-object-put mj-cobj '->entry@morpheme (list entry-cobj))) + (unless (string= entry canonical-form) + (when (setq canonical-entry-cobj + (concord-kanbun-add-morpheme-entry canonical-form)) + (concord-object-put mj-cobj '->entry@morpheme/canonical + (list canonical-entry-cobj)))) (when (setq wc-cobj (concord-kanbun-add-word-class word-superclass word-class word-subclass1 word-subclass2)) @@ -155,6 +157,7 @@ end send ret sentence dest + sentence-name sentence-id-name sentence-id sentence-cobj sentence-entry-cobj) (prog1 @@ -167,6 +170,14 @@ (setq ret (concord-kanbun-parse-corpus-line (buffer-substring (point-at-bol)(point-at-eol)))) (setq sentence (concat sentence (car ret))) + (setq sentence-name + (concat sentence-name + (if sentence-name + " ") + (format "%s[%s,%s,%s]" + (car ret) + (nth 2 ret) + (nth 3 ret)(nth 4 ret)))) (setq dest (cons (apply #'concord-kanbun-add-morpheme ret) dest))) @@ -185,7 +196,11 @@ 'sentence@zh-classical sentence-id)) (concord-object-put sentence-cobj '=name (format "%s(%s)" - sentence sentence-id-name))) + sentence-name sentence-id-name)) + ;; (concord-object-put + ;; sentence-cobj '=name (format "%s(%s)" + ;; sentence sentence-id-name)) + ) (concord-object-put sentence-cobj 'source/file-name source-name) (concord-object-put @@ -195,7 +210,7 @@ (when (setq sentence-entry-cobj (concord-kanbun-add-sentence-entry sentence)) (concord-object-put - sentence-cobj '->entry (list sentence-entry-cobj))) + sentence-cobj '->entry@sentence (list sentence-entry-cobj))) sentence-cobj)) (if send (goto-char send))))) @@ -208,4 +223,14 @@ (setq source-name (file-name-nondirectory buffer-file-name))) (let ((i 1)) (while (concord-kanbun-read-sentence i source-name) + (message (format "%s: sentence #%d is stored." source-name i)) (setq i (1+ i)))))) + +(defun concord-kanbun-batch-read-file () + (set-terminal-coding-system 'utf-8-jp-er) + (let ((file (pop command-line-args-left)) + (coding-system-for-read 'utf-8-jp-er) + (file-name-coding-system 'utf-8-jp-er)) + (with-temp-buffer + (insert-file-contents file) + (concord-kanbun-read-buffer (file-name-nondirectory file))))) -- 1.7.10.4