Assign genre `entry@zh-classical' instead of
authorMORIOKA Tomohiko <tomo.git@chise.org>
Wed, 4 Sep 2013 13:52:03 +0000 (22:52 +0900)
committerMORIOKA Tomohiko <tomo.git@chise.org>
Wed, 4 Sep 2013 13:52:03 +0000 (22:52 +0900)
`morpheme-entry@zh-classical' and `sentence-entry@zh-classical'.
(concord-kanbun-add-sentence-entry): Use genre `entry@zh-classical'
instead of `sentence-entry@zh-classical'.
(concord-kanbun-add-morpheme-entry): Use genre `entry@zh-classical'
instead of `morpheme-entry@zh-classical'.
(concord-kanbun-add-morpheme): Use feature `->entry@morpheme' instead
of `->entry' to store entry object; if canonical-form is not equal to
entry, add feature `->entry@morpheme/canonical' to store
canonical-form.
(concord-kanbun-read-sentence): Add word-classes of morphemes into
morphemes in feature `=name'; use feature `->entry@sentence' instead
of `->entry' to store entry object for sentence.
(concord-kanbun-read-buffer): Display source-names and sentence-numbers.
(concord-kanbun-batch-read-file): New function.

concord-kanbun-dic.el

index 1cfcd70..ea9416a 100644 (file)
@@ -1,8 +1,7 @@
-(concord-assign-genre 'morpheme-entry@zh-classical "/usr/local/var/kanbun/db")
+(concord-assign-genre 'entry@zh-classical "/usr/local/var/kanbun/db")
 (concord-assign-genre 'word-class@zh-classical "/usr/local/var/kanbun/db")
 (concord-assign-genre 'morpheme@zh-classical "/usr/local/var/kanbun/db")
 (concord-assign-genre 'sentence@zh-classical "/usr/local/var/kanbun/db")
-(concord-assign-genre 'sentence-entry@zh-classical "/usr/local/var/kanbun/db")
 (mount-char-attribute-table '*instance@morpheme-entry/zh-classical)
 
 (defun concord-kanbun-encode-name-as-id (name)
 
 (defun concord-kanbun-add-sentence-entry (entry)
   (let (entry-id me-cobj)
-    (unless (setq me-cobj (concord-decode-object
-                          '=name entry
-                          'sentence-entry@zh-classical))
+    (unless (setq me-cobj (concord-decode-object '=name entry
+                                                'entry@zh-classical))
       (setq entry-id (intern (concord-kanbun-encode-name-as-id entry)))
-      (setq me-cobj (concord-make-object
-                    'sentence-entry@zh-classical entry-id))
+      (setq me-cobj (concord-make-object 'entry@zh-classical entry-id))
       (concord-object-put me-cobj '=name entry))
     me-cobj))
 
        entry-id me-cobj chr ret ucs)
     (unless (setq me-cobj (concord-decode-object
                           '=name entry
-                          'morpheme-entry@zh-classical))
+                          'entry@zh-classical))
       (setq entry-id (intern (concord-kanbun-encode-name-as-id entry)))
-      (setq me-cobj (concord-make-object
-                    'morpheme-entry@zh-classical entry-id))
+      (setq me-cobj (concord-make-object 'entry@zh-classical entry-id))
       (concord-object-put me-cobj '=name entry)
       (if (= len 1)
          (concord-object-put me-cobj 'character (list (aref entry 0))))
                                          canonical-form
                                          ja-form ja-kana ja-conj-type)
   (let* (entry-cobj
+        canonical-entry-cobj
         wc-cobj wc-name
         mm-name
         mj-name mj-id mj-cobj)
        (setq mj-cobj (concord-make-object 'morpheme@zh-classical mj-id))
        (concord-object-put mj-cobj '=name mj-name)
        (when (setq entry-cobj (concord-kanbun-add-morpheme-entry entry))
-         (concord-object-put mj-cobj '->entry (list entry-cobj)))
+         (concord-object-put mj-cobj '->entry@morpheme (list entry-cobj)))
+       (unless (string= entry canonical-form)
+         (when (setq canonical-entry-cobj
+                     (concord-kanbun-add-morpheme-entry canonical-form))
+           (concord-object-put mj-cobj '->entry@morpheme/canonical
+                               (list canonical-entry-cobj))))
        (when (setq wc-cobj (concord-kanbun-add-word-class
                             word-superclass word-class
                             word-subclass1 word-subclass2))
        end send
        ret
        sentence dest
+       sentence-name
        sentence-id-name sentence-id sentence-cobj
        sentence-entry-cobj)
     (prog1
              (setq ret (concord-kanbun-parse-corpus-line
                         (buffer-substring (point-at-bol)(point-at-eol))))
              (setq sentence (concat sentence (car ret)))
+             (setq sentence-name
+                   (concat sentence-name
+                           (if sentence-name
+                               " ")
+                           (format "%s[%s,%s,%s]"
+                                   (car ret)
+                                   (nth 2 ret)
+                                   (nth 3 ret)(nth 4 ret))))
              (setq dest
                    (cons (apply #'concord-kanbun-add-morpheme ret)
                          dest)))
                     'sentence@zh-classical sentence-id))
              (concord-object-put
               sentence-cobj '=name (format "%s(%s)"
-                                           sentence sentence-id-name)))
+                                           sentence-name sentence-id-name))
+              ;; (concord-object-put
+              ;;  sentence-cobj '=name (format "%s(%s)"
+              ;;                               sentence sentence-id-name))
+             )
            (concord-object-put
             sentence-cobj 'source/file-name source-name)
            (concord-object-put
            (when (setq sentence-entry-cobj
                        (concord-kanbun-add-sentence-entry sentence))
              (concord-object-put
-              sentence-cobj '->entry (list sentence-entry-cobj)))
+              sentence-cobj '->entry@sentence (list sentence-entry-cobj)))
            sentence-cobj))
       (if send
          (goto-char send)))))
       (setq source-name (file-name-nondirectory buffer-file-name)))
     (let ((i 1))
       (while (concord-kanbun-read-sentence i source-name)
+       (message (format "%s: sentence #%d is stored." source-name i))
        (setq i (1+ i))))))
+
+(defun concord-kanbun-batch-read-file ()
+  (set-terminal-coding-system 'utf-8-jp-er)
+  (let ((file (pop command-line-args-left))
+       (coding-system-for-read 'utf-8-jp-er)
+       (file-name-coding-system 'utf-8-jp-er))
+    (with-temp-buffer
+      (insert-file-contents file)
+      (concord-kanbun-read-buffer (file-name-nondirectory file)))))