(concord-kanbun-add-morpheme): Add new argument comment.
authorMORIOKA Tomohiko <tomo.git@chise.org>
Sun, 15 Sep 2013 10:21:40 +0000 (19:21 +0900)
committerMORIOKA Tomohiko <tomo.git@chise.org>
Sun, 15 Sep 2013 10:21:40 +0000 (19:21 +0900)
(concord-kanbun-parse-corpus-line): Support comment.
(concord-kanbun-read-sentence): Likewise.

concord-kanbun-dic.el

index 1758998..6131b0f 100644 (file)
                                          word-subclass1 word-subclass2
                                          word-subclass3
                                          canonical-form
-                                         ja-form ja-kana ja-conj-type)
+                                         ja-form ja-kana ja-conj-type
+                                         comment)
   (let* (entry-cobj
         canonical-entry-cobj
         wc-cobj wc-name
         mm-name
-        mj-name mj-id mj-cobj)
+        mj-name
+        mjc-name mjc-id mjc-cobj)
     (when entry
       (setq wc-name
            (if (or (null word-subclass3)
                            entry canonical-form wc-name))
       (setq mj-name (format "%s (%s (%s),%s)"
                            mm-name ja-form ja-kana ja-conj-type))
-      (unless (setq mj-cobj (concord-decode-object
-                            '=name mj-name 'morpheme@zh-classical))
-       (setq mj-id (intern (concord-kanbun-encode-name-as-id mj-name)))
-       (setq mj-cobj (concord-make-object 'morpheme@zh-classical mj-id))
-       (concord-object-put mj-cobj '=name mj-name)
+      (setq mjc-name
+           (if comment
+               (format "%s\t; %s" mj-name comment)
+             mj-name))
+      (unless (setq mjc-cobj (concord-decode-object
+                            '=name mjc-name 'morpheme@zh-classical))
+       (setq mjc-id (intern (concord-kanbun-encode-name-as-id mjc-name)))
+       (setq mjc-cobj (concord-make-object 'morpheme@zh-classical mjc-id))
+       (concord-object-put mjc-cobj '=name mjc-name)
        (when (setq entry-cobj (concord-kanbun-add-morpheme-entry entry))
-         (concord-object-put mj-cobj '->entry@morpheme (list entry-cobj)))
+         (concord-object-put mjc-cobj '->entry@morpheme (list entry-cobj)))
        (unless (string= entry canonical-form)
          (when (setq canonical-entry-cobj
                      (concord-kanbun-add-morpheme-entry canonical-form))
-           (concord-object-put mj-cobj '->entry@morpheme/canonical
+           (concord-object-put mjc-cobj '->entry@morpheme/canonical
                                (list canonical-entry-cobj))))
        (when (setq wc-cobj (concord-kanbun-add-word-class
                             word-superclass word-class
                             word-subclass1 word-subclass2
                             word-subclass3))
-         (concord-object-put mj-cobj '->word-class (list wc-cobj)))
-       (concord-object-put mj-cobj 'ja-form ja-form)
-       (concord-object-put mj-cobj 'ja-kana ja-kana)
-       (concord-object-put mj-cobj 'ja-conjugation-type ja-conj-type))
-      mj-cobj)))
+         (concord-object-put mjc-cobj '->word-class (list wc-cobj)))
+       (concord-object-put mjc-cobj 'ja-form ja-form)
+       (concord-object-put mjc-cobj 'ja-kana ja-kana)
+       (concord-object-put mjc-cobj 'ja-conjugation-type ja-conj-type))
+      mjc-cobj)))
 
 (defun concord-kanbun-parse-corpus-line (string)
-  (let* ((ret (split-string string "\t"))
-        (ret2 (split-string (nth 1 ret) ",")))
-    (list (car ret)
-         (car ret2)(nth 1 ret2)(nth 2 ret2)(nth 3 ret2)(nth 4 ret2)
-         (nth 6 ret2)
-         (nth 7 ret2)(nth 8 ret2)(nth 9 ret2))))
+  (let* ((ret (split-string string "\t*[;\e$B!(\e(B]\\s *"))
+        entry features comment)
+    (if (and (setq comment (nth 1 ret))
+            (string-match "[ \t]+$" comment))
+       (setq comment (substring comment 0 (match-beginning 0))))
+    (setq ret (split-string (car ret) "\t"))
+    (setq entry (car ret)
+         features (split-string (nth 1 ret) ","))
+    (list entry
+         (car features)(nth 1 features)
+         (nth 2 features)(nth 3 features)(nth 4 features)
+         (nth 6 features)
+         (nth 7 features)(nth 8 features)(nth 9 features)
+         comment)))
 
 (defun concord-kanbun-add-corpus-line (string)
   (apply #'concord-kanbun-add-morpheme
                                    (nth 3 ret)(nth 4 ret))))
              (setq dest
                    (cons (apply #'concord-kanbun-add-morpheme ret)
-                         dest)))
+                         dest))
+             (goto-char (point-at-eol)))
             ;; (setq sentence-id
             ;;       (intern
             ;;        (concord-kanbun-encode-name-as-id