From: MORIOKA Tomohiko Date: Sun, 15 Sep 2013 10:21:40 +0000 (+0900) Subject: (concord-kanbun-add-morpheme): Add new argument comment. X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1bf6026f886f64a0b1c2a51333da69beb41302d3;p=chise%2Fconcord-kanbun.git (concord-kanbun-add-morpheme): Add new argument comment. (concord-kanbun-parse-corpus-line): Support comment. (concord-kanbun-read-sentence): Likewise. --- diff --git a/concord-kanbun-dic.el b/concord-kanbun-dic.el index 1758998..6131b0f 100644 --- a/concord-kanbun-dic.el +++ b/concord-kanbun-dic.el @@ -109,12 +109,14 @@ word-subclass1 word-subclass2 word-subclass3 canonical-form - ja-form ja-kana ja-conj-type) + ja-form ja-kana ja-conj-type + comment) (let* (entry-cobj canonical-entry-cobj wc-cobj wc-name mm-name - mj-name mj-id mj-cobj) + mj-name + mjc-name mjc-id mjc-cobj) (when entry (setq wc-name (if (or (null word-subclass3) @@ -130,35 +132,47 @@ entry canonical-form wc-name)) (setq mj-name (format "%s (%s (%s),%s)" mm-name ja-form ja-kana ja-conj-type)) - (unless (setq mj-cobj (concord-decode-object - '=name mj-name 'morpheme@zh-classical)) - (setq mj-id (intern (concord-kanbun-encode-name-as-id mj-name))) - (setq mj-cobj (concord-make-object 'morpheme@zh-classical mj-id)) - (concord-object-put mj-cobj '=name mj-name) + (setq mjc-name + (if comment + (format "%s\t; %s" mj-name comment) + mj-name)) + (unless (setq mjc-cobj (concord-decode-object + '=name mjc-name 'morpheme@zh-classical)) + (setq mjc-id (intern (concord-kanbun-encode-name-as-id mjc-name))) + (setq mjc-cobj (concord-make-object 'morpheme@zh-classical mjc-id)) + (concord-object-put mjc-cobj '=name mjc-name) (when (setq entry-cobj (concord-kanbun-add-morpheme-entry entry)) - (concord-object-put mj-cobj '->entry@morpheme (list entry-cobj))) + (concord-object-put mjc-cobj '->entry@morpheme (list entry-cobj))) (unless (string= entry canonical-form) (when (setq canonical-entry-cobj (concord-kanbun-add-morpheme-entry canonical-form)) - (concord-object-put mj-cobj '->entry@morpheme/canonical + (concord-object-put mjc-cobj '->entry@morpheme/canonical (list canonical-entry-cobj)))) (when (setq wc-cobj (concord-kanbun-add-word-class word-superclass word-class word-subclass1 word-subclass2 word-subclass3)) - (concord-object-put mj-cobj '->word-class (list wc-cobj))) - (concord-object-put mj-cobj 'ja-form ja-form) - (concord-object-put mj-cobj 'ja-kana ja-kana) - (concord-object-put mj-cobj 'ja-conjugation-type ja-conj-type)) - mj-cobj))) + (concord-object-put mjc-cobj '->word-class (list wc-cobj))) + (concord-object-put mjc-cobj 'ja-form ja-form) + (concord-object-put mjc-cobj 'ja-kana ja-kana) + (concord-object-put mjc-cobj 'ja-conjugation-type ja-conj-type)) + mjc-cobj))) (defun concord-kanbun-parse-corpus-line (string) - (let* ((ret (split-string string "\t")) - (ret2 (split-string (nth 1 ret) ","))) - (list (car ret) - (car ret2)(nth 1 ret2)(nth 2 ret2)(nth 3 ret2)(nth 4 ret2) - (nth 6 ret2) - (nth 7 ret2)(nth 8 ret2)(nth 9 ret2)))) + (let* ((ret (split-string string "\t*[;;]\\s *")) + entry features comment) + (if (and (setq comment (nth 1 ret)) + (string-match "[ \t]+$" comment)) + (setq comment (substring comment 0 (match-beginning 0)))) + (setq ret (split-string (car ret) "\t")) + (setq entry (car ret) + features (split-string (nth 1 ret) ",")) + (list entry + (car features)(nth 1 features) + (nth 2 features)(nth 3 features)(nth 4 features) + (nth 6 features) + (nth 7 features)(nth 8 features)(nth 9 features) + comment))) (defun concord-kanbun-add-corpus-line (string) (apply #'concord-kanbun-add-morpheme @@ -194,7 +208,8 @@ (nth 3 ret)(nth 4 ret)))) (setq dest (cons (apply #'concord-kanbun-add-morpheme ret) - dest))) + dest)) + (goto-char (point-at-eol))) ;; (setq sentence-id ;; (intern ;; (concord-kanbun-encode-name-as-id