word-subclass1 word-subclass2
word-subclass3
canonical-form
- ja-form ja-kana ja-conj-type)
+ ja-form ja-kana ja-conj-type
+ comment)
(let* (entry-cobj
canonical-entry-cobj
wc-cobj wc-name
mm-name
- mj-name mj-id mj-cobj)
+ mj-name
+ mjc-name mjc-id mjc-cobj)
(when entry
(setq wc-name
(if (or (null word-subclass3)
entry canonical-form wc-name))
(setq mj-name (format "%s (%s (%s),%s)"
mm-name ja-form ja-kana ja-conj-type))
- (unless (setq mj-cobj (concord-decode-object
- '=name mj-name 'morpheme@zh-classical))
- (setq mj-id (intern (concord-kanbun-encode-name-as-id mj-name)))
- (setq mj-cobj (concord-make-object 'morpheme@zh-classical mj-id))
- (concord-object-put mj-cobj '=name mj-name)
+ (setq mjc-name
+ (if comment
+ (format "%s\t; %s" mj-name comment)
+ mj-name))
+ (unless (setq mjc-cobj (concord-decode-object
+ '=name mjc-name 'morpheme@zh-classical))
+ (setq mjc-id (intern (concord-kanbun-encode-name-as-id mjc-name)))
+ (setq mjc-cobj (concord-make-object 'morpheme@zh-classical mjc-id))
+ (concord-object-put mjc-cobj '=name mjc-name)
(when (setq entry-cobj (concord-kanbun-add-morpheme-entry entry))
- (concord-object-put mj-cobj '->entry@morpheme (list entry-cobj)))
+ (concord-object-put mjc-cobj '->entry@morpheme (list entry-cobj)))
(unless (string= entry canonical-form)
(when (setq canonical-entry-cobj
(concord-kanbun-add-morpheme-entry canonical-form))
- (concord-object-put mj-cobj '->entry@morpheme/canonical
+ (concord-object-put mjc-cobj '->entry@morpheme/canonical
(list canonical-entry-cobj))))
(when (setq wc-cobj (concord-kanbun-add-word-class
word-superclass word-class
word-subclass1 word-subclass2
word-subclass3))
- (concord-object-put mj-cobj '->word-class (list wc-cobj)))
- (concord-object-put mj-cobj 'ja-form ja-form)
- (concord-object-put mj-cobj 'ja-kana ja-kana)
- (concord-object-put mj-cobj 'ja-conjugation-type ja-conj-type))
- mj-cobj)))
+ (concord-object-put mjc-cobj '->word-class (list wc-cobj)))
+ (concord-object-put mjc-cobj 'ja-form ja-form)
+ (concord-object-put mjc-cobj 'ja-kana ja-kana)
+ (concord-object-put mjc-cobj 'ja-conjugation-type ja-conj-type))
+ mjc-cobj)))
(defun concord-kanbun-parse-corpus-line (string)
- (let* ((ret (split-string string "\t"))
- (ret2 (split-string (nth 1 ret) ",")))
- (list (car ret)
- (car ret2)(nth 1 ret2)(nth 2 ret2)(nth 3 ret2)(nth 4 ret2)
- (nth 6 ret2)
- (nth 7 ret2)(nth 8 ret2)(nth 9 ret2))))
+ (let* ((ret (split-string string "\t*[;\e$B!(\e(B]\\s *"))
+ entry features comment)
+ (if (and (setq comment (nth 1 ret))
+ (string-match "[ \t]+$" comment))
+ (setq comment (substring comment 0 (match-beginning 0))))
+ (setq ret (split-string (car ret) "\t"))
+ (setq entry (car ret)
+ features (split-string (nth 1 ret) ","))
+ (list entry
+ (car features)(nth 1 features)
+ (nth 2 features)(nth 3 features)(nth 4 features)
+ (nth 6 features)
+ (nth 7 features)(nth 8 features)(nth 9 features)
+ comment)))
(defun concord-kanbun-add-corpus-line (string)
(apply #'concord-kanbun-add-morpheme
(nth 3 ret)(nth 4 ret))))
(setq dest
(cons (apply #'concord-kanbun-add-morpheme ret)
- dest)))
+ dest))
+ (goto-char (point-at-eol)))
;; (setq sentence-id
;; (intern
;; (concord-kanbun-encode-name-as-id