(defun concord-kanbun-corpus-insert-morpheme (morpheme) (let ((entry (or (concord-object-get morpheme '->entry@morpheme) (concord-object-get morpheme '->entry@morpheme/misc))) (word-class (concord-object-get morpheme '->word-class)) (canonical-form (or (concord-object-get morpheme '->entry@morpheme/canonical) (concord-object-get morpheme '->entry@morpheme/canonical/misc))) (ja-form (concord-object-get morpheme 'ja-form)) (ja-kana (concord-object-get morpheme 'ja-kana)) (ja-conj-type (concord-object-get morpheme 'ja-conjugation-type)) (name (concord-object-get morpheme '=name)) comment) (when entry (setq entry (concord-object-get (car entry) '=name))) (when word-class (setq word-class (split-string (concord-object-get (car word-class) '=name) ","))) (setq canonical-form (if canonical-form (concord-object-get (car canonical-form) '=name) entry)) (setq comment (and name (nth 1 (split-string name "\t;\\s ")))) (insert (format "%s\t%s,%s,%s,%s,%s,*,%s,%s,%s,%s%s\n" (or entry "*") (or (car word-class) "*") (or (nth 1 word-class) "*") (or (nth 2 word-class) "*") (or (nth 3 word-class) "*") (or (nth 4 word-class) "*") (or canonical-form "*") ja-form ja-kana ja-conj-type (if comment (format "\t; %s" comment) ""))))) (defun concord-kanbun-dump-file (source dest-dir) (with-temp-buffer (let ((coding-system-for-write 'utf-8-jp-er) (i 1) s-obj morphemes) (while (setq s-obj (concord-decode-object '=id (intern (format "%s/%s" source i)) 'sentence@zh-classical)) (when (setq morphemes (concord-object-get s-obj '->morphemes)) (dolist (morpheme morphemes) (concord-kanbun-corpus-insert-morpheme morpheme)) (insert "EOS\n")) (setq i (1+ i))) (write-region (point-min)(point-max) (expand-file-name source dest-dir))))) (defun concord-kanbun-dump-html-file (source dest-dir) (with-temp-buffer (let ((coding-system-for-write 'utf-8-jp-er) (i 1) s-obj morphemes source-base) (insert " ") (insert (format " %s \n" source)) (insert "\n") (while (setq s-obj (concord-decode-object '=id (intern (format "%s/%s" source i)) 'sentence@zh-classical)) (when (setq morphemes (concord-object-get s-obj '->morphemes)) (insert (format "\n" i)) (insert "
\n") (dolist (morpheme morphemes) (concord-kanbun-corpus-insert-morpheme morpheme) (forward-line -1) (insert "
") (end-of-line) (insert "
") (forward-line) ) (insert "EOS\n") (insert "
\n") (insert "
\n") ) (setq i (1+ i))) (setq source-base (if (string-match "\\.mc\\(\\.utf-8\\)?$" source) (substring source 0 (match-beginning 0)) source)) (insert " ") (write-region (point-min)(point-max) (expand-file-name (concat source-base ".utf-8.html") dest-dir)))))