From: MORIOKA Tomohiko Date: Fri, 31 Oct 2014 12:55:29 +0000 (+0900) Subject: (concord-kanbun-corpus-insert-morpheme): Support non-mkwcs morphemes. X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=c37509b3aa8c7e64616d170038fbc760ca3898a7;p=chise%2Fconcord-kanbun.git (concord-kanbun-corpus-insert-morpheme): Support non-mkwcs morphemes. (concord-kanbun-dump-html-file): New function. --- diff --git a/concord-kanbun-dump.el b/concord-kanbun-dump.el index 2a808ef..d79ed5c 100644 --- a/concord-kanbun-dump.el +++ b/concord-kanbun-dump.el @@ -1,8 +1,11 @@ (defun concord-kanbun-corpus-insert-morpheme (morpheme) - (let ((entry (concord-object-get morpheme '->entry@morpheme)) + (let ((entry (or (concord-object-get morpheme '->entry@morpheme) + (concord-object-get morpheme '->entry@morpheme/misc))) (word-class (concord-object-get morpheme '->word-class)) - (canonical-form (concord-object-get - morpheme '->entry@morpheme/canonical)) + (canonical-form (or (concord-object-get + morpheme '->entry@morpheme/canonical) + (concord-object-get + morpheme '->entry@morpheme/canonical/misc))) (ja-form (concord-object-get morpheme 'ja-form)) (ja-kana (concord-object-get morpheme 'ja-kana)) (ja-conj-type (concord-object-get morpheme 'ja-conjugation-type)) @@ -51,3 +54,48 @@ (setq i (1+ i))) (write-region (point-min)(point-max) (expand-file-name source dest-dir))))) + +(defun concord-kanbun-dump-html-file (source dest-dir) + (with-temp-buffer + (let ((coding-system-for-write 'utf-8-jp-er) + (i 1) + s-obj morphemes + source-base) + (insert " + +") + (insert (format " +%s +\n" + source)) + (insert "\n") + (while (setq s-obj (concord-decode-object + '=id (intern (format "%s/%s" source i)) + 'sentence@zh-classical)) + (when (setq morphemes (concord-object-get s-obj '->morphemes)) + (insert (format "\n" i)) + (insert "
\n") + (dolist (morpheme morphemes) + (concord-kanbun-corpus-insert-morpheme morpheme) + (forward-line -1) + (insert "
") + (end-of-line) + (insert "
") + (forward-line) + ) + (insert "EOS\n") + (insert "
\n") + (insert "
\n") + ) + (setq i (1+ i))) + (setq source-base + (if (string-match "\\.mc\\(\\.utf-8\\)?$" source) + (substring source 0 (match-beginning 0)) + source)) + (insert " + +") + (write-region (point-min)(point-max) + (expand-file-name (concat source-base ".utf-8.html") + dest-dir)))))