(defun concord-kanbun-corpus-insert-morpheme (morpheme)
- (let ((entry (concord-object-get morpheme '->entry@morpheme))
+ (let ((entry (or (concord-object-get morpheme '->entry@morpheme)
+ (concord-object-get morpheme '->entry@morpheme/misc)))
(word-class (concord-object-get morpheme '->word-class))
- (canonical-form (concord-object-get
- morpheme '->entry@morpheme/canonical))
+ (canonical-form (or (concord-object-get
+ morpheme '->entry@morpheme/canonical)
+ (concord-object-get
+ morpheme '->entry@morpheme/canonical/misc)))
(ja-form (concord-object-get morpheme 'ja-form))
(ja-kana (concord-object-get morpheme 'ja-kana))
(ja-conj-type (concord-object-get morpheme 'ja-conjugation-type))
(setq i (1+ i)))
(write-region (point-min)(point-max)
(expand-file-name source dest-dir)))))
+
+(defun concord-kanbun-dump-html-file (source dest-dir)
+ (with-temp-buffer
+ (let ((coding-system-for-write 'utf-8-jp-er)
+ (i 1)
+ s-obj morphemes
+ source-base)
+ (insert "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
+ \"http://www.w3.org/TR/html4/loose.dtd\">
+<html lang=\"ja\">
+")
+ (insert (format "<head>
+<title>%s</title>
+</head>\n"
+ source))
+ (insert "<body>\n")
+ (while (setq s-obj (concord-decode-object
+ '=id (intern (format "%s/%s" source i))
+ 'sentence@zh-classical))
+ (when (setq morphemes (concord-object-get s-obj '->morphemes))
+ (insert (format "<a name=\"%d\"></a>\n" i))
+ (insert "<div class=\"sentence\">\n")
+ (dolist (morpheme morphemes)
+ (concord-kanbun-corpus-insert-morpheme morpheme)
+ (forward-line -1)
+ (insert "<div class=\"morpheme\">")
+ (end-of-line)
+ (insert "</div>")
+ (forward-line)
+ )
+ (insert "EOS\n")
+ (insert "</div>\n")
+ (insert "<br />\n")
+ )
+ (setq i (1+ i)))
+ (setq source-base
+ (if (string-match "\\.mc\\(\\.utf-8\\)?$" source)
+ (substring source 0 (match-beginning 0))
+ source))
+ (insert "</body>
+</html>
+")
+ (write-region (point-min)(point-max)
+ (expand-file-name (concat source-base ".utf-8.html")
+ dest-dir)))))