(concord-kanbun-corpus-insert-morpheme): Support non-mkwcs morphemes.
authorMORIOKA Tomohiko <tomo.git@chise.org>
Fri, 31 Oct 2014 12:55:29 +0000 (21:55 +0900)
committerMORIOKA Tomohiko <tomo.git@chise.org>
Fri, 31 Oct 2014 12:55:29 +0000 (21:55 +0900)
(concord-kanbun-dump-html-file): New function.

concord-kanbun-dump.el

index 2a808ef..d79ed5c 100644 (file)
@@ -1,8 +1,11 @@
 (defun concord-kanbun-corpus-insert-morpheme (morpheme)
-  (let ((entry (concord-object-get morpheme '->entry@morpheme))
+  (let ((entry (or (concord-object-get morpheme '->entry@morpheme)
+                  (concord-object-get morpheme '->entry@morpheme/misc)))
        (word-class (concord-object-get morpheme '->word-class))
-       (canonical-form (concord-object-get
-                        morpheme '->entry@morpheme/canonical))
+       (canonical-form (or (concord-object-get
+                            morpheme '->entry@morpheme/canonical)
+                           (concord-object-get
+                            morpheme '->entry@morpheme/canonical/misc)))
        (ja-form (concord-object-get morpheme 'ja-form))
        (ja-kana (concord-object-get morpheme 'ja-kana))
        (ja-conj-type (concord-object-get morpheme 'ja-conjugation-type))
        (setq i (1+ i)))
       (write-region (point-min)(point-max)
                    (expand-file-name source dest-dir)))))
+
+(defun concord-kanbun-dump-html-file (source dest-dir)
+  (with-temp-buffer
+    (let ((coding-system-for-write 'utf-8-jp-er)
+         (i 1)
+         s-obj morphemes
+         source-base)
+      (insert "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
+            \"http://www.w3.org/TR/html4/loose.dtd\">
+<html lang=\"ja\">
+")
+      (insert (format "<head>
+<title>%s</title>
+</head>\n"
+                     source))
+      (insert "<body>\n")
+      (while (setq s-obj (concord-decode-object
+                         '=id (intern (format "%s/%s" source i))
+                         'sentence@zh-classical))
+       (when (setq morphemes (concord-object-get s-obj '->morphemes))
+         (insert (format "<a name=\"%d\"></a>\n" i))
+         (insert "<div class=\"sentence\">\n")
+         (dolist (morpheme morphemes)
+           (concord-kanbun-corpus-insert-morpheme morpheme)
+           (forward-line -1)
+           (insert "<div class=\"morpheme\">")
+           (end-of-line)
+           (insert "</div>")
+           (forward-line)
+           )
+         (insert "EOS\n")
+         (insert "</div>\n")
+         (insert "<br />\n")
+         )
+       (setq i (1+ i)))
+      (setq source-base
+           (if (string-match "\\.mc\\(\\.utf-8\\)?$" source)
+               (substring source 0 (match-beginning 0))
+             source))
+      (insert "</body>
+</html>
+")
+      (write-region (point-min)(point-max)
+                   (expand-file-name (concat source-base ".utf-8.html")
+                                     dest-dir)))))