New file.
authorMORIOKA Tomohiko <tomo.git@chise.org>
Wed, 11 Sep 2013 15:42:04 +0000 (00:42 +0900)
committerMORIOKA Tomohiko <tomo.git@chise.org>
Wed, 11 Sep 2013 15:42:04 +0000 (00:42 +0900)
concord-kanbun-dump.el [new file with mode: 0644]

diff --git a/concord-kanbun-dump.el b/concord-kanbun-dump.el
new file mode 100644 (file)
index 0000000..12642b8
--- /dev/null
@@ -0,0 +1,44 @@
+(defun concord-kanbun-corpus-insert-morpheme (morpheme)
+  (let ((entry (concord-object-get morpheme '->entry@morpheme))
+       (word-class (concord-object-get morpheme '->word-class))
+       (canonical-form (concord-object-get
+                        morpheme '->entry@morpheme/canonical))
+       (ja-form (concord-object-get morpheme 'ja-form))
+       (ja-kana (concord-object-get morpheme 'ja-kana))
+       (ja-conj-type (concord-object-get morpheme 'ja-conjugation-type)))
+    (when entry
+      (setq entry (concord-object-get (car entry) '=name)))
+    (when word-class
+      (setq word-class
+           (split-string
+            (concord-object-get (car word-class) '=name)
+            ",")))
+    (setq canonical-form
+         (if canonical-form
+             (concord-object-get (car canonical-form) '=name)
+           entry))
+    (insert
+     (format "%s\t%s,%s,%s,%s,*,*,%s,%s,%s,%s\n"
+            (or entry "*")
+            (or (car word-class) "*")
+            (or (nth 1 word-class) "*")
+            (or (nth 2 word-class) "*")
+            (or (nth 3 word-class) "*")
+            (or canonical-form "*")
+            ja-form ja-kana ja-conj-type))))
+
+(defun concord-kanbun-dump-file (source dest-dir)
+  (with-temp-buffer
+    (let ((coding-system-for-write 'utf-8-jp-er)
+         (i 1)
+         s-obj morphemes)
+      (while (setq s-obj (concord-decode-object
+                         '=id (intern (format "%s/%s" source i))
+                         'sentence@zh-classical))
+       (when (setq morphemes (concord-object-get s-obj '->morphemes))
+         (dolist (morpheme morphemes)
+           (concord-kanbun-corpus-insert-morpheme morpheme))
+         (insert "EOS\n"))
+       (setq i (1+ i)))
+      (write-region (point-min)(point-max)
+                   (expand-file-name source dest-dir)))))