1 (defun concord-kanbun-corpus-insert-morpheme (morpheme)
2 (let ((entry (or (concord-object-get morpheme '->entry@morpheme)
3 (concord-object-get morpheme '->entry@morpheme/misc)))
4 (word-class (concord-object-get morpheme '->word-class))
5 (canonical-form (or (concord-object-get
6 morpheme '->entry@morpheme/canonical)
8 morpheme '->entry@morpheme/canonical/misc)))
9 (ja-form (concord-object-get morpheme 'ja-form))
10 (ja-kana (concord-object-get morpheme 'ja-kana))
11 (ja-conj-type (concord-object-get morpheme 'ja-conjugation-type))
12 (name (concord-object-get morpheme '=name))
15 (setq entry (concord-object-get (car entry) '=name)))
19 (concord-object-get (car word-class) '=name)
23 (concord-object-get (car canonical-form) '=name)
27 (nth 1 (split-string name "\t;\\s "))))
29 (format "%s\t%s,%s,%s,%s,%s,*,%s,%s,%s,%s%s\n"
31 (or (car word-class) "*")
32 (or (nth 1 word-class) "*")
33 (or (nth 2 word-class) "*")
34 (or (nth 3 word-class) "*")
35 (or (nth 4 word-class) "*")
36 (or canonical-form "*")
37 ja-form ja-kana ja-conj-type
39 (format "\t; %s" comment)
42 (defun concord-kanbun-dump-file (source dest-dir)
44 (let ((coding-system-for-write 'utf-8-jp-er)
47 (while (setq s-obj (concord-decode-object
48 '=id (intern (format "%s/%s" source i))
49 'sentence@zh-classical))
50 (when (setq morphemes (concord-object-get s-obj '->morphemes))
51 (dolist (morpheme morphemes)
52 (concord-kanbun-corpus-insert-morpheme morpheme))
55 (write-region (point-min)(point-max)
56 (expand-file-name source dest-dir)))))
58 (defun concord-kanbun-dump-html-file (source dest-dir)
60 (let ((coding-system-for-write 'utf-8-jp-er)
64 (insert "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\"
65 \"http://www.w3.org/TR/html4/loose.dtd\">
68 (insert (format "<head>
73 (while (setq s-obj (concord-decode-object
74 '=id (intern (format "%s/%s" source i))
75 'sentence@zh-classical))
76 (when (setq morphemes (concord-object-get s-obj '->morphemes))
77 (insert (format "<a name=\"%d\"></a>\n" i))
78 (insert "<div class=\"sentence\">\n")
79 (dolist (morpheme morphemes)
80 (concord-kanbun-corpus-insert-morpheme morpheme)
82 (insert "<div class=\"morpheme\">")
93 (if (string-match "\\.mc\\(\\.utf-8\\)?$" source)
94 (substring source 0 (match-beginning 0))
99 (write-region (point-min)(point-max)
100 (expand-file-name (concat source-base ".utf-8.html")