1 (concord-assign-genre 'morpheme-entry@zh-classical "/usr/local/var/kanbun/db")
2 (concord-assign-genre 'word-class@zh-classical "/usr/local/var/kanbun/db")
3 (concord-assign-genre 'morpheme@zh-classical "/usr/local/var/kanbun/db")
4 (concord-assign-genre 'sentence@zh-classical "/usr/local/var/kanbun/db")
5 (concord-assign-genre 'sentence-entry@zh-classical "/usr/local/var/kanbun/db")
6 (mount-char-attribute-table '*instance@morpheme-entry/zh-classical)
8 (defun concord-kanbun-encode-name-as-id (name)
10 (mapconcat (lambda (c)
15 ((or (and (<= ?A c)(<= c ?Z))
16 (and (<= ?a c)(< c ?u))
17 (and (< ?u c)(< c ?z))
18 (and (<= ?0 c)(<= c ?9)))
21 ((setq ucs (or (encode-char c '=ucs@JP)
25 ((setq ret (encode-char c '=ruimoku-v6))
28 ((setq ret (encode-char c '=jef-china3))
32 (error "Unknown character %c" c)
33 ;; (format "m%08X" (char-id c))
38 (defun concord-kanbun-add-sentence-entry (entry)
39 (let (entry-id me-cobj)
40 (unless (setq me-cobj (concord-decode-object
42 'sentence-entry@zh-classical))
43 (setq entry-id (intern (concord-kanbun-encode-name-as-id entry)))
44 (setq me-cobj (concord-make-object
45 'sentence-entry@zh-classical entry-id))
46 (concord-object-put me-cobj '=name entry))
49 (defun concord-kanbun-add-morpheme-entry (entry)
50 (let ((len (length entry))
52 entry-id me-cobj chr ret ucs)
53 (unless (setq me-cobj (concord-decode-object
55 'morpheme-entry@zh-classical))
56 (setq entry-id (intern (concord-kanbun-encode-name-as-id entry)))
57 (setq me-cobj (concord-make-object
58 'morpheme-entry@zh-classical entry-id))
59 (concord-object-put me-cobj '=name entry)
61 (concord-object-put me-cobj 'character (list (aref entry 0))))
63 (setq chr (aref entry i))
64 (if (setq ucs (or (encode-char chr '=ucs@JP)
66 (setq chr (decode-char '=ucs ucs)))
67 (setq ret (get-char-attribute
68 chr '*instance@morpheme-entry/zh-classical))
69 (unless (member me-cobj ret)
70 (put-char-attribute chr '*instance@morpheme-entry/zh-classical
73 (save-char-attribute-table '*instance@morpheme-entry/zh-classical))
76 ;; (save-char-attribute-table '*instance@morpheme-entry/zh-classical)
78 (defun concord-kanbun-add-word-class (word-superclass
80 word-subclass1 word-subclass2)
81 (let (wc-name wc-cobj wc-name-id)
82 (when (and word-superclass
83 (not (string= word-superclass "*")))
84 (setq wc-name word-superclass)
86 (not (string= word-class "*")))
88 (format "%s,%s" wc-name word-class))
89 (when (and word-subclass1
90 (not (string= word-subclass1 "*")))
92 (format "%s,%s" wc-name word-subclass1))
93 (when (and word-subclass2
94 (not (string= word-subclass2 "*")))
96 (format "%s,%s" wc-name word-subclass2))))))
97 (unless (setq wc-cobj (concord-decode-object
99 'word-class@zh-classical))
100 (setq wc-name-id (intern (concord-kanbun-encode-name-as-id wc-name)))
101 (setq wc-cobj (concord-make-object
102 'word-class@zh-classical wc-name-id))
103 (concord-object-put wc-cobj '=name wc-name))
107 (defun concord-kanbun-add-morpheme (entry word-superclass word-class
108 word-subclass1 word-subclass2
110 ja-form ja-kana ja-conj-type)
114 mj-name mj-id mj-cobj)
116 (setq wc-name (format "%s,%s,%s,%s"
117 word-superclass word-class
118 word-subclass1 word-subclass2))
119 (setq mm-name (format "%s (%s) [%s]"
120 entry canonical-form wc-name))
121 (setq mj-name (format "%s (%s (%s),%s)"
122 mm-name ja-form ja-kana ja-conj-type))
123 (unless (setq mj-cobj (concord-decode-object
124 '=name mj-name 'morpheme@zh-classical))
125 (setq mj-id (intern (concord-kanbun-encode-name-as-id mj-name)))
126 (setq mj-cobj (concord-make-object 'morpheme@zh-classical mj-id))
127 (concord-object-put mj-cobj '=name mj-name)
128 (when (setq entry-cobj (concord-kanbun-add-morpheme-entry entry))
129 (concord-object-put mj-cobj '->entry (list entry-cobj)))
130 (when (setq wc-cobj (concord-kanbun-add-word-class
131 word-superclass word-class
132 word-subclass1 word-subclass2))
133 (concord-object-put mj-cobj '->word-class (list wc-cobj)))
134 (concord-object-put mj-cobj 'ja-form ja-form)
135 (concord-object-put mj-cobj 'ja-kana ja-kana)
136 (concord-object-put mj-cobj 'ja-conjugation-type ja-conj-type))
139 (defun concord-kanbun-parse-corpus-line (string)
140 (let* ((ret (split-string string "\t"))
141 (ret2 (split-string (nth 1 ret) ",")))
143 (car ret2)(nth 1 ret2)(nth 2 ret2)(nth 3 ret2)
145 (nth 7 ret2)(nth 8 ret2)(nth 9 ret2))))
147 (defun concord-kanbun-add-corpus-line (string)
148 (apply #'concord-kanbun-add-morpheme
149 (concord-kanbun-parse-corpus-line string)))
151 (defun concord-kanbun-read-sentence (sentence-number &optional source-name)
153 (setq source-name (file-name-nondirectory buffer-file-name)))
158 sentence-id-name sentence-id sentence-cobj
162 (when (search-forward "\nEOS\n" nil t)
163 (setq end (match-beginning 0)
166 (while (search-forward "\t" end t)
167 (setq ret (concord-kanbun-parse-corpus-line
168 (buffer-substring (point-at-bol)(point-at-eol))))
169 (setq sentence (concat sentence (car ret)))
171 (cons (apply #'concord-kanbun-add-morpheme ret)
175 ;; (concord-kanbun-encode-name-as-id
176 ;; (format "%s/%d" source-name sentence-number))))
177 (setq sentence-id-name
178 (format "%s/%d" source-name sentence-number))
179 (setq sentence-id (intern sentence-id-name))
180 (unless (setq sentence-cobj
181 (concord-decode-object
182 '=id sentence-id 'sentence@zh-classical))
185 'sentence@zh-classical sentence-id))
187 sentence-cobj '=name (format "%s(%s)"
188 sentence sentence-id-name)))
190 sentence-cobj 'source/file-name source-name)
192 sentence-cobj 'source/sentence-number sentence-number)
194 sentence-cobj '->morphemes (nreverse dest))
195 (when (setq sentence-entry-cobj
196 (concord-kanbun-add-sentence-entry sentence))
198 sentence-cobj '->entry (list sentence-entry-cobj)))
203 (defun concord-kanbun-read-buffer (&optional source-name)
206 (goto-char (point-min))
208 (setq source-name (file-name-nondirectory buffer-file-name)))
210 (while (concord-kanbun-read-sentence i source-name)