1 (concord-assign-genre 'entry@zh-classical "/usr/local/var/kanbun/db")
2 (concord-assign-genre 'word-class@zh-classical "/usr/local/var/kanbun/db")
3 (concord-assign-genre 'morpheme@zh-classical "/usr/local/var/kanbun/db")
4 (concord-assign-genre 'sentence@zh-classical "/usr/local/var/kanbun/db")
5 (mount-char-attribute-table '*instance@morpheme-entry/zh-classical)
7 (defun concord-kanbun-encode-name-as-id (name)
14 ((or (and (<= ?A c)(<= c ?Z))
15 (and (<= ?a c)(< c ?u))
16 (and (< ?u c)(< c ?z))
17 (and (<= ?0 c)(<= c ?9)))
20 ((setq ucs (or (encode-char c '=ucs@JP)
24 ((setq ret (encode-char c '=ruimoku-v6))
27 ((setq ret (encode-char c '=jef-china3))
31 (error "Unknown character %c" c)
32 ;; (format "m%08X" (char-id c))
37 (defun concord-kanbun-add-sentence-entry (entry)
38 (let (entry-id me-cobj)
39 (unless (setq me-cobj (concord-decode-object '=name entry
41 (setq entry-id (intern (concord-kanbun-encode-name-as-id entry)))
42 (setq me-cobj (concord-make-object 'entry@zh-classical entry-id))
43 (concord-object-put me-cobj '=name entry))
46 (defun concord-kanbun-add-morpheme-entry (entry)
47 (let ((len (length entry))
49 entry-id me-cobj chr ret ucs)
50 (unless (setq me-cobj (concord-decode-object
53 (setq entry-id (intern (concord-kanbun-encode-name-as-id entry)))
54 (setq me-cobj (concord-make-object 'entry@zh-classical entry-id))
55 (concord-object-put me-cobj '=name entry)
57 (concord-object-put me-cobj 'character (list (aref entry 0))))
59 (setq chr (aref entry i))
60 (if (setq ucs (or (encode-char chr '=ucs@JP)
62 (setq chr (decode-char '=ucs ucs)))
63 (setq ret (get-char-attribute
64 chr '*instance@morpheme-entry/zh-classical))
65 (unless (member me-cobj ret)
66 (put-char-attribute chr '*instance@morpheme-entry/zh-classical
69 (save-char-attribute-table '*instance@morpheme-entry/zh-classical))
72 ;; (save-char-attribute-table '*instance@morpheme-entry/zh-classical)
74 (defun concord-kanbun-add-word-class (word-superclass
76 word-subclass1 word-subclass2
78 (let (wc-name wc-cobj wc-name-id)
79 (when (and word-superclass
80 (not (string= word-superclass "*")))
81 (setq wc-name word-superclass)
83 (not (string= word-class "*")))
85 (format "%s,%s" wc-name word-class))
86 (when (and word-subclass1
87 (not (string= word-subclass1 "*")))
89 (format "%s,%s" wc-name word-subclass1))
90 (when (and word-subclass2
91 (not (string= word-subclass2 "*")))
93 (format "%s,%s" wc-name word-subclass2))
94 (when (and word-subclass3
95 (not (string= word-subclass3 "*")))
97 (format "%s,%s" wc-name word-subclass3)))))))
98 (unless (setq wc-cobj (concord-decode-object
100 'word-class@zh-classical))
101 (setq wc-name-id (intern (concord-kanbun-encode-name-as-id wc-name)))
102 (setq wc-cobj (concord-make-object
103 'word-class@zh-classical wc-name-id))
104 (concord-object-put wc-cobj '=name wc-name))
108 (defun concord-kanbun-add-morpheme (entry word-superclass word-class
109 word-subclass1 word-subclass2
112 ja-form ja-kana ja-conj-type)
117 mj-name mj-id mj-cobj)
120 (if (or (null word-subclass3)
121 (string-equal word-subclass3 "*"))
122 (format "%s,%s,%s,%s"
123 word-superclass word-class
124 word-subclass1 word-subclass2)
125 (format "%s,%s,%s,%s,%s"
126 word-superclass word-class
127 word-subclass1 word-subclass2
129 (setq mm-name (format "%s (%s) [%s]"
130 entry canonical-form wc-name))
131 (setq mj-name (format "%s (%s (%s),%s)"
132 mm-name ja-form ja-kana ja-conj-type))
133 (unless (setq mj-cobj (concord-decode-object
134 '=name mj-name 'morpheme@zh-classical))
135 (setq mj-id (intern (concord-kanbun-encode-name-as-id mj-name)))
136 (setq mj-cobj (concord-make-object 'morpheme@zh-classical mj-id))
137 (concord-object-put mj-cobj '=name mj-name)
138 (when (setq entry-cobj (concord-kanbun-add-morpheme-entry entry))
139 (concord-object-put mj-cobj '->entry@morpheme (list entry-cobj)))
140 (unless (string= entry canonical-form)
141 (when (setq canonical-entry-cobj
142 (concord-kanbun-add-morpheme-entry canonical-form))
143 (concord-object-put mj-cobj '->entry@morpheme/canonical
144 (list canonical-entry-cobj))))
145 (when (setq wc-cobj (concord-kanbun-add-word-class
146 word-superclass word-class
147 word-subclass1 word-subclass2
149 (concord-object-put mj-cobj '->word-class (list wc-cobj)))
150 (concord-object-put mj-cobj 'ja-form ja-form)
151 (concord-object-put mj-cobj 'ja-kana ja-kana)
152 (concord-object-put mj-cobj 'ja-conjugation-type ja-conj-type))
155 (defun concord-kanbun-parse-corpus-line (string)
156 (let* ((ret (split-string string "\t"))
157 (ret2 (split-string (nth 1 ret) ",")))
159 (car ret2)(nth 1 ret2)(nth 2 ret2)(nth 3 ret2)(nth 4 ret2)
161 (nth 7 ret2)(nth 8 ret2)(nth 9 ret2))))
163 (defun concord-kanbun-add-corpus-line (string)
164 (apply #'concord-kanbun-add-morpheme
165 (concord-kanbun-parse-corpus-line string)))
167 (defun concord-kanbun-read-sentence (sentence-number &optional source-name)
169 (setq source-name (file-name-nondirectory buffer-file-name)))
175 sentence-id-name sentence-id sentence-cobj
179 (when (search-forward "\nEOS\n" nil t)
180 (setq end (match-beginning 0)
183 (while (search-forward "\t" end t)
184 (setq ret (concord-kanbun-parse-corpus-line
185 (buffer-substring (point-at-bol)(point-at-eol))))
186 (setq sentence (concat sentence (car ret)))
188 (concat sentence-name
191 (format "%s[%s,%s,%s]"
194 (nth 3 ret)(nth 4 ret))))
196 (cons (apply #'concord-kanbun-add-morpheme ret)
200 ;; (concord-kanbun-encode-name-as-id
201 ;; (format "%s/%d" source-name sentence-number))))
202 (setq sentence-id-name
203 (format "%s/%d" source-name sentence-number))
204 (setq sentence-id (intern sentence-id-name))
205 (unless (setq sentence-cobj
206 (concord-decode-object
207 '=id sentence-id 'sentence@zh-classical))
210 'sentence@zh-classical sentence-id))
212 sentence-cobj '=name (format "%s(%s)"
213 sentence-name sentence-id-name))
214 ;; (concord-object-put
215 ;; sentence-cobj '=name (format "%s(%s)"
216 ;; sentence sentence-id-name))
219 sentence-cobj 'source/file-name source-name)
221 sentence-cobj 'source/sentence-number sentence-number)
223 sentence-cobj '->morphemes (nreverse dest))
224 (when (setq sentence-entry-cobj
225 (concord-kanbun-add-sentence-entry sentence))
227 sentence-cobj '->entry@sentence (list sentence-entry-cobj)))
232 (defun concord-kanbun-read-buffer (&optional source-name)
235 (goto-char (point-min))
237 (setq source-name (file-name-nondirectory buffer-file-name)))
239 (while (concord-kanbun-read-sentence i source-name)
240 (message (format "%s: sentence #%d is stored." source-name i))
243 (defun concord-kanbun-batch-read-file ()
244 (set-terminal-coding-system 'utf-8-jp-er)
245 (let ((file (pop command-line-args-left))
246 (coding-system-for-read 'utf-8-jp-er)
247 (file-name-coding-system 'utf-8-jp-er))
249 (insert-file-contents file)
250 (concord-kanbun-read-buffer (file-name-nondirectory file)))))