--- /dev/null
+(concord-assign-genre 'morpheme-entry@zh-classical "/usr/local/var/kanbun/db")
+(concord-assign-genre 'word-class@zh-classical "/usr/local/var/kanbun/db")
+(concord-assign-genre 'morpheme@zh-classical "/usr/local/var/kanbun/db")
+(concord-assign-genre 'sentence@zh-classical "/usr/local/var/kanbun/db")
+(concord-assign-genre 'sentence-entry@zh-classical "/usr/local/var/kanbun/db")
+(mount-char-attribute-table '*instance@morpheme-entry/zh-classical)
+
+(defun concord-kanbun-encode-name-as-id (name)
+ (let (ucs ret)
+ (mapconcat (lambda (c)
+ (cond
+ ((eq c ?\ )
+ "_"
+ )
+ ((or (and (<= ?A c)(<= c ?Z))
+ (and (<= ?a c)(< c ?u))
+ (and (< ?u c)(< c ?z))
+ (and (<= ?0 c)(<= c ?9)))
+ (char-to-string c)
+ )
+ ((setq ucs (or (encode-char c '=ucs@JP)
+ (char-ucs c)))
+ (format "u%04X" ucs)
+ )
+ ((setq ret (encode-char c '=ruimoku-v6))
+ (format "r%04X" ret)
+ )
+ ((setq ret (encode-char c '=jef-china3))
+ (format "j%04X" ret)
+ )
+ (t
+ (error "Unknown character %c" c)
+ ;; (format "m%08X" (char-id c))
+ )))
+ name
+ "")))
+
+(defun concord-kanbun-add-sentence-entry (entry)
+ (let (entry-id me-cobj)
+ (unless (setq me-cobj (concord-decode-object
+ '=name entry
+ 'sentence-entry@zh-classical))
+ (setq entry-id (intern (concord-kanbun-encode-name-as-id entry)))
+ (setq me-cobj (concord-make-object
+ 'sentence-entry@zh-classical entry-id))
+ (concord-object-put me-cobj '=name entry))
+ me-cobj))
+
+(defun concord-kanbun-add-morpheme-entry (entry)
+ (let ((len (length entry))
+ (i 0)
+ entry-id me-cobj chr ret ucs)
+ (unless (setq me-cobj (concord-decode-object
+ '=name entry
+ 'morpheme-entry@zh-classical))
+ (setq entry-id (intern (concord-kanbun-encode-name-as-id entry)))
+ (setq me-cobj (concord-make-object
+ 'morpheme-entry@zh-classical entry-id))
+ (concord-object-put me-cobj '=name entry)
+ (if (= len 1)
+ (concord-object-put me-cobj 'character (list (aref entry 0))))
+ (while (< i len)
+ (setq chr (aref entry i))
+ (if (setq ucs (or (encode-char chr '=ucs@JP)
+ (char-ucs chr)))
+ (setq chr (decode-char '=ucs ucs)))
+ (setq ret (get-char-attribute
+ chr '*instance@morpheme-entry/zh-classical))
+ (unless (member me-cobj ret)
+ (put-char-attribute chr '*instance@morpheme-entry/zh-classical
+ (cons me-cobj ret)))
+ (setq i (1+ i)))
+ (save-char-attribute-table '*instance@morpheme-entry/zh-classical))
+ me-cobj))
+
+;; (save-char-attribute-table '*instance@morpheme-entry/zh-classical)
+
+(defun concord-kanbun-add-word-class (word-superclass
+ word-class
+ word-subclass1 word-subclass2)
+ (let (wc-name wc-cobj wc-name-id)
+ (when (and word-superclass
+ (not (string= word-superclass "*")))
+ (setq wc-name word-superclass)
+ (when (and word-class
+ (not (string= word-class "*")))
+ (setq wc-name
+ (format "%s,%s" wc-name word-class))
+ (when (and word-subclass1
+ (not (string= word-subclass1 "*")))
+ (setq wc-name
+ (format "%s,%s" wc-name word-subclass1))
+ (when (and word-subclass2
+ (not (string= word-subclass2 "*")))
+ (setq wc-name
+ (format "%s,%s" wc-name word-subclass2))))))
+ (unless (setq wc-cobj (concord-decode-object
+ '=name wc-name
+ 'word-class@zh-classical))
+ (setq wc-name-id (intern (concord-kanbun-encode-name-as-id wc-name)))
+ (setq wc-cobj (concord-make-object
+ 'word-class@zh-classical wc-name-id))
+ (concord-object-put wc-cobj '=name wc-name))
+ wc-cobj))
+
+
+(defun concord-kanbun-add-morpheme (entry word-superclass word-class
+ word-subclass1 word-subclass2
+ canonical-form
+ ja-form ja-kana ja-conj-type)
+ (let* (entry-cobj
+ wc-cobj wc-name
+ mm-name
+ mj-name mj-id mj-cobj)
+ (when entry
+ (setq wc-name (format "%s,%s,%s,%s"
+ word-superclass word-class
+ word-subclass1 word-subclass2))
+ (setq mm-name (format "%s (%s) [%s]"
+ entry canonical-form wc-name))
+ (setq mj-name (format "%s (%s (%s),%s)"
+ mm-name ja-form ja-kana ja-conj-type))
+ (unless (setq mj-cobj (concord-decode-object
+ '=name mj-name 'morpheme@zh-classical))
+ (setq mj-id (intern (concord-kanbun-encode-name-as-id mj-name)))
+ (setq mj-cobj (concord-make-object 'morpheme@zh-classical mj-id))
+ (concord-object-put mj-cobj '=name mj-name)
+ (when (setq entry-cobj (concord-kanbun-add-morpheme-entry entry))
+ (concord-object-put mj-cobj '->entry (list entry-cobj)))
+ (when (setq wc-cobj (concord-kanbun-add-word-class
+ word-superclass word-class
+ word-subclass1 word-subclass2))
+ (concord-object-put mj-cobj '->word-class (list wc-cobj)))
+ (concord-object-put mj-cobj 'ja-form ja-form)
+ (concord-object-put mj-cobj 'ja-kana ja-kana)
+ (concord-object-put mj-cobj 'ja-conjugation-type ja-conj-type))
+ mj-cobj)))
+
+(defun concord-kanbun-parse-corpus-line (string)
+ (let* ((ret (split-string string "\t"))
+ (ret2 (split-string (nth 1 ret) ",")))
+ (list (car ret)
+ (car ret2)(nth 1 ret2)(nth 2 ret2)(nth 3 ret2)
+ (nth 6 ret2)
+ (nth 7 ret2)(nth 8 ret2)(nth 9 ret2))))
+
+(defun concord-kanbun-add-corpus-line (string)
+ (apply #'concord-kanbun-add-morpheme
+ (concord-kanbun-parse-corpus-line string)))
+
+(defun concord-kanbun-read-sentence (sentence-number &optional source-name)
+ (unless source-name
+ (setq source-name (file-name-nondirectory buffer-file-name)))
+ (let ((beg (point))
+ end send
+ ret
+ sentence dest
+ sentence-id-name sentence-id sentence-cobj
+ sentence-entry-cobj)
+ (prog1
+ (save-excursion
+ (when (search-forward "\nEOS\n" nil t)
+ (setq end (match-beginning 0)
+ send (match-end 0))
+ (goto-char beg)
+ (while (search-forward "\t" end t)
+ (setq ret (concord-kanbun-parse-corpus-line
+ (buffer-substring (point-at-bol)(point-at-eol))))
+ (setq sentence (concat sentence (car ret)))
+ (setq dest
+ (cons (apply #'concord-kanbun-add-morpheme ret)
+ dest)))
+ ;; (setq sentence-id
+ ;; (intern
+ ;; (concord-kanbun-encode-name-as-id
+ ;; (format "%s/%d" source-name sentence-number))))
+ (setq sentence-id-name
+ (format "%s/%d" source-name sentence-number))
+ (setq sentence-id (intern sentence-id-name))
+ (unless (setq sentence-cobj
+ (concord-decode-object
+ '=id sentence-id 'sentence@zh-classical))
+ (setq sentence-cobj
+ (concord-make-object
+ 'sentence@zh-classical sentence-id))
+ (concord-object-put
+ sentence-cobj '=name (format "%s(%s)"
+ sentence sentence-id-name)))
+ (concord-object-put
+ sentence-cobj 'source/file-name source-name)
+ (concord-object-put
+ sentence-cobj 'source/sentence-number sentence-number)
+ (concord-object-put
+ sentence-cobj '->morphemes (nreverse dest))
+ (when (setq sentence-entry-cobj
+ (concord-kanbun-add-sentence-entry sentence))
+ (concord-object-put
+ sentence-cobj '->entry (list sentence-entry-cobj)))
+ sentence-cobj))
+ (if send
+ (goto-char send)))))
+
+(defun concord-kanbun-read-buffer (&optional source-name)
+ (interactive)
+ (save-excursion
+ (goto-char (point-min))
+ (unless source-name
+ (setq source-name (file-name-nondirectory buffer-file-name)))
+ (let ((i 1))
+ (while (concord-kanbun-read-sentence i source-name)
+ (setq i (1+ i))))))
+++ /dev/null
-(concord-assign-genre 'morpheme-entry@zh-classical "/usr/local/var/kanbun/db")
-(concord-assign-genre 'word-class@zh-classical "/usr/local/var/kanbun/db")
-(concord-assign-genre 'morpheme@zh-classical "/usr/local/var/kanbun/db")
-(concord-assign-genre 'sentence@zh-classical "/usr/local/var/kanbun/db")
-(concord-assign-genre 'sentence-entry@zh-classical "/usr/local/var/kanbun/db")
-(mount-char-attribute-table '*instance@morpheme-entry/zh-classical)
-
-(defun concord-kanbun-encode-name-as-id (name)
- (let (ucs ret)
- (mapconcat (lambda (c)
- (cond
- ((eq c ?\ )
- "_"
- )
- ((or (and (<= ?A c)(<= c ?Z))
- (and (<= ?a c)(< c ?u))
- (and (< ?u c)(< c ?z))
- (and (<= ?0 c)(<= c ?9)))
- (char-to-string c)
- )
- ((setq ucs (or (encode-char c '=ucs@JP)
- (char-ucs c)))
- (format "u%04X" ucs)
- )
- ((setq ret (encode-char c '=ruimoku-v6))
- (format "r%04X" ret)
- )
- ((setq ret (encode-char c '=jef-china3))
- (format "j%04X" ret)
- )
- (t
- (error "Unknown character %c" c)
- ;; (format "m%08X" (char-id c))
- )))
- name
- "")))
-
-(defun concord-kanbun-add-sentence-entry (entry)
- (let (entry-id me-cobj)
- (unless (setq me-cobj (concord-decode-object
- '=name entry
- 'sentence-entry@zh-classical))
- (setq entry-id (intern (concord-kanbun-encode-name-as-id entry)))
- (setq me-cobj (concord-make-object
- 'sentence-entry@zh-classical entry-id))
- (concord-object-put me-cobj '=name entry))
- me-cobj))
-
-(defun concord-kanbun-add-morpheme-entry (entry)
- (let ((len (length entry))
- (i 0)
- entry-id me-cobj chr ret ucs)
- (unless (setq me-cobj (concord-decode-object
- '=name entry
- 'morpheme-entry@zh-classical))
- (setq entry-id (intern (concord-kanbun-encode-name-as-id entry)))
- (setq me-cobj (concord-make-object
- 'morpheme-entry@zh-classical entry-id))
- (concord-object-put me-cobj '=name entry)
- (if (= len 1)
- (concord-object-put me-cobj 'character (list (aref entry 0))))
- (while (< i len)
- (setq chr (aref entry i))
- (if (setq ucs (or (encode-char chr '=ucs@JP)
- (char-ucs chr)))
- (setq chr (decode-char '=ucs ucs)))
- (setq ret (get-char-attribute
- chr '*instance@morpheme-entry/zh-classical))
- (unless (member me-cobj ret)
- (put-char-attribute chr '*instance@morpheme-entry/zh-classical
- (cons me-cobj ret)))
- (setq i (1+ i)))
- (save-char-attribute-table '*instance@morpheme-entry/zh-classical))
- me-cobj))
-
-;; (save-char-attribute-table '*instance@morpheme-entry/zh-classical)
-
-(defun concord-kanbun-add-word-class (word-superclass
- word-class
- word-subclass1 word-subclass2)
- (let (wc-name wc-cobj wc-name-id)
- (when (and word-superclass
- (not (string= word-superclass "*")))
- (setq wc-name word-superclass)
- (when (and word-class
- (not (string= word-class "*")))
- (setq wc-name
- (format "%s,%s" wc-name word-class))
- (when (and word-subclass1
- (not (string= word-subclass1 "*")))
- (setq wc-name
- (format "%s,%s" wc-name word-subclass1))
- (when (and word-subclass2
- (not (string= word-subclass2 "*")))
- (setq wc-name
- (format "%s,%s" wc-name word-subclass2))))))
- (unless (setq wc-cobj (concord-decode-object
- '=name wc-name
- 'word-class@zh-classical))
- (setq wc-name-id (intern (concord-kanbun-encode-name-as-id wc-name)))
- (setq wc-cobj (concord-make-object
- 'word-class@zh-classical wc-name-id))
- (concord-object-put wc-cobj '=name wc-name))
- wc-cobj))
-
-
-(defun concord-kanbun-add-morpheme (entry word-superclass word-class
- word-subclass1 word-subclass2
- canonical-form
- ja-form ja-kana ja-conj-type)
- (let* (entry-cobj
- wc-cobj wc-name
- mm-name
- mj-name mj-id mj-cobj)
- (when entry
- (setq wc-name (format "%s,%s,%s,%s"
- word-superclass word-class
- word-subclass1 word-subclass2))
- (setq mm-name (format "%s (%s) [%s]"
- entry canonical-form wc-name))
- (setq mj-name (format "%s (%s (%s),%s)"
- mm-name ja-form ja-kana ja-conj-type))
- (unless (setq mj-cobj (concord-decode-object
- '=name mj-name 'morpheme@zh-classical))
- (setq mj-id (intern (concord-kanbun-encode-name-as-id mj-name)))
- (setq mj-cobj (concord-make-object 'morpheme@zh-classical mj-id))
- (concord-object-put mj-cobj '=name mj-name)
- (when (setq entry-cobj (concord-kanbun-add-morpheme-entry entry))
- (concord-object-put mj-cobj '->entry (list entry-cobj)))
- (when (setq wc-cobj (concord-kanbun-add-word-class
- word-superclass word-class
- word-subclass1 word-subclass2))
- (concord-object-put mj-cobj '->word-class (list wc-cobj)))
- (concord-object-put mj-cobj 'ja-form ja-form)
- (concord-object-put mj-cobj 'ja-kana ja-kana)
- (concord-object-put mj-cobj 'ja-conjugation-type ja-conj-type))
- mj-cobj)))
-
-(defun concord-kanbun-parse-corpus-line (string)
- (let* ((ret (split-string string "\t"))
- (ret2 (split-string (nth 1 ret) ",")))
- (list (car ret)
- (car ret2)(nth 1 ret2)(nth 2 ret2)(nth 3 ret2)
- (nth 6 ret2)
- (nth 7 ret2)(nth 8 ret2)(nth 9 ret2))))
-
-(defun concord-kanbun-add-corpus-line (string)
- (apply #'concord-kanbun-add-morpheme
- (concord-kanbun-parse-corpus-line string)))
-
-(defun concord-kanbun-read-sentence (sentence-number &optional source-name)
- (unless source-name
- (setq source-name (file-name-nondirectory buffer-file-name)))
- (let ((beg (point))
- end send
- ret
- sentence dest
- sentence-id-name sentence-id sentence-cobj
- sentence-entry-cobj)
- (prog1
- (save-excursion
- (when (search-forward "\nEOS\n" nil t)
- (setq end (match-beginning 0)
- send (match-end 0))
- (goto-char beg)
- (while (search-forward "\t" end t)
- (setq ret (concord-kanbun-parse-corpus-line
- (buffer-substring (point-at-bol)(point-at-eol))))
- (setq sentence (concat sentence (car ret)))
- (setq dest
- (cons (apply #'concord-kanbun-add-morpheme ret)
- dest)))
- ;; (setq sentence-id
- ;; (intern
- ;; (concord-kanbun-encode-name-as-id
- ;; (format "%s/%d" source-name sentence-number))))
- (setq sentence-id-name
- (format "%s/%d" source-name sentence-number))
- (setq sentence-id (intern sentence-id-name))
- (unless (setq sentence-cobj
- (concord-decode-object
- '=id sentence-id 'sentence@zh-classical))
- (setq sentence-cobj
- (concord-make-object
- 'sentence@zh-classical sentence-id))
- (concord-object-put
- sentence-cobj '=name (format "%s(%s)"
- sentence sentence-id-name)))
- (concord-object-put
- sentence-cobj 'source/file-name source-name)
- (concord-object-put
- sentence-cobj 'source/sentence-number sentence-number)
- (concord-object-put
- sentence-cobj '->morphemes (nreverse dest))
- (when (setq sentence-entry-cobj
- (concord-kanbun-add-sentence-entry sentence))
- (concord-object-put
- sentence-cobj '->entry (list sentence-entry-cobj)))
- sentence-cobj))
- (if send
- (goto-char send)))))
-
-(defun concord-kanbun-read-buffer (&optional source-name)
- (interactive)
- (save-excursion
- (goto-char (point-min))
- (unless source-name
- (setq source-name (file-name-nondirectory buffer-file-name)))
- (let ((i 1))
- (while (concord-kanbun-read-sentence i source-name)
- (setq i (1+ i))))))