From: MORIOKA Tomohiko Date: Wed, 4 Sep 2013 13:10:12 +0000 (+0900) Subject: Renamed. X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=c5b009ae7afd901c6ad64e3839fe112129fc77a9;p=chise%2Fconcord-kanbun.git Renamed. --- diff --git a/concord-kanbun-dic.el b/concord-kanbun-dic.el new file mode 100644 index 0000000..1cfcd70 --- /dev/null +++ b/concord-kanbun-dic.el @@ -0,0 +1,211 @@ +(concord-assign-genre 'morpheme-entry@zh-classical "/usr/local/var/kanbun/db") +(concord-assign-genre 'word-class@zh-classical "/usr/local/var/kanbun/db") +(concord-assign-genre 'morpheme@zh-classical "/usr/local/var/kanbun/db") +(concord-assign-genre 'sentence@zh-classical "/usr/local/var/kanbun/db") +(concord-assign-genre 'sentence-entry@zh-classical "/usr/local/var/kanbun/db") +(mount-char-attribute-table '*instance@morpheme-entry/zh-classical) + +(defun concord-kanbun-encode-name-as-id (name) + (let (ucs ret) + (mapconcat (lambda (c) + (cond + ((eq c ?\ ) + "_" + ) + ((or (and (<= ?A c)(<= c ?Z)) + (and (<= ?a c)(< c ?u)) + (and (< ?u c)(< c ?z)) + (and (<= ?0 c)(<= c ?9))) + (char-to-string c) + ) + ((setq ucs (or (encode-char c '=ucs@JP) + (char-ucs c))) + (format "u%04X" ucs) + ) + ((setq ret (encode-char c '=ruimoku-v6)) + (format "r%04X" ret) + ) + ((setq ret (encode-char c '=jef-china3)) + (format "j%04X" ret) + ) + (t + (error "Unknown character %c" c) + ;; (format "m%08X" (char-id c)) + ))) + name + ""))) + +(defun concord-kanbun-add-sentence-entry (entry) + (let (entry-id me-cobj) + (unless (setq me-cobj (concord-decode-object + '=name entry + 'sentence-entry@zh-classical)) + (setq entry-id (intern (concord-kanbun-encode-name-as-id entry))) + (setq me-cobj (concord-make-object + 'sentence-entry@zh-classical entry-id)) + (concord-object-put me-cobj '=name entry)) + me-cobj)) + +(defun concord-kanbun-add-morpheme-entry (entry) + (let ((len (length entry)) + (i 0) + entry-id me-cobj chr ret ucs) + (unless (setq me-cobj (concord-decode-object + '=name entry + 'morpheme-entry@zh-classical)) + (setq entry-id (intern (concord-kanbun-encode-name-as-id entry))) + (setq me-cobj (concord-make-object + 'morpheme-entry@zh-classical entry-id)) + (concord-object-put me-cobj '=name entry) + (if (= len 1) + (concord-object-put me-cobj 'character (list (aref entry 0)))) + (while (< i len) + (setq chr (aref entry i)) + (if (setq ucs (or (encode-char chr '=ucs@JP) + (char-ucs chr))) + (setq chr (decode-char '=ucs ucs))) + (setq ret (get-char-attribute + chr '*instance@morpheme-entry/zh-classical)) + (unless (member me-cobj ret) + (put-char-attribute chr '*instance@morpheme-entry/zh-classical + (cons me-cobj ret))) + (setq i (1+ i))) + (save-char-attribute-table '*instance@morpheme-entry/zh-classical)) + me-cobj)) + +;; (save-char-attribute-table '*instance@morpheme-entry/zh-classical) + +(defun concord-kanbun-add-word-class (word-superclass + word-class + word-subclass1 word-subclass2) + (let (wc-name wc-cobj wc-name-id) + (when (and word-superclass + (not (string= word-superclass "*"))) + (setq wc-name word-superclass) + (when (and word-class + (not (string= word-class "*"))) + (setq wc-name + (format "%s,%s" wc-name word-class)) + (when (and word-subclass1 + (not (string= word-subclass1 "*"))) + (setq wc-name + (format "%s,%s" wc-name word-subclass1)) + (when (and word-subclass2 + (not (string= word-subclass2 "*"))) + (setq wc-name + (format "%s,%s" wc-name word-subclass2)))))) + (unless (setq wc-cobj (concord-decode-object + '=name wc-name + 'word-class@zh-classical)) + (setq wc-name-id (intern (concord-kanbun-encode-name-as-id wc-name))) + (setq wc-cobj (concord-make-object + 'word-class@zh-classical wc-name-id)) + (concord-object-put wc-cobj '=name wc-name)) + wc-cobj)) + + +(defun concord-kanbun-add-morpheme (entry word-superclass word-class + word-subclass1 word-subclass2 + canonical-form + ja-form ja-kana ja-conj-type) + (let* (entry-cobj + wc-cobj wc-name + mm-name + mj-name mj-id mj-cobj) + (when entry + (setq wc-name (format "%s,%s,%s,%s" + word-superclass word-class + word-subclass1 word-subclass2)) + (setq mm-name (format "%s (%s) [%s]" + entry canonical-form wc-name)) + (setq mj-name (format "%s (%s (%s),%s)" + mm-name ja-form ja-kana ja-conj-type)) + (unless (setq mj-cobj (concord-decode-object + '=name mj-name 'morpheme@zh-classical)) + (setq mj-id (intern (concord-kanbun-encode-name-as-id mj-name))) + (setq mj-cobj (concord-make-object 'morpheme@zh-classical mj-id)) + (concord-object-put mj-cobj '=name mj-name) + (when (setq entry-cobj (concord-kanbun-add-morpheme-entry entry)) + (concord-object-put mj-cobj '->entry (list entry-cobj))) + (when (setq wc-cobj (concord-kanbun-add-word-class + word-superclass word-class + word-subclass1 word-subclass2)) + (concord-object-put mj-cobj '->word-class (list wc-cobj))) + (concord-object-put mj-cobj 'ja-form ja-form) + (concord-object-put mj-cobj 'ja-kana ja-kana) + (concord-object-put mj-cobj 'ja-conjugation-type ja-conj-type)) + mj-cobj))) + +(defun concord-kanbun-parse-corpus-line (string) + (let* ((ret (split-string string "\t")) + (ret2 (split-string (nth 1 ret) ","))) + (list (car ret) + (car ret2)(nth 1 ret2)(nth 2 ret2)(nth 3 ret2) + (nth 6 ret2) + (nth 7 ret2)(nth 8 ret2)(nth 9 ret2)))) + +(defun concord-kanbun-add-corpus-line (string) + (apply #'concord-kanbun-add-morpheme + (concord-kanbun-parse-corpus-line string))) + +(defun concord-kanbun-read-sentence (sentence-number &optional source-name) + (unless source-name + (setq source-name (file-name-nondirectory buffer-file-name))) + (let ((beg (point)) + end send + ret + sentence dest + sentence-id-name sentence-id sentence-cobj + sentence-entry-cobj) + (prog1 + (save-excursion + (when (search-forward "\nEOS\n" nil t) + (setq end (match-beginning 0) + send (match-end 0)) + (goto-char beg) + (while (search-forward "\t" end t) + (setq ret (concord-kanbun-parse-corpus-line + (buffer-substring (point-at-bol)(point-at-eol)))) + (setq sentence (concat sentence (car ret))) + (setq dest + (cons (apply #'concord-kanbun-add-morpheme ret) + dest))) + ;; (setq sentence-id + ;; (intern + ;; (concord-kanbun-encode-name-as-id + ;; (format "%s/%d" source-name sentence-number)))) + (setq sentence-id-name + (format "%s/%d" source-name sentence-number)) + (setq sentence-id (intern sentence-id-name)) + (unless (setq sentence-cobj + (concord-decode-object + '=id sentence-id 'sentence@zh-classical)) + (setq sentence-cobj + (concord-make-object + 'sentence@zh-classical sentence-id)) + (concord-object-put + sentence-cobj '=name (format "%s(%s)" + sentence sentence-id-name))) + (concord-object-put + sentence-cobj 'source/file-name source-name) + (concord-object-put + sentence-cobj 'source/sentence-number sentence-number) + (concord-object-put + sentence-cobj '->morphemes (nreverse dest)) + (when (setq sentence-entry-cobj + (concord-kanbun-add-sentence-entry sentence)) + (concord-object-put + sentence-cobj '->entry (list sentence-entry-cobj))) + sentence-cobj)) + (if send + (goto-char send))))) + +(defun concord-kanbun-read-buffer (&optional source-name) + (interactive) + (save-excursion + (goto-char (point-min)) + (unless source-name + (setq source-name (file-name-nondirectory buffer-file-name))) + (let ((i 1)) + (while (concord-kanbun-read-sentence i source-name) + (setq i (1+ i)))))) diff --git a/est-kanbun-dic.el b/est-kanbun-dic.el deleted file mode 100644 index 1cfcd70..0000000 --- a/est-kanbun-dic.el +++ /dev/null @@ -1,211 +0,0 @@ -(concord-assign-genre 'morpheme-entry@zh-classical "/usr/local/var/kanbun/db") -(concord-assign-genre 'word-class@zh-classical "/usr/local/var/kanbun/db") -(concord-assign-genre 'morpheme@zh-classical "/usr/local/var/kanbun/db") -(concord-assign-genre 'sentence@zh-classical "/usr/local/var/kanbun/db") -(concord-assign-genre 'sentence-entry@zh-classical "/usr/local/var/kanbun/db") -(mount-char-attribute-table '*instance@morpheme-entry/zh-classical) - -(defun concord-kanbun-encode-name-as-id (name) - (let (ucs ret) - (mapconcat (lambda (c) - (cond - ((eq c ?\ ) - "_" - ) - ((or (and (<= ?A c)(<= c ?Z)) - (and (<= ?a c)(< c ?u)) - (and (< ?u c)(< c ?z)) - (and (<= ?0 c)(<= c ?9))) - (char-to-string c) - ) - ((setq ucs (or (encode-char c '=ucs@JP) - (char-ucs c))) - (format "u%04X" ucs) - ) - ((setq ret (encode-char c '=ruimoku-v6)) - (format "r%04X" ret) - ) - ((setq ret (encode-char c '=jef-china3)) - (format "j%04X" ret) - ) - (t - (error "Unknown character %c" c) - ;; (format "m%08X" (char-id c)) - ))) - name - ""))) - -(defun concord-kanbun-add-sentence-entry (entry) - (let (entry-id me-cobj) - (unless (setq me-cobj (concord-decode-object - '=name entry - 'sentence-entry@zh-classical)) - (setq entry-id (intern (concord-kanbun-encode-name-as-id entry))) - (setq me-cobj (concord-make-object - 'sentence-entry@zh-classical entry-id)) - (concord-object-put me-cobj '=name entry)) - me-cobj)) - -(defun concord-kanbun-add-morpheme-entry (entry) - (let ((len (length entry)) - (i 0) - entry-id me-cobj chr ret ucs) - (unless (setq me-cobj (concord-decode-object - '=name entry - 'morpheme-entry@zh-classical)) - (setq entry-id (intern (concord-kanbun-encode-name-as-id entry))) - (setq me-cobj (concord-make-object - 'morpheme-entry@zh-classical entry-id)) - (concord-object-put me-cobj '=name entry) - (if (= len 1) - (concord-object-put me-cobj 'character (list (aref entry 0)))) - (while (< i len) - (setq chr (aref entry i)) - (if (setq ucs (or (encode-char chr '=ucs@JP) - (char-ucs chr))) - (setq chr (decode-char '=ucs ucs))) - (setq ret (get-char-attribute - chr '*instance@morpheme-entry/zh-classical)) - (unless (member me-cobj ret) - (put-char-attribute chr '*instance@morpheme-entry/zh-classical - (cons me-cobj ret))) - (setq i (1+ i))) - (save-char-attribute-table '*instance@morpheme-entry/zh-classical)) - me-cobj)) - -;; (save-char-attribute-table '*instance@morpheme-entry/zh-classical) - -(defun concord-kanbun-add-word-class (word-superclass - word-class - word-subclass1 word-subclass2) - (let (wc-name wc-cobj wc-name-id) - (when (and word-superclass - (not (string= word-superclass "*"))) - (setq wc-name word-superclass) - (when (and word-class - (not (string= word-class "*"))) - (setq wc-name - (format "%s,%s" wc-name word-class)) - (when (and word-subclass1 - (not (string= word-subclass1 "*"))) - (setq wc-name - (format "%s,%s" wc-name word-subclass1)) - (when (and word-subclass2 - (not (string= word-subclass2 "*"))) - (setq wc-name - (format "%s,%s" wc-name word-subclass2)))))) - (unless (setq wc-cobj (concord-decode-object - '=name wc-name - 'word-class@zh-classical)) - (setq wc-name-id (intern (concord-kanbun-encode-name-as-id wc-name))) - (setq wc-cobj (concord-make-object - 'word-class@zh-classical wc-name-id)) - (concord-object-put wc-cobj '=name wc-name)) - wc-cobj)) - - -(defun concord-kanbun-add-morpheme (entry word-superclass word-class - word-subclass1 word-subclass2 - canonical-form - ja-form ja-kana ja-conj-type) - (let* (entry-cobj - wc-cobj wc-name - mm-name - mj-name mj-id mj-cobj) - (when entry - (setq wc-name (format "%s,%s,%s,%s" - word-superclass word-class - word-subclass1 word-subclass2)) - (setq mm-name (format "%s (%s) [%s]" - entry canonical-form wc-name)) - (setq mj-name (format "%s (%s (%s),%s)" - mm-name ja-form ja-kana ja-conj-type)) - (unless (setq mj-cobj (concord-decode-object - '=name mj-name 'morpheme@zh-classical)) - (setq mj-id (intern (concord-kanbun-encode-name-as-id mj-name))) - (setq mj-cobj (concord-make-object 'morpheme@zh-classical mj-id)) - (concord-object-put mj-cobj '=name mj-name) - (when (setq entry-cobj (concord-kanbun-add-morpheme-entry entry)) - (concord-object-put mj-cobj '->entry (list entry-cobj))) - (when (setq wc-cobj (concord-kanbun-add-word-class - word-superclass word-class - word-subclass1 word-subclass2)) - (concord-object-put mj-cobj '->word-class (list wc-cobj))) - (concord-object-put mj-cobj 'ja-form ja-form) - (concord-object-put mj-cobj 'ja-kana ja-kana) - (concord-object-put mj-cobj 'ja-conjugation-type ja-conj-type)) - mj-cobj))) - -(defun concord-kanbun-parse-corpus-line (string) - (let* ((ret (split-string string "\t")) - (ret2 (split-string (nth 1 ret) ","))) - (list (car ret) - (car ret2)(nth 1 ret2)(nth 2 ret2)(nth 3 ret2) - (nth 6 ret2) - (nth 7 ret2)(nth 8 ret2)(nth 9 ret2)))) - -(defun concord-kanbun-add-corpus-line (string) - (apply #'concord-kanbun-add-morpheme - (concord-kanbun-parse-corpus-line string))) - -(defun concord-kanbun-read-sentence (sentence-number &optional source-name) - (unless source-name - (setq source-name (file-name-nondirectory buffer-file-name))) - (let ((beg (point)) - end send - ret - sentence dest - sentence-id-name sentence-id sentence-cobj - sentence-entry-cobj) - (prog1 - (save-excursion - (when (search-forward "\nEOS\n" nil t) - (setq end (match-beginning 0) - send (match-end 0)) - (goto-char beg) - (while (search-forward "\t" end t) - (setq ret (concord-kanbun-parse-corpus-line - (buffer-substring (point-at-bol)(point-at-eol)))) - (setq sentence (concat sentence (car ret))) - (setq dest - (cons (apply #'concord-kanbun-add-morpheme ret) - dest))) - ;; (setq sentence-id - ;; (intern - ;; (concord-kanbun-encode-name-as-id - ;; (format "%s/%d" source-name sentence-number)))) - (setq sentence-id-name - (format "%s/%d" source-name sentence-number)) - (setq sentence-id (intern sentence-id-name)) - (unless (setq sentence-cobj - (concord-decode-object - '=id sentence-id 'sentence@zh-classical)) - (setq sentence-cobj - (concord-make-object - 'sentence@zh-classical sentence-id)) - (concord-object-put - sentence-cobj '=name (format "%s(%s)" - sentence sentence-id-name))) - (concord-object-put - sentence-cobj 'source/file-name source-name) - (concord-object-put - sentence-cobj 'source/sentence-number sentence-number) - (concord-object-put - sentence-cobj '->morphemes (nreverse dest)) - (when (setq sentence-entry-cobj - (concord-kanbun-add-sentence-entry sentence)) - (concord-object-put - sentence-cobj '->entry (list sentence-entry-cobj))) - sentence-cobj)) - (if send - (goto-char send))))) - -(defun concord-kanbun-read-buffer (&optional source-name) - (interactive) - (save-excursion - (goto-char (point-min)) - (unless source-name - (setq source-name (file-name-nondirectory buffer-file-name))) - (let ((i 1)) - (while (concord-kanbun-read-sentence i source-name) - (setq i (1+ i))))))