X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=isd-turtle.el;h=c3df592aa78b6b64f607981e3c8b0d63095a697d;hb=7f9e82f111281a411570bb38f8360de681958fa6;hp=d6377274bad6abcd848bbbbcc1eda74d62bbd209;hpb=c32c31561dd67d5f577d5abfff2deb18a23c8fca;p=chise%2Fisd.git diff --git a/isd-turtle.el b/isd-turtle.el index d637727..c3df592 100644 --- a/isd-turtle.el +++ b/isd-turtle.el @@ -31,135 +31,232 @@ (setq est-coded-charset-priority-list '(; =ucs =mj - =adobe-japan1 + =adobe-japan1-0 + =adobe-japan1-1 + =adobe-japan1-2 + =adobe-japan1-3 + =adobe-japan1-4 + =adobe-japan1-5 + =adobe-japan1-6 =ucs@iso =jis-x0208 =jis-x0208@1990 =jis-x0213-1 =jis-x0213-1@2000 =jis-x0213-1@2004 =jis-x0213-2 =jis-x0212 + =gt + =hanyo-denshi/ks + =hanyo-denshi/tk =ucs-itaiji-001 =ucs-itaiji-002 =ucs-itaiji-003 + =ucs-itaiji-004 =ucs-itaiji-005 + =ucs-itaiji-006 + =ucs-itaiji-007 + =ucs-itaiji-008 + =ucs-itaiji-009 + =ucs-itaiji-010 + =ucs-itaiji-084 =ucs-var-001 - =gt + =ucs-var-002 + =ucs-var-003 + =ucs-var-004 + =ucs-var-005 =cns11643-1 =cns11643-2 =cns11643-3 =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7 =gb2312 =big5-cdp + =ks-x1001 =gt-k =ucs@unicode =ucs@JP/hanazono =gb12345 + =ucs@cns + =ucs@gb =zinbun-oracle =>zinbun-oracle =daikanwa =ruimoku-v6 =cbeta =jef-china3 + =daikanwa/+2p + =+>ucs@iso =+>ucs@unicode + =+>ucs@jis + =+>ucs@cns + =+>ucs@ks + =+>ucs@jis/1990 + =>mj =>jis-x0208 =>jis-x0213-1 =>jis-x0208@1997 =>ucs@iwds-1 + =>ucs@cognate =>ucs@component =>iwds-1 =>ucs@iso =>ucs@unicode - =+>ucs@iso =+>ucs@unicode =>ucs@jis =>ucs@cns =>ucs@ks + =>gt + =>gt-k =>>ucs@iso =>>ucs@unicode =>>ucs@jis =>>ucs@cns =>>ucs@ks + =>>gt-k + =>>hanyo-denshi/ks ==mj + ==ucs@iso + ==ucs@unicode + ==adobe-japan1-0 + ==adobe-japan1-1 + ==adobe-japan1-2 + ==adobe-japan1-3 + ==adobe-japan1-4 + ==adobe-japan1-5 + ==adobe-japan1-6 + ==ks-x1001 + ==hanyo-denshi/ks + ==hanyo-denshi/tk + ==ucs@jis + ==gt + ==cns11643-1 ==cns11643-2 ==cns11643-3 + ==cns11643-4 ==cns11643-5 ==cns11643-6 ==cns11643-7 + ==jis-x0212 + ==ucs@cns + ==koseki + ==daikanwa + ==gt-k + ==ucs@gb + ==ucs-itaiji-003 + ==ucs@JP/hanazono + ==daikanwa/+2p =>>jis-x0208 =>>jis-x0213-1 =>>jis-x0213-2 =+>jis-x0208 =+>jis-x0213-1 =+>jis-x0213-2 + =+>hanyo-denshi/jt =+>jis-x0208@1978 =>>gt =+>adobe-japan1 =>>adobe-japan1 =jis-x0208@1983 =jis-x0208@1978 + =>ucs-itaiji-001 + =>ucs-itaiji-002 + =>ucs-itaiji-003 + =>ucs-itaiji-004 =>ucs-itaiji-005 - ==ucs@unicode + =>ucs-itaiji-006 + =>ucs-itaiji-007 ==>ucs@bucs =big5 =>cbeta + ===mj + ===ucs@iso + ===ucs@unicode + ===hanyo-denshi/ks + ===ks-x1001 + ===gt + ===gt-k + ===ucs@ks + ===ucs@gb + =shinjigen + =shinjigen@rev + =shinjigen@1ed + =shinjigen/+p@rev + ==shinjigen + ==shinjigen@rev + ==daikanwa/+p + ==shinjigen@1ed + ===daikanwa/+p + =>daikanwa/ho + ===daikanwa/ho )) -(defvar isd-turtle-ccs-list nil) +;; (defvar isd-turtle-ccs-list nil) +(defvar chise-turtle-ccs-prefix-alist nil) (defun charset-code-point-format-spec (ccs) (cond ((memq ccs '(=ucs)) "0x%04X") - ((memq ccs '(=gt - =gt-k =daikanwa =adobe-japan1 - =cbeta =zinbun-oracle)) - "%05d") - ((memq ccs '(=hanyo-denshi/ks - =koseki =mj)) - "%06d") (t - "0x%X"))) + (let ((ccs-name (symbol-name ccs))) + (cond + ((string-match + "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)" + ccs-name) + "%04d") + ((string-match + "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)" + ccs-name) + "%05d") + ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name) + "%06d") + ((string-match "hanyo-denshi/tk" ccs-name) + "%08d") + (t + "0x%X")))))) -(defun isd-turtle-uri-encode-feature-name (feature-name) +;; (defun isd-turtle-uri-encode-feature-name (feature-name) +;; (cond +;; ((eq '=ucs feature-name) +;; "a.ucs") +;; ((eq '==>ucs@bucs feature-name) +;; "bucs") +;; (t +;; (mapconcat (lambda (c) +;; (if (eq c ?@) +;; "_" +;; (char-to-string c))) +;; (www-uri-encode-feature-name feature-name) +;; "")))) +(defun chise-turtle-uri-encode-ccs-name (feature-name) (cond ((eq '=ucs feature-name) "a.ucs") + ((eq '=big5 feature-name) + "a.big5") ((eq '==>ucs@bucs feature-name) "bucs") (t (mapconcat (lambda (c) - (if (eq c ?@) - "_" - (char-to-string c))) + (cond + ((eq c ?@) + "_") + ((eq c ?+) + "._.") + ((eq c ?=) + ".:.") + (t + (char-to-string c)))) (www-uri-encode-feature-name feature-name) "")))) -(defun isd-turtle-format-ccs-code-point (ccs code-point) - (format "%s:%s" - (isd-turtle-uri-encode-feature-name ccs) - (format (charset-code-point-format-spec ccs) - code-point))) - -;; (defun isd-turtle-encode-char (char) -;; (let ((ucs (encode-char char '=ucs))) -;; (if ucs -;; (format "ucs:0x%04X" ucs) -;; (www-uri-encode-object char)))) +;; (defun isd-turtle-format-ccs-code-point (ccs code-point) +;; (unless (memq ccs isd-turtle-ccs-list) +;; (setq isd-turtle-ccs-list (cons ccs isd-turtle-ccs-list))) +;; (format "%s:%s" +;; (isd-turtle-uri-encode-feature-name ccs) +;; (format (charset-code-point-format-spec ccs) +;; code-point))) +(defun chise-turtle-format-ccs-code-point (ccs code-point) + (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs))) + (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist) + (setq chise-turtle-ccs-prefix-alist + (cons (cons ccs-uri ccs) + chise-turtle-ccs-prefix-alist))) + (format "%s:%s" + ccs-uri + (format (charset-code-point-format-spec ccs) + code-point)))) (defun isd-turtle-encode-char (object) (let ((ccs-list est-coded-charset-priority-list) ccs ret) (if (setq ret (encode-char object '=ucs)) - (prog1 - ;; (format "a.ucs:0x%04X" ret) - (isd-turtle-format-ccs-code-point '=ucs ret) - (unless (memq '=ucs isd-turtle-ccs-list) - (setq isd-turtle-ccs-list (cons '=ucs isd-turtle-ccs-list)))) + (chise-turtle-format-ccs-code-point '=ucs ret) (while (and ccs-list (setq ccs (pop ccs-list)) (not (setq ret (encode-char object ccs 'defined-only))))) (cond (ret - (unless (memq ccs isd-turtle-ccs-list) - (setq isd-turtle-ccs-list (cons ccs isd-turtle-ccs-list))) - ;; (format (cond ((memq ccs '(=gt - ;; =gt-k =daikanwa =adobe-japan1 - ;; =cbeta =zinbun-oracle)) - ;; "%s:%05d") - ;; ((memq ccs '(=hanyo-denshi/ks - ;; =koseki - ;; =mj)) - ;; "%s:%06d") - ;; (t - ;; "%s:0x%X")) - ;; (isd-turtle-uri-encode-feature-name ccs) - ;; ret) - (isd-turtle-format-ccs-code-point ccs ret) + (chise-turtle-format-ccs-code-point ccs ret) ) ((and (setq ccs (car (split-char object))) (setq ret (encode-char object ccs))) - (unless (memq ccs isd-turtle-ccs-list) - (setq isd-turtle-ccs-list (cons ccs isd-turtle-ccs-list))) - ;; (format "%s:0x%X" - ;; (isd-turtle-uri-encode-feature-name ccs) - ;; ret) - (isd-turtle-format-ccs-code-point ccs ret) + (chise-turtle-format-ccs-code-point ccs ret) ) (t (format (if est-hide-cgi-mode @@ -168,7 +265,7 @@ (encode-char object 'system-char-id)) ))))) -(defun isd-turtle-format-component (component separator level) +(defun isd-turtle-format-component (component separator level prefix) (cond ((characterp component) (format "%s %c # %c" (isd-turtle-encode-char component) @@ -183,21 +280,31 @@ ((setq ret (assq 'ideographic-structure component)) (if (eq separator ?\;) (format "%s ;" - (isd-turtle-format-char nil (cdr ret) (1+ level))) - (isd-turtle-format-char nil (cdr ret) (1+ level))))))))) + (isd-turtle-format-char nil nil (cdr ret) (1+ level) + prefix)) + (isd-turtle-format-char nil nil (cdr ret) (1+ level) + prefix)))))))) -(defun isd-turtle-format-char (char &optional ids-list level) - (unless ids-list - (setq ids-list (get-char-attribute char 'ideographic-structure))) +(defun isd-turtle-format-char (ccs code-point &optional ids-list level + prefix without-head-char) (unless level (setq level 0)) + (unless prefix + (setq prefix "")) (let ((indent (make-string (* level 4) ?\ )) - (idc (car ids-list)) + char + idc p1 p2 p3 - (c1 (nth 1 ids-list)) - (c2 (nth 2 ids-list)) - (c3 (nth 3 ids-list)) + c1 c2 c3 ret) + (unless ids-list + (if (and ccs code-point + (setq char (decode-char ccs code-point))) + (setq ids-list (get-char-attribute char 'ideographic-structure)))) + (setq idc (car ids-list)) + (setq c1 (nth 1 ids-list) + c2 (nth 2 ids-list) + c3 (nth 3 ids-list)) (if (char-ref-p idc) (setq idc (plist-get idc :char))) (if (and (consp idc) @@ -242,80 +349,100 @@ (cond (p3 (format "%s -%s :structure [ a idc:%c ; -%s :%-8s %s -%s :%-8s %s -%s :%-8s %s +%s %s:structure [ a idc:%c ; +%s %s:%-8s %s +%s %s:%-8s %s +%s %s:%-8s %s %s ]%s" - (if char - (isd-turtle-format-component char ?\ 0) - "[") - indent idc - indent p1 (isd-turtle-format-component c1 ?\; (1+ level)) - indent p2 (isd-turtle-format-component c2 ?\; (1+ level)) - indent p3 (isd-turtle-format-component c3 ?\ (1+ level)) + (if without-head-char + "" + (if (and ccs code-point) + (format "%s # %c" + (chise-turtle-format-ccs-code-point ccs code-point) + char) + "[")) + indent prefix idc + indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix) + indent prefix p2 (isd-turtle-format-component c2 ?\; (1+ level) prefix) + indent prefix p3 (isd-turtle-format-component c3 ?\ (1+ level) prefix) indent - (if (null char) - (format "\n%s]" - indent) - "")) + (if without-head-char + "" + (if (null char) + (format "\n%s]" + indent) + ""))) ) (idc (format "%s -%s :structure [ a idc:%c ; -%s :%-8s %s -%s :%-8s %s +%s %s:structure [ a idc:%c ; +%s %s:%-8s %s +%s %s:%-8s %s %s ]%s" - (if char - (isd-turtle-format-component char ?\ 0) - "[") - indent idc - indent p1 (isd-turtle-format-component c1 ?\; (1+ level)) - indent p2 (isd-turtle-format-component c2 ?\ (1+ level)) + (if without-head-char + "" + (if (and ccs code-point) + (format "%s # %c" + (chise-turtle-format-ccs-code-point ccs code-point) + char) + "[")) + indent prefix idc + indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix) + indent prefix p2 (isd-turtle-format-component c2 ?\ (1+ level) prefix) indent - (if (null char) - (format "\n%s]" - indent) - "")))) + (if without-head-char + "" + (if (null char) + (format "\n%s]" + indent) + ""))))) )) -(defun isd-turtle-insert-char (char) - (let ((ret (isd-turtle-format-char char))) +(defun isd-turtle-insert-char (ccs code-point) + (let ((ret (isd-turtle-format-char ccs code-point))) (when ret (insert ret) (insert " .\n")))) (defun isd-turtle-insert-ccs-ranges (ccs &rest ranges) - (let (range code max-code char) + (let (range code max-code) (while ranges (setq range (car ranges)) (cond ((consp range) (setq code (car range) max-code (cdr range)) (while (<= code max-code) - (if (setq char (decode-char ccs code)) - (isd-turtle-insert-char char)) - (setq code (1+ code)))) + (isd-turtle-insert-char ccs code) + (setq code (1+ code))) + ) ((integerp range) - (if (setq char (decode-char ccs code)) - (isd-turtle-insert-char char))) + (isd-turtle-insert-char ccs range) + ) (t (error 'wrong-type-argument range))) (setq ranges (cdr ranges))))) (defun isd-turtle-dump-range (file path func &rest args) (with-temp-buffer (let ((coding-system-for-write 'utf-8-mcs-er) - isd-turtle-ccs-list) + ;; isd-turtle-ccs-list + chise-turtle-ccs-prefix-alist) (if (file-directory-p path) (setq path (expand-file-name file path))) (apply func args) (goto-char (point-min)) - (dolist (ccs (sort isd-turtle-ccs-list - #'char-attribute-name<)) - (insert (format "@prefix %s: <%s%s=> .\n" - (isd-turtle-uri-encode-feature-name ccs) - "http://www.chise.org/est/view/character/" - (www-uri-encode-feature-name ccs)))) + ;; (dolist (ccs (sort isd-turtle-ccs-list + ;; #'char-attribute-name<)) + ;; (insert (format "@prefix %s: <%s%s=> .\n" + ;; (isd-turtle-uri-encode-feature-name ccs) + ;; "http://www.chise.org/est/view/character/" + ;; (www-uri-encode-feature-name ccs)))) + (dolist (cell (sort chise-turtle-ccs-prefix-alist + (lambda (a b) + (char-attribute-name< (cdr a)(cdr b))))) + (insert (format "@prefix %s: <%s/%s=> .\n" + (car cell) + "http://www.chise.org/est/view/character" + (www-uri-encode-feature-name (cdr cell))))) (insert "\n") (goto-char (point-min)) (insert "# -*- coding: utf-8-mcs-er -*-\n") @@ -328,15 +455,149 @@ (interactive "Fdump ISD-UCS-Basic : ") (isd-turtle-dump-range "ISD-UCS-Basic.ttl" filename #'isd-turtle-insert-ccs-ranges - 'ucs '(#x4E00 . #x9FA5))) + '=ucs '(#x4E00 . #x9FA5))) ;;;###autoload (defun isd-turtle-dump-ucs-ext-a (filename) (interactive "Fdump ISD-UCS-Ext-A : ") (isd-turtle-dump-range "ISD-UCS-Ext-A.ttl" filename #'isd-turtle-insert-ccs-ranges - 'ucs '(#x3400 . #x4DB5) #xFA1F #xFA23)) + '=ucs '(#x3400 . #x4DB5) #xFA1F #xFA23)) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-b-1 (filename) + (interactive "Fdump IDS-UCS-Ext-B-1 : ") + (isd-turtle-dump-range "ISD-UCS-Ext-B-1.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x20000 . #x21FFF))) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-b-2 (filename) + (interactive "Fdump IDS-UCS-Ext-B-2 : ") + (isd-turtle-dump-range "ISD-UCS-Ext-B-2.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x22000 . #x23FFF))) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-b-3 (filename) + (interactive "Fdump IDS-UCS-Ext-B-3 : ") + (isd-turtle-dump-range "ISD-UCS-Ext-B-3.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x24000 . #x25FFF))) +;;;###autoload +(defun isd-turtle-dump-ucs-ext-b-4 (filename) + (interactive "Fdump IDS-UCS-Ext-B-4 : ") + (isd-turtle-dump-range "ISD-UCS-Ext-B-4.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x26000 . #x27FFF))) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-b-5 (filename) + (interactive "Fdump IDS-UCS-Ext-B-5 : ") + (isd-turtle-dump-range "ISD-UCS-Ext-B-5.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x28000 . #x29FFF))) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-b-6 (filename) + (interactive "Fdump IDS-UCS-Ext-B-6 : ") + (isd-turtle-dump-range "ISD-UCS-Ext-B-6.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x2A000 . #x2A6D6))) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-c (filename) + (interactive "Fdump IDS-UCS-Ext-C : ") + (isd-turtle-dump-range "ISD-UCS-Ext-C.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x2A700 . #x2B734))) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-d (filename) + (interactive "Fdump IDS-UCS-Ext-D : ") + (isd-turtle-dump-range "ISD-UCS-Ext-D.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x2B740 . #x2B81D))) + +;;;###autoload +(defun isd-turtle-dump-ucs-ext-e (filename) + (interactive "Fdump IDS-UCS-Ext-E : ") + (isd-turtle-dump-range "ISD-UCS-Ext-E.ttl" filename + #'isd-turtle-insert-ccs-ranges + 'ucs '(#x2B820 . #x2CEA1))) + +;;;###autoload +(defun isd-turtle-dump-mj-0 (filename) + (interactive "Fdump ISD-MJ-0 : ") + (isd-turtle-dump-range "ISD-MJ-0.ttl" filename + #'isd-turtle-insert-ccs-ranges + '=mj '(1 . 9999))) + +;;;###autoload +(defun isd-turtle-dump-mj-1 (filename) + (interactive "Fdump ISD-MJ-1 : ") + (isd-turtle-dump-range "ISD-MJ-1.ttl" filename + #'isd-turtle-insert-ccs-ranges + '=mj '(10000 . 19999))) + +;;;###autoload +(defun isd-turtle-dump-mj-2 (filename) + (interactive "Fdump ISD-MJ-2 : ") + (isd-turtle-dump-range "ISD-MJ-2.ttl" filename + #'isd-turtle-insert-ccs-ranges + '=mj '(20000 . 29999))) + +;;;###autoload +(defun isd-turtle-dump-mj-3 (filename) + (interactive "Fdump ISD-MJ-3 : ") + (isd-turtle-dump-range "ISD-MJ-3.ttl" filename + #'isd-turtle-insert-ccs-ranges + '=mj '(30000 . 39999))) + +;;;###autoload +(defun isd-turtle-dump-mj-4 (filename) + (interactive "Fdump ISD-MJ-4 : ") + (isd-turtle-dump-range "ISD-MJ-4.ttl" filename + #'isd-turtle-insert-ccs-ranges + '=mj '(40000 . 49999))) + +;;;###autoload +(defun isd-turtle-dump-mj-5 (filename) + (interactive "Fdump ISD-MJ-5 : ") + (isd-turtle-dump-range "ISD-MJ-5.ttl" filename + #'isd-turtle-insert-ccs-ranges + '=mj '(50000 . 59999))) + +;;;###autoload +(defun isd-turtle-dump-mj-6 (filename) + (interactive "Fdump ISD-MJ-6 : ") + (isd-turtle-dump-range "ISD-MJ-6.ttl" filename + #'isd-turtle-insert-ccs-ranges + '=mj '(60000 . 69999))) + +;;;###autoload +(defun isd-turtle-dump-all (directory) + (interactive "DISD directory : ") + (isd-turtle-dump-ucs-basic directory) + (isd-turtle-dump-ucs-ext-a directory) + (isd-turtle-dump-ucs-ext-b-1 directory) + (isd-turtle-dump-ucs-ext-b-2 directory) + (isd-turtle-dump-ucs-ext-b-3 directory) + (isd-turtle-dump-ucs-ext-b-4 directory) + (isd-turtle-dump-ucs-ext-b-5 directory) + (isd-turtle-dump-ucs-ext-b-6 directory) + (isd-turtle-dump-ucs-ext-c directory) + (isd-turtle-dump-ucs-ext-d directory) + (isd-turtle-dump-ucs-ext-e directory) + (isd-turtle-dump-mj-0 directory) + (isd-turtle-dump-mj-1 directory) + (isd-turtle-dump-mj-2 directory) + (isd-turtle-dump-mj-3 directory) + (isd-turtle-dump-mj-4 directory) + (isd-turtle-dump-mj-5 directory) + (isd-turtle-dump-mj-6 directory) + ) ;;; @ End.