X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=isd-turtle.el;h=a4f100122201ba5d1cbc10fc1d627c2a15481396;hb=0419f83175062eefeb94b7472dd2ae7601d0e5c9;hp=077d17450b8ed5d46e709e03a90df755e145c83f;hpb=f3dc5e0dc3defd43071cf187b59e941360806421;p=chise%2Fisd.git diff --git a/isd-turtle.el b/isd-turtle.el index 077d174..a4f1001 100644 --- a/isd-turtle.el +++ b/isd-turtle.el @@ -31,7 +31,13 @@ (setq est-coded-charset-priority-list '(; =ucs =mj - =adobe-japan1 + =adobe-japan1-0 + =adobe-japan1-1 + =adobe-japan1-2 + =adobe-japan1-3 + =adobe-japan1-4 + =adobe-japan1-5 + =adobe-japan1-6 =ucs@iso =jis-x0208 =jis-x0208@1990 =jis-x0213-1 @@ -39,42 +45,89 @@ =jis-x0213-2 =jis-x0212 =gt + =hanyo-denshi/ks =hanyo-denshi/tk =ucs-itaiji-001 =ucs-itaiji-002 =ucs-itaiji-003 + =ucs-itaiji-004 =ucs-itaiji-005 =ucs-itaiji-006 + =ucs-itaiji-007 + =ucs-itaiji-008 + =ucs-itaiji-009 + =ucs-itaiji-010 =ucs-itaiji-084 =ucs-var-001 =ucs-var-002 + =ucs-var-003 =ucs-var-004 =cns11643-1 =cns11643-2 =cns11643-3 =cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7 =gb2312 =big5-cdp + =ks-x1001 =gt-k =ucs@unicode =ucs@JP/hanazono =gb12345 + =ucs@cns + =ucs@gb =zinbun-oracle =>zinbun-oracle =daikanwa =ruimoku-v6 =cbeta =jef-china3 + =daikanwa/+2p + =+>ucs@iso =+>ucs@unicode + =+>ucs@jis + =+>ucs@cns + =+>ucs@ks + =+>ucs@jis/1990 + =>mj =>jis-x0208 =>jis-x0213-1 =>jis-x0208@1997 =>ucs@iwds-1 + =>ucs@cognate =>ucs@component =>iwds-1 =>ucs@iso =>ucs@unicode - =+>ucs@iso =+>ucs@unicode =>ucs@jis =>ucs@cns =>ucs@ks + =>gt + =>gt-k =>>ucs@iso =>>ucs@unicode =>>ucs@jis =>>ucs@cns =>>ucs@ks + =>>gt-k + =>>hanyo-denshi/ks ==mj + ==ucs@iso + ==ucs@unicode + ==adobe-japan1-0 + ==adobe-japan1-1 + ==adobe-japan1-2 + ==adobe-japan1-3 + ==adobe-japan1-4 + ==adobe-japan1-5 + ==adobe-japan1-6 + ==ks-x1001 + ==hanyo-denshi/ks + ==hanyo-denshi/tk + ==ucs@jis + ==gt + ==cns11643-1 ==cns11643-2 ==cns11643-3 + ==cns11643-4 ==cns11643-5 ==cns11643-6 ==cns11643-7 + ==jis-x0212 + ==ucs@cns + ==koseki + ==daikanwa + ==gt-k + ==ucs@gb + ==ucs-itaiji-003 + ==ucs@JP/hanazono + ==daikanwa/+2p =>>jis-x0208 =>>jis-x0213-1 =>>jis-x0213-2 =+>jis-x0208 =+>jis-x0213-1 =+>jis-x0213-2 + =+>hanyo-denshi/jt =+>jis-x0208@1978 =>>gt =+>adobe-japan1 @@ -82,66 +135,127 @@ =jis-x0208@1983 =jis-x0208@1978 =>ucs-itaiji-001 =>ucs-itaiji-002 + =>ucs-itaiji-003 + =>ucs-itaiji-004 =>ucs-itaiji-005 - ==ucs@unicode + =>ucs-itaiji-006 + =>ucs-itaiji-007 ==>ucs@bucs =big5 =>cbeta + ===mj + ===ucs@iso + ===ucs@unicode + ===hanyo-denshi/ks + ===ks-x1001 + ===gt + ===gt-k + ===ucs@ks + ===ucs@gb + =shinjigen + =shinjigen@rev + =shinjigen@1ed + =shinjigen/+p@rev + ==shinjigen + ==shinjigen@rev + ==daikanwa/+p + ==shinjigen@1ed + ===daikanwa/+p + =>daikanwa/ho + ===daikanwa/ho )) -(defvar isd-turtle-ccs-list nil) +;; (defvar isd-turtle-ccs-list nil) +(defvar chise-turtle-ccs-prefix-alist nil) (defun charset-code-point-format-spec (ccs) (cond ((memq ccs '(=ucs)) "0x%04X") - ((memq ccs '(=gt - =gt-k =daikanwa =adobe-japan1 - =cbeta =zinbun-oracle)) - "%05d") - ((memq ccs '(=hanyo-denshi/ks - =koseki =mj)) - "%06d") - ((memq ccs '(=hanyo-denshi/tk ==hanyo-denshi/tk)) - "%08d") (t - "0x%X"))) - -(defun isd-turtle-uri-encode-feature-name (feature-name) + (let ((ccs-name (symbol-name ccs))) + (cond + ((string-match + "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)" + ccs-name) + "%04d") + ((string-match + "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)" + ccs-name) + "%05d") + ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name) + "%06d") + ((string-match "hanyo-denshi/tk" ccs-name) + "%08d") + (t + "0x%X")))))) + +;; (defun isd-turtle-uri-encode-feature-name (feature-name) +;; (cond +;; ((eq '=ucs feature-name) +;; "a.ucs") +;; ((eq '==>ucs@bucs feature-name) +;; "bucs") +;; (t +;; (mapconcat (lambda (c) +;; (if (eq c ?@) +;; "_" +;; (char-to-string c))) +;; (www-uri-encode-feature-name feature-name) +;; "")))) +(defun chise-turtle-uri-encode-ccs-name (feature-name) (cond ((eq '=ucs feature-name) "a.ucs") + ((eq '=big5 feature-name) + "a.big5") ((eq '==>ucs@bucs feature-name) "bucs") (t (mapconcat (lambda (c) - (if (eq c ?@) - "_" - (char-to-string c))) + (cond + ((eq c ?@) + "_") + ((eq c ?+) + "._.") + ((eq c ?=) + ".:.") + (t + (char-to-string c)))) (www-uri-encode-feature-name feature-name) "")))) -(defun isd-turtle-format-ccs-code-point (ccs code-point) - (unless (memq ccs isd-turtle-ccs-list) - (setq isd-turtle-ccs-list (cons ccs isd-turtle-ccs-list))) - (format "%s:%s" - (isd-turtle-uri-encode-feature-name ccs) - (format (charset-code-point-format-spec ccs) - code-point))) +;; (defun isd-turtle-format-ccs-code-point (ccs code-point) +;; (unless (memq ccs isd-turtle-ccs-list) +;; (setq isd-turtle-ccs-list (cons ccs isd-turtle-ccs-list))) +;; (format "%s:%s" +;; (isd-turtle-uri-encode-feature-name ccs) +;; (format (charset-code-point-format-spec ccs) +;; code-point))) +(defun chise-turtle-format-ccs-code-point (ccs code-point) + (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs))) + (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist) + (setq chise-turtle-ccs-prefix-alist + (cons (cons ccs-uri ccs) + chise-turtle-ccs-prefix-alist))) + (format "%s:%s" + ccs-uri + (format (charset-code-point-format-spec ccs) + code-point)))) (defun isd-turtle-encode-char (object) (let ((ccs-list est-coded-charset-priority-list) ccs ret) (if (setq ret (encode-char object '=ucs)) - (isd-turtle-format-ccs-code-point '=ucs ret) + (chise-turtle-format-ccs-code-point '=ucs ret) (while (and ccs-list (setq ccs (pop ccs-list)) (not (setq ret (encode-char object ccs 'defined-only))))) (cond (ret - (isd-turtle-format-ccs-code-point ccs ret) + (chise-turtle-format-ccs-code-point ccs ret) ) ((and (setq ccs (car (split-char object))) (setq ret (encode-char object ccs))) - (isd-turtle-format-ccs-code-point ccs ret) + (chise-turtle-format-ccs-code-point ccs ret) ) (t (format (if est-hide-cgi-mode @@ -150,7 +264,7 @@ (encode-char object 'system-char-id)) ))))) -(defun isd-turtle-format-component (component separator level) +(defun isd-turtle-format-component (component separator level prefix) (cond ((characterp component) (format "%s %c # %c" (isd-turtle-encode-char component) @@ -165,12 +279,17 @@ ((setq ret (assq 'ideographic-structure component)) (if (eq separator ?\;) (format "%s ;" - (isd-turtle-format-char nil nil (cdr ret) (1+ level))) - (isd-turtle-format-char nil nil (cdr ret) (1+ level))))))))) + (isd-turtle-format-char nil nil (cdr ret) (1+ level) + prefix)) + (isd-turtle-format-char nil nil (cdr ret) (1+ level) + prefix)))))))) -(defun isd-turtle-format-char (ccs code-point &optional ids-list level) +(defun isd-turtle-format-char (ccs code-point &optional ids-list level + prefix without-head-char) (unless level (setq level 0)) + (unless prefix + (setq prefix "")) (let ((indent (make-string (* level 4) ?\ )) char idc @@ -229,45 +348,53 @@ (cond (p3 (format "%s -%s :structure [ a idc:%c ; -%s :%-8s %s -%s :%-8s %s -%s :%-8s %s +%s %s:structure [ a idc:%c ; +%s %s:%-8s %s +%s %s:%-8s %s +%s %s:%-8s %s %s ]%s" - (if (and ccs code-point) - (format "%s # %c" - (isd-turtle-format-ccs-code-point ccs code-point) - char) - "[") - indent idc - indent p1 (isd-turtle-format-component c1 ?\; (1+ level)) - indent p2 (isd-turtle-format-component c2 ?\; (1+ level)) - indent p3 (isd-turtle-format-component c3 ?\ (1+ level)) + (if without-head-char + "" + (if (and ccs code-point) + (format "%s # %c" + (chise-turtle-format-ccs-code-point ccs code-point) + char) + "[")) + indent prefix idc + indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix) + indent prefix p2 (isd-turtle-format-component c2 ?\; (1+ level) prefix) + indent prefix p3 (isd-turtle-format-component c3 ?\ (1+ level) prefix) indent - (if (null char) - (format "\n%s]" - indent) - "")) + (if without-head-char + "" + (if (null char) + (format "\n%s]" + indent) + ""))) ) (idc (format "%s -%s :structure [ a idc:%c ; -%s :%-8s %s -%s :%-8s %s +%s %s:structure [ a idc:%c ; +%s %s:%-8s %s +%s %s:%-8s %s %s ]%s" - (if (and ccs code-point) - (format "%s # %c" - (isd-turtle-format-ccs-code-point ccs code-point) - char) - "[") - indent idc - indent p1 (isd-turtle-format-component c1 ?\; (1+ level)) - indent p2 (isd-turtle-format-component c2 ?\ (1+ level)) + (if without-head-char + "" + (if (and ccs code-point) + (format "%s # %c" + (chise-turtle-format-ccs-code-point ccs code-point) + char) + "[")) + indent prefix idc + indent prefix p1 (isd-turtle-format-component c1 ?\; (1+ level) prefix) + indent prefix p2 (isd-turtle-format-component c2 ?\ (1+ level) prefix) indent - (if (null char) - (format "\n%s]" - indent) - "")))) + (if without-head-char + "" + (if (null char) + (format "\n%s]" + indent) + ""))))) )) (defun isd-turtle-insert-char (ccs code-point) @@ -296,17 +423,25 @@ (defun isd-turtle-dump-range (file path func &rest args) (with-temp-buffer (let ((coding-system-for-write 'utf-8-mcs-er) - isd-turtle-ccs-list) + ;; isd-turtle-ccs-list + chise-turtle-ccs-prefix-alist) (if (file-directory-p path) (setq path (expand-file-name file path))) (apply func args) (goto-char (point-min)) - (dolist (ccs (sort isd-turtle-ccs-list - #'char-attribute-name<)) - (insert (format "@prefix %s: <%s%s=> .\n" - (isd-turtle-uri-encode-feature-name ccs) - "http://www.chise.org/est/view/character/" - (www-uri-encode-feature-name ccs)))) + ;; (dolist (ccs (sort isd-turtle-ccs-list + ;; #'char-attribute-name<)) + ;; (insert (format "@prefix %s: <%s%s=> .\n" + ;; (isd-turtle-uri-encode-feature-name ccs) + ;; "http://www.chise.org/est/view/character/" + ;; (www-uri-encode-feature-name ccs)))) + (dolist (cell (sort chise-turtle-ccs-prefix-alist + (lambda (a b) + (char-attribute-name< (cdr a)(cdr b))))) + (insert (format "@prefix %s: <%s/%s=> .\n" + (car cell) + "http://www.chise.org/est/view/character" + (www-uri-encode-feature-name (cdr cell))))) (insert "\n") (goto-char (point-min)) (insert "# -*- coding: utf-8-mcs-er -*-\n")