New file.
authorMORIOKA Tomohiko <tomo.git@chise.org>
Sat, 15 Dec 2018 08:18:27 +0000 (17:18 +0900)
committerMORIOKA Tomohiko <tomo.git@chise.org>
Sat, 15 Dec 2018 08:18:27 +0000 (17:18 +0900)
chiset-common.el [new file with mode: 0644]

diff --git a/chiset-common.el b/chiset-common.el
new file mode 100644 (file)
index 0000000..ee1ee8b
--- /dev/null
@@ -0,0 +1,340 @@
+;;; chiset-common.el --- CHISET common utility -*- coding: utf-8-er; -*-
+
+;; Copyright (C) 2010,2011,2012,2013,2014,2015,2016,2017,2018 MORIOKA Tomohiko.
+
+;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
+;; Keywords: CHISE, RDF, Turtle, WWW
+
+;; This file is part of CHISET (CHISE/Turtle).
+
+;; XEmacs CHISE is free software; you can redistribute it and/or
+;; modify it under the terms of the GNU General Public License as
+;; published by the Free Software Foundation; either version 2, or (at
+;; your option) any later version.
+
+;; XEmacs CHISE is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with XEmacs CHISE; see the file COPYING.  If not, write to
+;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+;;; Code:
+
+(defun decode-uri-string (string &optional coding-system)
+  (if (> (length string) 0)
+      (let ((i 0)
+           dest)
+       (setq string
+             (mapconcat (lambda (char)
+                          (if (eq char ?+)
+                              " "
+                            (char-to-string char)))
+                        string ""))
+       (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
+         (setq dest (concat dest
+                            (substring string i (match-beginning 0))
+                            (char-to-string
+                             (int-char
+                              (string-to-int (match-string 1 string) 16))))
+               i (match-end 0)))
+       (decode-coding-string
+        (concat dest (substring string i))
+        coding-system))))
+
+
+;;; @ URI representation
+;;;
+
+(defun est-uri-decode-feature-name-body (uri-feature)
+  (let ((len (length uri-feature))
+       (i 0)
+       ch dest)
+    (while (< i len)
+      (setq dest
+           (concat
+            dest
+            (if (eq (aref uri-feature i) ?\.)
+                (if (and (< (+ i 2) len)
+                         (eq (aref uri-feature (+ i 2)) ?\.))
+                    (prog1
+                        (cond
+                         ((eq (setq ch (aref uri-feature (1+ i))) ?\.)
+                          "/")
+                         ((eq ch ?-)
+                          "*")
+                         (t
+                          (substring uri-feature i (+ i 3))
+                          ))
+                      (setq i (+ i 3)))
+                  (setq i (1+ i))
+                  ".")
+              (prog1
+                  (char-to-string (aref uri-feature i))
+                (setq i (1+ i)))))))
+    dest))
+
+(defun est-uri-encode-feature-name-body (feature)
+  (mapconcat (lambda (c)
+              (cond ((eq c ?*)
+                     ".-.")
+                    ((eq c ?/)
+                     "...")
+                    (t (char-to-string c))))
+            feature ""))
+
+(defun www-uri-decode-feature-name (uri-feature)
+  (let (feature)
+    (setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er))
+    (cond
+     ((string-match "^from\\." uri-feature)
+      (intern (format "<-%s"
+                     (est-uri-decode-feature-name-body
+                      (substring uri-feature (match-end 0)))))
+      )
+     ((string-match "^to\\." uri-feature)
+      (intern (format "->%s"
+                     (est-uri-decode-feature-name-body
+                      (substring uri-feature (match-end 0)))))
+      )
+     ((string-match "^rep\\." uri-feature)
+      (intern (format "=%s"
+                     (est-uri-decode-feature-name-body
+                      (substring uri-feature (match-end 0)))))
+      )
+     ((string-match "^rep[2i]\\." uri-feature)
+      (intern (format "===%s"
+                     (est-uri-decode-feature-name-body
+                      (substring uri-feature (match-end 0)))))
+      )
+     ((string-match "^g\\." uri-feature)
+      (intern (format "=>>%s"
+                     (est-uri-decode-feature-name-body
+                      (substring uri-feature (match-end 0)))))
+      )
+     ((string-match "^g[i2]\\." uri-feature)
+      (intern (format "==%s"
+                     (est-uri-decode-feature-name-body
+                      (substring uri-feature (match-end 0)))))
+      )
+     ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
+      (intern (format "=>>%s%s"
+                     (make-string (string-to-int
+                                   (match-string 1 uri-feature))
+                                  ?>)
+                     (est-uri-decode-feature-name-body
+                      (substring uri-feature (match-end 0)))))
+      )
+     ((string-match "^o\\." uri-feature)
+      (intern (format "=+>%s"
+                     (est-uri-decode-feature-name-body
+                      (substring uri-feature (match-end 0)))))
+      )
+     ((string-match "^a\\." uri-feature)
+      (intern (format "=>%s"
+                     (est-uri-decode-feature-name-body
+                      (substring uri-feature (match-end 0)))))
+      )
+     ((string-match "^a\\([0-9]+\\)\\." uri-feature)
+      (intern (format "%s>%s"
+                     (make-string (string-to-int
+                                   (match-string 1 uri-feature))
+                                  ?=)
+                     (est-uri-decode-feature-name-body
+                      (substring uri-feature (match-end 0)))))
+      )
+     ((and (setq uri-feature (est-uri-decode-feature-name-body uri-feature))
+          (setq feature (intern (format "=>%s" uri-feature)))
+          (find-charset feature))
+      feature)
+     ((and (setq feature (intern (format "=>>%s" uri-feature)))
+          (find-charset feature))
+      feature)
+     ((and (setq feature (intern (format "=>>>%s" uri-feature)))
+          (find-charset feature))
+      feature)
+     ((and (setq feature (intern (format "=%s" uri-feature)))
+          (find-charset feature))
+      feature)
+     (t (intern uri-feature)))))
+
+(defun www-uri-encode-feature-name (feature-name)
+  (setq feature-name (symbol-name feature-name))
+  (cond
+   ((string-match "^=\\+>\\([^=>]+\\)" feature-name)
+    (concat "o."
+           (est-uri-encode-feature-name-body
+            (substring feature-name (match-beginning 1))))
+    )
+   ((string-match "^=\\([^=>]+\\)" feature-name)
+    (concat "rep."
+           (est-uri-encode-feature-name-body
+            (substring feature-name (match-beginning 1))))
+    )
+   ((string-match "^==\\([^=>]+\\)" feature-name)
+    (concat "g2."
+           (est-uri-encode-feature-name-body
+            (substring feature-name (match-beginning 1))))
+    )
+   ((string-match "^===\\([^=>]+\\)" feature-name)
+    (concat "repi."
+           (est-uri-encode-feature-name-body
+            (substring feature-name (match-beginning 1))))
+    )
+   ((string-match "^=>>\\([^=>]+\\)" feature-name)
+    (concat "g."
+           (est-uri-encode-feature-name-body
+            (substring feature-name (match-beginning 1))))
+    )
+   ((string-match "^=>>>\\([^=>]+\\)" feature-name)
+    (concat "gi."
+           (est-uri-encode-feature-name-body
+            (substring feature-name (match-beginning 1))))
+    )
+   ((string-match "^=>>\\(>+\\)" feature-name)
+    (format "gi%d.%s"
+           (length (match-string 1 feature-name))
+           (est-uri-encode-feature-name-body
+            (substring feature-name (match-end 1))))
+    )
+   ((string-match "^=>\\([^=>]+\\)" feature-name)
+    (concat "a."
+           (est-uri-encode-feature-name-body
+            (substring feature-name (match-beginning 1))))
+    )
+   ((string-match "^\\(=+\\)>" feature-name)
+    (format "a%d.%s"
+           (length (match-string 1 feature-name))
+           (est-uri-encode-feature-name-body
+            (substring feature-name (match-end 0))))
+    )
+   ((string-match "^->" feature-name)
+    (concat "to."
+           (est-uri-encode-feature-name-body
+            (substring feature-name (match-end 0))))
+    )
+   ((string-match "^<-" feature-name)
+    (concat "from."
+           (est-uri-encode-feature-name-body
+            (substring feature-name (match-end 0))))
+    )
+   (t (est-uri-encode-feature-name-body feature-name))))
+
+
+(defvar chise-turtle-ccs-prefix-alist nil)
+
+(defun chise-turtle-uri-decode-feature-name (uri-feature)
+  (cond ((string= "a.ucs" uri-feature)
+        '=ucs)
+       ((string= "a.big5" uri-feature)
+        '=big5)
+       (t
+        (www-uri-decode-feature-name uri-feature))))
+
+(defun chise-turtle-uri-encode-ccs-name (feature-name)
+  (cond
+   ((eq '=ucs feature-name)
+    "a.ucs")
+   ((eq '=big5 feature-name)
+    "a.big5")
+   ((eq '==>ucs@bucs feature-name)
+    "bucs")
+   (t
+    (mapconcat (lambda (c)
+                (cond
+                 ((eq c ?@)
+                  "_")
+                 ((eq c ?+)
+                  "._.")
+                 ((eq c ?=)
+                  ".:.")
+                 ((eq c ?|)
+                  "._cmp_.")
+                 (t
+                  (char-to-string c))))
+              (www-uri-encode-feature-name feature-name)
+              ""))))
+
+(defun charset-code-point-format-spec (ccs)
+  (cond ((memq ccs '(=ucs))
+        "0x%04X")
+       (t
+        (let ((ccs-name (symbol-name ccs)))
+          (cond
+           ((string-match
+             "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
+             ccs-name)
+            "%04d")
+           ((string-match
+             "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
+             ccs-name)
+            "%05d")
+           ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
+            "%06d")
+           ((string-match "hanyo-denshi/tk" ccs-name)
+            "%08d")
+           (t
+            "0x%X"))))))
+
+(defun chise-turtle-format-ccs-code-point (ccs code-point)
+  (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
+    (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
+      (setq chise-turtle-ccs-prefix-alist
+           (cons (cons ccs-uri ccs)
+                 chise-turtle-ccs-prefix-alist)))
+    (format "%s:%s"
+           ccs-uri
+           (format (charset-code-point-format-spec ccs)
+                   code-point))))
+
+(defun chise-turtle-encode-char (object)
+  (let (spec cell dest
+       ccs ret ret2)
+    (if (setq ret (encode-char object '=ucs))
+       (chise-turtle-format-ccs-code-point '=ucs ret)
+      (setq spec (char-attribute-alist object))
+      (while (and spec
+                 (setq cell (pop spec)))
+       (if (and (find-charset (car cell))
+                (setq ret (cdr cell)))
+           (setq dest (cons cell dest))))
+      (setq ret (car (sort dest (lambda (a b)
+                                 (char-attribute-name< (car a)(car b)))))
+           ccs (car ret)
+           ret (cdr ret))
+      (cond (ret
+            (chise-turtle-format-ccs-code-point ccs ret)
+            )
+           ((and (setq ccs (car (split-char object)))
+                 (setq ret (encode-char object ccs)))
+            (chise-turtle-format-ccs-code-point ccs ret)
+            )
+           ((setq ret (get-char-attribute object 'ideographic-combination))
+            (format "ideocomb:%s"
+                    (mapconcat (lambda (cell)
+                                 (cond ((characterp cell)
+                                        (char-to-string cell)
+                                        )
+                                       ((setq ret2 (find-char cell))
+                                        (char-to-string ret2)
+                                        )
+                                       (t
+                                        (format "%S" cell)
+                                        )))
+                               ret ""))
+            )
+           (t
+            (format "system-char-id:0x%X"
+                    (encode-char object 'system-char-id))
+            )))))
+
+
+;;; @ end
+;;;
+
+(provide 'chiset-common)
+
+;;; chiset-common.el ends here