--- /dev/null
+;;; chiset-common.el --- CHISET common utility -*- coding: utf-8-er; -*-
+
+;; Copyright (C) 2010,2011,2012,2013,2014,2015,2016,2017,2018 MORIOKA Tomohiko.
+
+;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
+;; Keywords: CHISE, RDF, Turtle, WWW
+
+;; This file is part of CHISET (CHISE/Turtle).
+
+;; XEmacs CHISE is free software; you can redistribute it and/or
+;; modify it under the terms of the GNU General Public License as
+;; published by the Free Software Foundation; either version 2, or (at
+;; your option) any later version.
+
+;; XEmacs CHISE is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with XEmacs CHISE; see the file COPYING. If not, write to
+;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+;;; Code:
+
+(defun decode-uri-string (string &optional coding-system)
+ (if (> (length string) 0)
+ (let ((i 0)
+ dest)
+ (setq string
+ (mapconcat (lambda (char)
+ (if (eq char ?+)
+ " "
+ (char-to-string char)))
+ string ""))
+ (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
+ (setq dest (concat dest
+ (substring string i (match-beginning 0))
+ (char-to-string
+ (int-char
+ (string-to-int (match-string 1 string) 16))))
+ i (match-end 0)))
+ (decode-coding-string
+ (concat dest (substring string i))
+ coding-system))))
+
+
+;;; @ URI representation
+;;;
+
+(defun est-uri-decode-feature-name-body (uri-feature)
+ (let ((len (length uri-feature))
+ (i 0)
+ ch dest)
+ (while (< i len)
+ (setq dest
+ (concat
+ dest
+ (if (eq (aref uri-feature i) ?\.)
+ (if (and (< (+ i 2) len)
+ (eq (aref uri-feature (+ i 2)) ?\.))
+ (prog1
+ (cond
+ ((eq (setq ch (aref uri-feature (1+ i))) ?\.)
+ "/")
+ ((eq ch ?-)
+ "*")
+ (t
+ (substring uri-feature i (+ i 3))
+ ))
+ (setq i (+ i 3)))
+ (setq i (1+ i))
+ ".")
+ (prog1
+ (char-to-string (aref uri-feature i))
+ (setq i (1+ i)))))))
+ dest))
+
+(defun est-uri-encode-feature-name-body (feature)
+ (mapconcat (lambda (c)
+ (cond ((eq c ?*)
+ ".-.")
+ ((eq c ?/)
+ "...")
+ (t (char-to-string c))))
+ feature ""))
+
+(defun www-uri-decode-feature-name (uri-feature)
+ (let (feature)
+ (setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er))
+ (cond
+ ((string-match "^from\\." uri-feature)
+ (intern (format "<-%s"
+ (est-uri-decode-feature-name-body
+ (substring uri-feature (match-end 0)))))
+ )
+ ((string-match "^to\\." uri-feature)
+ (intern (format "->%s"
+ (est-uri-decode-feature-name-body
+ (substring uri-feature (match-end 0)))))
+ )
+ ((string-match "^rep\\." uri-feature)
+ (intern (format "=%s"
+ (est-uri-decode-feature-name-body
+ (substring uri-feature (match-end 0)))))
+ )
+ ((string-match "^rep[2i]\\." uri-feature)
+ (intern (format "===%s"
+ (est-uri-decode-feature-name-body
+ (substring uri-feature (match-end 0)))))
+ )
+ ((string-match "^g\\." uri-feature)
+ (intern (format "=>>%s"
+ (est-uri-decode-feature-name-body
+ (substring uri-feature (match-end 0)))))
+ )
+ ((string-match "^g[i2]\\." uri-feature)
+ (intern (format "==%s"
+ (est-uri-decode-feature-name-body
+ (substring uri-feature (match-end 0)))))
+ )
+ ((string-match "^gi\\([0-9]+\\)\\." uri-feature)
+ (intern (format "=>>%s%s"
+ (make-string (string-to-int
+ (match-string 1 uri-feature))
+ ?>)
+ (est-uri-decode-feature-name-body
+ (substring uri-feature (match-end 0)))))
+ )
+ ((string-match "^o\\." uri-feature)
+ (intern (format "=+>%s"
+ (est-uri-decode-feature-name-body
+ (substring uri-feature (match-end 0)))))
+ )
+ ((string-match "^a\\." uri-feature)
+ (intern (format "=>%s"
+ (est-uri-decode-feature-name-body
+ (substring uri-feature (match-end 0)))))
+ )
+ ((string-match "^a\\([0-9]+\\)\\." uri-feature)
+ (intern (format "%s>%s"
+ (make-string (string-to-int
+ (match-string 1 uri-feature))
+ ?=)
+ (est-uri-decode-feature-name-body
+ (substring uri-feature (match-end 0)))))
+ )
+ ((and (setq uri-feature (est-uri-decode-feature-name-body uri-feature))
+ (setq feature (intern (format "=>%s" uri-feature)))
+ (find-charset feature))
+ feature)
+ ((and (setq feature (intern (format "=>>%s" uri-feature)))
+ (find-charset feature))
+ feature)
+ ((and (setq feature (intern (format "=>>>%s" uri-feature)))
+ (find-charset feature))
+ feature)
+ ((and (setq feature (intern (format "=%s" uri-feature)))
+ (find-charset feature))
+ feature)
+ (t (intern uri-feature)))))
+
+(defun www-uri-encode-feature-name (feature-name)
+ (setq feature-name (symbol-name feature-name))
+ (cond
+ ((string-match "^=\\+>\\([^=>]+\\)" feature-name)
+ (concat "o."
+ (est-uri-encode-feature-name-body
+ (substring feature-name (match-beginning 1))))
+ )
+ ((string-match "^=\\([^=>]+\\)" feature-name)
+ (concat "rep."
+ (est-uri-encode-feature-name-body
+ (substring feature-name (match-beginning 1))))
+ )
+ ((string-match "^==\\([^=>]+\\)" feature-name)
+ (concat "g2."
+ (est-uri-encode-feature-name-body
+ (substring feature-name (match-beginning 1))))
+ )
+ ((string-match "^===\\([^=>]+\\)" feature-name)
+ (concat "repi."
+ (est-uri-encode-feature-name-body
+ (substring feature-name (match-beginning 1))))
+ )
+ ((string-match "^=>>\\([^=>]+\\)" feature-name)
+ (concat "g."
+ (est-uri-encode-feature-name-body
+ (substring feature-name (match-beginning 1))))
+ )
+ ((string-match "^=>>>\\([^=>]+\\)" feature-name)
+ (concat "gi."
+ (est-uri-encode-feature-name-body
+ (substring feature-name (match-beginning 1))))
+ )
+ ((string-match "^=>>\\(>+\\)" feature-name)
+ (format "gi%d.%s"
+ (length (match-string 1 feature-name))
+ (est-uri-encode-feature-name-body
+ (substring feature-name (match-end 1))))
+ )
+ ((string-match "^=>\\([^=>]+\\)" feature-name)
+ (concat "a."
+ (est-uri-encode-feature-name-body
+ (substring feature-name (match-beginning 1))))
+ )
+ ((string-match "^\\(=+\\)>" feature-name)
+ (format "a%d.%s"
+ (length (match-string 1 feature-name))
+ (est-uri-encode-feature-name-body
+ (substring feature-name (match-end 0))))
+ )
+ ((string-match "^->" feature-name)
+ (concat "to."
+ (est-uri-encode-feature-name-body
+ (substring feature-name (match-end 0))))
+ )
+ ((string-match "^<-" feature-name)
+ (concat "from."
+ (est-uri-encode-feature-name-body
+ (substring feature-name (match-end 0))))
+ )
+ (t (est-uri-encode-feature-name-body feature-name))))
+
+
+(defvar chise-turtle-ccs-prefix-alist nil)
+
+(defun chise-turtle-uri-decode-feature-name (uri-feature)
+ (cond ((string= "a.ucs" uri-feature)
+ '=ucs)
+ ((string= "a.big5" uri-feature)
+ '=big5)
+ (t
+ (www-uri-decode-feature-name uri-feature))))
+
+(defun chise-turtle-uri-encode-ccs-name (feature-name)
+ (cond
+ ((eq '=ucs feature-name)
+ "a.ucs")
+ ((eq '=big5 feature-name)
+ "a.big5")
+ ((eq '==>ucs@bucs feature-name)
+ "bucs")
+ (t
+ (mapconcat (lambda (c)
+ (cond
+ ((eq c ?@)
+ "_")
+ ((eq c ?+)
+ "._.")
+ ((eq c ?=)
+ ".:.")
+ ((eq c ?|)
+ "._cmp_.")
+ (t
+ (char-to-string c))))
+ (www-uri-encode-feature-name feature-name)
+ ""))))
+
+(defun charset-code-point-format-spec (ccs)
+ (cond ((memq ccs '(=ucs))
+ "0x%04X")
+ (t
+ (let ((ccs-name (symbol-name ccs)))
+ (cond
+ ((string-match
+ "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)"
+ ccs-name)
+ "%04d")
+ ((string-match
+ "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)"
+ ccs-name)
+ "%05d")
+ ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name)
+ "%06d")
+ ((string-match "hanyo-denshi/tk" ccs-name)
+ "%08d")
+ (t
+ "0x%X"))))))
+
+(defun chise-turtle-format-ccs-code-point (ccs code-point)
+ (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs)))
+ (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist)
+ (setq chise-turtle-ccs-prefix-alist
+ (cons (cons ccs-uri ccs)
+ chise-turtle-ccs-prefix-alist)))
+ (format "%s:%s"
+ ccs-uri
+ (format (charset-code-point-format-spec ccs)
+ code-point))))
+
+(defun chise-turtle-encode-char (object)
+ (let (spec cell dest
+ ccs ret ret2)
+ (if (setq ret (encode-char object '=ucs))
+ (chise-turtle-format-ccs-code-point '=ucs ret)
+ (setq spec (char-attribute-alist object))
+ (while (and spec
+ (setq cell (pop spec)))
+ (if (and (find-charset (car cell))
+ (setq ret (cdr cell)))
+ (setq dest (cons cell dest))))
+ (setq ret (car (sort dest (lambda (a b)
+ (char-attribute-name< (car a)(car b)))))
+ ccs (car ret)
+ ret (cdr ret))
+ (cond (ret
+ (chise-turtle-format-ccs-code-point ccs ret)
+ )
+ ((and (setq ccs (car (split-char object)))
+ (setq ret (encode-char object ccs)))
+ (chise-turtle-format-ccs-code-point ccs ret)
+ )
+ ((setq ret (get-char-attribute object 'ideographic-combination))
+ (format "ideocomb:%s"
+ (mapconcat (lambda (cell)
+ (cond ((characterp cell)
+ (char-to-string cell)
+ )
+ ((setq ret2 (find-char cell))
+ (char-to-string ret2)
+ )
+ (t
+ (format "%S" cell)
+ )))
+ ret ""))
+ )
+ (t
+ (format "system-char-id:0x%X"
+ (encode-char object 'system-char-id))
+ )))))
+
+
+;;; @ end
+;;;
+
+(provide 'chiset-common)
+
+;;; chiset-common.el ends here