From f942183448ed252c573819f675b16f7e6fde3b91 Mon Sep 17 00:00:00 2001 From: MORIOKA Tomohiko Date: Sat, 15 Dec 2018 17:18:27 +0900 Subject: [PATCH] New file. --- chiset-common.el | 340 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 340 insertions(+) create mode 100644 chiset-common.el diff --git a/chiset-common.el b/chiset-common.el new file mode 100644 index 0000000..ee1ee8b --- /dev/null +++ b/chiset-common.el @@ -0,0 +1,340 @@ +;;; chiset-common.el --- CHISET common utility -*- coding: utf-8-er; -*- + +;; Copyright (C) 2010,2011,2012,2013,2014,2015,2016,2017,2018 MORIOKA Tomohiko. + +;; Author: MORIOKA Tomohiko +;; Keywords: CHISE, RDF, Turtle, WWW + +;; This file is part of CHISET (CHISE/Turtle). + +;; XEmacs CHISE is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 2, or (at +;; your option) any later version. + +;; XEmacs CHISE is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with XEmacs CHISE; see the file COPYING. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Code: + +(defun decode-uri-string (string &optional coding-system) + (if (> (length string) 0) + (let ((i 0) + dest) + (setq string + (mapconcat (lambda (char) + (if (eq char ?+) + " " + (char-to-string char))) + string "")) + (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i) + (setq dest (concat dest + (substring string i (match-beginning 0)) + (char-to-string + (int-char + (string-to-int (match-string 1 string) 16)))) + i (match-end 0))) + (decode-coding-string + (concat dest (substring string i)) + coding-system)))) + + +;;; @ URI representation +;;; + +(defun est-uri-decode-feature-name-body (uri-feature) + (let ((len (length uri-feature)) + (i 0) + ch dest) + (while (< i len) + (setq dest + (concat + dest + (if (eq (aref uri-feature i) ?\.) + (if (and (< (+ i 2) len) + (eq (aref uri-feature (+ i 2)) ?\.)) + (prog1 + (cond + ((eq (setq ch (aref uri-feature (1+ i))) ?\.) + "/") + ((eq ch ?-) + "*") + (t + (substring uri-feature i (+ i 3)) + )) + (setq i (+ i 3))) + (setq i (1+ i)) + ".") + (prog1 + (char-to-string (aref uri-feature i)) + (setq i (1+ i))))))) + dest)) + +(defun est-uri-encode-feature-name-body (feature) + (mapconcat (lambda (c) + (cond ((eq c ?*) + ".-.") + ((eq c ?/) + "...") + (t (char-to-string c)))) + feature "")) + +(defun www-uri-decode-feature-name (uri-feature) + (let (feature) + (setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er)) + (cond + ((string-match "^from\\." uri-feature) + (intern (format "<-%s" + (est-uri-decode-feature-name-body + (substring uri-feature (match-end 0))))) + ) + ((string-match "^to\\." uri-feature) + (intern (format "->%s" + (est-uri-decode-feature-name-body + (substring uri-feature (match-end 0))))) + ) + ((string-match "^rep\\." uri-feature) + (intern (format "=%s" + (est-uri-decode-feature-name-body + (substring uri-feature (match-end 0))))) + ) + ((string-match "^rep[2i]\\." uri-feature) + (intern (format "===%s" + (est-uri-decode-feature-name-body + (substring uri-feature (match-end 0))))) + ) + ((string-match "^g\\." uri-feature) + (intern (format "=>>%s" + (est-uri-decode-feature-name-body + (substring uri-feature (match-end 0))))) + ) + ((string-match "^g[i2]\\." uri-feature) + (intern (format "==%s" + (est-uri-decode-feature-name-body + (substring uri-feature (match-end 0))))) + ) + ((string-match "^gi\\([0-9]+\\)\\." uri-feature) + (intern (format "=>>%s%s" + (make-string (string-to-int + (match-string 1 uri-feature)) + ?>) + (est-uri-decode-feature-name-body + (substring uri-feature (match-end 0))))) + ) + ((string-match "^o\\." uri-feature) + (intern (format "=+>%s" + (est-uri-decode-feature-name-body + (substring uri-feature (match-end 0))))) + ) + ((string-match "^a\\." uri-feature) + (intern (format "=>%s" + (est-uri-decode-feature-name-body + (substring uri-feature (match-end 0))))) + ) + ((string-match "^a\\([0-9]+\\)\\." uri-feature) + (intern (format "%s>%s" + (make-string (string-to-int + (match-string 1 uri-feature)) + ?=) + (est-uri-decode-feature-name-body + (substring uri-feature (match-end 0))))) + ) + ((and (setq uri-feature (est-uri-decode-feature-name-body uri-feature)) + (setq feature (intern (format "=>%s" uri-feature))) + (find-charset feature)) + feature) + ((and (setq feature (intern (format "=>>%s" uri-feature))) + (find-charset feature)) + feature) + ((and (setq feature (intern (format "=>>>%s" uri-feature))) + (find-charset feature)) + feature) + ((and (setq feature (intern (format "=%s" uri-feature))) + (find-charset feature)) + feature) + (t (intern uri-feature))))) + +(defun www-uri-encode-feature-name (feature-name) + (setq feature-name (symbol-name feature-name)) + (cond + ((string-match "^=\\+>\\([^=>]+\\)" feature-name) + (concat "o." + (est-uri-encode-feature-name-body + (substring feature-name (match-beginning 1)))) + ) + ((string-match "^=\\([^=>]+\\)" feature-name) + (concat "rep." + (est-uri-encode-feature-name-body + (substring feature-name (match-beginning 1)))) + ) + ((string-match "^==\\([^=>]+\\)" feature-name) + (concat "g2." + (est-uri-encode-feature-name-body + (substring feature-name (match-beginning 1)))) + ) + ((string-match "^===\\([^=>]+\\)" feature-name) + (concat "repi." + (est-uri-encode-feature-name-body + (substring feature-name (match-beginning 1)))) + ) + ((string-match "^=>>\\([^=>]+\\)" feature-name) + (concat "g." + (est-uri-encode-feature-name-body + (substring feature-name (match-beginning 1)))) + ) + ((string-match "^=>>>\\([^=>]+\\)" feature-name) + (concat "gi." + (est-uri-encode-feature-name-body + (substring feature-name (match-beginning 1)))) + ) + ((string-match "^=>>\\(>+\\)" feature-name) + (format "gi%d.%s" + (length (match-string 1 feature-name)) + (est-uri-encode-feature-name-body + (substring feature-name (match-end 1)))) + ) + ((string-match "^=>\\([^=>]+\\)" feature-name) + (concat "a." + (est-uri-encode-feature-name-body + (substring feature-name (match-beginning 1)))) + ) + ((string-match "^\\(=+\\)>" feature-name) + (format "a%d.%s" + (length (match-string 1 feature-name)) + (est-uri-encode-feature-name-body + (substring feature-name (match-end 0)))) + ) + ((string-match "^->" feature-name) + (concat "to." + (est-uri-encode-feature-name-body + (substring feature-name (match-end 0)))) + ) + ((string-match "^<-" feature-name) + (concat "from." + (est-uri-encode-feature-name-body + (substring feature-name (match-end 0)))) + ) + (t (est-uri-encode-feature-name-body feature-name)))) + + +(defvar chise-turtle-ccs-prefix-alist nil) + +(defun chise-turtle-uri-decode-feature-name (uri-feature) + (cond ((string= "a.ucs" uri-feature) + '=ucs) + ((string= "a.big5" uri-feature) + '=big5) + (t + (www-uri-decode-feature-name uri-feature)))) + +(defun chise-turtle-uri-encode-ccs-name (feature-name) + (cond + ((eq '=ucs feature-name) + "a.ucs") + ((eq '=big5 feature-name) + "a.big5") + ((eq '==>ucs@bucs feature-name) + "bucs") + (t + (mapconcat (lambda (c) + (cond + ((eq c ?@) + "_") + ((eq c ?+) + "._.") + ((eq c ?=) + ".:.") + ((eq c ?|) + "._cmp_.") + (t + (char-to-string c)))) + (www-uri-encode-feature-name feature-name) + "")))) + +(defun charset-code-point-format-spec (ccs) + (cond ((memq ccs '(=ucs)) + "0x%04X") + (t + (let ((ccs-name (symbol-name ccs))) + (cond + ((string-match + "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)" + ccs-name) + "%04d") + ((string-match + "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)" + ccs-name) + "%05d") + ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name) + "%06d") + ((string-match "hanyo-denshi/tk" ccs-name) + "%08d") + (t + "0x%X")))))) + +(defun chise-turtle-format-ccs-code-point (ccs code-point) + (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs))) + (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist) + (setq chise-turtle-ccs-prefix-alist + (cons (cons ccs-uri ccs) + chise-turtle-ccs-prefix-alist))) + (format "%s:%s" + ccs-uri + (format (charset-code-point-format-spec ccs) + code-point)))) + +(defun chise-turtle-encode-char (object) + (let (spec cell dest + ccs ret ret2) + (if (setq ret (encode-char object '=ucs)) + (chise-turtle-format-ccs-code-point '=ucs ret) + (setq spec (char-attribute-alist object)) + (while (and spec + (setq cell (pop spec))) + (if (and (find-charset (car cell)) + (setq ret (cdr cell))) + (setq dest (cons cell dest)))) + (setq ret (car (sort dest (lambda (a b) + (char-attribute-name< (car a)(car b))))) + ccs (car ret) + ret (cdr ret)) + (cond (ret + (chise-turtle-format-ccs-code-point ccs ret) + ) + ((and (setq ccs (car (split-char object))) + (setq ret (encode-char object ccs))) + (chise-turtle-format-ccs-code-point ccs ret) + ) + ((setq ret (get-char-attribute object 'ideographic-combination)) + (format "ideocomb:%s" + (mapconcat (lambda (cell) + (cond ((characterp cell) + (char-to-string cell) + ) + ((setq ret2 (find-char cell)) + (char-to-string ret2) + ) + (t + (format "%S" cell) + ))) + ret "")) + ) + (t + (format "system-char-id:0x%X" + (encode-char object 'system-char-id)) + ))))) + + +;;; @ end +;;; + +(provide 'chiset-common) + +;;; chiset-common.el ends here -- 1.7.10.4