;;; chiset-common.el --- CHISET common utility -*- coding: utf-8-er; -*- ;; Copyright (C) 2010,2011,2012,2013,2014,2015,2016,2017,2018 MORIOKA Tomohiko. ;; Author: MORIOKA Tomohiko ;; Keywords: CHISE, RDF, Turtle, WWW ;; This file is part of CHISET (CHISE/Turtle). ;; XEmacs CHISE is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as ;; published by the Free Software Foundation; either version 2, or (at ;; your option) any later version. ;; XEmacs CHISE is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; You should have received a copy of the GNU General Public License ;; along with XEmacs CHISE; see the file COPYING. If not, write to ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, ;; Boston, MA 02111-1307, USA. ;;; Code: (defun decode-uri-string (string &optional coding-system) (if (> (length string) 0) (let ((i 0) dest) (setq string (mapconcat (lambda (char) (if (eq char ?+) " " (char-to-string char))) string "")) (while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i) (setq dest (concat dest (substring string i (match-beginning 0)) (char-to-string (int-char (string-to-int (match-string 1 string) 16)))) i (match-end 0))) (decode-coding-string (concat dest (substring string i)) coding-system)))) ;;; @ URI representation ;;; (defun est-uri-decode-feature-name-body (uri-feature) (let ((len (length uri-feature)) (i 0) ch dest) (while (< i len) (setq dest (concat dest (if (eq (aref uri-feature i) ?\.) (if (and (< (+ i 2) len) (eq (aref uri-feature (+ i 2)) ?\.)) (prog1 (cond ((eq (setq ch (aref uri-feature (1+ i))) ?\.) "/") ((eq ch ?-) "*") (t (substring uri-feature i (+ i 3)) )) (setq i (+ i 3))) (setq i (1+ i)) ".") (prog1 (char-to-string (aref uri-feature i)) (setq i (1+ i))))))) dest)) (defun est-uri-encode-feature-name-body (feature) (mapconcat (lambda (c) (cond ((eq c ?*) ".-.") ((eq c ?/) "...") (t (char-to-string c)))) feature "")) (defun www-uri-decode-feature-name (uri-feature) (let (feature) (setq uri-feature (decode-uri-string uri-feature 'utf-8-mcs-er)) (cond ((string-match "^from\\." uri-feature) (intern (format "<-%s" (est-uri-decode-feature-name-body (substring uri-feature (match-end 0))))) ) ((string-match "^to\\." uri-feature) (intern (format "->%s" (est-uri-decode-feature-name-body (substring uri-feature (match-end 0))))) ) ((string-match "^rep\\." uri-feature) (intern (format "=%s" (est-uri-decode-feature-name-body (substring uri-feature (match-end 0))))) ) ((string-match "^rep[2i]\\." uri-feature) (intern (format "===%s" (est-uri-decode-feature-name-body (substring uri-feature (match-end 0))))) ) ((string-match "^g\\." uri-feature) (intern (format "=>>%s" (est-uri-decode-feature-name-body (substring uri-feature (match-end 0))))) ) ((string-match "^g[i2]\\." uri-feature) (intern (format "==%s" (est-uri-decode-feature-name-body (substring uri-feature (match-end 0))))) ) ((string-match "^gi\\([0-9]+\\)\\." uri-feature) (intern (format "=>>%s%s" (make-string (string-to-int (match-string 1 uri-feature)) ?>) (est-uri-decode-feature-name-body (substring uri-feature (match-end 0))))) ) ((string-match "^o\\." uri-feature) (intern (format "=+>%s" (est-uri-decode-feature-name-body (substring uri-feature (match-end 0))))) ) ((string-match "^a\\." uri-feature) (intern (format "=>%s" (est-uri-decode-feature-name-body (substring uri-feature (match-end 0))))) ) ((string-match "^a\\([0-9]+\\)\\." uri-feature) (intern (format "%s>%s" (make-string (string-to-int (match-string 1 uri-feature)) ?=) (est-uri-decode-feature-name-body (substring uri-feature (match-end 0))))) ) ((and (setq uri-feature (est-uri-decode-feature-name-body uri-feature)) (setq feature (intern (format "=>%s" uri-feature))) (find-charset feature)) feature) ((and (setq feature (intern (format "=>>%s" uri-feature))) (find-charset feature)) feature) ((and (setq feature (intern (format "=>>>%s" uri-feature))) (find-charset feature)) feature) ((and (setq feature (intern (format "=%s" uri-feature))) (find-charset feature)) feature) (t (intern uri-feature))))) (defun www-uri-encode-feature-name (feature-name) (setq feature-name (symbol-name feature-name)) (cond ((string-match "^=\\+>\\([^=>]+\\)" feature-name) (concat "o." (est-uri-encode-feature-name-body (substring feature-name (match-beginning 1)))) ) ((string-match "^=\\([^=>]+\\)" feature-name) (concat "rep." (est-uri-encode-feature-name-body (substring feature-name (match-beginning 1)))) ) ((string-match "^==\\([^=>]+\\)" feature-name) (concat "g2." (est-uri-encode-feature-name-body (substring feature-name (match-beginning 1)))) ) ((string-match "^===\\([^=>]+\\)" feature-name) (concat "repi." (est-uri-encode-feature-name-body (substring feature-name (match-beginning 1)))) ) ((string-match "^=>>\\([^=>]+\\)" feature-name) (concat "g." (est-uri-encode-feature-name-body (substring feature-name (match-beginning 1)))) ) ((string-match "^=>>>\\([^=>]+\\)" feature-name) (concat "gi." (est-uri-encode-feature-name-body (substring feature-name (match-beginning 1)))) ) ((string-match "^=>>\\(>+\\)" feature-name) (format "gi%d.%s" (length (match-string 1 feature-name)) (est-uri-encode-feature-name-body (substring feature-name (match-end 1)))) ) ((string-match "^=>\\([^=>]+\\)" feature-name) (concat "a." (est-uri-encode-feature-name-body (substring feature-name (match-beginning 1)))) ) ((string-match "^\\(=+\\)>" feature-name) (format "a%d.%s" (length (match-string 1 feature-name)) (est-uri-encode-feature-name-body (substring feature-name (match-end 0)))) ) ((string-match "^->" feature-name) (concat "to." (est-uri-encode-feature-name-body (substring feature-name (match-end 0)))) ) ((string-match "^<-" feature-name) (concat "from." (est-uri-encode-feature-name-body (substring feature-name (match-end 0)))) ) (t (est-uri-encode-feature-name-body feature-name)))) (defvar chise-turtle-ccs-prefix-alist nil) (defun chise-turtle-uri-decode-feature-name (uri-feature) (cond ((string= "a.ucs" uri-feature) '=ucs) ((string= "a.big5" uri-feature) '=big5) (t (www-uri-decode-feature-name uri-feature)))) (defun chise-turtle-uri-encode-ccs-name (feature-name) (cond ((eq '=ucs feature-name) "a.ucs") ((eq '=big5 feature-name) "a.big5") ((eq '==>ucs@bucs feature-name) "bucs") (t (mapconcat (lambda (c) (cond ((eq c ?@) "_") ((eq c ?+) "._.") ((eq c ?=) ".:.") ((eq c ?|) "._cmp_.") (t (char-to-string c)))) (www-uri-encode-feature-name feature-name) "")))) (defun charset-code-point-format-spec (ccs) (cond ((memq ccs '(=ucs)) "0x%04X") (t (let ((ccs-name (symbol-name ccs))) (cond ((string-match "\\(shinjigen\\|daikanwa/ho\\|=>iwds-1\\)" ccs-name) "%04d") ((string-match "\\(gt\\|daikanwa\\|adobe-japan1\\|cbeta\\|zinbun-oracle\\|hng\\)" ccs-name) "%05d") ((string-match "\\(hanyo-denshi/ks\\|koseki\\|mj\\)" ccs-name) "%06d") ((string-match "hanyo-denshi/tk" ccs-name) "%08d") (t "0x%X")))))) (defun chise-turtle-format-ccs-code-point (ccs code-point) (let ((ccs-uri (chise-turtle-uri-encode-ccs-name ccs))) (unless (assoc ccs-uri chise-turtle-ccs-prefix-alist) (setq chise-turtle-ccs-prefix-alist (cons (cons ccs-uri ccs) chise-turtle-ccs-prefix-alist))) (format "%s:%s" ccs-uri (format (charset-code-point-format-spec ccs) code-point)))) (defun chise-turtle-encode-char (object) (let (spec cell dest ccs ret ret2) (if (setq ret (encode-char object '=ucs)) (chise-turtle-format-ccs-code-point '=ucs ret) (setq spec (char-attribute-alist object)) (while (and spec (setq cell (pop spec))) (if (and (find-charset (car cell)) (setq ret (cdr cell))) (setq dest (cons cell dest)))) (setq ret (car (sort dest (lambda (a b) (char-attribute-name< (car a)(car b))))) ccs (car ret) ret (cdr ret)) (cond (ret (chise-turtle-format-ccs-code-point ccs ret) ) ((and (setq ccs (car (split-char object))) (setq ret (encode-char object ccs))) (chise-turtle-format-ccs-code-point ccs ret) ) ((setq ret (get-char-attribute object 'ideographic-combination)) (format "ideocomb:%s" (mapconcat (lambda (cell) (cond ((characterp cell) (char-to-string cell) ) ((setq ret2 (find-char cell)) (char-to-string ret2) ) (t (format "%S" cell) ))) ret "")) ) (t (format "system-char-id:0x%X" (encode-char object 'system-char-id)) ))))) ;;; @ end ;;; (provide 'chiset-common) ;;; chiset-common.el ends here