;; -*- coding: utf-8-mcs-er -*-
(require 'char-db-util)
(defvar chise-wiki-view-url "view.cgi")
(defvar chise-wiki-edit-url "edit/edit.cgi")
(defvar chise-wiki-bitmap-glyphs-url
"http://chise.zinbun.kyoto-u.ac.jp/glyphs")
(defvar chise-wiki-glyph-cgi-url
"http://chise.zinbun.kyoto-u.ac.jp/chisewiki/glyph.cgi")
(defun decode-uri-string (string &optional coding-system)
(if (> (length string) 0)
(let ((i 0)
dest)
(setq string
(mapconcat (lambda (char)
(if (eq char ?+)
" "
(char-to-string char)))
string ""))
(while (string-match "%\\([0-9A-F][0-9A-F]\\)" string i)
(setq dest (concat dest
(substring string i (match-beginning 0))
(char-to-string
(int-char
(string-to-int (match-string 1 string) 16))))
i (match-end 0)))
(decode-coding-string
(concat dest (substring string i))
coding-system))))
(defun www-feature-type (feature-name)
(or (char-feature-property feature-name 'type)
(let ((str (symbol-name feature-name)))
(cond
((string-match "^\\(->\\|<-\\)" str)
'relation)
((string-match "^ideographic-structure\\(@\\|$\\)" str)
'structure)
))))
(defun www-feature-value-format (feature-name)
(or (char-feature-property feature-name 'value-format)
(let ((type (www-feature-type feature-name)))
(cond ((eq type 'relation)
'space-separated-char-list)
((eq type 'structure)
'space-separated-ids)))
(if (find-charset feature-name)
(if (and (= (charset-dimension feature-name) 2)
(= (charset-chars feature-name) 94))
'("0x" (HEX)
" (" (decimal) ") <" (ku-ten) ">")
'("0x" (HEX) " (" (decimal) ")")))))
(defun char-feature-name-at-domain (feature-name domain)
(let ((name (symbol-name feature-name)))
(cond
((string-match "@[^*]+$" name)
(intern (format "%s/%s" name domain))
)
(t
(intern (format "%s@%s" name domain))
))))
(defun www-char-feature (character feature)
(let ((latest-feature (char-feature-name-at-domain feature '$rev=latest)))
(mount-char-attribute-table latest-feature)
(or (char-feature character latest-feature)
(char-feature character feature))))
;;; @ URI representation
;;;
(defun www-uri-decode-feature-name (uri-feature)
(let (feature)
(cond
((string-match "^from\\." uri-feature)
(intern (format "<-%s" (substring uri-feature (match-end 0))))
)
((string-match "^to\\." uri-feature)
(intern (format "->%s" (substring uri-feature (match-end 0))))
)
((string-match "^rep\\." uri-feature)
(intern (format "=%s" (substring uri-feature (match-end 0))))
)
((string-match "^g\\." uri-feature)
(intern (format "=>>%s" (substring uri-feature (match-end 0))))
)
((string-match "^gi\\." uri-feature)
(intern (format "=>>>%s" (substring uri-feature (match-end 0))))
)
((string-match "^gi\\([0-9]+\\)\\." uri-feature)
(intern (format "=>>%s%s"
(make-string (string-to-int
(match-string 1 uri-feature))
?>)
(substring uri-feature (match-end 0))))
)
((string-match "^a\\." uri-feature)
(intern (format "=>%s" (substring uri-feature (match-end 0))))
)
((string-match "^a\\([0-9]+\\)\\." uri-feature)
(intern (format "%s>%s"
(make-string (string-to-int
(match-string 1 uri-feature))
?=)
(substring uri-feature (match-end 0))))
)
((and (setq feature (intern (format "=>%s" uri-feature)))
(find-charset feature))
feature)
((and (setq feature (intern (format "=>>%s" uri-feature)))
(find-charset feature))
feature)
((and (setq feature (intern (format "=>>>%s" uri-feature)))
(find-charset feature))
feature)
((and (setq feature (intern (format "=%s" uri-feature)))
(find-charset feature))
feature)
(t (intern uri-feature)))))
(defun www-uri-encode-feature-name (feature-name)
(setq feature-name (symbol-name feature-name))
(cond
((string-match "^=\\([^=>]+\\)" feature-name)
(concat "rep." (substring feature-name (match-beginning 1)))
)
((string-match "^=>>\\([^=>]+\\)" feature-name)
(concat "g." (substring feature-name (match-beginning 1)))
)
((string-match "^=>>>\\([^=>]+\\)" feature-name)
(concat "gi." (substring feature-name (match-beginning 1)))
)
((string-match "^=>>\\(>+\\)" feature-name)
(format "gi%d.%s"
(length (match-string 1 feature-name))
(substring feature-name (match-end 1)))
)
((string-match "^=>\\([^=>]+\\)" feature-name)
(concat "a." (substring feature-name (match-beginning 1)))
)
((string-match "^\\(=+\\)>" feature-name)
(format "a%d.%s"
(length (match-string 1 feature-name))
(substring feature-name (match-end 0)))
)
((string-match "^->" feature-name)
(concat "to." (substring feature-name (match-end 0)))
)
((string-match "^<-" feature-name)
(concat "from." (substring feature-name (match-end 0)))
)
(t feature-name)))
(defun www-uri-decode-char (char-rep)
(let (ccs cpos)
(cond
((string-match "\\(%3A\\|:\\)" char-rep)
(setq ccs (substring char-rep 0 (match-beginning 0))
cpos (substring char-rep (match-end 0)))
(setq ccs (www-uri-decode-feature-name ccs))
(cond
((string-match "^0x" cpos)
(setq cpos
(string-to-number (substring cpos (match-end 0)) 16))
)
(t
(setq cpos (string-to-number cpos))
))
(if (numberp cpos)
(decode-char ccs cpos))
)
(t
(setq char-rep (decode-uri-string char-rep 'utf-8-mcs-er))
(when (= (length char-rep) 1)
(aref char-rep 0))
))))
(defun www-uri-encode-char (char)
(if (encode-char char '=ucs)
(mapconcat
(lambda (byte)
(format "%%%02X" byte))
(encode-coding-string (char-to-string char) 'utf-8-mcs-er)
"")
(let ((ccs-list '(; =ucs
=cns11643-1 =cns11643-2 =cns11643-3
=cns11643-4 =cns11643-5 =cns11643-6 =cns11643-7
=gb2312 =gb12345
=jis-x0208 =jis-x0208@1990
=jis-x0212
=cbeta =jef-china3
=jis-x0213-1@2000 =jis-x0213-1@2004
=jis-x0208@1983 =jis-x0208@1978
=zinbun-oracle
=daikanwa
=gt =gt-k
=>>jis-x0208 =>>jis-x0213-1
=>jis-x0208 =>jis-x0213-1
=>>gt
=big5
=big5-cdp))
ccs ret)
(while (and ccs-list
(setq ccs (pop ccs-list))
(not (setq ret (encode-char char ccs 'defined-only)))))
(cond (ret
(format "%s:0x%X"
(www-uri-encode-feature-name ccs)
ret))
((and (setq ccs (car (split-char char)))
(setq ret (encode-char char ccs)))
(format "%s:0x%X"
(www-uri-encode-feature-name ccs)
ret))
(t
(format "system-char-id:0x%X"
(encode-char char 'system-char-id))
)))))
;;; @ Feature name presentation
;;;
(defun www-format-feature-name-default (feature-name)
(mapconcat
#'capitalize
(split-string
(symbol-name feature-name)
"-")
" "))
(defun www-format-feature-name-as-rel-to (feature-name)
(concat "\u2192" (substring (symbol-name feature-name) 2)))
(defun www-format-feature-name-as-rel-from (feature-name)
(concat "\u2190" (substring (symbol-name feature-name) 2)))
(defun www-format-feature-name-as-CCS (feature-name)
(let* ((rest
(split-string
(symbol-name feature-name)
"-"))
(dest (upcase (pop rest))))
(when (string-match "^=+>*" dest)
(setq dest (concat (substring dest 0 (match-end 0))
" "
(substring dest (match-end 0)))))
(cond
(rest
(while (cdr rest)
(setq dest (concat dest " " (upcase (pop rest)))))
(if (string-match "^[0-9]+$" (car rest))
(concat dest "-" (car rest))
(concat dest " " (upcase (car rest))))
)
(t dest))))
(defun www-format-feature-name* (feature-name &optional lang)
(let (name)
(cond
((or (and lang
(char-feature-property
feature-name
(intern (format "name@%s" lang))))
(char-feature-property
feature-name 'name)))
((find-charset feature-name)
(www-format-feature-name-as-CCS feature-name))
((and (setq name (symbol-name feature-name))
(string-match "^\\(->\\)" name))
(www-format-feature-name-as-rel-to feature-name))
((string-match "^\\(<-\\)" name)
(www-format-feature-name-as-rel-from feature-name))
(t
(www-format-feature-name-default feature-name)))))
(defun www-format-feature-name (feature-name &optional lang)
(www-format-encode-string
(www-format-feature-name* feature-name lang)))
;;; @ Feature value presentation
;;;
(defun www-format-value-as-kuten (value)
(format "%02d-%02d"
(- (lsh value -8) 32)
(- (logand value 255) 32)))
(defun www-format-value-as-char-list (value &optional without-tags)
(if (listp value)
(mapconcat
(if without-tags
(lambda (unit)
(www-format-encode-string
(format (if (characterp unit)
"%c"
"%s")
unit)
'without-tags))
(lambda (unit)
(if (characterp unit)
(format "%s"
chise-wiki-view-url
(www-uri-encode-char unit)
(www-format-encode-string (char-to-string unit)))
(www-format-encode-string (format "%s" unit)))))
value " ")
(www-format-encode-string (format "%s" value) without-tags)))
(defun www-format-value-as-ids (value &optional without-tags)
(if (listp value)
(mapconcat
(if without-tags
(lambda (unit)
(www-format-encode-string
(format (if (characterp unit)
"%c"
"%s")
unit)
'without-tags))
(lambda (unit)
(if (characterp unit)
(format "%s"
chise-wiki-view-url
(www-uri-encode-char unit)
(www-format-encode-string (char-to-string unit)))
(www-format-encode-string (format "%s" unit)))))
(ideographic-structure-to-ids value) " ")
(www-format-encode-string (format "%s" value) without-tags)))
(defun www-format-value-as-S-exp (value &optional without-tags)
(www-format-encode-string (format "%S" value) without-tags))
(defun www-format-value-as-HEX (value)
(if (integerp value)
(format "%X" value)
(www-format-value-as-S-exp value)))
(defun www-format-value-as-CCS-default (value)
(if (integerp value)
(format "0x%s (%d)"
(www-format-value-as-HEX value)
value)
(www-format-value-as-S-exp value)))
(defun www-format-value-as-CCS-94x94 (value)
(if (integerp value)
(format "0x%s [%s] (%d)"
(www-format-value-as-HEX value)
(www-format-value-as-kuten value)
value)
(www-format-value-as-S-exp value)))
(defun www-format-value (value &optional feature-name format without-tags)
;; (cond
;; ((find-charset feature-name)
;; (cond
;; ((and (= (charset-chars feature-name) 94)
;; (= (charset-dimension feature-name) 2))
;; (www-format-value-as-CCS-94x94 value))
;; (t
;; (www-format-value-as-CCS-default value)))
;; )
;; (t
;; (www-format-value-as-S-exp value)))
(www-format-apply-value format nil value nil nil without-tags)
)
;;; @ format evaluator
;;;
(defun www-format-encode-string (string &optional without-tags)
(with-temp-buffer
(insert string)
(let (plane code start end char variants ret)
(goto-char (point-min))
(while (search-forward "<" nil t)
(replace-match "<" nil t))
(goto-char (point-min))
(while (search-forward ">" nil t)
(replace-match ">" nil t))
(if without-tags
(encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
(let ((coded-charset-entity-reference-alist
(list*
'(=cns11643-1 "C1-" 4 X)
'(=cns11643-2 "C2-" 4 X)
'(=cns11643-3 "C3-" 4 X)
'(=cns11643-4 "C4-" 4 X)
'(=cns11643-5 "C5-" 4 X)
'(=cns11643-6 "C6-" 4 X)
'(=cns11643-7 "C7-" 4 X)
'(=gb2312 "G0-" 4 X)
'(=gb12345 "G1-" 4 X)
'(=jis-x0208@1990 "J90-" 4 X)
'(=jis-x0212 "JSP-" 4 X)
'(=cbeta "CB" 5 d)
'(=jis-x0208@1997 "J97-" 4 X)
'(=jis-x0208@1978 "J78-" 4 X)
'(=jis-x0208@1983 "J83-" 4 X)
'(=gt "GT-" 5 d)
'(=zinbun-oracle "ZOB-" 4 d)
'(=jef-china3 "JC3-" 4 X)
'(=daikanwa "M-" 5 d)
coded-charset-entity-reference-alist)))
(encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
(goto-char (point-min))
(while (re-search-forward "&CB\\([0-9]+\\);" nil t)
(setq code (string-to-int (match-string 1)))
(replace-match
(format ""
code
chise-wiki-bitmap-glyphs-url
(/ code 1000) code)
t 'literal))
(goto-char (point-min))
(while (re-search-forward "&J\\(78\\|83\\|90\\|97\\|SP\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
(setq plane (match-string 1)
code (string-to-int (match-string 2) 16))
(replace-match
(format "
"
plane code
chise-wiki-bitmap-glyphs-url
plane
(- (lsh code -8) 32)
(- (logand code 255) 32))
t 'literal))
(goto-char (point-min))
(while (re-search-forward "&G\\([01]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
(setq plane (string-to-int (match-string 1))
code (string-to-int (match-string 2) 16))
(replace-match
(format "
"
plane code
chise-wiki-bitmap-glyphs-url
plane
(- (lsh code -8) 32)
(- (logand code 255) 32))
t 'literal))
(goto-char (point-min))
(while (re-search-forward "&C\\([1-7]\\)-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
(setq plane (string-to-int (match-string 1))
code (string-to-int (match-string 2) 16))
(replace-match
(format "
"
plane code
chise-wiki-bitmap-glyphs-url
plane code)
t 'literal))
(goto-char (point-min))
(while (re-search-forward "&JC3-\\([0-9A-F][0-9A-F][0-9A-F][0-9A-F]\\);" nil t)
(setq code (string-to-int (match-string 1) 16))
(replace-match
(format "
"
code code)
t 'literal))
(goto-char (point-min))
(while (re-search-forward "&ZOB-\\([0-9]+\\);" nil t)
(setq code (string-to-int (match-string 1)))
(replace-match
(format "
"
code
chise-wiki-bitmap-glyphs-url
code)
t 'literal))
(goto-char (point-min))
(while (re-search-forward "&\\(G-\\)?GT-\\([0-9]+\\);" nil t)
(setq code (string-to-int (match-string 2)))
(replace-match
(format "
"
code
chise-wiki-glyph-cgi-url
code)
t 'literal))
(goto-char (point-min))
(while (re-search-forward "&B-\\([0-9A-F]+\\);" nil t)
(setq code (string-to-int (match-string 1) 16))
(replace-match
(format "
"
code
chise-wiki-glyph-cgi-url
code)
t 'literal))
(goto-char (point-min))
(while (re-search-forward "&CDP-\\([0-9A-F]+\\);" nil t)
(setq code (string-to-int (match-string 1) 16))
(replace-match
(format "
"
code
chise-wiki-glyph-cgi-url
code)
t 'literal))
(goto-char (point-min))
(while (re-search-forward "&UU\\+\\([0-9A-F]+\\);" nil t)
(setq code (string-to-int (match-string 1) 16))
(replace-match
(format "
"
code
code)
t 'literal))
(goto-char (point-min))
(while (re-search-forward "&MCS-\\([0-9A-F]+\\);" nil t)
(setq code (string-to-int (match-string 1) 16))
(setq start (match-beginning 0)
end (match-end 0))
(setq char (decode-char 'system-char-id code))
(setq variants (or (www-char-feature char '->subsumptive)
(www-char-feature char '->denotational)))
(while (and variants
(setq ret (www-format-encode-string
(char-to-string (car variants))))
(string-match "&MCS-\\([0-9A-F]+\\);" ret))
(setq variants (cdr variants)))
(unless (string-match "&MCS-\\([0-9A-F]+\\);" ret)
(goto-char start)
(delete-region start end)
(insert ret)))
))
;; (goto-char (point-min))
;; (while (search-forward ">-" nil t)
;; (replace-match ">-" t 'literal))
(buffer-string))))
(defun www-format-props-to-string (props &optional format)
(unless format
(setq format (plist-get props :format)))
(concat "%"
(plist-get props :flag)
(if (plist-get props :zero-padding)
"0")
(if (plist-get props :len)
(format "%d" (plist-get props :len)))
(cond
((eq format 'decimal) "d")
((eq format 'hex) "x")
((eq format 'HEX) "X")
((eq format 'S-exp) "S")
(t "s"))))
(defun www-format-apply-value (format props value
&optional uri-char uri-feature
without-tags)
(let (ret)
(setq ret
(cond
((memq format '(decimal hex HEX))
(if (integerp value)
(format (www-format-props-to-string props format)
value)
(www-format-encode-string
(format "%s" value)
without-tags))
)
((eq format 'S-exp)
(www-format-encode-string
(format (www-format-props-to-string props format)
value)
without-tags))
((eq format 'ku-ten)
(www-format-value-as-kuten value))
((eq format 'space-separated-char-list)
(www-format-value-as-char-list value without-tags))
((eq format 'space-separated-ids)
(www-format-value-as-ids value without-tags))
(t
(setq format 'default)
(www-format-encode-string
(format (www-format-props-to-string props 'default)
value)
without-tags))))
(if (or without-tags (eq (plist-get props :mode) 'peek))
ret
(format "%s "
ret
chise-wiki-edit-url
uri-char uri-feature format))))
(defun www-format-eval-feature-value (char
feature-name
&optional format lang uri-char value)
(unless value
(setq value (www-char-feature char feature-name)))
(unless format
(setq format (www-feature-value-format feature-name)))
(cond
((symbolp format)
(www-format-apply-value
format nil value
uri-char (www-uri-encode-feature-name feature-name))
)
((consp format)
(cond ((null (cdr format))
(setq format (car format))
(www-format-apply-value
(car format) (nth 1 format) value
uri-char (www-uri-encode-feature-name feature-name))
)
(t
(www-format-eval-list format char feature-name lang uri-char)
)))))
(defun www-format-eval-unit (exp char feature-name
&optional lang uri-char value)
(unless value
(setq value (www-char-feature char feature-name)))
(unless uri-char
(setq uri-char (www-uri-encode-char char)))
(cond
((stringp exp) (www-format-encode-string exp))
((null exp) "")
((consp exp)
(cond
((memq (car exp) '(value decimal hex HEX ku-ten S-exp default))
(if (eq (car exp) 'value)
(www-format-eval-feature-value char feature-name
(plist-get (nth 1 exp) :format)
lang uri-char value)
(www-format-apply-value
(car exp) (nth 1 exp) value
uri-char (www-uri-encode-feature-name feature-name)))
)
((eq (car exp) 'name)
(format "%s"
chise-wiki-view-url
(www-uri-encode-feature-name feature-name)
uri-char
(www-format-feature-name feature-name lang))
)
((eq (car exp) 'link)
(format "%s"
(www-format-eval-list (plist-get (nth 1 exp) :ref)
char feature-name lang uri-char)
(www-format-eval-list (nthcdr 2 exp)
char feature-name lang uri-char)))
(t
(format "<%s
>%s%s
>"
(car exp)
(www-format-eval-list (nthcdr 2 exp) char feature-name
lang uri-char)
(car exp)))))))
(defun www-format-eval-list (format-list char feature-name
&optional lang uri-char)
(if (consp format-list)
(mapconcat
(lambda (exp)
(www-format-eval-unit exp char feature-name lang uri-char))
format-list "")
(www-format-eval-unit format-list char feature-name lang uri-char)))
;;; @ HTML generator
;;;
(defun www-html-display-text (text)
(princ
(with-temp-buffer
(insert text)
(goto-char (point-min))
(while (search-forward "<" nil t)
(replace-match "<" nil t))
(goto-char (point-min))
(while (search-forward ">" nil t)
(replace-match ">" nil t))
(goto-char (point-min))
(while (re-search-forward "\\[\\[\\([^]|[]+\\)|\\([^][]+\\)\\]\\]" nil t)
(replace-match
(format "%s"
(match-string 2)
(match-string 1))
nil t))
(encode-coding-region (point-min)(point-max) 'utf-8-mcs-er)
(goto-char (point-min))
(while (search-forward ">-" nil t)
(replace-match ">-" nil t))
(buffer-string))))
(defun www-html-display-paragraph (text)
(princ "
") (www-html-display-text text) (princ "
\n")) (provide 'cwiki-common)