1 ;;; char-db-json.el --- Character Database utility -*- coding: utf-8-er; -*-
3 ;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,
4 ;; 2008,2009,2010,2011,2012,2013,2014,2015,2016 MORIOKA Tomohiko.
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
9 ;; This file is part of XEmacs CHISE.
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ;; General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING. If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
28 (require 'char-db-util)
29 (require 'cwiki-format)
31 (setq char-db-ignored-attributes
32 '(ideographic-products
34 *instance@ruimoku/bibliography/title
35 *instance@morpheme-entry/zh-classical))
38 ;;; @ char-db formatters
41 (defun char-db-json-insert-char-spec (char &optional readable column
44 (setq column (current-column)))
45 (let (char-spec temp-char)
46 (setq char-spec (char-db-make-char-spec char))
47 (unless (or (characterp char) ; char
49 (setq char (find-char char-spec))
51 ;; define temporary character
52 ;; Current implementation is dirty.
53 (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
55 (remove-char-attribute temp-char 'ideograph-daikanwa)
56 (setq char temp-char))
57 (char-db-json-insert-char-features char
59 (union (mapcar #'car char-spec)
63 ;; undefine temporary character
64 ;; Current implementation is dirty.
65 (setq char-spec (char-attribute-alist temp-char))
67 (remove-char-attribute temp-char (car (car char-spec)))
68 (setq char-spec (cdr char-spec))))))
70 (defun char-db-json-insert-alist (alist &optional readable column)
72 (setq column (current-column)))
74 (concat "\n" (make-string (1+ column) ?\ )))
78 lbs cell rest separator)
81 (setq name (car (car alist))
82 value (cdr (car alist)))
83 (cond ((eq name 'char)
85 (if (setq ret (condition-case nil
93 (setq key (car (car value)))
94 ;; (if (find-charset key)
95 ;; (setq cal (cons key cal))
96 (setq al (cons key al))
98 (setq value (cdr value)))
99 (insert-char-attributes ret
103 (insert (prin1-to-string value)))
105 (insert line-breaking))
107 (insert (format "(%-18s " name))
108 (setq lbs (concat "\n" (make-string (current-column) ?\ )))
110 (setq cell (car value))
111 (if (and (consp cell)
113 (setq ret (condition-case nil
123 (setq key (car (car rest)))
124 ;; (if (find-charset key)
125 ;; (setq cal (cons key cal))
126 (setq al (cons key al))
128 (setq rest (cdr rest)))
131 (insert-char-attributes ret
135 (setq separator lbs))
138 (insert (prin1-to-string cell))
139 (setq separator " "))
140 (setq value (cdr value)))
142 (insert line-breaking))
144 (insert (format "(%-18s . %S)%s"
147 (setq alist (cdr alist))))
150 (defun char-db-json-insert-char-reference (plist &optional readable column)
152 (setq column (current-column)))
154 (concat "\n" (make-string (1+ column) ?\ )))
159 (setq name (pop plist))
160 (setq value (pop plist))
161 (cond ((eq name :char)
163 (insert "\"char\":\t")
164 (cond ((numberp value)
165 (setq value (decode-char '=ucs value)))
167 ;; (setq value (or (find-char value)
170 (char-db-json-insert-char-spec value readable)
171 (insert line-breaking)
174 (insert (format "%s%s\t%d, \"_comment\": \"%c\"%s"
177 (ideographic-radical value)
181 (insert (format "%s%s\t%S" separator name value))
182 (setq separator line-breaking)))
186 (defun char-db-json-insert-ccs-feature (name value line-breaking)
192 ((memq name '(=>iwds-1
195 =shinjigen@1ed ==shinjigen@1ed
196 =shinjigen@rev ==shinjigen@rev
197 =shinjigen/+p@rev ==shinjigen/+p@rev
198 ===daikanwa/ho ==daikanwa/ho
199 =daikanwa/ho =>>daikanwa/ho =>daikanwa/ho))
200 " %-20s %4d,\t\"_comment\": \"%c")
201 ((eq name '=shinjigen@1ed/24pr)
202 " %-20s %4d,\t\"_comment\": \"%c")
206 ==daikanwa =daikanwa =>>daikanwa =>daikanwa
207 =daikanwa@rev1 =daikanwa@rev2
208 =daikanwa/+p ==daikanwa/+p ===daikanwa/+p
210 =daikanwa/+2p =>>daikanwa/+2p
215 =adobe-japan1-0 ==adobe-japan1-0 ===adobe-japan1-0
216 =adobe-japan1-1 ==adobe-japan1-1 ===adobe-japan1-1
217 =adobe-japan1-2 ==adobe-japan1-2 ===adobe-japan1-2
218 =adobe-japan1-3 ==adobe-japan1-3 ===adobe-japan1-3
219 =adobe-japan1-4 ==adobe-japan1-4 ===adobe-japan1-4
220 =adobe-japan1-5 ==adobe-japan1-5 ===adobe-japan1-5
221 =adobe-japan1-6 ==adobe-japan1-6 ===adobe-japan1-6
222 =>>adobe-japan1-0 =+>adobe-japan1-0
223 =>>adobe-japan1-1 =+>adobe-japan1-1
224 =>>adobe-japan1-2 =+>adobe-japan1-2
225 =>>adobe-japan1-3 =+>adobe-japan1-3
226 =>>adobe-japan1-4 =+>adobe-japan1-4
227 =>>adobe-japan1-5 =+>adobe-japan1-5
228 =>>adobe-japan1-6 =+>adobe-japan1-6
229 =>cbeta =cbeta =>>cbeta ==cbeta ===cbeta
230 =zinbun-oracle =>zinbun-oracle
231 ===hng-jou ===hng-keg ===hng-dng ===hng-mam
232 ===hng-drt ===hng-kgk ===hng-myz ===hng-kda
233 ===hng-khi ===hng-khm ===hng-hok ===hng-kyd ===hng-sok
234 ===hng-yhk ===hng-kak ===hng-kar ===hng-kae
235 ===hng-sys ===hng-tsu ===hng-tzj
236 ===hng-hos ===hng-nak ===hng-jhk
237 ===hng-hod ===hng-gok ===hng-ink ===hng-nto
238 ===hng-nkm ===hng-k24 ===hng-nkk
239 ===hng-kcc ===hng-kcj ===hng-kbk ===hng-sik
240 ===hng-skk ===hng-kyu ===hng-ksk ===hng-wan
241 ===hng-okd ===hng-wad ===hng-kmi
242 ===hng-zkd ===hng-doh ===hng-jyu
243 ===hng-tzs ===hng-kss ===hng-kyo
245 ;; (string-match "^=adobe-" (symbol-name name))
247 " %-20s %5d,\t\"_comment\": \"%c")
248 ((memq name '(=hanyo-denshi/ks
249 ==hanyo-denshi/ks ===hanyo-denshi/ks
252 =mj ==mj ===mj =>>mj =>mj
254 " %-19s %6d,\t\"_comment\": \"%c")
255 ((memq name '(=hanyo-denshi/tk ==hanyo-denshi/tk))
256 " %-20s %8d,\t\"_comment\": \"%c")
257 ((>= (charset-dimension name) 2)
258 " %-20s %5d,\t\"_comment\": \"%c")
260 " %-20s %3d,\t\"_comment\": \"%c"))
261 (format "\"%s\":" name)
262 (if (= (charset-iso-graphic-plane name) 1)
264 (cond ((= (charset-dimension name) 1)
266 ((= (charset-dimension name) 2)
268 ((= (charset-dimension name) 3)
272 (char-db-decode-isolated-char name value)))
273 (if (and (= (charset-chars name) 94)
274 (= (charset-dimension name) 2))
275 (insert (format " [%02d-%02d]\""
276 (- (lsh value -8) 32)
277 (- (logand value 255) 32)))
281 (insert (format " %-20s %s"
282 (format "\"%s\":" name) value))
286 (defun char-db-json-insert-relation-feature (char name value line-breaking
288 (insert (format " %-20s [%s "
289 (format "\"%s\":" name) line-breaking))
290 (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
291 separator cell sources required-features
294 (setq cell (car value))
296 (setq cell (decode-char '=ucs cell)))
298 ((eq name '->subsumptive)
299 (when (or (not (some (lambda (atr)
300 (get-char-attribute cell atr))
301 char-db-ignored-attributes))
303 (encode-char cell ccs 'defined-only))
307 (setq separator (format ",%s" lbs)))
308 (let ((char-db-ignored-attributes
310 char-db-ignored-attributes)))
311 (char-db-json-insert-char-features
312 cell readable nil nil 'for-sub-node))
318 char (intern (format "%s*sources" name))))
319 (setq required-features nil)
320 (dolist (source sources)
324 shinjigen shinjigen@1ed shinjigen@rev))
325 (setq required-features
326 (union required-features
338 =shinjigen/+p@rev))))
340 (setq required-features
341 (union required-features
346 ((find-charset (setq ret (intern (format "=%s" source))))
347 (setq required-features
348 (cons ret required-features)))
349 (t (setq required-features
350 (cons source required-features)))))
351 (cond ((string-match "@JP" (symbol-name name))
352 (setq required-features
353 (union required-features
361 ((string-match "@CN" (symbol-name name))
362 (setq required-features
363 (union required-features
369 (setq separator (format ",%s" lbs)))
371 (insert (format "%S" cell))
372 (char-db-json-insert-char-spec cell readable
379 (setq separator (format ",%s" lbs)))
380 (if (consp (car cell))
381 (char-db-json-insert-char-spec cell readable)
382 (char-db-json-insert-char-reference cell readable))
388 (insert (prin1-to-string cell))
389 (setq separator " ")))
390 (setq value (cdr value)))
393 (defun char-db-json-insert-char-features (char
394 &optional readable attributes column
397 (setq column (current-column)))
398 (let ((est-view-url-prefix "http://chise.org/est/view")
400 name value ; has-long-ccs-name
404 (concat "\n" (make-string (1+ column) ?\ )))
406 lbs cell separator ret
408 dest-ccss ; sources required-features
413 (if (consp attributes)
415 (dolist (name attributes)
416 (unless (memq name char-db-ignored-attributes)
417 (if (find-charset name)
421 (dolist (name (char-attribute-list))
422 (unless (memq name char-db-ignored-attributes)
423 (if (find-charset name)
427 #'char-attribute-name<)))
429 (format "{ \"@context\": \"%s/genre/character/context.json\""
430 est-view-url-prefix))
431 (setq line-separator (format ",%s" line-breaking))
432 (setq id (www-uri-make-object-url char))
433 (insert (format "%s \"@id\": \"%s\"" line-separator id))
434 (setq obj-id (file-name-nondirectory id))
437 ((string-match "^a2\\." obj-id)
438 "chise:super-abstract-character")
439 ((string-match "^a\\." obj-id)
440 "chise:abstract-character")
441 ((string-match "^o\\." obj-id)
442 "chise:unified-glyph")
443 ((string-match "^rep\\." obj-id)
444 "chise:abstract-glyph")
445 ((string-match "^g\\." obj-id)
446 "chise:detailed-glyph")
447 ((string-match "^g2\\." obj-id)
448 "chise:abstract-glyph-form")
449 ((string-match "^gi\\." obj-id)
450 "chise:abstract-glyph-form")
451 ((string-match "^repi\\." obj-id)
456 (insert (format "%s \"@type\": \"%s\"" line-separator type))
457 (when (memq '<-subsumptive attributes)
458 (when (or readable (not for-sub-node))
459 (when (setq value (get-char-attribute char '<-subsumptive))
460 (insert line-separator)
461 (char-db-json-insert-relation-feature char '<-subsumptive value
465 (setq attributes (delq '<-subsumptive attributes))
467 (when (and (memq '<-denotational attributes)
468 (setq value (get-char-attribute char '<-denotational)))
469 (insert line-separator)
470 (char-db-json-insert-relation-feature char '<-denotational value
473 (setq attributes (delq '<-denotational attributes)))
474 (when (and (memq '<-denotational@component attributes)
476 (get-char-attribute char '<-denotational@component)))
477 (insert line-separator)
478 (char-db-json-insert-relation-feature
479 char '<-denotational@component value
482 (setq attributes (delq '<-denotational@component attributes)))
483 (when (and (memq 'name attributes)
484 (setq value (get-char-attribute char 'name)))
485 (insert line-separator)
487 (if (> (+ (current-column) (length value)) 48)
491 (setq attributes (delq 'name attributes))
493 (when (and (memq 'name* attributes)
494 (setq value (get-char-attribute char 'name*)))
495 (insert line-separator)
497 (if (> (+ (current-column) (length value)) 48)
501 (setq attributes (delq 'name* attributes))
503 (when (and (memq 'script attributes)
504 (setq value (get-char-attribute char 'script)))
505 (insert line-separator)
506 (insert (format " \"script\":\t\t %s"
507 (mapconcat (function prin1-to-string)
509 (setq attributes (delq 'script attributes))
511 (dolist (name '(=>ucs =>ucs*))
512 (when (and (memq name attributes)
513 (setq value (get-char-attribute char name)))
514 (insert line-separator)
515 (insert (format " \"%-20s\": #x%04X,\t\"_comment\": \"%c\"%s"
516 name value (decode-char '=ucs value)
518 (setq attributes (delq name attributes))))
519 (dolist (name '(=>ucs@gb =>ucs@big5))
520 (when (and (memq name attributes)
521 (setq value (get-char-attribute char name)))
522 (insert line-separator)
523 (insert (format " \"%-20s\": #x%04X,\t\"_comment\": \"%c\"%s"
528 (symbol-name name) 2)))
531 (setq attributes (delq name attributes))
533 (when (and (memq 'general-category attributes)
534 (setq value (get-char-attribute char 'general-category)))
535 (insert line-separator)
537 " \"general-category\":\t [ %s ], \"_comment\": \"%s\""
538 (mapconcat (lambda (cell)
541 (cond ((rassoc value unidata-normative-category-alist)
542 "Normative Category")
543 ((rassoc value unidata-informative-category-alist)
544 "Informative Category")
548 (setq attributes (delq 'general-category attributes))
550 (when (and (memq 'bidi-category attributes)
551 (setq value (get-char-attribute char 'bidi-category)))
552 (insert line-separator)
553 (insert (format " \"bidi-category\":\t %S"
555 (setq attributes (delq 'bidi-category attributes))
557 (unless (or (not (memq 'mirrored attributes))
558 (eq (setq value (get-char-attribute char 'mirrored 'empty))
560 (insert line-separator)
561 (insert (format " \"mirrored\":\t\t %S"
563 (setq attributes (delq 'mirrored attributes))
566 ((and (memq 'decimal-digit-value attributes)
567 (setq value (get-char-attribute char 'decimal-digit-value)))
568 (insert line-separator)
569 (insert (format " \"decimal-digit-value\": %S"
571 (setq attributes (delq 'decimal-digit-value attributes))
572 (when (and (memq 'digit-value attributes)
573 (setq value (get-char-attribute char 'digit-value)))
574 (insert line-separator)
575 (insert (format " \"digit-value\":\t %S"
577 (setq attributes (delq 'digit-value attributes))
579 (when (and (memq 'numeric-value attributes)
580 (setq value (get-char-attribute char 'numeric-value)))
581 (insert line-separator)
582 (insert (format " \"numeric-value\":\t %S"
584 (setq attributes (delq 'numeric-value attributes))
588 (when (and (memq 'digit-value attributes)
589 (setq value (get-char-attribute char 'digit-value)))
590 (insert line-separator)
591 (insert (format " \"digit-value\":\t %S"
593 (setq attributes (delq 'digit-value attributes))
595 (when (and (memq 'numeric-value attributes)
596 (setq value (get-char-attribute char 'numeric-value)))
597 (insert line-separator)
598 (insert (format " \"numeric-value\":\t %S"
600 (setq attributes (delq 'numeric-value attributes))
602 (when (and (memq 'iso-10646-comment attributes)
603 (setq value (get-char-attribute char 'iso-10646-comment)))
604 (insert line-separator)
605 (insert (format "{\"iso-10646-comment\":\t %S}%s"
608 (setq attributes (delq 'iso-10646-comment attributes))
610 (when (and (memq 'morohashi-daikanwa attributes)
611 (setq value (get-char-attribute char 'morohashi-daikanwa)))
612 (insert line-separator)
613 (insert (format "{\"morohashi-daikanwa\":\t%s}%s"
614 (mapconcat (function prin1-to-string) value " ")
616 (setq attributes (delq 'morohashi-daikanwa attributes))
620 (when (and (memq 'ideographic-radical attributes)
621 (setq value (get-char-attribute char 'ideographic-radical)))
623 (insert line-separator)
624 (insert (format " \"ideographic-radical\": %S,\t\"_comment\": \"%c\""
626 (ideographic-radical radical)
628 (setq attributes (delq 'ideographic-radical attributes))
630 (when (and (memq 'shuowen-radical attributes)
631 (setq value (get-char-attribute char 'shuowen-radical)))
632 (insert line-separator)
633 (insert (format " \"shuowen-radical\":\t %S,\t\"_comment\": \"%c\""
635 (shuowen-radical value)))
636 (setq attributes (delq 'shuowen-radical attributes))
641 char-db-feature-domains
643 (dolist (feature (char-attribute-list))
644 (setq feature (symbol-name feature))
646 "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
648 (setq domain (intern (match-string 2 feature)))
649 (unless (memq domain dest)
650 (setq dest (cons domain dest)))))
651 (sort dest #'string<))))
652 (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
653 (when (and (memq key attributes)
654 (setq value (get-char-attribute char key)))
656 (insert line-separator)
657 (insert (format "{\"%s\": %S},\t\"_comment\": \"%c\"%s"
660 (ideographic-radical radical)
662 (setq attributes (delq key attributes))
664 (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
665 (when (and (memq key attributes)
666 (setq value (get-char-attribute char key)))
668 (insert line-separator)
669 (insert (format " \"%s\": %S"
672 (setq attributes (delq key attributes))
674 (setq key (intern (format "%s@%s" 'total-strokes domain)))
675 (when (and (memq key attributes)
676 (setq value (get-char-attribute char key)))
677 (insert line-separator)
678 (insert (format " \"%s\": %S"
682 (setq attributes (delq key attributes))
684 (dolist (feature '(ideographic-radical
687 (setq key (intern (format "%s@%s*sources" feature domain)))
688 (when (and (memq key attributes)
689 (setq value (get-char-attribute char key)))
690 (insert line-separator)
691 (insert (format " \"%s\":%s" key line-breaking))
693 (insert (format " %s" cell)))
694 (setq attributes (delq key attributes))
697 (when (and (memq 'ideographic-strokes attributes)
698 (setq value (get-char-attribute char 'ideographic-strokes)))
700 (insert line-separator)
701 (insert (format " \"ideographic-strokes\": %S"
704 (setq attributes (delq 'ideographic-strokes attributes))
706 (when (and (memq 'kangxi-radical attributes)
707 (setq value (get-char-attribute char 'kangxi-radical)))
708 (unless (eq value radical)
709 (insert line-separator)
710 (insert (format "{\"kangxi-radical\":\t%S},\t\"_comment\": \"%c\"%s"
712 (ideographic-radical value)
715 (setq radical value)))
716 (setq attributes (delq 'kangxi-radical attributes))
718 (when (and (memq 'kangxi-strokes attributes)
719 (setq value (get-char-attribute char 'kangxi-strokes)))
720 (unless (eq value strokes)
721 (insert line-separator)
722 (insert (format "{\"kangxi-strokes\":\t%S}%s"
726 (setq strokes value)))
727 (setq attributes (delq 'kangxi-strokes attributes))
729 (when (and (memq 'japanese-radical attributes)
730 (setq value (get-char-attribute char 'japanese-radical)))
731 (unless (eq value radical)
732 (insert line-separator)
733 (insert (format "{\"japanese-radical\":\t%S},\t\"_comment\": \"%c\"%s"
735 (ideographic-radical value)
738 (setq radical value)))
739 (setq attributes (delq 'japanese-radical attributes))
741 (when (and (memq 'japanese-strokes attributes)
742 (setq value (get-char-attribute char 'japanese-strokes)))
743 (unless (eq value strokes)
744 (insert line-separator)
745 (insert (format "{\"japanese-strokes\":\t%S}%s"
749 (setq strokes value)))
750 (setq attributes (delq 'japanese-strokes attributes))
752 (when (and (memq 'cns-radical attributes)
753 (setq value (get-char-attribute char 'cns-radical)))
754 (insert line-separator)
755 (insert (format "{\"cns-radical\":\t%S},\t\"_comment\": \"%c\"%s"
757 (ideographic-radical value)
759 (setq attributes (delq 'cns-radical attributes))
761 (when (and (memq 'cns-strokes attributes)
762 (setq value (get-char-attribute char 'cns-strokes)))
763 (unless (eq value strokes)
764 (insert line-separator)
765 (insert (format "{\"cns-strokes\":\t%S}%s"
769 (setq strokes value)))
770 (setq attributes (delq 'cns-strokes attributes))
772 (when (and (memq 'ideographic- attributes)
773 (setq value (get-char-attribute char 'ideographic-)))
774 (insert line-separator)
775 (insert "{\"ideographic-\": ")
776 (setq lbs (concat "\n" (make-string (current-column) ?\ ))
779 (setq cell (car value))
781 (setq cell (decode-char '=ucs cell)))
782 (cond ((characterp cell)
786 (insert (format "%S" cell))
787 (char-db-json-insert-char-spec cell readable))
788 (setq separator lbs))
792 (if (consp (car cell))
793 (char-db-json-insert-char-spec cell readable)
794 (char-db-json-insert-char-reference cell readable))
795 (setq separator lbs))
799 (insert (prin1-to-string cell))
800 (setq separator " ")))
801 (setq value (cdr value)))
803 (insert line-breaking)
804 (setq attributes (delq 'ideographic- attributes)))
805 (when (and (memq 'total-strokes attributes)
806 (setq value (get-char-attribute char 'total-strokes)))
807 (insert line-separator)
808 (insert (format " \"total-strokes\": %S"
811 (setq attributes (delq 'total-strokes attributes))
813 (when (and (memq '->ideograph attributes)
814 (setq value (get-char-attribute char '->ideograph)))
815 (insert line-separator)
816 (insert (format "{\"->ideograph\":\t%s}%s"
817 (mapconcat (lambda (code)
818 (cond ((symbolp code)
821 (format "#x%04X" code))
824 line-breaking code))))
827 (setq attributes (delq '->ideograph attributes))
829 (if (equal (get-char-attribute char '->titlecase)
830 (get-char-attribute char '->uppercase))
831 (setq attributes (delq '->titlecase attributes)))
833 (dolist (ignored '(composition
834 ->denotational <-subsumptive ->ucs-unified
835 ->ideographic-component-forms))
836 (setq attributes (delq ignored attributes))))
838 (setq name (car attributes))
839 (unless (eq (setq value (get-char-attribute char name 'value-is-empty))
841 (cond ((setq ret (find-charset name))
842 (setq name (charset-name ret))
843 (when (not (memq name dest-ccss))
844 (setq dest-ccss (cons name dest-ccss))
845 (insert line-separator)
846 (char-db-json-insert-ccs-feature name value line-breaking))
848 ((string-match "^=>ucs@" (symbol-name name))
849 (insert line-separator)
850 (insert (format "{\"%-20s\": #x%04X},\t\"_comment\": \"%c\"%s"
851 name value (decode-char '=ucs value)
854 ((eq name 'jisx0208-1978/4X)
855 (insert line-separator)
856 (insert (format "{\"%-20s\": #x%04X}%s"
862 (not (eq name '->subsumptive))
863 (not (eq name '->uppercase))
864 (not (eq name '->lowercase))
865 (not (eq name '->titlecase))
866 (not (eq name '->canonical))
867 (not (eq name '->Bopomofo))
868 (not (eq name '->mistakable))
869 (not (eq name '->ideographic-variants))
870 (null (get-char-attribute
871 char (intern (format "%s*sources" name))))
872 (not (string-match "\\*sources$" (symbol-name name)))
873 (null (get-char-attribute
874 char (intern (format "%s*note" name))))
875 (not (string-match "\\*note$" (symbol-name name)))
876 (or (eq name '<-identical)
877 (eq name '<-uppercase)
878 (eq name '<-lowercase)
879 (eq name '<-titlecase)
880 (eq name '<-canonical)
881 (eq name '<-ideographic-variants)
882 ;; (eq name '<-synonyms)
883 (string-match "^<-synonyms" (symbol-name name))
884 (eq name '<-mistakable)
885 (when (string-match "^->" (symbol-name name))
887 ((string-match "^->fullwidth" (symbol-name name))
888 (not (and (consp value)
889 (characterp (car value))
891 (car value) '=ucs 'defined-only)))
896 ((or (eq name 'ideographic-structure)
897 (eq name 'ideographic-combination)
898 (eq name 'ideographic-)
899 (eq name '=decomposition)
900 (char-feature-base-name= '=decomposition name)
901 (char-feature-base-name= '=>decomposition name)
902 ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
903 ;; (symbol-name name))
904 (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
905 (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
908 (insert line-separator)
909 (char-db-json-insert-relation-feature char name value
912 ((memq name '(ideograph=
913 original-ideograph-of
917 ;; simplified-ideograph-of
919 ;; ideographic-different-form-of
921 (insert line-separator)
922 (insert (format "{\"%-20s\":%s " name line-breaking))
923 (setq lbs (concat "\n" (make-string (current-column) ?\ ))
926 (setq cell (car value))
927 (if (and (consp cell)
932 (char-db-json-insert-alist cell readable)
933 (setq separator lbs))
936 (insert (prin1-to-string cell))
937 (setq separator " "))
938 (setq value (cdr value)))
940 (insert line-breaking))
942 (insert line-separator)
943 (insert (format " %-20s [ "
944 (format "\"%s\":" name)))
945 (setq lbs (concat "\n" (make-string (current-column) ?\ ))
948 (setq cell (car value))
949 (if (and (consp cell)
951 (setq ret (condition-case nil
959 (setq key (car (car rest)))
960 (if (find-charset key)
961 (setq cal (cons key cal))
962 (setq al (cons key al)))
963 (setq rest (cdr rest)))
966 (char-db-json-insert-char-features ret
970 (setq separator lbs))
971 (setq ret (prin1-to-string cell))
973 (if (< (+ (current-column)
980 (setq separator " "))
981 (setq value (cdr value)))
985 (insert line-separator)
986 (insert (format " %-20s "
987 (format "\"%s\":" name)))
988 (setq ret (prin1-to-string value))
989 (unless (< (+ (current-column)
993 (insert line-breaking))
997 (setq attributes (cdr attributes)))
998 (insert "\n" (make-string column ?\ ) "}")))
1000 (defun char-db-json-char-data (char &optional readable
1003 (setq column (current-column)))
1005 (narrow-to-region (point)(point))
1006 (char-db-json-insert-char-features char readable attributes column)
1007 (goto-char (point-min))
1008 (while (re-search-forward "[ \t]+$" nil t)
1011 (goto-char (point-min))
1012 (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1013 (let ((column (current-column))
1014 (indent-tabs-mode t))
1015 (delete-region (match-beginning 0) (point))
1016 (indent-to column)))
1017 (goto-char (point-max))
1018 ;; (tabify (point-min)(point-max))
1021 (defun char-db-json-char-data-with-variant (char &optional printable
1023 script excluded-script)
1025 (char-db-json-char-data char printable)
1026 (let ((variants (char-variants char))
1030 (setq variants (sort variants #'<))
1031 (setq rest variants)
1032 (setq variants (cons char variants))
1034 (setq variant (car rest))
1035 (unless (get-char-attribute variant '<-subsumptive)
1036 (if (and (or (null script)
1037 (null (setq vs (get-char-attribute variant 'script)))
1039 (or (null excluded-script)
1040 (null (setq vs (get-char-attribute variant 'script)))
1041 (not (memq excluded-script vs))))
1042 (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1044 (char-db-json-char-data variant printable)
1045 (if (setq ret (char-variants variant))
1047 (or (memq (car ret) variants)
1048 ;; (get-char-attribute (car ret) '<-subsumptive)
1049 (setq rest (nconc rest (list (car ret)))))
1050 (setq ret (cdr ret)))))))
1051 (setq rest (cdr rest)))
1054 (defun char-db-json-insert-char-range-data (min max
1059 (while (<= code max)
1060 (setq char (decode-char '=ucs code))
1061 (if (encode-char char '=ucs 'defined-only)
1062 (char-db-json-char-data-with-variant char nil 'no-ucs-unified
1063 script excluded-script))
1064 (setq code (1+ code)))))
1066 (defun write-char-range-data-to-json-file (min max file
1069 (let ((coding-system-for-write char-db-file-coding-system))
1071 (insert (format "// -*- coding: %s -*-\n"
1072 char-db-file-coding-system))
1073 (char-db-json-insert-char-range-data min max script excluded-script)
1074 (write-region (point-min)(point-max) file))))
1077 (defun what-char-definition-json (char)
1078 (interactive (list (char-after)))
1079 (let ((buf (get-buffer-create "*Character Description*"))
1080 (the-buf (current-buffer))
1081 (win-conf (current-window-configuration)))
1083 (make-local-variable 'what-character-original-window-configuration)
1084 (setq what-character-original-window-configuration win-conf)
1085 (setq buffer-read-only nil)
1089 (char-db-json-char-data-with-variant char nil)
1090 (unless (char-attribute-alist char)
1091 (insert (format "// = %c\n"
1092 (let* ((rest (split-char char))
1096 (setq code (logior (lsh code 8)
1098 (decode-char ccs code)))))
1099 ;; (char-db-update-comment)
1100 (set-buffer-modified-p nil)
1101 (view-mode the-buf (lambda (buf)
1102 (set-window-configuration
1103 what-character-original-window-configuration)
1105 (goto-char (point-min)))
1107 (set-window-configuration
1108 what-character-original-window-configuration)
1109 (signal (car err) (cdr err)))))))
1111 (defun char-db-json-batch-view ()
1112 (setq terminal-coding-system 'binary)
1114 (let* ((target (pop command-line-args-left))
1117 (princ "Content-Type: application/json; charset=UTF-8
1122 (when (string-match "^char=\\(&[^&;]+;\\)" target)
1123 (setq ret (match-end 0))
1126 (www-uri-encode-object
1127 (www-uri-decode-object
1128 'character (match-string 1 target)))
1129 (substring target ret))))
1131 (mapcar (lambda (cell)
1132 (if (string-match "=" cell)
1134 (setq genre (substring cell 0 (match-beginning 0))
1135 ret (substring cell (match-end 0)))
1138 (decode-uri-string genre 'utf-8-mcs-er))
1140 (list (decode-uri-string cell 'utf-8-mcs-er))))
1141 (split-string target "&")))
1142 (setq ret (car target))
1143 (cond ((eq (car ret) 'char)
1144 (setq object (www-uri-decode-object (car ret)(cdr ret)))
1145 (when (characterp object)
1147 (char-db-json-char-data object)
1148 (encode-coding-region (point-min)(point-max)
1149 char-db-file-coding-system)
1150 (princ (buffer-string))
1153 ((eq (car ret) 'character)
1154 (setq object (www-uri-decode-object (car ret)(cdr ret)))
1155 (when (characterp object)
1157 (char-db-json-char-data object)
1158 (encode-coding-region (point-min)(point-max)
1159 char-db-file-coding-system)
1160 (princ (buffer-string))
1166 (princ (format "%S" err)))
1173 (provide 'char-db-json)
1175 ;;; char-db-json.el ends here