update.
[chise/est.git] / char-db-json.el
1 ;;; char-db-json.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,
4 ;;   2008,2009,2010,2011,2012,2013,2014,2015,2016 MORIOKA Tomohiko.
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
8
9 ;; This file is part of XEmacs CHISE.
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (require 'char-db-util)
29 (require 'cwiki-format)
30
31 (setq char-db-ignored-attributes
32       '(ideographic-products
33         ->HNG
34         *instance@ruimoku/bibliography/title
35         *instance@morpheme-entry/zh-classical))
36
37
38 ;;; @ char-db formatters
39 ;;;
40     
41 (defun char-db-json-insert-char-spec (char &optional readable column
42                                            required-features)
43   (unless column
44     (setq column (current-column)))
45   (let (char-spec temp-char)
46     (setq char-spec (char-db-make-char-spec char))
47     (unless (or (characterp char) ; char
48                 (condition-case nil
49                     (setq char (find-char char-spec))
50                   (error nil)))
51       ;; define temporary character
52       ;;   Current implementation is dirty.
53       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
54                                          char-spec)))
55       (remove-char-attribute temp-char 'ideograph-daikanwa)
56       (setq char temp-char))
57     (char-db-json-insert-char-features char
58                                        readable
59                                        (union (mapcar #'car char-spec)
60                                               required-features)
61                                        nil 'for-sub-node)
62     (when temp-char
63       ;; undefine temporary character
64       ;;   Current implementation is dirty.
65       (setq char-spec (char-attribute-alist temp-char))
66       (while char-spec
67         (remove-char-attribute temp-char (car (car char-spec)))
68         (setq char-spec (cdr char-spec))))))
69
70 (defun char-db-json-insert-alist (alist &optional readable column)
71   (unless column
72     (setq column (current-column)))
73   (let ((line-breaking
74          (concat "\n" (make-string (1+ column) ?\ )))
75         name value
76         ret al ; cal
77         key
78         lbs cell rest separator)
79     (insert "(")
80     (while alist
81       (setq name (car (car alist))
82             value (cdr (car alist)))
83       (cond ((eq name 'char)
84              (insert "(char . ")
85              (if (setq ret (condition-case nil
86                                (find-char value)
87                              (error nil)))
88                  (progn
89                    (setq al nil
90                          ;; cal nil
91                          )
92                    (while value
93                      (setq key (car (car value)))
94                      ;; (if (find-charset key)
95                      ;;     (setq cal (cons key cal))
96                      (setq al (cons key al))
97                      ;; )
98                      (setq value (cdr value)))
99                    (insert-char-attributes ret
100                                            readable
101                                            (or al 'none) ; cal
102                                            nil 'for-sub-node))
103                (insert (prin1-to-string value)))
104              (insert ")")
105              (insert line-breaking))
106             ((consp value)
107              (insert (format "(%-18s " name))
108              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
109              (while (consp value)
110                (setq cell (car value))
111                (if (and (consp cell)
112                         (consp (car cell))
113                         (setq ret (condition-case nil
114                                       (find-char cell)
115                                     (error nil)))
116                         )
117                    (progn
118                      (setq rest cell
119                            al nil
120                            ;; cal nil
121                            )
122                      (while rest
123                        (setq key (car (car rest)))
124                        ;; (if (find-charset key)
125                        ;;     (setq cal (cons key cal))
126                        (setq al (cons key al))
127                        ;; )
128                        (setq rest (cdr rest)))
129                      (if separator
130                          (insert lbs))
131                      (insert-char-attributes ret
132                                              readable
133                                              al ; cal
134                                              nil 'for-sub-node)
135                      (setq separator lbs))
136                  (if separator
137                      (insert separator))
138                  (insert (prin1-to-string cell))
139                  (setq separator " "))
140                (setq value (cdr value)))
141              (insert ")")
142              (insert line-breaking))
143             (t
144              (insert (format "(%-18s . %S)%s"
145                              name value
146                              line-breaking))))
147       (setq alist (cdr alist))))
148   (insert ")"))
149
150 (defun char-db-json-insert-char-reference (plist &optional readable column)
151   (unless column
152     (setq column (current-column)))
153   (let ((line-breaking
154          (concat "\n" (make-string (1+ column) ?\ )))
155         (separator "")
156         name value)
157     (insert "{")
158     (while plist
159       (setq name (pop plist))
160       (setq value (pop plist))
161       (cond ((eq name :char)
162              (insert separator)
163              (insert "\"char\":\t")
164              (cond ((numberp value)
165                     (setq value (decode-char '=ucs value)))
166                    ;; ((consp value)
167                    ;;  (setq value (or (find-char value)
168                    ;;                  value)))
169                    )
170              (char-db-json-insert-char-spec value readable)
171              (insert line-breaking)
172              (setq separator ""))
173             ((eq name :radical)
174              (insert (format "%s%s\t%d, \"_comment\": \"%c\"%s"
175                              separator
176                              name value
177                              (ideographic-radical value)
178                              line-breaking))
179              (setq separator ""))
180             (t
181              (insert (format "%s%s\t%S" separator name value))
182              (setq separator line-breaking)))
183       ))
184   (insert " }"))
185
186 (defun char-db-json-insert-ccs-feature (name value line-breaking)
187   (cond
188    ((integerp value)
189     (insert
190      (format
191       (cond
192        ((memq name '(=>iwds-1
193                      ==shinjigen
194                      =shinjigen
195                      =shinjigen@1ed ==shinjigen@1ed
196                      =shinjigen@rev ==shinjigen@rev
197                      =shinjigen/+p@rev ==shinjigen/+p@rev
198                      ===daikanwa/ho ==daikanwa/ho
199                      =daikanwa/ho =>>daikanwa/ho =>daikanwa/ho))
200         " %-18s  %4d,\t\"_comment\": \"%c")
201        ((eq name '=shinjigen@1ed/24pr)
202         " %-18s  %4d,\t\"_comment\": \"%c")
203        ((or
204          (memq name
205                '(===daikanwa
206                  ==daikanwa =daikanwa =>>daikanwa =>daikanwa
207                  =daikanwa@rev1 =daikanwa@rev2
208                  =daikanwa/+p ==daikanwa/+p ===daikanwa/+p
209                  =>>daikanwa/+p
210                  =daikanwa/+2p =>>daikanwa/+2p
211                  =gt ==gt ===gt
212                  =>>gt =+>gt =>gt
213                  =gt-k ==gt-k ===gt-k
214                  =>>gt-k =>gt-k
215                  =adobe-japan1-0 ==adobe-japan1-0 ===adobe-japan1-0
216                  =adobe-japan1-1 ==adobe-japan1-1 ===adobe-japan1-1
217                  =adobe-japan1-2 ==adobe-japan1-2 ===adobe-japan1-2
218                  =adobe-japan1-3 ==adobe-japan1-3 ===adobe-japan1-3
219                  =adobe-japan1-4 ==adobe-japan1-4 ===adobe-japan1-4
220                  =adobe-japan1-5 ==adobe-japan1-5 ===adobe-japan1-5
221                  =adobe-japan1-6 ==adobe-japan1-6 ===adobe-japan1-6
222                  =>>adobe-japan1-0 =+>adobe-japan1-0
223                  =>>adobe-japan1-1 =+>adobe-japan1-1
224                  =>>adobe-japan1-2 =+>adobe-japan1-2
225                  =>>adobe-japan1-3 =+>adobe-japan1-3
226                  =>>adobe-japan1-4 =+>adobe-japan1-4
227                  =>>adobe-japan1-5 =+>adobe-japan1-5
228                  =>>adobe-japan1-6 =+>adobe-japan1-6
229                  =>cbeta =cbeta =>>cbeta ==cbeta ===cbeta
230                  =zinbun-oracle =>zinbun-oracle
231                  ===hng-jou ===hng-keg ===hng-dng ===hng-mam
232                  ===hng-drt ===hng-kgk ===hng-myz ===hng-kda
233                  ===hng-khi ===hng-khm ===hng-hok ===hng-kyd ===hng-sok
234                  ===hng-yhk ===hng-kak ===hng-kar ===hng-kae
235                  ===hng-sys ===hng-tsu ===hng-tzj
236                  ===hng-hos ===hng-nak ===hng-jhk
237                  ===hng-hod ===hng-gok ===hng-ink ===hng-nto
238                  ===hng-nkm ===hng-k24 ===hng-nkk
239                  ===hng-kcc ===hng-kcj ===hng-kbk ===hng-sik
240                  ===hng-skk ===hng-kyu ===hng-ksk ===hng-wan
241                  ===hng-okd ===hng-wad ===hng-kmi
242                  ===hng-zkd ===hng-doh ===hng-jyu
243                  ===hng-tzs ===hng-kss ===hng-kyo
244                  ===hng-smk))
245          ;; (string-match "^=adobe-" (symbol-name name))
246          )
247         " %-18s %5d,\t\"_comment\": \"%c")
248        ((memq name '(=hanyo-denshi/ks
249                      ==hanyo-denshi/ks ===hanyo-denshi/ks
250                      =>>hanyo-denshi/ks
251                      =koseki ==koseki
252                      =mj ==mj ===mj =>>mj =>mj
253                      =zihai mojikyo))
254         " %-18s %6d,\t\"_comment\": \"%c")
255        ((memq name '(=hanyo-denshi/tk ==hanyo-denshi/tk))
256         " %-18s %8d,\t\"_comment\": \"%c")
257        ((>= (charset-dimension name) 2)
258         " %-18s %5d,\t\"_comment\": \"%c")
259        (t
260         " %-18s %3d,\t\"_comment\": \"%c"))
261       (format "\"%s\":" name)
262       (if (= (charset-iso-graphic-plane name) 1)
263           (logior value
264                   (cond ((= (charset-dimension name) 1)
265                          #x80)
266                         ((= (charset-dimension name) 2)
267                          #x8080)
268                         ((= (charset-dimension name) 3)
269                          #x808080)
270                         (t 0)))
271         value)
272       (char-db-decode-isolated-char name value)))
273     (if (and (= (charset-chars name) 94)
274              (= (charset-dimension name) 2))
275         (insert (format " [%02d-%02d]\""
276                         (- (lsh value -8) 32)
277                         (- (logand value 255) 32)))
278       (insert "\""))
279     )
280    (t
281     (insert (format " %-18s %s"
282                     (format "\"%s\":" name) value))
283     ))
284   )
285
286 (defun char-db-json-insert-relation-feature (char name value line-breaking
287                                              ccss readable)
288   (insert (format " %-18s [%s    "
289                   (format "\"%s\":" name) line-breaking))
290   (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
291         separator cell sources required-features
292         ret)
293     (while (consp value)
294       (setq cell (car value))
295       (if (integerp cell)
296           (setq cell (decode-char '=ucs cell)))
297       (cond
298        ((eq name '->subsumptive)
299         (when (or (not (some (lambda (atr)
300                                (get-char-attribute cell atr))
301                              char-db-ignored-attributes))
302                   (some (lambda (ccs)
303                           (encode-char cell ccs 'defined-only))
304                         ccss))
305           (if separator
306               (insert separator)
307             (setq separator (format ",%s" lbs)))
308           (let ((char-db-ignored-attributes
309                  (cons '<-subsumptive
310                        char-db-ignored-attributes)))
311             (char-db-json-insert-char-features
312              cell readable nil nil 'for-sub-node))
313           )
314         )
315        ((characterp cell)
316         (setq sources
317               (get-char-attribute
318                char (intern (format "%s*sources" name))))
319         (setq required-features nil)
320         (dolist (source sources)
321           (cond
322            ((memq source '(JP
323                            JP/Jouyou
324                            shinjigen shinjigen@1ed shinjigen@rev))
325             (setq required-features
326                   (union required-features
327                          '(=jis-x0208
328                            =jis-x0208@1990
329                            =jis-x0213-1@2000
330                            =jis-x0213-1@2004
331                            =jis-x0213-2
332                            =jis-x0212
333                            =jis-x0208@1983
334                            =jis-x0208@1978
335                            =shinjigen
336                            =shinjigen@1ed
337                            =shinjigen@rev
338                            =shinjigen/+p@rev))))
339            ((eq source 'CN)
340             (setq required-features
341                   (union required-features
342                          '(=gb2312
343                            =gb12345
344                            =iso-ir165)))))
345           (cond
346            ((find-charset (setq ret (intern (format "=%s" source))))
347             (setq required-features
348                   (cons ret required-features)))
349            (t (setq required-features
350                     (cons source required-features)))))
351         (cond ((string-match "@JP" (symbol-name name))
352                (setq required-features
353                      (union required-features
354                             '(=jis-x0208
355                               =jis-x0208@1990
356                               =jis-x0213-1-2000
357                               =jis-x0213-2-2000
358                               =jis-x0212
359                               =jis-x0208@1983
360                               =jis-x0208@1978))))
361               ((string-match "@CN" (symbol-name name))
362                (setq required-features
363                      (union required-features
364                             '(=gb2312
365                               =gb12345
366                               =iso-ir165)))))
367         (if separator
368             (insert separator)
369           (setq separator (format ",%s" lbs)))
370         (if readable
371             (insert (format "%S" cell))
372           (char-db-json-insert-char-spec cell readable
373                                          nil
374                                          required-features))
375         )
376        ((consp cell)
377         (if separator
378             (insert separator)
379           (setq separator (format ",%s" lbs)))
380         (if (consp (car cell))
381             (char-db-json-insert-char-spec cell readable)
382           (char-db-json-insert-char-reference cell readable))
383         )
384        (t
385         (if separator
386             (insert separator)
387           )
388         (insert (prin1-to-string cell))
389         (setq separator " ")))
390       (setq value (cdr value)))
391     (insert " ]")))
392
393 (defun char-db-json-insert-char-features (char
394                                           &optional readable attributes column
395                                           for-sub-node)
396   (unless column
397     (setq column (current-column)))
398   (let (name value ; has-long-ccs-name
399         rest
400         radical strokes
401         (line-breaking
402          (concat "\n" (make-string (1+ column) ?\ )))
403         (line-separator nil)
404         lbs cell separator ret
405         key al cal
406         dest-ccss ; sources required-features
407         ccss)
408     (let (atr-d)
409       (setq attributes
410             (sort (if attributes
411                       (if (consp attributes)
412                           (progn
413                             (dolist (name attributes)
414                               (unless (memq name char-db-ignored-attributes)
415                                 (if (find-charset name)
416                                     (push name ccss))
417                                 (push name atr-d)))
418                             atr-d))
419                     (dolist (name (char-attribute-list))
420                       (unless (memq name char-db-ignored-attributes)
421                         (if (find-charset name)
422                             (push name ccss))
423                         (push name atr-d)))
424                     atr-d)
425                   #'char-attribute-name<)))
426     (insert "{")
427     (when (memq '<-subsumptive attributes)
428       (when (or readable (not for-sub-node))
429         (when (setq value (get-char-attribute char '<-subsumptive))
430           (char-db-json-insert-relation-feature char '<-subsumptive value
431                                                 line-breaking
432                                                 ccss readable)
433           (setq line-separator (format ",%s" line-breaking))
434           ))
435       (setq attributes (delq '<-subsumptive attributes))
436       )
437     (when (and (memq '<-denotational attributes)
438                (setq value (get-char-attribute char '<-denotational)))
439       (if line-separator
440           (insert line-separator)
441         (setq line-separator (format ",%s" line-breaking)))
442       (char-db-json-insert-relation-feature char '<-denotational value
443                                             line-breaking
444                                             ccss readable)
445       (setq attributes (delq '<-denotational attributes)))
446     (when (and (memq '<-denotational@component attributes)
447                (setq value
448                      (get-char-attribute char '<-denotational@component)))
449       (if line-separator
450           (insert line-separator)
451         (setq line-separator (format ",%s" line-breaking)))
452       (char-db-json-insert-relation-feature
453        char '<-denotational@component value
454        line-breaking
455        ccss readable)
456       (setq attributes (delq '<-denotational@component attributes)))
457     (when (and (memq 'name attributes)
458                (setq value (get-char-attribute char 'name)))
459       (if line-separator
460           (insert line-separator)
461         (setq line-separator (format ",%s" line-breaking)))
462       (insert (format
463                (if (> (+ (current-column) (length value)) 48)
464                    "\"name\": %S"
465                  "\"name\":               %S")
466                value))
467       (setq attributes (delq 'name attributes))
468       )
469     (when (and (memq 'name* attributes)
470                (setq value (get-char-attribute char 'name*)))
471       (if line-separator
472           (insert line-separator)
473         (setq line-separator (format ",%s" line-breaking)))
474       (insert (format
475                (if (> (+ (current-column) (length value)) 48)
476                    "\"name*\": %S"
477                  "{\"name*\":               %S")
478                value))
479       (setq attributes (delq 'name* attributes))
480       )
481     (when (and (memq 'script attributes)
482                (setq value (get-char-attribute char 'script)))
483       (if line-separator
484           (insert line-separator)
485         (setq line-separator (format ",%s" line-breaking)))
486       (insert (format "{\"script\":\t\t%s}%s"
487                       (mapconcat (function prin1-to-string)
488                                  value " ")
489                       line-breaking))
490       (setq attributes (delq 'script attributes))
491       )
492     (dolist (name '(=>ucs =>ucs*))
493       (when (and (memq name attributes)
494                  (setq value (get-char-attribute char name)))
495         (if line-separator
496             (insert line-separator)
497           (setq line-separator (format ",%s" line-breaking)))
498         (insert (format "{\"%-18s\":  #x%04X},\t\"_comment\": \"%c\"%s"
499                         name value (decode-char '=ucs value)
500                         line-breaking))
501         (setq attributes (delq name attributes))))
502     (dolist (name '(=>ucs@gb =>ucs@big5))
503       (when (and (memq name attributes)
504                  (setq value (get-char-attribute char name)))
505         (if line-separator
506             (insert line-separator)
507           (setq line-separator (format ",%s" line-breaking)))
508         (insert (format "{\"%-18s\":  #x%04X},\t\"_comment\": \"%c\"%s"
509                         name value
510                         (decode-char (intern
511                                       (concat "="
512                                               (substring
513                                                (symbol-name name) 2)))
514                                      value)
515                         line-breaking))
516         (setq attributes (delq name attributes))
517         ))
518     (when (and (memq 'general-category attributes)
519                (setq value (get-char-attribute char 'general-category)))
520       (if line-separator
521           (insert line-separator)
522         (setq line-separator (format ",%s" line-breaking)))
523       (insert (format
524                "{\"general-category\":\t%s} // %s%s"
525                (mapconcat (lambda (cell)
526                             (format "%S" cell))
527                           value " ")
528                (cond ((rassoc value unidata-normative-category-alist)
529                       "Normative Category")
530                      ((rassoc value unidata-informative-category-alist)
531                       "Informative Category")
532                      (t
533                       "Unknown Category"))
534                line-breaking))
535       (setq attributes (delq 'general-category attributes))
536       )
537     (when (and (memq 'bidi-category attributes)
538                (setq value (get-char-attribute char 'bidi-category)))
539       (if line-separator
540           (insert line-separator)
541         (setq line-separator (format ",%s" line-breaking)))
542       (insert (format "{\"bidi-category\":\t %S}%s"
543                       value
544                       line-breaking))
545       (setq attributes (delq 'bidi-category attributes))
546       )
547     (unless (or (not (memq 'mirrored attributes))
548                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
549                     'empty))
550       (if line-separator
551           (insert line-separator)
552         (setq line-separator (format ",%s" line-breaking)))
553       (insert (format "{\"mirrored\":\t\t %S}%s"
554                       value
555                       line-breaking))
556       (setq attributes (delq 'mirrored attributes))
557       )
558     (cond
559      ((and (memq 'decimal-digit-value attributes)
560            (setq value (get-char-attribute char 'decimal-digit-value)))
561       (if line-separator
562           (insert line-separator)
563         (setq line-separator (format ",%s" line-breaking)))
564       (insert (format "{\"decimal-digit-value\":  %S}%s"
565                       value
566                       line-breaking))
567       (setq attributes (delq 'decimal-digit-value attributes))
568       (when (and (memq 'digit-value attributes)
569                  (setq value (get-char-attribute char 'digit-value)))
570         (if line-separator
571             (insert line-separator)
572           (setq line-separator (format ",%s" line-breaking)))
573         (insert (format "{\"digit-value\":\t  %S}%s"
574                         value
575                         line-breaking))
576         (setq attributes (delq 'digit-value attributes))
577         )
578       (when (and (memq 'numeric-value attributes)
579                  (setq value (get-char-attribute char 'numeric-value)))
580         (if line-separator
581             (insert line-separator)
582           (setq line-separator (format ",%s" line-breaking)))
583         (insert (format "{\"numeric-value\":\t  %S}%s"
584                         value
585                         line-breaking))
586         (setq attributes (delq 'numeric-value attributes))
587         )
588       )
589      (t
590       (when (and (memq 'digit-value attributes)
591                  (setq value (get-char-attribute char 'digit-value)))
592         (if line-separator
593             (insert line-separator)
594           (setq line-separator (format ",%s" line-breaking)))
595         (insert (format "{\"digit-value\":\t %S}%s"
596                         value
597                         line-breaking))
598         (setq attributes (delq 'digit-value attributes))
599         )
600       (when (and (memq 'numeric-value attributes)
601                  (setq value (get-char-attribute char 'numeric-value)))
602         (if line-separator
603             (insert line-separator)
604           (setq line-separator (format ",%s" line-breaking)))
605         (insert (format "{\"numeric-value\":\t %S}%s"
606                         value
607                         line-breaking))
608         (setq attributes (delq 'numeric-value attributes))
609         )))
610     (when (and (memq 'iso-10646-comment attributes)
611                (setq value (get-char-attribute char 'iso-10646-comment)))
612       (if line-separator
613           (insert line-separator)
614         (setq line-separator (format ",%s" line-breaking)))
615       (insert (format "{\"iso-10646-comment\":\t %S}%s"
616                       value
617                       line-breaking))
618       (setq attributes (delq 'iso-10646-comment attributes))
619       )
620     (when (and (memq 'morohashi-daikanwa attributes)
621                (setq value (get-char-attribute char 'morohashi-daikanwa)))
622       (if line-separator
623           (insert line-separator)
624         (setq line-separator (format ",%s" line-breaking)))
625       (insert (format "{\"morohashi-daikanwa\":\t%s}%s"
626                       (mapconcat (function prin1-to-string) value " ")
627                       line-breaking))
628       (setq attributes (delq 'morohashi-daikanwa attributes))
629       )
630     (setq radical nil
631           strokes nil)
632     (when (and (memq 'ideographic-radical attributes)
633                (setq value (get-char-attribute char 'ideographic-radical)))
634       (setq radical value)
635       (if line-separator
636           (insert line-separator)
637         (setq line-separator (format ",%s" line-breaking)))
638       (insert (format " \"ideographic-radical\": %S,\t\"_comment\": \"%c\""
639                       radical
640                       (ideographic-radical radical)
641                       ))
642       (setq attributes (delq 'ideographic-radical attributes))
643       )
644     (when (and (memq 'shuowen-radical attributes)
645                (setq value (get-char-attribute char 'shuowen-radical)))
646       (if line-separator
647           (insert line-separator)
648         (setq line-separator (format ",%s" line-breaking)))
649       (insert (format "{\"shuowen-radical\":\t %S},\t\"_comment\": \"%c\"%s"
650                       value
651                       (shuowen-radical value)
652                       line-breaking))
653       (setq attributes (delq 'shuowen-radical attributes))
654       )
655     (let (key)
656       (dolist (domain
657                (append
658                 char-db-feature-domains
659                 (let (dest domain)
660                   (dolist (feature (char-attribute-list))
661                     (setq feature (symbol-name feature))
662                     (when (string-match
663                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
664                            feature)
665                       (setq domain (intern (match-string 2 feature)))
666                      (unless (memq domain dest)
667                        (setq dest (cons domain dest)))))
668                   (sort dest #'string<))))
669         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
670         (when (and (memq key attributes)
671                    (setq value (get-char-attribute char key)))
672           (setq radical value)
673           (if line-separator
674               (insert line-separator)
675             (setq line-separator (format ",%s" line-breaking)))
676           (insert (format "{\"%s\": %S},\t\"_comment\": \"%c\"%s"
677                           key
678                           radical
679                           (ideographic-radical radical)
680                           line-breaking))
681           (setq attributes (delq key attributes))
682           )
683         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
684         (when (and (memq key attributes)
685                    (setq value (get-char-attribute char key)))
686           (setq strokes value)
687           (if line-separator
688               (insert line-separator)
689             (setq line-separator (format ",%s" line-breaking)))
690           (insert (format " \"%s\": %S"
691                           key
692                           strokes))
693           (setq attributes (delq key attributes))
694           )
695         (setq key (intern (format "%s@%s" 'total-strokes domain)))
696         (when (and (memq key attributes)
697                    (setq value (get-char-attribute char key)))
698           (if line-separator
699               (insert line-separator)
700             (setq line-separator (format ",%s" line-breaking)))
701           (insert (format " \"%s\":       %S"
702                           key
703                           value
704                           ))
705           (setq attributes (delq key attributes))
706           )
707         (dolist (feature '(ideographic-radical
708                            ideographic-strokes
709                            total-strokes))
710           (setq key (intern (format "%s@%s*sources" feature domain)))
711           (when (and (memq key attributes)
712                      (setq value (get-char-attribute char key)))
713             (if line-separator
714                 (insert line-separator)
715               (setq line-separator (format ",%s" line-breaking)))
716             (insert (format " \"%s\":%s" key line-breaking))
717             (dolist (cell value)
718               (insert (format " %s" cell)))
719             (setq attributes (delq key attributes))
720             ))
721         ))
722     (when (and (memq 'ideographic-strokes attributes)
723                (setq value (get-char-attribute char 'ideographic-strokes)))
724       (setq strokes value)
725       (if line-separator
726           (insert line-separator)
727         (setq line-separator (format ",%s" line-breaking)))
728       (insert (format " \"ideographic-strokes\": %S"
729                       strokes
730                       ))
731       (setq attributes (delq 'ideographic-strokes attributes))
732       )
733     (when (and (memq 'kangxi-radical attributes)
734                (setq value (get-char-attribute char 'kangxi-radical)))
735       (unless (eq value radical)
736         (if line-separator
737             (insert line-separator)
738           (setq line-separator (format ",%s" line-breaking)))
739         (insert (format "{\"kangxi-radical\":\t%S},\t\"_comment\": \"%c\"%s"
740                         value
741                         (ideographic-radical value)
742                         line-breaking))
743         (or radical
744             (setq radical value)))
745       (setq attributes (delq 'kangxi-radical attributes))
746       )
747     (when (and (memq 'kangxi-strokes attributes)
748                (setq value (get-char-attribute char 'kangxi-strokes)))
749       (unless (eq value strokes)
750         (if line-separator
751             (insert line-separator)
752           (setq line-separator (format ",%s" line-breaking)))
753         (insert (format "{\"kangxi-strokes\":\t%S}%s"
754                         value
755                         line-breaking))
756         (or strokes
757             (setq strokes value)))
758       (setq attributes (delq 'kangxi-strokes attributes))
759       )
760     (when (and (memq 'japanese-radical attributes)
761                (setq value (get-char-attribute char 'japanese-radical)))
762       (unless (eq value radical)
763         (if line-separator
764             (insert line-separator)
765           (setq line-separator (format ",%s" line-breaking)))
766         (insert (format "{\"japanese-radical\":\t%S},\t\"_comment\": \"%c\"%s"
767                         value
768                         (ideographic-radical value)
769                         line-breaking))
770         (or radical
771             (setq radical value)))
772       (setq attributes (delq 'japanese-radical attributes))
773       )
774     (when (and (memq 'japanese-strokes attributes)
775                (setq value (get-char-attribute char 'japanese-strokes)))
776       (unless (eq value strokes)
777         (if line-separator
778             (insert line-separator)
779           (setq line-separator (format ",%s" line-breaking)))
780         (insert (format "{\"japanese-strokes\":\t%S}%s"
781                         value
782                         line-breaking))
783         (or strokes
784             (setq strokes value)))
785       (setq attributes (delq 'japanese-strokes attributes))
786       )
787     (when (and (memq 'cns-radical attributes)
788                (setq value (get-char-attribute char 'cns-radical)))
789       (if line-separator
790           (insert line-separator)
791         (setq line-separator (format ",%s" line-breaking)))
792       (insert (format "{\"cns-radical\":\t%S},\t\"_comment\": \"%c\"%s"
793                       value
794                       (ideographic-radical value)
795                       line-breaking))
796       (setq attributes (delq 'cns-radical attributes))
797       )
798     (when (and (memq 'cns-strokes attributes)
799                (setq value (get-char-attribute char 'cns-strokes)))
800       (unless (eq value strokes)
801         (if line-separator
802             (insert line-separator)
803           (setq line-separator (format ",%s" line-breaking)))
804         (insert (format "{\"cns-strokes\":\t%S}%s"
805                         value
806                         line-breaking))
807         (or strokes
808             (setq strokes value)))
809       (setq attributes (delq 'cns-strokes attributes))
810       )
811     (when (and (memq 'ideographic- attributes)
812                (setq value (get-char-attribute char 'ideographic-)))
813       (if line-separator
814           (insert line-separator)
815         (setq line-separator (format ",%s" line-breaking)))
816       (insert "{\"ideographic-\":       ")
817       (setq lbs (concat "\n" (make-string (current-column) ?\ ))
818             separator nil)
819       (while (consp value)
820         (setq cell (car value))
821         (if (integerp cell)
822             (setq cell (decode-char '=ucs cell)))
823         (cond ((characterp cell)
824                (if separator
825                    (insert lbs))
826                (if readable
827                    (insert (format "%S" cell))
828                  (char-db-json-insert-char-spec cell readable))
829                (setq separator lbs))
830               ((consp cell)
831                (if separator
832                    (insert lbs))
833                (if (consp (car cell))
834                    (char-db-json-insert-char-spec cell readable)
835                  (char-db-json-insert-char-reference cell readable))
836                (setq separator lbs))
837               (t
838                (if separator
839                    (insert separator))
840                (insert (prin1-to-string cell))
841                (setq separator " ")))
842         (setq value (cdr value)))
843       (insert " }")
844       (insert line-breaking)
845       (setq attributes (delq 'ideographic- attributes)))
846     (when (and (memq 'total-strokes attributes)
847                (setq value (get-char-attribute char 'total-strokes)))
848       (if line-separator
849           (insert line-separator)
850         (setq line-separator (format ",%s" line-breaking)))
851       (insert (format " \"total-strokes\":       %S"
852                       value
853                       ))
854       (setq attributes (delq 'total-strokes attributes))
855       )
856     (when (and (memq '->ideograph attributes)
857                (setq value (get-char-attribute char '->ideograph)))
858       (if line-separator
859           (insert line-separator)
860         (setq line-separator (format ",%s" line-breaking)))
861       (insert (format "{\"->ideograph\":\t%s}%s"
862                       (mapconcat (lambda (code)
863                                    (cond ((symbolp code)
864                                           (symbol-name code))
865                                          ((integerp code)
866                                           (format "#x%04X" code))
867                                          (t
868                                           (format "%s %S"
869                                                   line-breaking code))))
870                                  value " ")
871                       line-breaking))
872       (setq attributes (delq '->ideograph attributes))
873       )
874     (if (equal (get-char-attribute char '->titlecase)
875                (get-char-attribute char '->uppercase))
876         (setq attributes (delq '->titlecase attributes)))
877     (unless readable
878       (dolist (ignored '(composition
879                          ->denotational <-subsumptive ->ucs-unified
880                          ->ideographic-component-forms))
881         (setq attributes (delq ignored attributes))))
882     (while attributes
883       (setq name (car attributes))
884       (unless (eq (setq value (get-char-attribute char name 'value-is-empty))
885                   'value-is-empty)
886         (cond ((setq ret (find-charset name))
887                (setq name (charset-name ret))
888                (when (not (memq name dest-ccss))
889                  (setq dest-ccss (cons name dest-ccss))
890                  (if line-separator
891                      (insert line-separator)
892                    (setq line-separator (format ",%s" line-breaking)))
893                  (char-db-json-insert-ccs-feature name value line-breaking))
894                )
895               ((string-match "^=>ucs@" (symbol-name name))
896                (if line-separator
897                    (insert line-separator)
898                  (setq line-separator (format ",%s" line-breaking)))
899                (insert (format "{\"%-18s\": #x%04X},\t\"_comment\": \"%c\"%s"
900                                name value (decode-char '=ucs value)
901                                line-breaking))
902                )
903               ((eq name 'jisx0208-1978/4X)
904                (if line-separator
905                    (insert line-separator)
906                  (setq line-separator (format ",%s" line-breaking)))
907                (insert (format "{\"%-18s\": #x%04X}%s"
908                                name value
909                                line-breaking))
910                )
911               ((and
912                 (not readable)
913                 (not (eq name '->subsumptive))
914                 (not (eq name '->uppercase))
915                 (not (eq name '->lowercase))
916                 (not (eq name '->titlecase))
917                 (not (eq name '->canonical))
918                 (not (eq name '->Bopomofo))
919                 (not (eq name '->mistakable))
920                 (not (eq name '->ideographic-variants))
921                 (null (get-char-attribute
922                        char (intern (format "%s*sources" name))))
923                 (not (string-match "\\*sources$" (symbol-name name)))
924                 (null (get-char-attribute
925                        char (intern (format "%s*note" name))))
926                 (not (string-match "\\*note$" (symbol-name name)))
927                 (or (eq name '<-identical)
928                     (eq name '<-uppercase)
929                     (eq name '<-lowercase)
930                     (eq name '<-titlecase)
931                     (eq name '<-canonical)
932                     (eq name '<-ideographic-variants)
933                     ;; (eq name '<-synonyms)
934                     (string-match "^<-synonyms" (symbol-name name))
935                     (eq name '<-mistakable)
936                     (when (string-match "^->" (symbol-name name))
937                       (cond
938                        ((string-match "^->fullwidth" (symbol-name name))
939                         (not (and (consp value)
940                                   (characterp (car value))
941                                   (encode-char
942                                    (car value) '=ucs 'defined-only)))
943                         )
944                        (t)))
945                     ))
946                )
947               ((or (eq name 'ideographic-structure)
948                    (eq name 'ideographic-combination)
949                    (eq name 'ideographic-)
950                    (eq name '=decomposition)
951                    (char-feature-base-name= '=decomposition name)
952                    (char-feature-base-name= '=>decomposition name)
953                    ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
954                    ;;               (symbol-name name))
955                    (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
956                    (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
957                                  (symbol-name name))
958                    )
959                (if line-separator
960                    (insert line-separator)
961                  (setq line-separator (format ",%s" line-breaking)))
962                (char-db-json-insert-relation-feature char name value
963                                                      line-breaking
964                                                      ccss readable))
965               ((memq name '(ideograph=
966                             original-ideograph-of
967                             ancient-ideograph-of
968                             vulgar-ideograph-of
969                             wrong-ideograph-of
970                             ;; simplified-ideograph-of
971                             ideographic-variants
972                             ;; ideographic-different-form-of
973                             ))
974                (if line-separator
975                    (insert line-separator)
976                  (setq line-separator (format ",%s" line-breaking)))
977                (insert (format "{\"%-18s\":%s " name line-breaking))
978                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
979                      separator nil)
980                (while (consp value)
981                  (setq cell (car value))
982                  (if (and (consp cell)
983                           (consp (car cell)))
984                      (progn
985                        (if separator
986                            (insert lbs))
987                        (char-db-json-insert-alist cell readable)
988                        (setq separator lbs))
989                    (if separator
990                        (insert separator))
991                    (insert (prin1-to-string cell))
992                    (setq separator " "))
993                  (setq value (cdr value)))
994                (insert " }")
995                (insert line-breaking))
996               ((consp value)
997                (if line-separator
998                    (insert line-separator)
999                  (setq line-separator (format ",%s" line-breaking)))
1000                (insert (format "{\"%-18s\": " name))
1001                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
1002                      separator nil)
1003                (while (consp value)
1004                  (setq cell (car value))
1005                  (if (and (consp cell)
1006                           (consp (car cell))
1007                           (setq ret (condition-case nil
1008                                         (find-char cell)
1009                                       (error nil))))
1010                      (progn
1011                        (setq rest cell
1012                              al nil
1013                              cal nil)
1014                        (while rest
1015                          (setq key (car (car rest)))
1016                          (if (find-charset key)
1017                              (setq cal (cons key cal))
1018                            (setq al (cons key al)))
1019                          (setq rest (cdr rest)))
1020                        (if separator
1021                            (insert lbs))
1022                        (char-db-json-insert-char-features ret
1023                                                           readable
1024                                                           al
1025                                                           nil 'for-sub-node)
1026                        (setq separator lbs))
1027                    (setq ret (prin1-to-string cell))
1028                    (if separator
1029                        (if (< (+ (current-column)
1030                                  (length ret)
1031                                  (length separator))
1032                               76)
1033                            (insert separator)
1034                          (insert lbs)))
1035                    (insert ret)
1036                    (setq separator " "))
1037                  (setq value (cdr value)))
1038                (insert " }")
1039                (insert line-breaking))
1040               (t
1041                (if line-separator
1042                    (insert line-separator)
1043                  (setq line-separator (format ",%s" line-breaking)))
1044                (insert (format "{\"%-18s\":" name))
1045                (setq ret (prin1-to-string value))
1046                (unless (< (+ (current-column)
1047                              (length ret)
1048                              3)
1049                           76)
1050                  (insert line-breaking))
1051                (insert ret " }" line-breaking)
1052                ;; (insert (format "(%-18s . %S)%s"
1053                ;;                 name value
1054                ;;                 line-breaking))
1055                )
1056               ))
1057       (setq attributes (cdr attributes)))
1058     (insert " }")))
1059
1060 (defun char-db-json-char-data (char &optional readable
1061                                     attributes column)
1062   (unless column
1063     (setq column (current-column)))
1064   (save-restriction
1065     (narrow-to-region (point)(point))
1066     (char-db-json-insert-char-features char readable attributes column)
1067     (goto-char (point-min))
1068     (while (re-search-forward "[ \t]+$" nil t)
1069       (replace-match ""))
1070     ;; from tabify.
1071     (goto-char (point-min))
1072     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1073       (let ((column (current-column))
1074             (indent-tabs-mode t))
1075         (delete-region (match-beginning 0) (point))
1076         (indent-to column)))
1077     (goto-char (point-max))
1078     ;; (tabify (point-min)(point-max))
1079     ))
1080
1081 (defun char-db-json-char-data-with-variant (char &optional printable
1082                                            no-ucs-unified
1083                                            script excluded-script)
1084   (insert "[ ")
1085   (char-db-json-char-data char printable)
1086   (let ((variants (char-variants char))
1087         rest
1088         variant vs ret
1089         )
1090     (setq variants (sort variants #'<))
1091     (setq rest variants)
1092     (setq variants (cons char variants))
1093     (while rest
1094       (setq variant (car rest))
1095       (unless (get-char-attribute variant '<-subsumptive)
1096         (if (and (or (null script)
1097                      (null (setq vs (get-char-attribute variant 'script)))
1098                      (memq script vs))
1099                  (or (null excluded-script)
1100                      (null (setq vs (get-char-attribute variant 'script)))
1101                      (not (memq excluded-script vs))))
1102             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1103               (insert ",\n  ")
1104               (char-db-json-char-data variant printable)
1105               (if (setq ret (char-variants variant))
1106                   (while ret
1107                     (or (memq (car ret) variants)
1108                         ;; (get-char-attribute (car ret) '<-subsumptive)
1109                         (setq rest (nconc rest (list (car ret)))))
1110                     (setq ret (cdr ret)))))))
1111       (setq rest (cdr rest)))
1112     (insert "\n]\n")))
1113
1114 (defun char-db-json-insert-char-range-data (min max
1115                                                 &optional script
1116                                                 excluded-script)
1117   (let ((code min)
1118         char)
1119     (while (<= code max)
1120       (setq char (decode-char '=ucs code))
1121       (if (encode-char char '=ucs 'defined-only)
1122           (char-db-json-char-data-with-variant char nil 'no-ucs-unified
1123                                          script excluded-script))
1124       (setq code (1+ code)))))
1125
1126 (defun write-char-range-data-to-json-file (min max file
1127                                                &optional script
1128                                                excluded-script)
1129   (let ((coding-system-for-write char-db-file-coding-system))
1130     (with-temp-buffer
1131       (insert (format "// -*- coding: %s -*-\n"
1132                       char-db-file-coding-system))
1133       (char-db-json-insert-char-range-data min max script excluded-script)
1134       (write-region (point-min)(point-max) file))))
1135
1136 ;;;###autoload
1137 (defun what-char-definition-json (char)
1138   (interactive (list (char-after)))
1139   (let ((buf (get-buffer-create "*Character Description*"))
1140         (the-buf (current-buffer))
1141         (win-conf (current-window-configuration)))
1142     (pop-to-buffer buf)
1143     (make-local-variable 'what-character-original-window-configuration)
1144     (setq what-character-original-window-configuration win-conf)
1145     (setq buffer-read-only nil)
1146     (erase-buffer)
1147     (condition-case err
1148         (progn
1149           (char-db-json-char-data-with-variant char nil)
1150           (unless (char-attribute-alist char)
1151             (insert (format "// = %c\n"
1152                             (let* ((rest (split-char char))
1153                                    (ccs (pop rest))
1154                                    (code (pop rest)))
1155                               (while rest
1156                                 (setq code (logior (lsh code 8)
1157                                                    (pop rest))))
1158                               (decode-char ccs code)))))
1159           ;; (char-db-update-comment)
1160           (set-buffer-modified-p nil)
1161           (view-mode the-buf (lambda (buf)
1162                                (set-window-configuration
1163                                 what-character-original-window-configuration)
1164                                ))
1165           (goto-char (point-min)))
1166       (error (progn
1167                (set-window-configuration
1168                 what-character-original-window-configuration)
1169                (signal (car err) (cdr err)))))))
1170
1171 (defun char-db-json-batch-view ()
1172   (setq terminal-coding-system 'binary)
1173   (condition-case err
1174       (let* ((target (pop command-line-args-left))
1175              ret genre
1176              object)
1177         (princ "Content-Type: application/json; charset=UTF-8
1178
1179 ")
1180         (cond
1181          ((stringp target)
1182           (when (string-match "^char=\\(&[^&;]+;\\)" target)
1183             (setq ret (match-end 0))
1184             (setq target
1185                   (concat "char="
1186                           (www-uri-encode-object
1187                            (www-uri-decode-object
1188                             'character (match-string 1 target)))
1189                           (substring target ret))))
1190           (setq target
1191                 (mapcar (lambda (cell)
1192                           (if (string-match "=" cell)
1193                               (progn
1194                                 (setq genre (substring cell 0 (match-beginning 0))
1195                                       ret (substring cell (match-end 0)))
1196                                 (cons
1197                                  (intern
1198                                   (decode-uri-string genre 'utf-8-mcs-er))
1199                                  ret))
1200                             (list (decode-uri-string cell 'utf-8-mcs-er))))
1201                         (split-string target "&")))
1202           (setq ret (car target))
1203           (cond ((eq (car ret) 'char)
1204                  (setq object (www-uri-decode-object (car ret)(cdr ret)))
1205                  (when (characterp object)
1206                    (with-temp-buffer
1207                      (char-db-json-char-data object)
1208                      (encode-coding-region (point-min)(point-max)
1209                                            char-db-file-coding-system)
1210                      (princ (buffer-string))
1211                      ))
1212                  )
1213                 ((eq (car ret) 'character)
1214                  (setq object (www-uri-decode-object (car ret)(cdr ret)))
1215                  (when (characterp object)
1216                    (with-temp-buffer
1217                      (char-db-json-char-data object)
1218                      (encode-coding-region (point-min)(point-max)
1219                                            char-db-file-coding-system)
1220                      (princ (buffer-string))
1221                      ))
1222                  ))
1223           ))
1224         )
1225     (error nil
1226            (princ (format "%S" err)))
1227     ))
1228
1229
1230 ;;; @ end
1231 ;;;
1232
1233 (provide 'char-db-json)
1234
1235 ;;; char-db-json.el ends here