update.
[chise/est.git] / char-db-json.el
1 ;;; char-db-json.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,
4 ;;   2008,2009,2010,2011,2012,2013,2014,2015,2016 MORIOKA Tomohiko.
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
8
9 ;; This file is part of XEmacs CHISE.
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (require 'char-db-util)
29 (require 'cwiki-format)
30
31 (setq char-db-ignored-attributes
32       '(ideographic-products
33         composition
34         ;; ->HNG
35         *instance@ruimoku/bibliography/title
36         *instance@morpheme-entry/zh-classical))
37
38
39 ;;; @ char-db formatters
40 ;;;
41     
42 (defun char-db-json-insert-char-spec (char &optional readable column
43                                            required-features)
44   (unless column
45     (setq column (current-column)))
46   (let (char-spec temp-char)
47     (setq char-spec (char-db-make-char-spec char))
48     (unless (or (characterp char) ; char
49                 (condition-case nil
50                     (setq char (find-char char-spec))
51                   (error nil)))
52       ;; define temporary character
53       ;;   Current implementation is dirty.
54       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
55                                          char-spec)))
56       (remove-char-attribute temp-char 'ideograph-daikanwa)
57       (setq char temp-char))
58     (char-db-json-insert-char-features char
59                                        readable
60                                        (union (mapcar #'car char-spec)
61                                               required-features)
62                                        nil 'for-sub-node)
63     (when temp-char
64       ;; undefine temporary character
65       ;;   Current implementation is dirty.
66       (setq char-spec (char-attribute-alist temp-char))
67       (while char-spec
68         (remove-char-attribute temp-char (car (car char-spec)))
69         (setq char-spec (cdr char-spec))))))
70
71 (defun char-db-json-insert-alist (alist &optional readable column)
72   (unless column
73     (setq column (current-column)))
74   (let ((line-breaking
75          (concat "\n" (make-string (1+ column) ?\ )))
76         name value
77         ret al ; cal
78         key
79         lbs cell rest separator)
80     (insert "(")
81     (while alist
82       (setq name (car (car alist))
83             value (cdr (car alist)))
84       (cond ((eq name 'char)
85              (insert "(char . ")
86              (if (setq ret (condition-case nil
87                                (find-char value)
88                              (error nil)))
89                  (progn
90                    (setq al nil
91                          ;; cal nil
92                          )
93                    (while value
94                      (setq key (car (car value)))
95                      ;; (if (find-charset key)
96                      ;;     (setq cal (cons key cal))
97                      (setq al (cons key al))
98                      ;; )
99                      (setq value (cdr value)))
100                    (insert-char-attributes ret
101                                            readable
102                                            (or al 'none) ; cal
103                                            nil 'for-sub-node))
104                (insert (prin1-to-string value)))
105              (insert ")")
106              (insert line-breaking))
107             ((consp value)
108              (insert (format "(%-18s " name))
109              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
110              (while (consp value)
111                (setq cell (car value))
112                (if (and (consp cell)
113                         (consp (car cell))
114                         (setq ret (condition-case nil
115                                       (find-char cell)
116                                     (error nil)))
117                         )
118                    (progn
119                      (setq rest cell
120                            al nil
121                            ;; cal nil
122                            )
123                      (while rest
124                        (setq key (car (car rest)))
125                        ;; (if (find-charset key)
126                        ;;     (setq cal (cons key cal))
127                        (setq al (cons key al))
128                        ;; )
129                        (setq rest (cdr rest)))
130                      (if separator
131                          (insert lbs))
132                      (insert-char-attributes ret
133                                              readable
134                                              al ; cal
135                                              nil 'for-sub-node)
136                      (setq separator lbs))
137                  (if separator
138                      (insert separator))
139                  (insert (prin1-to-string cell))
140                  (setq separator " "))
141                (setq value (cdr value)))
142              (insert ")")
143              (insert line-breaking))
144             (t
145              (insert (format "(%-18s . %S)%s"
146                              name value
147                              line-breaking))))
148       (setq alist (cdr alist))))
149   (insert ")"))
150
151 (defun char-db-json-insert-char-reference (plist &optional readable column)
152   (unless column
153     (setq column (current-column)))
154   (let ((line-breaking
155          (concat "\n" (make-string (1+ column) ?\ )))
156         (separator "")
157         name value)
158     (insert "{")
159     (while plist
160       (setq name (pop plist))
161       (setq value (pop plist))
162       (cond ((eq name :char)
163              (insert separator)
164              (insert "\"char\":\t")
165              (cond ((numberp value)
166                     (setq value (decode-char '=ucs value)))
167                    ;; ((consp value)
168                    ;;  (setq value (or (find-char value)
169                    ;;                  value)))
170                    )
171              (char-db-json-insert-char-spec value readable)
172              (insert line-breaking)
173              (setq separator ""))
174             ((eq name :radical)
175              (insert (format "%s%s\t%d, \"_comment\": \"%c\"%s"
176                              separator
177                              name value
178                              (ideographic-radical value)
179                              line-breaking))
180              (setq separator ""))
181             (t
182              (insert (format "%s%s\t%S" separator name value))
183              (setq separator line-breaking)))
184       ))
185   (insert " }"))
186
187 (defun char-db-json-insert-ccs-feature (name value line-breaking)
188   (cond
189    ((integerp value)
190     (insert
191      (format
192       (cond
193        ((memq name '(=>iwds-1
194                      ==shinjigen
195                      =shinjigen
196                      =shinjigen@1ed ==shinjigen@1ed
197                      =shinjigen@rev ==shinjigen@rev
198                      =shinjigen/+p@rev ==shinjigen/+p@rev
199                      ===daikanwa/ho ==daikanwa/ho
200                      =daikanwa/ho =>>daikanwa/ho =>daikanwa/ho))
201         " %-20s  %4d,\t\"_comment\": \"%c")
202        ((eq name '=shinjigen@1ed/24pr)
203         " %-20s  %4d,\t\"_comment\": \"%c")
204        ((or
205          (memq name
206                '(===daikanwa
207                  ==daikanwa =daikanwa =>>daikanwa =>daikanwa
208                  =daikanwa@rev1 =daikanwa@rev2
209                  =daikanwa/+p ==daikanwa/+p ===daikanwa/+p
210                  =>>daikanwa/+p
211                  =daikanwa/+2p =>>daikanwa/+2p
212                  =gt ==gt ===gt
213                  =>>gt =+>gt =>gt
214                  =gt-k ==gt-k ===gt-k
215                  =>>gt-k =>gt-k
216                  =adobe-japan1-0 ==adobe-japan1-0 ===adobe-japan1-0
217                  =adobe-japan1-1 ==adobe-japan1-1 ===adobe-japan1-1
218                  =adobe-japan1-2 ==adobe-japan1-2 ===adobe-japan1-2
219                  =adobe-japan1-3 ==adobe-japan1-3 ===adobe-japan1-3
220                  =adobe-japan1-4 ==adobe-japan1-4 ===adobe-japan1-4
221                  =adobe-japan1-5 ==adobe-japan1-5 ===adobe-japan1-5
222                  =adobe-japan1-6 ==adobe-japan1-6 ===adobe-japan1-6
223                  =>>adobe-japan1-0 =+>adobe-japan1-0
224                  =>>adobe-japan1-1 =+>adobe-japan1-1
225                  =>>adobe-japan1-2 =+>adobe-japan1-2
226                  =>>adobe-japan1-3 =+>adobe-japan1-3
227                  =>>adobe-japan1-4 =+>adobe-japan1-4
228                  =>>adobe-japan1-5 =+>adobe-japan1-5
229                  =>>adobe-japan1-6 =+>adobe-japan1-6
230                  =>cbeta =cbeta =>>cbeta ==cbeta ===cbeta
231                  =zinbun-oracle =>zinbun-oracle
232                  ===hng-jou ===hng-keg ===hng-dng ===hng-mam
233                  ===hng-drt ===hng-kgk ===hng-myz ===hng-kda
234                  ===hng-khi ===hng-khm ===hng-hok ===hng-kyd ===hng-sok
235                  ===hng-yhk ===hng-kak ===hng-kar ===hng-kae
236                  ===hng-sys ===hng-tsu ===hng-tzj
237                  ===hng-hos ===hng-nak ===hng-jhk
238                  ===hng-hod ===hng-gok ===hng-ink ===hng-nto
239                  ===hng-nkm ===hng-k24 ===hng-nkk
240                  ===hng-kcc ===hng-kcj ===hng-kbk ===hng-sik
241                  ===hng-skk ===hng-kyu ===hng-ksk ===hng-wan
242                  ===hng-okd ===hng-wad ===hng-kmi
243                  ===hng-zkd ===hng-doh ===hng-jyu
244                  ===hng-tzs ===hng-kss ===hng-kyo
245                  ===hng-smk))
246          ;; (string-match "^=adobe-" (symbol-name name))
247          )
248         " %-20s %5d,\t\"_comment\": \"%c")
249        ((memq name '(=hanyo-denshi/ks
250                      ==hanyo-denshi/ks ===hanyo-denshi/ks
251                      =>>hanyo-denshi/ks
252                      =koseki ==koseki
253                      =mj ==mj ===mj =>>mj =>mj
254                      =zihai mojikyo))
255         " %-19s %6d,\t\"_comment\": \"%c")
256        ((memq name '(=hanyo-denshi/tk ==hanyo-denshi/tk))
257         " %-20s %8d,\t\"_comment\": \"%c")
258        ((>= (charset-dimension name) 2)
259         " %-20s %5d,\t\"_comment\": \"%c")
260        (t
261         " %-20s   %3d,\t\"_comment\": \"%c"))
262       (format "\"%s\":" name)
263       (if (= (charset-iso-graphic-plane name) 1)
264           (logior value
265                   (cond ((= (charset-dimension name) 1)
266                          #x80)
267                         ((= (charset-dimension name) 2)
268                          #x8080)
269                         ((= (charset-dimension name) 3)
270                          #x808080)
271                         (t 0)))
272         value)
273       (char-db-decode-isolated-char name value)))
274     (if (and (= (charset-chars name) 94)
275              (= (charset-dimension name) 2))
276         (insert (format " [%02d-%02d]\""
277                         (- (lsh value -8) 32)
278                         (- (logand value 255) 32)))
279       (insert "\""))
280     )
281    (t
282     (insert (format " %-20s %s"
283                     (format "\"%s\":" name) value))
284     ))
285   )
286
287 (defun char-db-json-insert-relation-feature (char name value line-breaking
288                                              ccss readable)
289   (insert (format " %-20s [%s    "
290                   (format "\"%s\":" name) line-breaking))
291   (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
292         separator cell sources required-features
293         ret)
294     (if (characterp value)
295         (setq value (list value)))
296     (while (consp value)
297       (setq cell (car value))
298       (if (integerp cell)
299           (setq cell (decode-char '=ucs cell)))
300       (cond
301        ((eq name '->subsumptive)
302         (when (or (not (some (lambda (atr)
303                                (get-char-attribute cell atr))
304                              char-db-ignored-attributes))
305                   (some (lambda (ccs)
306                           (encode-char cell ccs 'defined-only))
307                         ccss))
308           (if separator
309               (insert separator)
310             (setq separator (format ",%s" lbs)))
311           (let ((char-db-ignored-attributes
312                  (cons '<-subsumptive
313                        char-db-ignored-attributes)))
314             (char-db-json-insert-char-features
315              cell readable nil nil 'for-sub-node))
316           )
317         )
318        ((characterp cell)
319         (setq sources
320               (get-char-attribute
321                char (intern (format "%s*sources" name))))
322         (setq required-features nil)
323         (dolist (source sources)
324           (cond
325            ((memq source '(JP
326                            JP/Jouyou
327                            shinjigen shinjigen@1ed shinjigen@rev))
328             (setq required-features
329                   (union required-features
330                          '(=jis-x0208
331                            =jis-x0208@1990
332                            =jis-x0213-1@2000
333                            =jis-x0213-1@2004
334                            =jis-x0213-2
335                            =jis-x0212
336                            =jis-x0208@1983
337                            =jis-x0208@1978
338                            =shinjigen
339                            =shinjigen@1ed
340                            =shinjigen@rev
341                            =shinjigen/+p@rev))))
342            ((eq source 'CN)
343             (setq required-features
344                   (union required-features
345                          '(=gb2312
346                            =gb12345
347                            =iso-ir165)))))
348           (cond
349            ((find-charset (setq ret (intern (format "=%s" source))))
350             (setq required-features
351                   (cons ret required-features)))
352            (t (setq required-features
353                     (cons source required-features)))))
354         (cond ((string-match "@JP" (symbol-name name))
355                (setq required-features
356                      (union required-features
357                             '(=jis-x0208
358                               =jis-x0208@1990
359                               =jis-x0213-1-2000
360                               =jis-x0213-2-2000
361                               =jis-x0212
362                               =jis-x0208@1983
363                               =jis-x0208@1978))))
364               ((string-match "@CN" (symbol-name name))
365                (setq required-features
366                      (union required-features
367                             '(=gb2312
368                               =gb12345
369                               =iso-ir165)))))
370         (if separator
371             (insert separator)
372           (setq separator (format ",%s" lbs)))
373         ;; (if readable
374         ;;     (insert (format "%S" cell))
375         ;;   (char-db-json-insert-char-spec cell readable
376         ;;                                  nil
377         ;;                                  required-features))
378         (char-db-json-insert-char-spec cell readable
379                                        nil
380                                        required-features)
381         )
382        ((consp cell)
383         (if separator
384             (insert separator)
385           (setq separator (format ",%s" lbs)))
386         (if (consp (car cell))
387             (char-db-json-insert-char-spec cell readable)
388           (char-db-json-insert-char-reference cell readable))
389         )
390        (t
391         (if separator
392             (insert separator)
393           )
394         (insert (prin1-to-string cell))
395         (setq separator " ")))
396       (setq value (cdr value)))
397     (insert " ]")))
398
399 (defun char-db-json-insert-char-features (char
400                                           &optional readable attributes column
401                                           for-sub-node)
402   (unless column
403     (setq column (current-column)))
404   (let ((est-view-url-prefix "http://chise.org/est/view")
405         id obj-id type
406         name value ; has-long-ccs-name
407         rest
408         radical strokes
409         (line-breaking
410          (concat "\n" (make-string (1+ column) ?\ )))
411         line-separator
412         lbs cell separator ret
413         key al cal
414         dest-ccss ; sources required-features
415         ccss)
416     (let (atr-d)
417       (setq attributes
418             (sort (if attributes
419                       (if (consp attributes)
420                           (progn
421                             (dolist (name attributes)
422                               (unless (memq name char-db-ignored-attributes)
423                                 (if (find-charset name)
424                                     (push name ccss))
425                                 (push name atr-d)))
426                             atr-d))
427                     (dolist (name (char-attribute-list))
428                       (unless (memq name char-db-ignored-attributes)
429                         (if (find-charset name)
430                             (push name ccss))
431                         (push name atr-d)))
432                     atr-d)
433                   #'char-attribute-name<)))
434     (insert
435      (format "{ \"@context\": \"%s/genre/character/context.json\""
436              est-view-url-prefix))
437     (setq line-separator (format ",%s" line-breaking))
438     (setq id (www-uri-make-object-url char))
439     (insert (format "%s \"@id\": \"%s\"" line-separator id))
440     (setq obj-id (file-name-nondirectory id))
441     (setq type
442           (cond
443            ((string-match "^a2\\." obj-id)
444             "chise:super-abstract-character")
445            ((string-match "^a\\." obj-id)
446             "chise:abstract-character")
447            ((string-match "^o\\." obj-id)
448             "chise:unified-glyph")
449            ((string-match "^rep\\." obj-id)
450             "chise:abstract-glyph")
451            ((string-match "^g\\." obj-id)
452             "chise:detailed-glyph")
453            ((string-match "^g2\\." obj-id)
454             "chise:abstract-glyph-form")
455            ((string-match "^gi\\." obj-id)
456             "chise:abstract-glyph-form")
457            ((string-match "^repi\\." obj-id)
458             "chise:glyph-image")
459            (t
460             "chise:character")
461            ))
462     (insert (format "%s \"@type\": \"%s\"" line-separator type))
463     (when (memq '<-subsumptive attributes)
464       (when (or readable (not for-sub-node))
465         (when (setq value (get-char-attribute char '<-subsumptive))
466           (insert line-separator)
467           (char-db-json-insert-relation-feature char '<-subsumptive value
468                                                 line-breaking
469                                                 ccss readable)
470           ))
471       (setq attributes (delq '<-subsumptive attributes))
472       )
473     (when (and (memq '<-denotational attributes)
474                (setq value (get-char-attribute char '<-denotational)))
475       (insert line-separator)
476       (char-db-json-insert-relation-feature char '<-denotational value
477                                             line-breaking
478                                             ccss readable)
479       (setq attributes (delq '<-denotational attributes)))
480     (when (and (memq '<-denotational@component attributes)
481                (setq value
482                      (get-char-attribute char '<-denotational@component)))
483       (insert line-separator)
484       (char-db-json-insert-relation-feature
485        char '<-denotational@component value
486        line-breaking
487        ccss readable)
488       (setq attributes (delq '<-denotational@component attributes)))
489     (when (and (memq 'name attributes)
490                (setq value (get-char-attribute char 'name)))
491       (insert line-separator)
492       (insert (format
493                (if (> (+ (current-column) (length value)) 48)
494                    " \"name\": %S"
495                  " \"name\":                 %S")
496                value))
497       (setq attributes (delq 'name attributes))
498       )
499     (when (and (memq 'name* attributes)
500                (setq value (get-char-attribute char 'name*)))
501       (insert line-separator)
502       (insert (format
503                (if (> (+ (current-column) (length value)) 48)
504                    " \"name*\": %S"
505                  " \"name*\":                 %S")
506                value))
507       (setq attributes (delq 'name* attributes))
508       )
509     (when (and (memq 'script attributes)
510                (setq value (get-char-attribute char 'script)))
511       (insert line-separator)
512       (insert (format " \"script\":\t\t  %s"
513                       (mapconcat (function prin1-to-string)
514                                  value " ")))
515       (setq attributes (delq 'script attributes))
516       )
517     (dolist (name '(=>ucs =>ucs*))
518       (when (and (memq name attributes)
519                  (setq value (get-char-attribute char name)))
520         (insert line-separator)
521         (insert (format " \"%-20s\":  #x%04X,\t\"_comment\": \"%c\""
522                         name value (decode-char '=ucs value)))
523         (setq attributes (delq name attributes))))
524     (dolist (name '(=>ucs@gb =>ucs@big5))
525       (when (and (memq name attributes)
526                  (setq value (get-char-attribute char name)))
527         (insert line-separator)
528         (insert (format " \"%-20s\":  #x%04X,\t\"_comment\": \"%c\"%s"
529                         name value
530                         (decode-char (intern
531                                       (concat "="
532                                               (substring
533                                                (symbol-name name) 2)))
534                                      value)
535                         line-breaking))
536         (setq attributes (delq name attributes))
537         ))
538     (when (and (memq 'general-category attributes)
539                (setq value (get-char-attribute char 'general-category)))
540       (insert line-separator)
541       (insert (format
542                " \"general-category\":\t [ %s ], \"_comment\": \"%s\""
543                (mapconcat (lambda (cell)
544                             (format "%S" cell))
545                           value " ")
546                (cond ((rassoc value unidata-normative-category-alist)
547                       "Normative Category")
548                      ((rassoc value unidata-informative-category-alist)
549                       "Informative Category")
550                      (t
551                       "Unknown Category"))
552                ))
553       (setq attributes (delq 'general-category attributes))
554       )
555     (when (and (memq 'bidi-category attributes)
556                (setq value (get-char-attribute char 'bidi-category)))
557       (insert line-separator)
558       (insert (format " \"bidi-category\":\t  %S"
559                       value))
560       (setq attributes (delq 'bidi-category attributes))
561       )
562     (unless (or (not (memq 'mirrored attributes))
563                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
564                     'empty))
565       (insert line-separator)
566       (insert (format " \"mirrored\":\t\t  %S"
567                       value))
568       (setq attributes (delq 'mirrored attributes))
569       )
570     (cond
571      ((and (memq 'decimal-digit-value attributes)
572            (setq value (get-char-attribute char 'decimal-digit-value)))
573       (insert line-separator)
574       (insert (format " \"decimal-digit-value\":  %S"
575                       value))
576       (setq attributes (delq 'decimal-digit-value attributes))
577       (when (and (memq 'digit-value attributes)
578                  (setq value (get-char-attribute char 'digit-value)))
579         (insert line-separator)
580         (insert (format " \"digit-value\":\t    %S"
581                         value))
582         (setq attributes (delq 'digit-value attributes))
583         )
584       (when (and (memq 'numeric-value attributes)
585                  (setq value (get-char-attribute char 'numeric-value)))
586         (insert line-separator)
587         (insert (format " \"numeric-value\":\t    %S"
588                         value))
589         (setq attributes (delq 'numeric-value attributes))
590         )
591       )
592      (t
593       (when (and (memq 'digit-value attributes)
594                  (setq value (get-char-attribute char 'digit-value)))
595         (insert line-separator)
596         (insert (format " \"digit-value\":\t %S"
597                         value))
598         (setq attributes (delq 'digit-value attributes))
599         )
600       (when (and (memq 'numeric-value attributes)
601                  (setq value (get-char-attribute char 'numeric-value)))
602         (insert line-separator)
603         (insert (format " \"numeric-value\":\t %S"
604                         value))
605         (setq attributes (delq 'numeric-value attributes))
606         )))
607     (when (and (memq 'iso-10646-comment attributes)
608                (setq value (get-char-attribute char 'iso-10646-comment)))
609       (insert line-separator)
610       (insert (format "{\"iso-10646-comment\":\t %S}%s"
611                       value
612                       line-breaking))
613       (setq attributes (delq 'iso-10646-comment attributes))
614       )
615     (when (and (memq 'morohashi-daikanwa attributes)
616                (setq value (get-char-attribute char 'morohashi-daikanwa)))
617       (insert line-separator)
618       (insert (format "{\"morohashi-daikanwa\":\t%s}%s"
619                       (mapconcat (function prin1-to-string) value " ")
620                       line-breaking))
621       (setq attributes (delq 'morohashi-daikanwa attributes))
622       )
623     (setq radical nil
624           strokes nil)
625     (when (and (memq 'ideographic-radical attributes)
626                (setq value (get-char-attribute char 'ideographic-radical)))
627       (setq radical value)
628       (insert line-separator)
629       (insert (format " \"ideographic-radical\": %S,\t\"_comment\": \"%c\""
630                       radical
631                       (ideographic-radical radical)
632                       ))
633       (setq attributes (delq 'ideographic-radical attributes))
634       )
635     (when (and (memq 'shuowen-radical attributes)
636                (setq value (get-char-attribute char 'shuowen-radical)))
637       (insert line-separator)
638       (insert (format " \"shuowen-radical\":\t %S,\t\"_comment\": \"%c\""
639                       value
640                       (shuowen-radical value)))
641       (setq attributes (delq 'shuowen-radical attributes))
642       )
643     (let (key)
644       (dolist (domain
645                (append
646                 char-db-feature-domains
647                 (let (dest domain)
648                   (dolist (feature (char-attribute-list))
649                     (setq feature (symbol-name feature))
650                     (when (string-match
651                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
652                            feature)
653                       (setq domain (intern (match-string 2 feature)))
654                      (unless (memq domain dest)
655                        (setq dest (cons domain dest)))))
656                   (sort dest #'string<))))
657         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
658         (when (and (memq key attributes)
659                    (setq value (get-char-attribute char key)))
660           (setq radical value)
661           (insert line-separator)
662           (insert (format "{\"%s\": %S},\t\"_comment\": \"%c\"%s"
663                           key
664                           radical
665                           (ideographic-radical radical)
666                           line-breaking))
667           (setq attributes (delq key attributes))
668           )
669         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
670         (when (and (memq key attributes)
671                    (setq value (get-char-attribute char key)))
672           (setq strokes value)
673           (insert line-separator)
674           (insert (format " \"%s\": %S"
675                           key
676                           strokes))
677           (setq attributes (delq key attributes))
678           )
679         (setq key (intern (format "%s@%s" 'total-strokes domain)))
680         (when (and (memq key attributes)
681                    (setq value (get-char-attribute char key)))
682           (insert line-separator)
683           (insert (format " \"%s\":       %S"
684                           key
685                           value
686                           ))
687           (setq attributes (delq key attributes))
688           )
689         (dolist (feature '(ideographic-radical
690                            ideographic-strokes
691                            total-strokes))
692           (setq key (intern (format "%s@%s*sources" feature domain)))
693           (when (and (memq key attributes)
694                      (setq value (get-char-attribute char key)))
695             (insert line-separator)
696             (insert (format " \"%s\":%s" key line-breaking))
697             (dolist (cell value)
698               (insert (format " %s" cell)))
699             (setq attributes (delq key attributes))
700             ))
701         ))
702     (when (and (memq 'ideographic-strokes attributes)
703                (setq value (get-char-attribute char 'ideographic-strokes)))
704       (setq strokes value)
705       (insert line-separator)
706       (insert (format " \"ideographic-strokes\": %S"
707                       strokes
708                       ))
709       (setq attributes (delq 'ideographic-strokes attributes))
710       )
711     (when (and (memq 'kangxi-radical attributes)
712                (setq value (get-char-attribute char 'kangxi-radical)))
713       (unless (eq value radical)
714         (insert line-separator)
715         (insert (format "{\"kangxi-radical\":\t%S},\t\"_comment\": \"%c\"%s"
716                         value
717                         (ideographic-radical value)
718                         line-breaking))
719         (or radical
720             (setq radical value)))
721       (setq attributes (delq 'kangxi-radical attributes))
722       )
723     (when (and (memq 'kangxi-strokes attributes)
724                (setq value (get-char-attribute char 'kangxi-strokes)))
725       (unless (eq value strokes)
726         (insert line-separator)
727         (insert (format "{\"kangxi-strokes\":\t%S}%s"
728                         value
729                         line-breaking))
730         (or strokes
731             (setq strokes value)))
732       (setq attributes (delq 'kangxi-strokes attributes))
733       )
734     (when (and (memq 'japanese-strokes attributes)
735                (setq value (get-char-attribute char 'japanese-strokes)))
736       (unless (eq value strokes)
737         (insert line-separator)
738         (insert (format "{\"japanese-strokes\":\t%S}%s"
739                         value
740                         line-breaking))
741         (or strokes
742             (setq strokes value)))
743       (setq attributes (delq 'japanese-strokes attributes))
744       )
745     (when (and (memq 'cns-radical attributes)
746                (setq value (get-char-attribute char 'cns-radical)))
747       (insert line-separator)
748       (insert (format "{\"cns-radical\":\t%S},\t\"_comment\": \"%c\"%s"
749                       value
750                       (ideographic-radical value)
751                       line-breaking))
752       (setq attributes (delq 'cns-radical attributes))
753       )
754     (when (and (memq 'cns-strokes attributes)
755                (setq value (get-char-attribute char 'cns-strokes)))
756       (unless (eq value strokes)
757         (insert line-separator)
758         (insert (format "{\"cns-strokes\":\t%S}%s"
759                         value
760                         line-breaking))
761         (or strokes
762             (setq strokes value)))
763       (setq attributes (delq 'cns-strokes attributes))
764       )
765     (when (and (memq 'total-strokes attributes)
766                (setq value (get-char-attribute char 'total-strokes)))
767       (insert line-separator)
768       (insert (format " \"total-strokes\":       %S"
769                       value
770                       ))
771       (setq attributes (delq 'total-strokes attributes))
772       )
773     (if (equal (get-char-attribute char '->titlecase)
774                (get-char-attribute char '->uppercase))
775         (setq attributes (delq '->titlecase attributes)))
776     (unless readable
777       (dolist (ignored '(composition
778                          ->denotational <-subsumptive ->ucs-unified
779                          ->ideographic-component-forms))
780         (setq attributes (delq ignored attributes))))
781     (while attributes
782       (setq name (car attributes))
783       (unless (eq (setq value (get-char-attribute char name 'value-is-empty))
784                   'value-is-empty)
785         (cond ((setq ret (find-charset name))
786                (setq name (charset-name ret))
787                (when (not (memq name dest-ccss))
788                  (setq dest-ccss (cons name dest-ccss))
789                  (insert line-separator)
790                  (char-db-json-insert-ccs-feature name value line-breaking))
791                )
792               ((string-match "^=>ucs@" (symbol-name name))
793                (insert line-separator)
794                (insert (format "{\"%-20s\": #x%04X},\t\"_comment\": \"%c\"%s"
795                                name value (decode-char '=ucs value)
796                                line-breaking))
797                )
798               ((eq name 'jisx0208-1978/4X)
799                (insert line-separator)
800                (insert (format "{\"%-20s\": #x%04X}%s"
801                                name value
802                                line-breaking))
803                )
804               ((and
805                 (not readable)
806                 (not (eq name '->subsumptive))
807                 (not (eq name '->uppercase))
808                 (not (eq name '->lowercase))
809                 (not (eq name '->titlecase))
810                 (not (eq name '->canonical))
811                 (not (eq name '->Bopomofo))
812                 (not (eq name '->mistakable))
813                 (not (eq name '->ideographic-variants))
814                 (null (get-char-attribute
815                        char (intern (format "%s*sources" name))))
816                 (not (string-match "\\*sources$" (symbol-name name)))
817                 (null (get-char-attribute
818                        char (intern (format "%s*note" name))))
819                 (not (string-match "\\*note$" (symbol-name name)))
820                 (or (eq name '<-identical)
821                     (eq name '<-uppercase)
822                     (eq name '<-lowercase)
823                     (eq name '<-titlecase)
824                     (eq name '<-canonical)
825                     (eq name '<-ideographic-variants)
826                     ;; (eq name '<-synonyms)
827                     (string-match "^<-synonyms" (symbol-name name))
828                     (eq name '<-mistakable)
829                     (when (string-match "^->" (symbol-name name))
830                       (cond
831                        ((string-match "^->fullwidth" (symbol-name name))
832                         (not (and (consp value)
833                                   (characterp (car value))
834                                   (encode-char
835                                    (car value) '=ucs 'defined-only)))
836                         )
837                        (t)))
838                     ))
839                )
840               ((or (eq name 'ideographic-structure)
841                    (eq name 'ideographic-combination)
842                    ;; (eq name 'ideographic-)
843                    (eq name '=decomposition)
844                    (char-feature-base-name= '=decomposition name)
845                    (char-feature-base-name= '=>decomposition name)
846                    ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
847                    ;;               (symbol-name name))
848                    (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
849                    (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
850                                  (symbol-name name))
851                    )
852                (insert line-separator)
853                (char-db-json-insert-relation-feature char name value
854                                                      line-breaking
855                                                      ccss readable))
856               ((consp value)
857                (insert line-separator)
858                (insert (format " %-20s [ "
859                                (format "\"%s\":" name)))
860                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
861                      separator nil)
862                (while (consp value)
863                  (setq cell (car value))
864                  (if (and (consp cell)
865                           (consp (car cell))
866                           (setq ret (condition-case nil
867                                         (find-char cell)
868                                       (error nil))))
869                      (progn
870                        (setq rest cell
871                              al nil
872                              cal nil)
873                        (while rest
874                          (setq key (car (car rest)))
875                          (if (find-charset key)
876                              (setq cal (cons key cal))
877                            (setq al (cons key al)))
878                          (setq rest (cdr rest)))
879                        (if separator
880                            (insert lbs))
881                        (char-db-json-insert-char-features ret
882                                                           readable
883                                                           al
884                                                           nil 'for-sub-node)
885                        (setq separator lbs))
886                    (setq ret (prin1-to-string cell))
887                    (if separator
888                        (if (< (+ (current-column)
889                                  (length ret)
890                                  (length separator))
891                               76)
892                            (insert separator)
893                          (insert lbs)))
894                    (insert ret)
895                    (setq separator " "))
896                  (setq value (cdr value)))
897                (insert " ]")
898                )
899               (t
900                (insert line-separator)
901                (insert (format " %-20s "
902                                (format "\"%s\":" name)))
903                (setq ret (prin1-to-string value))
904                (unless (< (+ (current-column)
905                              (length ret)
906                              3)
907                           76)
908                  (insert line-breaking))
909                (insert ret)
910                )
911               ))
912       (setq attributes (cdr attributes)))
913     (insert "\n" (make-string column ?\ ) "}")))
914
915 (defun char-db-json-char-data (char &optional readable
916                                     attributes column)
917   (unless column
918     (setq column (current-column)))
919   (save-restriction
920     (narrow-to-region (point)(point))
921     (char-db-json-insert-char-features char readable attributes column)
922     (goto-char (point-min))
923     (while (re-search-forward "[ \t]+$" nil t)
924       (replace-match ""))
925     ;; from tabify.
926     (goto-char (point-min))
927     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
928       (let ((column (current-column))
929             (indent-tabs-mode t))
930         (delete-region (match-beginning 0) (point))
931         (indent-to column)))
932     (goto-char (point-max))
933     ;; (tabify (point-min)(point-max))
934     ))
935
936 (defun char-db-json-char-data-with-variant (char &optional printable
937                                            no-ucs-unified
938                                            script excluded-script)
939   (insert "[ ")
940   (char-db-json-char-data char printable)
941   (let ((variants (char-variants char))
942         rest
943         variant vs ret
944         )
945     (setq variants (sort variants #'<))
946     (setq rest variants)
947     (setq variants (cons char variants))
948     (while rest
949       (setq variant (car rest))
950       (unless (get-char-attribute variant '<-subsumptive)
951         (if (and (or (null script)
952                      (null (setq vs (get-char-attribute variant 'script)))
953                      (memq script vs))
954                  (or (null excluded-script)
955                      (null (setq vs (get-char-attribute variant 'script)))
956                      (not (memq excluded-script vs))))
957             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
958               (insert ",\n  ")
959               (char-db-json-char-data variant printable)
960               (if (setq ret (char-variants variant))
961                   (while ret
962                     (or (memq (car ret) variants)
963                         ;; (get-char-attribute (car ret) '<-subsumptive)
964                         (setq rest (nconc rest (list (car ret)))))
965                     (setq ret (cdr ret)))))))
966       (setq rest (cdr rest)))
967     (insert "\n]\n")))
968
969 (defun char-db-json-insert-char-range-data (min max
970                                                 &optional script
971                                                 excluded-script)
972   (let ((code min)
973         char)
974     (while (<= code max)
975       (setq char (decode-char '=ucs code))
976       (if (encode-char char '=ucs 'defined-only)
977           (char-db-json-char-data-with-variant char nil 'no-ucs-unified
978                                          script excluded-script))
979       (setq code (1+ code)))))
980
981 (defun write-char-range-data-to-json-file (min max file
982                                                &optional script
983                                                excluded-script)
984   (let ((coding-system-for-write char-db-file-coding-system))
985     (with-temp-buffer
986       (insert (format "// -*- coding: %s -*-\n"
987                       char-db-file-coding-system))
988       (char-db-json-insert-char-range-data min max script excluded-script)
989       (write-region (point-min)(point-max) file))))
990
991 ;;;###autoload
992 (defun what-char-definition-json (char)
993   (interactive (list (char-after)))
994   (let ((est-hide-cgi-mode t)
995         (buf (get-buffer-create "*Character Description*"))
996         (the-buf (current-buffer))
997         (win-conf (current-window-configuration)))
998     (pop-to-buffer buf)
999     (make-local-variable 'what-character-original-window-configuration)
1000     (setq what-character-original-window-configuration win-conf)
1001     (setq buffer-read-only nil)
1002     (erase-buffer)
1003     (condition-case err
1004         (progn
1005           (char-db-json-char-data-with-variant char 'printable)
1006           (unless (char-attribute-alist char)
1007             (insert (format "// = %c\n"
1008                             (let* ((rest (split-char char))
1009                                    (ccs (pop rest))
1010                                    (code (pop rest)))
1011                               (while rest
1012                                 (setq code (logior (lsh code 8)
1013                                                    (pop rest))))
1014                               (decode-char ccs code)))))
1015           ;; (char-db-update-comment)
1016           (set-buffer-modified-p nil)
1017           (view-mode the-buf (lambda (buf)
1018                                (set-window-configuration
1019                                 what-character-original-window-configuration)
1020                                ))
1021           (goto-char (point-min)))
1022       (error (progn
1023                (set-window-configuration
1024                 what-character-original-window-configuration)
1025                (signal (car err) (cdr err)))))))
1026
1027 (defun char-db-json-batch-view ()
1028   (setq terminal-coding-system 'binary)
1029   (condition-case err
1030       (let* ((target (pop command-line-args-left))
1031              ret genre
1032              object)
1033         (princ "Content-Type: application/json; charset=UTF-8
1034
1035 ")
1036         (cond
1037          ((stringp target)
1038           (when (string-match "^char=\\(&[^&;]+;\\)" target)
1039             (setq ret (match-end 0))
1040             (setq target
1041                   (concat "char="
1042                           (www-uri-encode-object
1043                            (www-uri-decode-object
1044                             'character (match-string 1 target)))
1045                           (substring target ret))))
1046           (setq target
1047                 (mapcar (lambda (cell)
1048                           (if (string-match "=" cell)
1049                               (progn
1050                                 (setq genre (substring cell 0 (match-beginning 0))
1051                                       ret (substring cell (match-end 0)))
1052                                 (cons
1053                                  (intern
1054                                   (decode-uri-string genre 'utf-8-mcs-er))
1055                                  ret))
1056                             (list (decode-uri-string cell 'utf-8-mcs-er))))
1057                         (split-string target "&")))
1058           (setq ret (car target))
1059           (cond ((eq (car ret) 'char)
1060                  (setq object (www-uri-decode-object (car ret)(cdr ret)))
1061                  (when (characterp object)
1062                    (with-temp-buffer
1063                      (char-db-json-char-data object)
1064                      (encode-coding-region (point-min)(point-max)
1065                                            char-db-file-coding-system)
1066                      (princ (buffer-string))
1067                      ))
1068                  )
1069                 ((eq (car ret) 'character)
1070                  (setq object (www-uri-decode-object (car ret)(cdr ret)))
1071                  (when (characterp object)
1072                    (with-temp-buffer
1073                      (char-db-json-char-data object)
1074                      (encode-coding-region (point-min)(point-max)
1075                                            char-db-file-coding-system)
1076                      (princ (buffer-string))
1077                      ))
1078                  ))
1079           ))
1080         )
1081     (error nil
1082            (princ (format "%S" err)))
1083     ))
1084
1085
1086 ;;; @ end
1087 ;;;
1088
1089 (provide 'char-db-json)
1090
1091 ;;; char-db-json.el ends here