(char-db-ignored-attributes): Add `composition'.
[chise/est.git] / char-db-json.el
1 ;;; char-db-json.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,
4 ;;   2008,2009,2010,2011,2012,2013,2014,2015,2016 MORIOKA Tomohiko.
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
8
9 ;; This file is part of XEmacs CHISE.
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (require 'char-db-util)
29 (require 'cwiki-format)
30
31 (setq char-db-ignored-attributes
32       '(ideographic-products
33         composition
34         ;; ->HNG
35         *instance@ruimoku/bibliography/title
36         *instance@morpheme-entry/zh-classical))
37
38
39 ;;; @ char-db formatters
40 ;;;
41     
42 (defun char-db-json-insert-char-spec (char &optional readable column
43                                            required-features)
44   (unless column
45     (setq column (current-column)))
46   (let (char-spec temp-char)
47     (setq char-spec (char-db-make-char-spec char))
48     (unless (or (characterp char) ; char
49                 (condition-case nil
50                     (setq char (find-char char-spec))
51                   (error nil)))
52       ;; define temporary character
53       ;;   Current implementation is dirty.
54       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
55                                          char-spec)))
56       (remove-char-attribute temp-char 'ideograph-daikanwa)
57       (setq char temp-char))
58     (char-db-json-insert-char-features char
59                                        readable
60                                        (union (mapcar #'car char-spec)
61                                               required-features)
62                                        nil 'for-sub-node)
63     (when temp-char
64       ;; undefine temporary character
65       ;;   Current implementation is dirty.
66       (setq char-spec (char-attribute-alist temp-char))
67       (while char-spec
68         (remove-char-attribute temp-char (car (car char-spec)))
69         (setq char-spec (cdr char-spec))))))
70
71 (defun char-db-json-insert-alist (alist &optional readable column)
72   (unless column
73     (setq column (current-column)))
74   (let ((line-breaking
75          (concat "\n" (make-string (1+ column) ?\ )))
76         name value
77         ret al ; cal
78         key
79         lbs cell rest separator)
80     (insert "(")
81     (while alist
82       (setq name (car (car alist))
83             value (cdr (car alist)))
84       (cond ((eq name 'char)
85              (insert "(char . ")
86              (if (setq ret (condition-case nil
87                                (find-char value)
88                              (error nil)))
89                  (progn
90                    (setq al nil
91                          ;; cal nil
92                          )
93                    (while value
94                      (setq key (car (car value)))
95                      ;; (if (find-charset key)
96                      ;;     (setq cal (cons key cal))
97                      (setq al (cons key al))
98                      ;; )
99                      (setq value (cdr value)))
100                    (insert-char-attributes ret
101                                            readable
102                                            (or al 'none) ; cal
103                                            nil 'for-sub-node))
104                (insert (prin1-to-string value)))
105              (insert ")")
106              (insert line-breaking))
107             ((consp value)
108              (insert (format "(%-18s " name))
109              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
110              (while (consp value)
111                (setq cell (car value))
112                (if (and (consp cell)
113                         (consp (car cell))
114                         (setq ret (condition-case nil
115                                       (find-char cell)
116                                     (error nil)))
117                         )
118                    (progn
119                      (setq rest cell
120                            al nil
121                            ;; cal nil
122                            )
123                      (while rest
124                        (setq key (car (car rest)))
125                        ;; (if (find-charset key)
126                        ;;     (setq cal (cons key cal))
127                        (setq al (cons key al))
128                        ;; )
129                        (setq rest (cdr rest)))
130                      (if separator
131                          (insert lbs))
132                      (insert-char-attributes ret
133                                              readable
134                                              al ; cal
135                                              nil 'for-sub-node)
136                      (setq separator lbs))
137                  (if separator
138                      (insert separator))
139                  (insert (prin1-to-string cell))
140                  (setq separator " "))
141                (setq value (cdr value)))
142              (insert ")")
143              (insert line-breaking))
144             (t
145              (insert (format "(%-18s . %S)%s"
146                              name value
147                              line-breaking))))
148       (setq alist (cdr alist))))
149   (insert ")"))
150
151 (defun char-db-json-insert-char-reference (plist &optional readable column)
152   (unless column
153     (setq column (current-column)))
154   (let ((line-breaking
155          (concat "\n" (make-string (1+ column) ?\ )))
156         (separator "")
157         name value)
158     (insert "{")
159     (while plist
160       (setq name (pop plist))
161       (setq value (pop plist))
162       (cond ((eq name :char)
163              (insert separator)
164              (insert "\"char\":\t")
165              (cond ((numberp value)
166                     (setq value (decode-char '=ucs value)))
167                    ;; ((consp value)
168                    ;;  (setq value (or (find-char value)
169                    ;;                  value)))
170                    )
171              (char-db-json-insert-char-spec value readable)
172              (insert line-breaking)
173              (setq separator ""))
174             ((eq name :radical)
175              (insert (format "%s%s\t%d, \"_comment\": \"%c\"%s"
176                              separator
177                              name value
178                              (ideographic-radical value)
179                              line-breaking))
180              (setq separator ""))
181             (t
182              (insert (format "%s%s\t%S" separator name value))
183              (setq separator line-breaking)))
184       ))
185   (insert " }"))
186
187 (defun char-db-json-insert-ccs-feature (name value line-breaking)
188   (cond
189    ((integerp value)
190     (insert
191      (format
192       (cond
193        ((memq name '(=>iwds-1
194                      ==shinjigen
195                      =shinjigen
196                      =shinjigen@1ed ==shinjigen@1ed
197                      =shinjigen@rev ==shinjigen@rev
198                      =shinjigen/+p@rev ==shinjigen/+p@rev
199                      ===daikanwa/ho ==daikanwa/ho
200                      =daikanwa/ho =>>daikanwa/ho =>daikanwa/ho))
201         " %-20s  %4d,\t\"_comment\": \"%c")
202        ((eq name '=shinjigen@1ed/24pr)
203         " %-20s  %4d,\t\"_comment\": \"%c")
204        ((or
205          (memq name
206                '(===daikanwa
207                  ==daikanwa =daikanwa =>>daikanwa =>daikanwa
208                  =daikanwa@rev1 =daikanwa@rev2
209                  =daikanwa/+p ==daikanwa/+p ===daikanwa/+p
210                  =>>daikanwa/+p
211                  =daikanwa/+2p =>>daikanwa/+2p
212                  =gt ==gt ===gt
213                  =>>gt =+>gt =>gt
214                  =gt-k ==gt-k ===gt-k
215                  =>>gt-k =>gt-k
216                  =adobe-japan1-0 ==adobe-japan1-0 ===adobe-japan1-0
217                  =adobe-japan1-1 ==adobe-japan1-1 ===adobe-japan1-1
218                  =adobe-japan1-2 ==adobe-japan1-2 ===adobe-japan1-2
219                  =adobe-japan1-3 ==adobe-japan1-3 ===adobe-japan1-3
220                  =adobe-japan1-4 ==adobe-japan1-4 ===adobe-japan1-4
221                  =adobe-japan1-5 ==adobe-japan1-5 ===adobe-japan1-5
222                  =adobe-japan1-6 ==adobe-japan1-6 ===adobe-japan1-6
223                  =>>adobe-japan1-0 =+>adobe-japan1-0
224                  =>>adobe-japan1-1 =+>adobe-japan1-1
225                  =>>adobe-japan1-2 =+>adobe-japan1-2
226                  =>>adobe-japan1-3 =+>adobe-japan1-3
227                  =>>adobe-japan1-4 =+>adobe-japan1-4
228                  =>>adobe-japan1-5 =+>adobe-japan1-5
229                  =>>adobe-japan1-6 =+>adobe-japan1-6
230                  =>cbeta =cbeta =>>cbeta ==cbeta ===cbeta
231                  =zinbun-oracle =>zinbun-oracle
232                  ===hng-jou ===hng-keg ===hng-dng ===hng-mam
233                  ===hng-drt ===hng-kgk ===hng-myz ===hng-kda
234                  ===hng-khi ===hng-khm ===hng-hok ===hng-kyd ===hng-sok
235                  ===hng-yhk ===hng-kak ===hng-kar ===hng-kae
236                  ===hng-sys ===hng-tsu ===hng-tzj
237                  ===hng-hos ===hng-nak ===hng-jhk
238                  ===hng-hod ===hng-gok ===hng-ink ===hng-nto
239                  ===hng-nkm ===hng-k24 ===hng-nkk
240                  ===hng-kcc ===hng-kcj ===hng-kbk ===hng-sik
241                  ===hng-skk ===hng-kyu ===hng-ksk ===hng-wan
242                  ===hng-okd ===hng-wad ===hng-kmi
243                  ===hng-zkd ===hng-doh ===hng-jyu
244                  ===hng-tzs ===hng-kss ===hng-kyo
245                  ===hng-smk))
246          ;; (string-match "^=adobe-" (symbol-name name))
247          )
248         " %-20s %5d,\t\"_comment\": \"%c")
249        ((memq name '(=hanyo-denshi/ks
250                      ==hanyo-denshi/ks ===hanyo-denshi/ks
251                      =>>hanyo-denshi/ks
252                      =koseki ==koseki
253                      =mj ==mj ===mj =>>mj =>mj
254                      =zihai mojikyo))
255         " %-19s %6d,\t\"_comment\": \"%c")
256        ((memq name '(=hanyo-denshi/tk ==hanyo-denshi/tk))
257         " %-20s %8d,\t\"_comment\": \"%c")
258        ((>= (charset-dimension name) 2)
259         " %-20s %5d,\t\"_comment\": \"%c")
260        (t
261         " %-20s   %3d,\t\"_comment\": \"%c"))
262       (format "\"%s\":" name)
263       (if (= (charset-iso-graphic-plane name) 1)
264           (logior value
265                   (cond ((= (charset-dimension name) 1)
266                          #x80)
267                         ((= (charset-dimension name) 2)
268                          #x8080)
269                         ((= (charset-dimension name) 3)
270                          #x808080)
271                         (t 0)))
272         value)
273       (char-db-decode-isolated-char name value)))
274     (if (and (= (charset-chars name) 94)
275              (= (charset-dimension name) 2))
276         (insert (format " [%02d-%02d]\""
277                         (- (lsh value -8) 32)
278                         (- (logand value 255) 32)))
279       (insert "\""))
280     )
281    (t
282     (insert (format " %-20s %s"
283                     (format "\"%s\":" name) value))
284     ))
285   )
286
287 (defun char-db-json-insert-relation-feature (char name value line-breaking
288                                              ccss readable)
289   (insert (format " %-20s [%s    "
290                   (format "\"%s\":" name) line-breaking))
291   (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
292         separator cell sources required-features
293         ret)
294     (while (consp value)
295       (setq cell (car value))
296       (if (integerp cell)
297           (setq cell (decode-char '=ucs cell)))
298       (cond
299        ((eq name '->subsumptive)
300         (when (or (not (some (lambda (atr)
301                                (get-char-attribute cell atr))
302                              char-db-ignored-attributes))
303                   (some (lambda (ccs)
304                           (encode-char cell ccs 'defined-only))
305                         ccss))
306           (if separator
307               (insert separator)
308             (setq separator (format ",%s" lbs)))
309           (let ((char-db-ignored-attributes
310                  (cons '<-subsumptive
311                        char-db-ignored-attributes)))
312             (char-db-json-insert-char-features
313              cell readable nil nil 'for-sub-node))
314           )
315         )
316        ((characterp cell)
317         (setq sources
318               (get-char-attribute
319                char (intern (format "%s*sources" name))))
320         (setq required-features nil)
321         (dolist (source sources)
322           (cond
323            ((memq source '(JP
324                            JP/Jouyou
325                            shinjigen shinjigen@1ed shinjigen@rev))
326             (setq required-features
327                   (union required-features
328                          '(=jis-x0208
329                            =jis-x0208@1990
330                            =jis-x0213-1@2000
331                            =jis-x0213-1@2004
332                            =jis-x0213-2
333                            =jis-x0212
334                            =jis-x0208@1983
335                            =jis-x0208@1978
336                            =shinjigen
337                            =shinjigen@1ed
338                            =shinjigen@rev
339                            =shinjigen/+p@rev))))
340            ((eq source 'CN)
341             (setq required-features
342                   (union required-features
343                          '(=gb2312
344                            =gb12345
345                            =iso-ir165)))))
346           (cond
347            ((find-charset (setq ret (intern (format "=%s" source))))
348             (setq required-features
349                   (cons ret required-features)))
350            (t (setq required-features
351                     (cons source required-features)))))
352         (cond ((string-match "@JP" (symbol-name name))
353                (setq required-features
354                      (union required-features
355                             '(=jis-x0208
356                               =jis-x0208@1990
357                               =jis-x0213-1-2000
358                               =jis-x0213-2-2000
359                               =jis-x0212
360                               =jis-x0208@1983
361                               =jis-x0208@1978))))
362               ((string-match "@CN" (symbol-name name))
363                (setq required-features
364                      (union required-features
365                             '(=gb2312
366                               =gb12345
367                               =iso-ir165)))))
368         (if separator
369             (insert separator)
370           (setq separator (format ",%s" lbs)))
371         ;; (if readable
372         ;;     (insert (format "%S" cell))
373         ;;   (char-db-json-insert-char-spec cell readable
374         ;;                                  nil
375         ;;                                  required-features))
376         (char-db-json-insert-char-spec cell readable
377                                        nil
378                                        required-features)
379         )
380        ((consp cell)
381         (if separator
382             (insert separator)
383           (setq separator (format ",%s" lbs)))
384         (if (consp (car cell))
385             (char-db-json-insert-char-spec cell readable)
386           (char-db-json-insert-char-reference cell readable))
387         )
388        (t
389         (if separator
390             (insert separator)
391           )
392         (insert (prin1-to-string cell))
393         (setq separator " ")))
394       (setq value (cdr value)))
395     (insert " ]")))
396
397 (defun char-db-json-insert-char-features (char
398                                           &optional readable attributes column
399                                           for-sub-node)
400   (unless column
401     (setq column (current-column)))
402   (let ((est-view-url-prefix "http://chise.org/est/view")
403         id obj-id type
404         name value ; has-long-ccs-name
405         rest
406         radical strokes
407         (line-breaking
408          (concat "\n" (make-string (1+ column) ?\ )))
409         line-separator
410         lbs cell separator ret
411         key al cal
412         dest-ccss ; sources required-features
413         ccss)
414     (let (atr-d)
415       (setq attributes
416             (sort (if attributes
417                       (if (consp attributes)
418                           (progn
419                             (dolist (name attributes)
420                               (unless (memq name char-db-ignored-attributes)
421                                 (if (find-charset name)
422                                     (push name ccss))
423                                 (push name atr-d)))
424                             atr-d))
425                     (dolist (name (char-attribute-list))
426                       (unless (memq name char-db-ignored-attributes)
427                         (if (find-charset name)
428                             (push name ccss))
429                         (push name atr-d)))
430                     atr-d)
431                   #'char-attribute-name<)))
432     (insert
433      (format "{ \"@context\": \"%s/genre/character/context.json\""
434              est-view-url-prefix))
435     (setq line-separator (format ",%s" line-breaking))
436     (setq id (www-uri-make-object-url char))
437     (insert (format "%s \"@id\": \"%s\"" line-separator id))
438     (setq obj-id (file-name-nondirectory id))
439     (setq type
440           (cond
441            ((string-match "^a2\\." obj-id)
442             "chise:super-abstract-character")
443            ((string-match "^a\\." obj-id)
444             "chise:abstract-character")
445            ((string-match "^o\\." obj-id)
446             "chise:unified-glyph")
447            ((string-match "^rep\\." obj-id)
448             "chise:abstract-glyph")
449            ((string-match "^g\\." obj-id)
450             "chise:detailed-glyph")
451            ((string-match "^g2\\." obj-id)
452             "chise:abstract-glyph-form")
453            ((string-match "^gi\\." obj-id)
454             "chise:abstract-glyph-form")
455            ((string-match "^repi\\." obj-id)
456             "chise:glyph-image")
457            (t
458             "chise:character")
459            ))
460     (insert (format "%s \"@type\": \"%s\"" line-separator type))
461     (when (memq '<-subsumptive attributes)
462       (when (or readable (not for-sub-node))
463         (when (setq value (get-char-attribute char '<-subsumptive))
464           (insert line-separator)
465           (char-db-json-insert-relation-feature char '<-subsumptive value
466                                                 line-breaking
467                                                 ccss readable)
468           ))
469       (setq attributes (delq '<-subsumptive attributes))
470       )
471     (when (and (memq '<-denotational attributes)
472                (setq value (get-char-attribute char '<-denotational)))
473       (insert line-separator)
474       (char-db-json-insert-relation-feature char '<-denotational value
475                                             line-breaking
476                                             ccss readable)
477       (setq attributes (delq '<-denotational attributes)))
478     (when (and (memq '<-denotational@component attributes)
479                (setq value
480                      (get-char-attribute char '<-denotational@component)))
481       (insert line-separator)
482       (char-db-json-insert-relation-feature
483        char '<-denotational@component value
484        line-breaking
485        ccss readable)
486       (setq attributes (delq '<-denotational@component attributes)))
487     (when (and (memq 'name attributes)
488                (setq value (get-char-attribute char 'name)))
489       (insert line-separator)
490       (insert (format
491                (if (> (+ (current-column) (length value)) 48)
492                    " \"name\": %S"
493                  " \"name\":                 %S")
494                value))
495       (setq attributes (delq 'name attributes))
496       )
497     (when (and (memq 'name* attributes)
498                (setq value (get-char-attribute char 'name*)))
499       (insert line-separator)
500       (insert (format
501                (if (> (+ (current-column) (length value)) 48)
502                    " \"name*\": %S"
503                  " \"name*\":                 %S")
504                value))
505       (setq attributes (delq 'name* attributes))
506       )
507     (when (and (memq 'script attributes)
508                (setq value (get-char-attribute char 'script)))
509       (insert line-separator)
510       (insert (format " \"script\":\t\t  %s"
511                       (mapconcat (function prin1-to-string)
512                                  value " ")))
513       (setq attributes (delq 'script attributes))
514       )
515     (dolist (name '(=>ucs =>ucs*))
516       (when (and (memq name attributes)
517                  (setq value (get-char-attribute char name)))
518         (insert line-separator)
519         (insert (format " \"%-20s\":  #x%04X,\t\"_comment\": \"%c\""
520                         name value (decode-char '=ucs value)))
521         (setq attributes (delq name attributes))))
522     (dolist (name '(=>ucs@gb =>ucs@big5))
523       (when (and (memq name attributes)
524                  (setq value (get-char-attribute char name)))
525         (insert line-separator)
526         (insert (format " \"%-20s\":  #x%04X,\t\"_comment\": \"%c\"%s"
527                         name value
528                         (decode-char (intern
529                                       (concat "="
530                                               (substring
531                                                (symbol-name name) 2)))
532                                      value)
533                         line-breaking))
534         (setq attributes (delq name attributes))
535         ))
536     (when (and (memq 'general-category attributes)
537                (setq value (get-char-attribute char 'general-category)))
538       (insert line-separator)
539       (insert (format
540                " \"general-category\":\t [ %s ], \"_comment\": \"%s\""
541                (mapconcat (lambda (cell)
542                             (format "%S" cell))
543                           value " ")
544                (cond ((rassoc value unidata-normative-category-alist)
545                       "Normative Category")
546                      ((rassoc value unidata-informative-category-alist)
547                       "Informative Category")
548                      (t
549                       "Unknown Category"))
550                ))
551       (setq attributes (delq 'general-category attributes))
552       )
553     (when (and (memq 'bidi-category attributes)
554                (setq value (get-char-attribute char 'bidi-category)))
555       (insert line-separator)
556       (insert (format " \"bidi-category\":\t  %S"
557                       value))
558       (setq attributes (delq 'bidi-category attributes))
559       )
560     (unless (or (not (memq 'mirrored attributes))
561                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
562                     'empty))
563       (insert line-separator)
564       (insert (format " \"mirrored\":\t\t  %S"
565                       value))
566       (setq attributes (delq 'mirrored attributes))
567       )
568     (cond
569      ((and (memq 'decimal-digit-value attributes)
570            (setq value (get-char-attribute char 'decimal-digit-value)))
571       (insert line-separator)
572       (insert (format " \"decimal-digit-value\":  %S"
573                       value))
574       (setq attributes (delq 'decimal-digit-value attributes))
575       (when (and (memq 'digit-value attributes)
576                  (setq value (get-char-attribute char 'digit-value)))
577         (insert line-separator)
578         (insert (format " \"digit-value\":\t    %S"
579                         value))
580         (setq attributes (delq 'digit-value attributes))
581         )
582       (when (and (memq 'numeric-value attributes)
583                  (setq value (get-char-attribute char 'numeric-value)))
584         (insert line-separator)
585         (insert (format " \"numeric-value\":\t    %S"
586                         value))
587         (setq attributes (delq 'numeric-value attributes))
588         )
589       )
590      (t
591       (when (and (memq 'digit-value attributes)
592                  (setq value (get-char-attribute char 'digit-value)))
593         (insert line-separator)
594         (insert (format " \"digit-value\":\t %S"
595                         value))
596         (setq attributes (delq 'digit-value attributes))
597         )
598       (when (and (memq 'numeric-value attributes)
599                  (setq value (get-char-attribute char 'numeric-value)))
600         (insert line-separator)
601         (insert (format " \"numeric-value\":\t %S"
602                         value))
603         (setq attributes (delq 'numeric-value attributes))
604         )))
605     (when (and (memq 'iso-10646-comment attributes)
606                (setq value (get-char-attribute char 'iso-10646-comment)))
607       (insert line-separator)
608       (insert (format "{\"iso-10646-comment\":\t %S}%s"
609                       value
610                       line-breaking))
611       (setq attributes (delq 'iso-10646-comment attributes))
612       )
613     (when (and (memq 'morohashi-daikanwa attributes)
614                (setq value (get-char-attribute char 'morohashi-daikanwa)))
615       (insert line-separator)
616       (insert (format "{\"morohashi-daikanwa\":\t%s}%s"
617                       (mapconcat (function prin1-to-string) value " ")
618                       line-breaking))
619       (setq attributes (delq 'morohashi-daikanwa attributes))
620       )
621     (setq radical nil
622           strokes nil)
623     (when (and (memq 'ideographic-radical attributes)
624                (setq value (get-char-attribute char 'ideographic-radical)))
625       (setq radical value)
626       (insert line-separator)
627       (insert (format " \"ideographic-radical\": %S,\t\"_comment\": \"%c\""
628                       radical
629                       (ideographic-radical radical)
630                       ))
631       (setq attributes (delq 'ideographic-radical attributes))
632       )
633     (when (and (memq 'shuowen-radical attributes)
634                (setq value (get-char-attribute char 'shuowen-radical)))
635       (insert line-separator)
636       (insert (format " \"shuowen-radical\":\t %S,\t\"_comment\": \"%c\""
637                       value
638                       (shuowen-radical value)))
639       (setq attributes (delq 'shuowen-radical attributes))
640       )
641     (let (key)
642       (dolist (domain
643                (append
644                 char-db-feature-domains
645                 (let (dest domain)
646                   (dolist (feature (char-attribute-list))
647                     (setq feature (symbol-name feature))
648                     (when (string-match
649                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
650                            feature)
651                       (setq domain (intern (match-string 2 feature)))
652                      (unless (memq domain dest)
653                        (setq dest (cons domain dest)))))
654                   (sort dest #'string<))))
655         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
656         (when (and (memq key attributes)
657                    (setq value (get-char-attribute char key)))
658           (setq radical value)
659           (insert line-separator)
660           (insert (format "{\"%s\": %S},\t\"_comment\": \"%c\"%s"
661                           key
662                           radical
663                           (ideographic-radical radical)
664                           line-breaking))
665           (setq attributes (delq key attributes))
666           )
667         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
668         (when (and (memq key attributes)
669                    (setq value (get-char-attribute char key)))
670           (setq strokes value)
671           (insert line-separator)
672           (insert (format " \"%s\": %S"
673                           key
674                           strokes))
675           (setq attributes (delq key attributes))
676           )
677         (setq key (intern (format "%s@%s" 'total-strokes domain)))
678         (when (and (memq key attributes)
679                    (setq value (get-char-attribute char key)))
680           (insert line-separator)
681           (insert (format " \"%s\":       %S"
682                           key
683                           value
684                           ))
685           (setq attributes (delq key attributes))
686           )
687         (dolist (feature '(ideographic-radical
688                            ideographic-strokes
689                            total-strokes))
690           (setq key (intern (format "%s@%s*sources" feature domain)))
691           (when (and (memq key attributes)
692                      (setq value (get-char-attribute char key)))
693             (insert line-separator)
694             (insert (format " \"%s\":%s" key line-breaking))
695             (dolist (cell value)
696               (insert (format " %s" cell)))
697             (setq attributes (delq key attributes))
698             ))
699         ))
700     (when (and (memq 'ideographic-strokes attributes)
701                (setq value (get-char-attribute char 'ideographic-strokes)))
702       (setq strokes value)
703       (insert line-separator)
704       (insert (format " \"ideographic-strokes\": %S"
705                       strokes
706                       ))
707       (setq attributes (delq 'ideographic-strokes attributes))
708       )
709     (when (and (memq 'kangxi-radical attributes)
710                (setq value (get-char-attribute char 'kangxi-radical)))
711       (unless (eq value radical)
712         (insert line-separator)
713         (insert (format "{\"kangxi-radical\":\t%S},\t\"_comment\": \"%c\"%s"
714                         value
715                         (ideographic-radical value)
716                         line-breaking))
717         (or radical
718             (setq radical value)))
719       (setq attributes (delq 'kangxi-radical attributes))
720       )
721     (when (and (memq 'kangxi-strokes attributes)
722                (setq value (get-char-attribute char 'kangxi-strokes)))
723       (unless (eq value strokes)
724         (insert line-separator)
725         (insert (format "{\"kangxi-strokes\":\t%S}%s"
726                         value
727                         line-breaking))
728         (or strokes
729             (setq strokes value)))
730       (setq attributes (delq 'kangxi-strokes attributes))
731       )
732     (when (and (memq 'japanese-strokes attributes)
733                (setq value (get-char-attribute char 'japanese-strokes)))
734       (unless (eq value strokes)
735         (insert line-separator)
736         (insert (format "{\"japanese-strokes\":\t%S}%s"
737                         value
738                         line-breaking))
739         (or strokes
740             (setq strokes value)))
741       (setq attributes (delq 'japanese-strokes attributes))
742       )
743     (when (and (memq 'cns-radical attributes)
744                (setq value (get-char-attribute char 'cns-radical)))
745       (insert line-separator)
746       (insert (format "{\"cns-radical\":\t%S},\t\"_comment\": \"%c\"%s"
747                       value
748                       (ideographic-radical value)
749                       line-breaking))
750       (setq attributes (delq 'cns-radical attributes))
751       )
752     (when (and (memq 'cns-strokes attributes)
753                (setq value (get-char-attribute char 'cns-strokes)))
754       (unless (eq value strokes)
755         (insert line-separator)
756         (insert (format "{\"cns-strokes\":\t%S}%s"
757                         value
758                         line-breaking))
759         (or strokes
760             (setq strokes value)))
761       (setq attributes (delq 'cns-strokes attributes))
762       )
763     (when (and (memq 'total-strokes attributes)
764                (setq value (get-char-attribute char 'total-strokes)))
765       (insert line-separator)
766       (insert (format " \"total-strokes\":       %S"
767                       value
768                       ))
769       (setq attributes (delq 'total-strokes attributes))
770       )
771     (if (equal (get-char-attribute char '->titlecase)
772                (get-char-attribute char '->uppercase))
773         (setq attributes (delq '->titlecase attributes)))
774     (unless readable
775       (dolist (ignored '(composition
776                          ->denotational <-subsumptive ->ucs-unified
777                          ->ideographic-component-forms))
778         (setq attributes (delq ignored attributes))))
779     (while attributes
780       (setq name (car attributes))
781       (unless (eq (setq value (get-char-attribute char name 'value-is-empty))
782                   'value-is-empty)
783         (cond ((setq ret (find-charset name))
784                (setq name (charset-name ret))
785                (when (not (memq name dest-ccss))
786                  (setq dest-ccss (cons name dest-ccss))
787                  (insert line-separator)
788                  (char-db-json-insert-ccs-feature name value line-breaking))
789                )
790               ((string-match "^=>ucs@" (symbol-name name))
791                (insert line-separator)
792                (insert (format "{\"%-20s\": #x%04X},\t\"_comment\": \"%c\"%s"
793                                name value (decode-char '=ucs value)
794                                line-breaking))
795                )
796               ((eq name 'jisx0208-1978/4X)
797                (insert line-separator)
798                (insert (format "{\"%-20s\": #x%04X}%s"
799                                name value
800                                line-breaking))
801                )
802               ((and
803                 (not readable)
804                 (not (eq name '->subsumptive))
805                 (not (eq name '->uppercase))
806                 (not (eq name '->lowercase))
807                 (not (eq name '->titlecase))
808                 (not (eq name '->canonical))
809                 (not (eq name '->Bopomofo))
810                 (not (eq name '->mistakable))
811                 (not (eq name '->ideographic-variants))
812                 (null (get-char-attribute
813                        char (intern (format "%s*sources" name))))
814                 (not (string-match "\\*sources$" (symbol-name name)))
815                 (null (get-char-attribute
816                        char (intern (format "%s*note" name))))
817                 (not (string-match "\\*note$" (symbol-name name)))
818                 (or (eq name '<-identical)
819                     (eq name '<-uppercase)
820                     (eq name '<-lowercase)
821                     (eq name '<-titlecase)
822                     (eq name '<-canonical)
823                     (eq name '<-ideographic-variants)
824                     ;; (eq name '<-synonyms)
825                     (string-match "^<-synonyms" (symbol-name name))
826                     (eq name '<-mistakable)
827                     (when (string-match "^->" (symbol-name name))
828                       (cond
829                        ((string-match "^->fullwidth" (symbol-name name))
830                         (not (and (consp value)
831                                   (characterp (car value))
832                                   (encode-char
833                                    (car value) '=ucs 'defined-only)))
834                         )
835                        (t)))
836                     ))
837                )
838               ((or (eq name 'ideographic-structure)
839                    (eq name 'ideographic-combination)
840                    ;; (eq name 'ideographic-)
841                    (eq name '=decomposition)
842                    (char-feature-base-name= '=decomposition name)
843                    (char-feature-base-name= '=>decomposition name)
844                    ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
845                    ;;               (symbol-name name))
846                    (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
847                    (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
848                                  (symbol-name name))
849                    )
850                (insert line-separator)
851                (char-db-json-insert-relation-feature char name value
852                                                      line-breaking
853                                                      ccss readable))
854               ((consp value)
855                (insert line-separator)
856                (insert (format " %-20s [ "
857                                (format "\"%s\":" name)))
858                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
859                      separator nil)
860                (while (consp value)
861                  (setq cell (car value))
862                  (if (and (consp cell)
863                           (consp (car cell))
864                           (setq ret (condition-case nil
865                                         (find-char cell)
866                                       (error nil))))
867                      (progn
868                        (setq rest cell
869                              al nil
870                              cal nil)
871                        (while rest
872                          (setq key (car (car rest)))
873                          (if (find-charset key)
874                              (setq cal (cons key cal))
875                            (setq al (cons key al)))
876                          (setq rest (cdr rest)))
877                        (if separator
878                            (insert lbs))
879                        (char-db-json-insert-char-features ret
880                                                           readable
881                                                           al
882                                                           nil 'for-sub-node)
883                        (setq separator lbs))
884                    (setq ret (prin1-to-string cell))
885                    (if separator
886                        (if (< (+ (current-column)
887                                  (length ret)
888                                  (length separator))
889                               76)
890                            (insert separator)
891                          (insert lbs)))
892                    (insert ret)
893                    (setq separator " "))
894                  (setq value (cdr value)))
895                (insert " ]")
896                )
897               (t
898                (insert line-separator)
899                (insert (format " %-20s "
900                                (format "\"%s\":" name)))
901                (setq ret (prin1-to-string value))
902                (unless (< (+ (current-column)
903                              (length ret)
904                              3)
905                           76)
906                  (insert line-breaking))
907                (insert ret)
908                )
909               ))
910       (setq attributes (cdr attributes)))
911     (insert "\n" (make-string column ?\ ) "}")))
912
913 (defun char-db-json-char-data (char &optional readable
914                                     attributes column)
915   (unless column
916     (setq column (current-column)))
917   (save-restriction
918     (narrow-to-region (point)(point))
919     (char-db-json-insert-char-features char readable attributes column)
920     (goto-char (point-min))
921     (while (re-search-forward "[ \t]+$" nil t)
922       (replace-match ""))
923     ;; from tabify.
924     (goto-char (point-min))
925     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
926       (let ((column (current-column))
927             (indent-tabs-mode t))
928         (delete-region (match-beginning 0) (point))
929         (indent-to column)))
930     (goto-char (point-max))
931     ;; (tabify (point-min)(point-max))
932     ))
933
934 (defun char-db-json-char-data-with-variant (char &optional printable
935                                            no-ucs-unified
936                                            script excluded-script)
937   (insert "[ ")
938   (char-db-json-char-data char printable)
939   (let ((variants (char-variants char))
940         rest
941         variant vs ret
942         )
943     (setq variants (sort variants #'<))
944     (setq rest variants)
945     (setq variants (cons char variants))
946     (while rest
947       (setq variant (car rest))
948       (unless (get-char-attribute variant '<-subsumptive)
949         (if (and (or (null script)
950                      (null (setq vs (get-char-attribute variant 'script)))
951                      (memq script vs))
952                  (or (null excluded-script)
953                      (null (setq vs (get-char-attribute variant 'script)))
954                      (not (memq excluded-script vs))))
955             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
956               (insert ",\n  ")
957               (char-db-json-char-data variant printable)
958               (if (setq ret (char-variants variant))
959                   (while ret
960                     (or (memq (car ret) variants)
961                         ;; (get-char-attribute (car ret) '<-subsumptive)
962                         (setq rest (nconc rest (list (car ret)))))
963                     (setq ret (cdr ret)))))))
964       (setq rest (cdr rest)))
965     (insert "\n]\n")))
966
967 (defun char-db-json-insert-char-range-data (min max
968                                                 &optional script
969                                                 excluded-script)
970   (let ((code min)
971         char)
972     (while (<= code max)
973       (setq char (decode-char '=ucs code))
974       (if (encode-char char '=ucs 'defined-only)
975           (char-db-json-char-data-with-variant char nil 'no-ucs-unified
976                                          script excluded-script))
977       (setq code (1+ code)))))
978
979 (defun write-char-range-data-to-json-file (min max file
980                                                &optional script
981                                                excluded-script)
982   (let ((coding-system-for-write char-db-file-coding-system))
983     (with-temp-buffer
984       (insert (format "// -*- coding: %s -*-\n"
985                       char-db-file-coding-system))
986       (char-db-json-insert-char-range-data min max script excluded-script)
987       (write-region (point-min)(point-max) file))))
988
989 ;;;###autoload
990 (defun what-char-definition-json (char)
991   (interactive (list (char-after)))
992   (let ((est-hide-cgi-mode t)
993         (buf (get-buffer-create "*Character Description*"))
994         (the-buf (current-buffer))
995         (win-conf (current-window-configuration)))
996     (pop-to-buffer buf)
997     (make-local-variable 'what-character-original-window-configuration)
998     (setq what-character-original-window-configuration win-conf)
999     (setq buffer-read-only nil)
1000     (erase-buffer)
1001     (condition-case err
1002         (progn
1003           (char-db-json-char-data-with-variant char 'printable)
1004           (unless (char-attribute-alist char)
1005             (insert (format "// = %c\n"
1006                             (let* ((rest (split-char char))
1007                                    (ccs (pop rest))
1008                                    (code (pop rest)))
1009                               (while rest
1010                                 (setq code (logior (lsh code 8)
1011                                                    (pop rest))))
1012                               (decode-char ccs code)))))
1013           ;; (char-db-update-comment)
1014           (set-buffer-modified-p nil)
1015           (view-mode the-buf (lambda (buf)
1016                                (set-window-configuration
1017                                 what-character-original-window-configuration)
1018                                ))
1019           (goto-char (point-min)))
1020       (error (progn
1021                (set-window-configuration
1022                 what-character-original-window-configuration)
1023                (signal (car err) (cdr err)))))))
1024
1025 (defun char-db-json-batch-view ()
1026   (setq terminal-coding-system 'binary)
1027   (condition-case err
1028       (let* ((target (pop command-line-args-left))
1029              ret genre
1030              object)
1031         (princ "Content-Type: application/json; charset=UTF-8
1032
1033 ")
1034         (cond
1035          ((stringp target)
1036           (when (string-match "^char=\\(&[^&;]+;\\)" target)
1037             (setq ret (match-end 0))
1038             (setq target
1039                   (concat "char="
1040                           (www-uri-encode-object
1041                            (www-uri-decode-object
1042                             'character (match-string 1 target)))
1043                           (substring target ret))))
1044           (setq target
1045                 (mapcar (lambda (cell)
1046                           (if (string-match "=" cell)
1047                               (progn
1048                                 (setq genre (substring cell 0 (match-beginning 0))
1049                                       ret (substring cell (match-end 0)))
1050                                 (cons
1051                                  (intern
1052                                   (decode-uri-string genre 'utf-8-mcs-er))
1053                                  ret))
1054                             (list (decode-uri-string cell 'utf-8-mcs-er))))
1055                         (split-string target "&")))
1056           (setq ret (car target))
1057           (cond ((eq (car ret) 'char)
1058                  (setq object (www-uri-decode-object (car ret)(cdr ret)))
1059                  (when (characterp object)
1060                    (with-temp-buffer
1061                      (char-db-json-char-data object)
1062                      (encode-coding-region (point-min)(point-max)
1063                                            char-db-file-coding-system)
1064                      (princ (buffer-string))
1065                      ))
1066                  )
1067                 ((eq (car ret) 'character)
1068                  (setq object (www-uri-decode-object (car ret)(cdr ret)))
1069                  (when (characterp object)
1070                    (with-temp-buffer
1071                      (char-db-json-char-data object)
1072                      (encode-coding-region (point-min)(point-max)
1073                                            char-db-file-coding-system)
1074                      (princ (buffer-string))
1075                      ))
1076                  ))
1077           ))
1078         )
1079     (error nil
1080            (princ (format "%S" err)))
1081     ))
1082
1083
1084 ;;; @ end
1085 ;;;
1086
1087 (provide 'char-db-json)
1088
1089 ;;; char-db-json.el ends here