update.
[chise/est.git] / char-db-json.el
1 ;;; char-db-json.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,
4 ;;   2008,2009,2010,2011,2012,2013,2014,2015,2016 MORIOKA Tomohiko.
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
8
9 ;; This file is part of XEmacs CHISE.
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (require 'char-db-util)
29 (require 'cwiki-format)
30
31 (setq char-db-ignored-attributes
32       '(ideographic-products
33         ;; ->HNG
34         *instance@ruimoku/bibliography/title
35         *instance@morpheme-entry/zh-classical))
36
37
38 ;;; @ char-db formatters
39 ;;;
40     
41 (defun char-db-json-insert-char-spec (char &optional readable column
42                                            required-features)
43   (unless column
44     (setq column (current-column)))
45   (let (char-spec temp-char)
46     (setq char-spec (char-db-make-char-spec char))
47     (unless (or (characterp char) ; char
48                 (condition-case nil
49                     (setq char (find-char char-spec))
50                   (error nil)))
51       ;; define temporary character
52       ;;   Current implementation is dirty.
53       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
54                                          char-spec)))
55       (remove-char-attribute temp-char 'ideograph-daikanwa)
56       (setq char temp-char))
57     (char-db-json-insert-char-features char
58                                        readable
59                                        (union (mapcar #'car char-spec)
60                                               required-features)
61                                        nil 'for-sub-node)
62     (when temp-char
63       ;; undefine temporary character
64       ;;   Current implementation is dirty.
65       (setq char-spec (char-attribute-alist temp-char))
66       (while char-spec
67         (remove-char-attribute temp-char (car (car char-spec)))
68         (setq char-spec (cdr char-spec))))))
69
70 (defun char-db-json-insert-alist (alist &optional readable column)
71   (unless column
72     (setq column (current-column)))
73   (let ((line-breaking
74          (concat "\n" (make-string (1+ column) ?\ )))
75         name value
76         ret al ; cal
77         key
78         lbs cell rest separator)
79     (insert "(")
80     (while alist
81       (setq name (car (car alist))
82             value (cdr (car alist)))
83       (cond ((eq name 'char)
84              (insert "(char . ")
85              (if (setq ret (condition-case nil
86                                (find-char value)
87                              (error nil)))
88                  (progn
89                    (setq al nil
90                          ;; cal nil
91                          )
92                    (while value
93                      (setq key (car (car value)))
94                      ;; (if (find-charset key)
95                      ;;     (setq cal (cons key cal))
96                      (setq al (cons key al))
97                      ;; )
98                      (setq value (cdr value)))
99                    (insert-char-attributes ret
100                                            readable
101                                            (or al 'none) ; cal
102                                            nil 'for-sub-node))
103                (insert (prin1-to-string value)))
104              (insert ")")
105              (insert line-breaking))
106             ((consp value)
107              (insert (format "(%-18s " name))
108              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
109              (while (consp value)
110                (setq cell (car value))
111                (if (and (consp cell)
112                         (consp (car cell))
113                         (setq ret (condition-case nil
114                                       (find-char cell)
115                                     (error nil)))
116                         )
117                    (progn
118                      (setq rest cell
119                            al nil
120                            ;; cal nil
121                            )
122                      (while rest
123                        (setq key (car (car rest)))
124                        ;; (if (find-charset key)
125                        ;;     (setq cal (cons key cal))
126                        (setq al (cons key al))
127                        ;; )
128                        (setq rest (cdr rest)))
129                      (if separator
130                          (insert lbs))
131                      (insert-char-attributes ret
132                                              readable
133                                              al ; cal
134                                              nil 'for-sub-node)
135                      (setq separator lbs))
136                  (if separator
137                      (insert separator))
138                  (insert (prin1-to-string cell))
139                  (setq separator " "))
140                (setq value (cdr value)))
141              (insert ")")
142              (insert line-breaking))
143             (t
144              (insert (format "(%-18s . %S)%s"
145                              name value
146                              line-breaking))))
147       (setq alist (cdr alist))))
148   (insert ")"))
149
150 (defun char-db-json-insert-char-reference (plist &optional readable column)
151   (unless column
152     (setq column (current-column)))
153   (let ((line-breaking
154          (concat "\n" (make-string (1+ column) ?\ )))
155         (separator "")
156         name value)
157     (insert "{")
158     (while plist
159       (setq name (pop plist))
160       (setq value (pop plist))
161       (cond ((eq name :char)
162              (insert separator)
163              (insert "\"char\":\t")
164              (cond ((numberp value)
165                     (setq value (decode-char '=ucs value)))
166                    ;; ((consp value)
167                    ;;  (setq value (or (find-char value)
168                    ;;                  value)))
169                    )
170              (char-db-json-insert-char-spec value readable)
171              (insert line-breaking)
172              (setq separator ""))
173             ((eq name :radical)
174              (insert (format "%s%s\t%d, \"_comment\": \"%c\"%s"
175                              separator
176                              name value
177                              (ideographic-radical value)
178                              line-breaking))
179              (setq separator ""))
180             (t
181              (insert (format "%s%s\t%S" separator name value))
182              (setq separator line-breaking)))
183       ))
184   (insert " }"))
185
186 (defun char-db-json-insert-ccs-feature (name value line-breaking)
187   (cond
188    ((integerp value)
189     (insert
190      (format
191       (cond
192        ((memq name '(=>iwds-1
193                      ==shinjigen
194                      =shinjigen
195                      =shinjigen@1ed ==shinjigen@1ed
196                      =shinjigen@rev ==shinjigen@rev
197                      =shinjigen/+p@rev ==shinjigen/+p@rev
198                      ===daikanwa/ho ==daikanwa/ho
199                      =daikanwa/ho =>>daikanwa/ho =>daikanwa/ho))
200         " %-20s  %4d,\t\"_comment\": \"%c")
201        ((eq name '=shinjigen@1ed/24pr)
202         " %-20s  %4d,\t\"_comment\": \"%c")
203        ((or
204          (memq name
205                '(===daikanwa
206                  ==daikanwa =daikanwa =>>daikanwa =>daikanwa
207                  =daikanwa@rev1 =daikanwa@rev2
208                  =daikanwa/+p ==daikanwa/+p ===daikanwa/+p
209                  =>>daikanwa/+p
210                  =daikanwa/+2p =>>daikanwa/+2p
211                  =gt ==gt ===gt
212                  =>>gt =+>gt =>gt
213                  =gt-k ==gt-k ===gt-k
214                  =>>gt-k =>gt-k
215                  =adobe-japan1-0 ==adobe-japan1-0 ===adobe-japan1-0
216                  =adobe-japan1-1 ==adobe-japan1-1 ===adobe-japan1-1
217                  =adobe-japan1-2 ==adobe-japan1-2 ===adobe-japan1-2
218                  =adobe-japan1-3 ==adobe-japan1-3 ===adobe-japan1-3
219                  =adobe-japan1-4 ==adobe-japan1-4 ===adobe-japan1-4
220                  =adobe-japan1-5 ==adobe-japan1-5 ===adobe-japan1-5
221                  =adobe-japan1-6 ==adobe-japan1-6 ===adobe-japan1-6
222                  =>>adobe-japan1-0 =+>adobe-japan1-0
223                  =>>adobe-japan1-1 =+>adobe-japan1-1
224                  =>>adobe-japan1-2 =+>adobe-japan1-2
225                  =>>adobe-japan1-3 =+>adobe-japan1-3
226                  =>>adobe-japan1-4 =+>adobe-japan1-4
227                  =>>adobe-japan1-5 =+>adobe-japan1-5
228                  =>>adobe-japan1-6 =+>adobe-japan1-6
229                  =>cbeta =cbeta =>>cbeta ==cbeta ===cbeta
230                  =zinbun-oracle =>zinbun-oracle
231                  ===hng-jou ===hng-keg ===hng-dng ===hng-mam
232                  ===hng-drt ===hng-kgk ===hng-myz ===hng-kda
233                  ===hng-khi ===hng-khm ===hng-hok ===hng-kyd ===hng-sok
234                  ===hng-yhk ===hng-kak ===hng-kar ===hng-kae
235                  ===hng-sys ===hng-tsu ===hng-tzj
236                  ===hng-hos ===hng-nak ===hng-jhk
237                  ===hng-hod ===hng-gok ===hng-ink ===hng-nto
238                  ===hng-nkm ===hng-k24 ===hng-nkk
239                  ===hng-kcc ===hng-kcj ===hng-kbk ===hng-sik
240                  ===hng-skk ===hng-kyu ===hng-ksk ===hng-wan
241                  ===hng-okd ===hng-wad ===hng-kmi
242                  ===hng-zkd ===hng-doh ===hng-jyu
243                  ===hng-tzs ===hng-kss ===hng-kyo
244                  ===hng-smk))
245          ;; (string-match "^=adobe-" (symbol-name name))
246          )
247         " %-20s %5d,\t\"_comment\": \"%c")
248        ((memq name '(=hanyo-denshi/ks
249                      ==hanyo-denshi/ks ===hanyo-denshi/ks
250                      =>>hanyo-denshi/ks
251                      =koseki ==koseki
252                      =mj ==mj ===mj =>>mj =>mj
253                      =zihai mojikyo))
254         " %-19s %6d,\t\"_comment\": \"%c")
255        ((memq name '(=hanyo-denshi/tk ==hanyo-denshi/tk))
256         " %-20s %8d,\t\"_comment\": \"%c")
257        ((>= (charset-dimension name) 2)
258         " %-20s %5d,\t\"_comment\": \"%c")
259        (t
260         " %-20s   %3d,\t\"_comment\": \"%c"))
261       (format "\"%s\":" name)
262       (if (= (charset-iso-graphic-plane name) 1)
263           (logior value
264                   (cond ((= (charset-dimension name) 1)
265                          #x80)
266                         ((= (charset-dimension name) 2)
267                          #x8080)
268                         ((= (charset-dimension name) 3)
269                          #x808080)
270                         (t 0)))
271         value)
272       (char-db-decode-isolated-char name value)))
273     (if (and (= (charset-chars name) 94)
274              (= (charset-dimension name) 2))
275         (insert (format " [%02d-%02d]\""
276                         (- (lsh value -8) 32)
277                         (- (logand value 255) 32)))
278       (insert "\""))
279     )
280    (t
281     (insert (format " %-20s %s"
282                     (format "\"%s\":" name) value))
283     ))
284   )
285
286 (defun char-db-json-insert-relation-feature (char name value line-breaking
287                                              ccss readable)
288   (insert (format " %-20s [%s    "
289                   (format "\"%s\":" name) line-breaking))
290   (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
291         separator cell sources required-features
292         ret)
293     (while (consp value)
294       (setq cell (car value))
295       (if (integerp cell)
296           (setq cell (decode-char '=ucs cell)))
297       (cond
298        ((eq name '->subsumptive)
299         (when (or (not (some (lambda (atr)
300                                (get-char-attribute cell atr))
301                              char-db-ignored-attributes))
302                   (some (lambda (ccs)
303                           (encode-char cell ccs 'defined-only))
304                         ccss))
305           (if separator
306               (insert separator)
307             (setq separator (format ",%s" lbs)))
308           (let ((char-db-ignored-attributes
309                  (cons '<-subsumptive
310                        char-db-ignored-attributes)))
311             (char-db-json-insert-char-features
312              cell readable nil nil 'for-sub-node))
313           )
314         )
315        ((characterp cell)
316         (setq sources
317               (get-char-attribute
318                char (intern (format "%s*sources" name))))
319         (setq required-features nil)
320         (dolist (source sources)
321           (cond
322            ((memq source '(JP
323                            JP/Jouyou
324                            shinjigen shinjigen@1ed shinjigen@rev))
325             (setq required-features
326                   (union required-features
327                          '(=jis-x0208
328                            =jis-x0208@1990
329                            =jis-x0213-1@2000
330                            =jis-x0213-1@2004
331                            =jis-x0213-2
332                            =jis-x0212
333                            =jis-x0208@1983
334                            =jis-x0208@1978
335                            =shinjigen
336                            =shinjigen@1ed
337                            =shinjigen@rev
338                            =shinjigen/+p@rev))))
339            ((eq source 'CN)
340             (setq required-features
341                   (union required-features
342                          '(=gb2312
343                            =gb12345
344                            =iso-ir165)))))
345           (cond
346            ((find-charset (setq ret (intern (format "=%s" source))))
347             (setq required-features
348                   (cons ret required-features)))
349            (t (setq required-features
350                     (cons source required-features)))))
351         (cond ((string-match "@JP" (symbol-name name))
352                (setq required-features
353                      (union required-features
354                             '(=jis-x0208
355                               =jis-x0208@1990
356                               =jis-x0213-1-2000
357                               =jis-x0213-2-2000
358                               =jis-x0212
359                               =jis-x0208@1983
360                               =jis-x0208@1978))))
361               ((string-match "@CN" (symbol-name name))
362                (setq required-features
363                      (union required-features
364                             '(=gb2312
365                               =gb12345
366                               =iso-ir165)))))
367         (if separator
368             (insert separator)
369           (setq separator (format ",%s" lbs)))
370         (if readable
371             (insert (format "%S" cell))
372           (char-db-json-insert-char-spec cell readable
373                                          nil
374                                          required-features))
375         )
376        ((consp cell)
377         (if separator
378             (insert separator)
379           (setq separator (format ",%s" lbs)))
380         (if (consp (car cell))
381             (char-db-json-insert-char-spec cell readable)
382           (char-db-json-insert-char-reference cell readable))
383         )
384        (t
385         (if separator
386             (insert separator)
387           )
388         (insert (prin1-to-string cell))
389         (setq separator " ")))
390       (setq value (cdr value)))
391     (insert " ]")))
392
393 (defun char-db-json-insert-char-features (char
394                                           &optional readable attributes column
395                                           for-sub-node)
396   (unless column
397     (setq column (current-column)))
398   (let ((est-view-url-prefix "http://chise.org/est/view")
399         name value ; has-long-ccs-name
400         rest
401         radical strokes
402         (line-breaking
403          (concat "\n" (make-string (1+ column) ?\ )))
404         line-separator
405         lbs cell separator ret
406         key al cal
407         dest-ccss ; sources required-features
408         ccss)
409     (let (atr-d)
410       (setq attributes
411             (sort (if attributes
412                       (if (consp attributes)
413                           (progn
414                             (dolist (name attributes)
415                               (unless (memq name char-db-ignored-attributes)
416                                 (if (find-charset name)
417                                     (push name ccss))
418                                 (push name atr-d)))
419                             atr-d))
420                     (dolist (name (char-attribute-list))
421                       (unless (memq name char-db-ignored-attributes)
422                         (if (find-charset name)
423                             (push name ccss))
424                         (push name atr-d)))
425                     atr-d)
426                   #'char-attribute-name<)))
427     (insert
428      (format "{ \"@context\": \"%s/genre/character/context.json\""
429              est-view-url-prefix))
430     (setq line-separator (format ",%s" line-breaking))
431     (insert (format "%s \"@id\": \"%s\""
432                     line-separator
433                     (www-uri-make-object-url char)))
434     (when (memq '<-subsumptive attributes)
435       (when (or readable (not for-sub-node))
436         (when (setq value (get-char-attribute char '<-subsumptive))
437           (insert line-separator)
438           (char-db-json-insert-relation-feature char '<-subsumptive value
439                                                 line-breaking
440                                                 ccss readable)
441           ))
442       (setq attributes (delq '<-subsumptive attributes))
443       )
444     (when (and (memq '<-denotational attributes)
445                (setq value (get-char-attribute char '<-denotational)))
446       (insert line-separator)
447       (char-db-json-insert-relation-feature char '<-denotational value
448                                             line-breaking
449                                             ccss readable)
450       (setq attributes (delq '<-denotational attributes)))
451     (when (and (memq '<-denotational@component attributes)
452                (setq value
453                      (get-char-attribute char '<-denotational@component)))
454       (insert line-separator)
455       (char-db-json-insert-relation-feature
456        char '<-denotational@component value
457        line-breaking
458        ccss readable)
459       (setq attributes (delq '<-denotational@component attributes)))
460     (when (and (memq 'name attributes)
461                (setq value (get-char-attribute char 'name)))
462       (insert line-separator)
463       (insert (format
464                (if (> (+ (current-column) (length value)) 48)
465                    " \"name\": %S"
466                  " \"name\":                 %S")
467                value))
468       (setq attributes (delq 'name attributes))
469       )
470     (when (and (memq 'name* attributes)
471                (setq value (get-char-attribute char 'name*)))
472       (insert line-separator)
473       (insert (format
474                (if (> (+ (current-column) (length value)) 48)
475                    " \"name*\": %S"
476                  " \"name*\":                 %S")
477                value))
478       (setq attributes (delq 'name* attributes))
479       )
480     (when (and (memq 'script attributes)
481                (setq value (get-char-attribute char 'script)))
482       (insert line-separator)
483       (insert (format " \"script\":\t\t  %s"
484                       (mapconcat (function prin1-to-string)
485                                  value " ")))
486       (setq attributes (delq 'script attributes))
487       )
488     (dolist (name '(=>ucs =>ucs*))
489       (when (and (memq name attributes)
490                  (setq value (get-char-attribute char name)))
491         (insert line-separator)
492         (insert (format " \"%-20s\":  #x%04X,\t\"_comment\": \"%c\"%s"
493                         name value (decode-char '=ucs value)
494                         line-breaking))
495         (setq attributes (delq name attributes))))
496     (dolist (name '(=>ucs@gb =>ucs@big5))
497       (when (and (memq name attributes)
498                  (setq value (get-char-attribute char name)))
499         (insert line-separator)
500         (insert (format " \"%-20s\":  #x%04X,\t\"_comment\": \"%c\"%s"
501                         name value
502                         (decode-char (intern
503                                       (concat "="
504                                               (substring
505                                                (symbol-name name) 2)))
506                                      value)
507                         line-breaking))
508         (setq attributes (delq name attributes))
509         ))
510     (when (and (memq 'general-category attributes)
511                (setq value (get-char-attribute char 'general-category)))
512       (insert line-separator)
513       (insert (format
514                " \"general-category\":\t [ %s ], \"_comment\": \"%s\""
515                (mapconcat (lambda (cell)
516                             (format "%S" cell))
517                           value " ")
518                (cond ((rassoc value unidata-normative-category-alist)
519                       "Normative Category")
520                      ((rassoc value unidata-informative-category-alist)
521                       "Informative Category")
522                      (t
523                       "Unknown Category"))
524                ))
525       (setq attributes (delq 'general-category attributes))
526       )
527     (when (and (memq 'bidi-category attributes)
528                (setq value (get-char-attribute char 'bidi-category)))
529       (insert line-separator)
530       (insert (format " \"bidi-category\":\t  %S"
531                       value))
532       (setq attributes (delq 'bidi-category attributes))
533       )
534     (unless (or (not (memq 'mirrored attributes))
535                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
536                     'empty))
537       (insert line-separator)
538       (insert (format " \"mirrored\":\t\t  %S"
539                       value))
540       (setq attributes (delq 'mirrored attributes))
541       )
542     (cond
543      ((and (memq 'decimal-digit-value attributes)
544            (setq value (get-char-attribute char 'decimal-digit-value)))
545       (insert line-separator)
546       (insert (format " \"decimal-digit-value\":  %S"
547                       value))
548       (setq attributes (delq 'decimal-digit-value attributes))
549       (when (and (memq 'digit-value attributes)
550                  (setq value (get-char-attribute char 'digit-value)))
551         (insert line-separator)
552         (insert (format " \"digit-value\":\t    %S"
553                         value))
554         (setq attributes (delq 'digit-value attributes))
555         )
556       (when (and (memq 'numeric-value attributes)
557                  (setq value (get-char-attribute char 'numeric-value)))
558         (insert line-separator)
559         (insert (format " \"numeric-value\":\t    %S"
560                         value))
561         (setq attributes (delq 'numeric-value attributes))
562         )
563       )
564      (t
565       (when (and (memq 'digit-value attributes)
566                  (setq value (get-char-attribute char 'digit-value)))
567         (insert line-separator)
568         (insert (format " \"digit-value\":\t %S"
569                         value))
570         (setq attributes (delq 'digit-value attributes))
571         )
572       (when (and (memq 'numeric-value attributes)
573                  (setq value (get-char-attribute char 'numeric-value)))
574         (insert line-separator)
575         (insert (format " \"numeric-value\":\t %S"
576                         value))
577         (setq attributes (delq 'numeric-value attributes))
578         )))
579     (when (and (memq 'iso-10646-comment attributes)
580                (setq value (get-char-attribute char 'iso-10646-comment)))
581       (insert line-separator)
582       (insert (format "{\"iso-10646-comment\":\t %S}%s"
583                       value
584                       line-breaking))
585       (setq attributes (delq 'iso-10646-comment attributes))
586       )
587     (when (and (memq 'morohashi-daikanwa attributes)
588                (setq value (get-char-attribute char 'morohashi-daikanwa)))
589       (insert line-separator)
590       (insert (format "{\"morohashi-daikanwa\":\t%s}%s"
591                       (mapconcat (function prin1-to-string) value " ")
592                       line-breaking))
593       (setq attributes (delq 'morohashi-daikanwa attributes))
594       )
595     (setq radical nil
596           strokes nil)
597     (when (and (memq 'ideographic-radical attributes)
598                (setq value (get-char-attribute char 'ideographic-radical)))
599       (setq radical value)
600       (insert line-separator)
601       (insert (format " \"ideographic-radical\": %S,\t\"_comment\": \"%c\""
602                       radical
603                       (ideographic-radical radical)
604                       ))
605       (setq attributes (delq 'ideographic-radical attributes))
606       )
607     (when (and (memq 'shuowen-radical attributes)
608                (setq value (get-char-attribute char 'shuowen-radical)))
609       (insert line-separator)
610       (insert (format " \"shuowen-radical\":\t %S,\t\"_comment\": \"%c\""
611                       value
612                       (shuowen-radical value)))
613       (setq attributes (delq 'shuowen-radical attributes))
614       )
615     (let (key)
616       (dolist (domain
617                (append
618                 char-db-feature-domains
619                 (let (dest domain)
620                   (dolist (feature (char-attribute-list))
621                     (setq feature (symbol-name feature))
622                     (when (string-match
623                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
624                            feature)
625                       (setq domain (intern (match-string 2 feature)))
626                      (unless (memq domain dest)
627                        (setq dest (cons domain dest)))))
628                   (sort dest #'string<))))
629         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
630         (when (and (memq key attributes)
631                    (setq value (get-char-attribute char key)))
632           (setq radical value)
633           (insert line-separator)
634           (insert (format "{\"%s\": %S},\t\"_comment\": \"%c\"%s"
635                           key
636                           radical
637                           (ideographic-radical radical)
638                           line-breaking))
639           (setq attributes (delq key attributes))
640           )
641         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
642         (when (and (memq key attributes)
643                    (setq value (get-char-attribute char key)))
644           (setq strokes value)
645           (insert line-separator)
646           (insert (format " \"%s\": %S"
647                           key
648                           strokes))
649           (setq attributes (delq key attributes))
650           )
651         (setq key (intern (format "%s@%s" 'total-strokes domain)))
652         (when (and (memq key attributes)
653                    (setq value (get-char-attribute char key)))
654           (insert line-separator)
655           (insert (format " \"%s\":       %S"
656                           key
657                           value
658                           ))
659           (setq attributes (delq key attributes))
660           )
661         (dolist (feature '(ideographic-radical
662                            ideographic-strokes
663                            total-strokes))
664           (setq key (intern (format "%s@%s*sources" feature domain)))
665           (when (and (memq key attributes)
666                      (setq value (get-char-attribute char key)))
667             (insert line-separator)
668             (insert (format " \"%s\":%s" key line-breaking))
669             (dolist (cell value)
670               (insert (format " %s" cell)))
671             (setq attributes (delq key attributes))
672             ))
673         ))
674     (when (and (memq 'ideographic-strokes attributes)
675                (setq value (get-char-attribute char 'ideographic-strokes)))
676       (setq strokes value)
677       (insert line-separator)
678       (insert (format " \"ideographic-strokes\": %S"
679                       strokes
680                       ))
681       (setq attributes (delq 'ideographic-strokes attributes))
682       )
683     (when (and (memq 'kangxi-radical attributes)
684                (setq value (get-char-attribute char 'kangxi-radical)))
685       (unless (eq value radical)
686         (insert line-separator)
687         (insert (format "{\"kangxi-radical\":\t%S},\t\"_comment\": \"%c\"%s"
688                         value
689                         (ideographic-radical value)
690                         line-breaking))
691         (or radical
692             (setq radical value)))
693       (setq attributes (delq 'kangxi-radical attributes))
694       )
695     (when (and (memq 'kangxi-strokes attributes)
696                (setq value (get-char-attribute char 'kangxi-strokes)))
697       (unless (eq value strokes)
698         (insert line-separator)
699         (insert (format "{\"kangxi-strokes\":\t%S}%s"
700                         value
701                         line-breaking))
702         (or strokes
703             (setq strokes value)))
704       (setq attributes (delq 'kangxi-strokes attributes))
705       )
706     (when (and (memq 'japanese-radical attributes)
707                (setq value (get-char-attribute char 'japanese-radical)))
708       (unless (eq value radical)
709         (insert line-separator)
710         (insert (format "{\"japanese-radical\":\t%S},\t\"_comment\": \"%c\"%s"
711                         value
712                         (ideographic-radical value)
713                         line-breaking))
714         (or radical
715             (setq radical value)))
716       (setq attributes (delq 'japanese-radical attributes))
717       )
718     (when (and (memq 'japanese-strokes attributes)
719                (setq value (get-char-attribute char 'japanese-strokes)))
720       (unless (eq value strokes)
721         (insert line-separator)
722         (insert (format "{\"japanese-strokes\":\t%S}%s"
723                         value
724                         line-breaking))
725         (or strokes
726             (setq strokes value)))
727       (setq attributes (delq 'japanese-strokes attributes))
728       )
729     (when (and (memq 'cns-radical attributes)
730                (setq value (get-char-attribute char 'cns-radical)))
731       (insert line-separator)
732       (insert (format "{\"cns-radical\":\t%S},\t\"_comment\": \"%c\"%s"
733                       value
734                       (ideographic-radical value)
735                       line-breaking))
736       (setq attributes (delq 'cns-radical attributes))
737       )
738     (when (and (memq 'cns-strokes attributes)
739                (setq value (get-char-attribute char 'cns-strokes)))
740       (unless (eq value strokes)
741         (insert line-separator)
742         (insert (format "{\"cns-strokes\":\t%S}%s"
743                         value
744                         line-breaking))
745         (or strokes
746             (setq strokes value)))
747       (setq attributes (delq 'cns-strokes attributes))
748       )
749     (when (and (memq 'ideographic- attributes)
750                (setq value (get-char-attribute char 'ideographic-)))
751       (insert line-separator)
752       (insert "{\"ideographic-\":       ")
753       (setq lbs (concat "\n" (make-string (current-column) ?\ ))
754             separator nil)
755       (while (consp value)
756         (setq cell (car value))
757         (if (integerp cell)
758             (setq cell (decode-char '=ucs cell)))
759         (cond ((characterp cell)
760                (if separator
761                    (insert lbs))
762                (if readable
763                    (insert (format "%S" cell))
764                  (char-db-json-insert-char-spec cell readable))
765                (setq separator lbs))
766               ((consp cell)
767                (if separator
768                    (insert lbs))
769                (if (consp (car cell))
770                    (char-db-json-insert-char-spec cell readable)
771                  (char-db-json-insert-char-reference cell readable))
772                (setq separator lbs))
773               (t
774                (if separator
775                    (insert separator))
776                (insert (prin1-to-string cell))
777                (setq separator " ")))
778         (setq value (cdr value)))
779       (insert " }")
780       (insert line-breaking)
781       (setq attributes (delq 'ideographic- attributes)))
782     (when (and (memq 'total-strokes attributes)
783                (setq value (get-char-attribute char 'total-strokes)))
784       (insert line-separator)
785       (insert (format " \"total-strokes\":       %S"
786                       value
787                       ))
788       (setq attributes (delq 'total-strokes attributes))
789       )
790     (when (and (memq '->ideograph attributes)
791                (setq value (get-char-attribute char '->ideograph)))
792       (insert line-separator)
793       (insert (format "{\"->ideograph\":\t%s}%s"
794                       (mapconcat (lambda (code)
795                                    (cond ((symbolp code)
796                                           (symbol-name code))
797                                          ((integerp code)
798                                           (format "#x%04X" code))
799                                          (t
800                                           (format "%s %S"
801                                                   line-breaking code))))
802                                  value " ")
803                       line-breaking))
804       (setq attributes (delq '->ideograph attributes))
805       )
806     (if (equal (get-char-attribute char '->titlecase)
807                (get-char-attribute char '->uppercase))
808         (setq attributes (delq '->titlecase attributes)))
809     (unless readable
810       (dolist (ignored '(composition
811                          ->denotational <-subsumptive ->ucs-unified
812                          ->ideographic-component-forms))
813         (setq attributes (delq ignored attributes))))
814     (while attributes
815       (setq name (car attributes))
816       (unless (eq (setq value (get-char-attribute char name 'value-is-empty))
817                   'value-is-empty)
818         (cond ((setq ret (find-charset name))
819                (setq name (charset-name ret))
820                (when (not (memq name dest-ccss))
821                  (setq dest-ccss (cons name dest-ccss))
822                  (insert line-separator)
823                  (char-db-json-insert-ccs-feature name value line-breaking))
824                )
825               ((string-match "^=>ucs@" (symbol-name name))
826                (insert line-separator)
827                (insert (format "{\"%-20s\": #x%04X},\t\"_comment\": \"%c\"%s"
828                                name value (decode-char '=ucs value)
829                                line-breaking))
830                )
831               ((eq name 'jisx0208-1978/4X)
832                (insert line-separator)
833                (insert (format "{\"%-20s\": #x%04X}%s"
834                                name value
835                                line-breaking))
836                )
837               ((and
838                 (not readable)
839                 (not (eq name '->subsumptive))
840                 (not (eq name '->uppercase))
841                 (not (eq name '->lowercase))
842                 (not (eq name '->titlecase))
843                 (not (eq name '->canonical))
844                 (not (eq name '->Bopomofo))
845                 (not (eq name '->mistakable))
846                 (not (eq name '->ideographic-variants))
847                 (null (get-char-attribute
848                        char (intern (format "%s*sources" name))))
849                 (not (string-match "\\*sources$" (symbol-name name)))
850                 (null (get-char-attribute
851                        char (intern (format "%s*note" name))))
852                 (not (string-match "\\*note$" (symbol-name name)))
853                 (or (eq name '<-identical)
854                     (eq name '<-uppercase)
855                     (eq name '<-lowercase)
856                     (eq name '<-titlecase)
857                     (eq name '<-canonical)
858                     (eq name '<-ideographic-variants)
859                     ;; (eq name '<-synonyms)
860                     (string-match "^<-synonyms" (symbol-name name))
861                     (eq name '<-mistakable)
862                     (when (string-match "^->" (symbol-name name))
863                       (cond
864                        ((string-match "^->fullwidth" (symbol-name name))
865                         (not (and (consp value)
866                                   (characterp (car value))
867                                   (encode-char
868                                    (car value) '=ucs 'defined-only)))
869                         )
870                        (t)))
871                     ))
872                )
873               ((or (eq name 'ideographic-structure)
874                    (eq name 'ideographic-combination)
875                    (eq name 'ideographic-)
876                    (eq name '=decomposition)
877                    (char-feature-base-name= '=decomposition name)
878                    (char-feature-base-name= '=>decomposition name)
879                    ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
880                    ;;               (symbol-name name))
881                    (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
882                    (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
883                                  (symbol-name name))
884                    )
885                (insert line-separator)
886                (char-db-json-insert-relation-feature char name value
887                                                      line-breaking
888                                                      ccss readable))
889               ((memq name '(ideograph=
890                             original-ideograph-of
891                             ancient-ideograph-of
892                             vulgar-ideograph-of
893                             wrong-ideograph-of
894                             ;; simplified-ideograph-of
895                             ideographic-variants
896                             ;; ideographic-different-form-of
897                             ))
898                (insert line-separator)
899                (insert (format "{\"%-20s\":%s " name line-breaking))
900                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
901                      separator nil)
902                (while (consp value)
903                  (setq cell (car value))
904                  (if (and (consp cell)
905                           (consp (car cell)))
906                      (progn
907                        (if separator
908                            (insert lbs))
909                        (char-db-json-insert-alist cell readable)
910                        (setq separator lbs))
911                    (if separator
912                        (insert separator))
913                    (insert (prin1-to-string cell))
914                    (setq separator " "))
915                  (setq value (cdr value)))
916                (insert " }")
917                (insert line-breaking))
918               ((consp value)
919                (insert line-separator)
920                (insert (format " %-20s [ "
921                                (format "\"%s\":" name)))
922                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
923                      separator nil)
924                (while (consp value)
925                  (setq cell (car value))
926                  (if (and (consp cell)
927                           (consp (car cell))
928                           (setq ret (condition-case nil
929                                         (find-char cell)
930                                       (error nil))))
931                      (progn
932                        (setq rest cell
933                              al nil
934                              cal nil)
935                        (while rest
936                          (setq key (car (car rest)))
937                          (if (find-charset key)
938                              (setq cal (cons key cal))
939                            (setq al (cons key al)))
940                          (setq rest (cdr rest)))
941                        (if separator
942                            (insert lbs))
943                        (char-db-json-insert-char-features ret
944                                                           readable
945                                                           al
946                                                           nil 'for-sub-node)
947                        (setq separator lbs))
948                    (setq ret (prin1-to-string cell))
949                    (if separator
950                        (if (< (+ (current-column)
951                                  (length ret)
952                                  (length separator))
953                               76)
954                            (insert separator)
955                          (insert lbs)))
956                    (insert ret)
957                    (setq separator " "))
958                  (setq value (cdr value)))
959                (insert " ]")
960                )
961               (t
962                (insert line-separator)
963                (insert (format " %-20s "
964                                (format "\"%s\":" name)))
965                (setq ret (prin1-to-string value))
966                (unless (< (+ (current-column)
967                              (length ret)
968                              3)
969                           76)
970                  (insert line-breaking))
971                (insert ret)
972                )
973               ))
974       (setq attributes (cdr attributes)))
975     (insert "\n" (make-string column ?\ ) "}")))
976
977 (defun char-db-json-char-data (char &optional readable
978                                     attributes column)
979   (unless column
980     (setq column (current-column)))
981   (save-restriction
982     (narrow-to-region (point)(point))
983     (char-db-json-insert-char-features char readable attributes column)
984     (goto-char (point-min))
985     (while (re-search-forward "[ \t]+$" nil t)
986       (replace-match ""))
987     ;; from tabify.
988     (goto-char (point-min))
989     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
990       (let ((column (current-column))
991             (indent-tabs-mode t))
992         (delete-region (match-beginning 0) (point))
993         (indent-to column)))
994     (goto-char (point-max))
995     ;; (tabify (point-min)(point-max))
996     ))
997
998 (defun char-db-json-char-data-with-variant (char &optional printable
999                                            no-ucs-unified
1000                                            script excluded-script)
1001   (insert "[ ")
1002   (char-db-json-char-data char printable)
1003   (let ((variants (char-variants char))
1004         rest
1005         variant vs ret
1006         )
1007     (setq variants (sort variants #'<))
1008     (setq rest variants)
1009     (setq variants (cons char variants))
1010     (while rest
1011       (setq variant (car rest))
1012       (unless (get-char-attribute variant '<-subsumptive)
1013         (if (and (or (null script)
1014                      (null (setq vs (get-char-attribute variant 'script)))
1015                      (memq script vs))
1016                  (or (null excluded-script)
1017                      (null (setq vs (get-char-attribute variant 'script)))
1018                      (not (memq excluded-script vs))))
1019             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1020               (insert ",\n  ")
1021               (char-db-json-char-data variant printable)
1022               (if (setq ret (char-variants variant))
1023                   (while ret
1024                     (or (memq (car ret) variants)
1025                         ;; (get-char-attribute (car ret) '<-subsumptive)
1026                         (setq rest (nconc rest (list (car ret)))))
1027                     (setq ret (cdr ret)))))))
1028       (setq rest (cdr rest)))
1029     (insert "\n]\n")))
1030
1031 (defun char-db-json-insert-char-range-data (min max
1032                                                 &optional script
1033                                                 excluded-script)
1034   (let ((code min)
1035         char)
1036     (while (<= code max)
1037       (setq char (decode-char '=ucs code))
1038       (if (encode-char char '=ucs 'defined-only)
1039           (char-db-json-char-data-with-variant char nil 'no-ucs-unified
1040                                          script excluded-script))
1041       (setq code (1+ code)))))
1042
1043 (defun write-char-range-data-to-json-file (min max file
1044                                                &optional script
1045                                                excluded-script)
1046   (let ((coding-system-for-write char-db-file-coding-system))
1047     (with-temp-buffer
1048       (insert (format "// -*- coding: %s -*-\n"
1049                       char-db-file-coding-system))
1050       (char-db-json-insert-char-range-data min max script excluded-script)
1051       (write-region (point-min)(point-max) file))))
1052
1053 ;;;###autoload
1054 (defun what-char-definition-json (char)
1055   (interactive (list (char-after)))
1056   (let ((buf (get-buffer-create "*Character Description*"))
1057         (the-buf (current-buffer))
1058         (win-conf (current-window-configuration)))
1059     (pop-to-buffer buf)
1060     (make-local-variable 'what-character-original-window-configuration)
1061     (setq what-character-original-window-configuration win-conf)
1062     (setq buffer-read-only nil)
1063     (erase-buffer)
1064     (condition-case err
1065         (progn
1066           (char-db-json-char-data-with-variant char nil)
1067           (unless (char-attribute-alist char)
1068             (insert (format "// = %c\n"
1069                             (let* ((rest (split-char char))
1070                                    (ccs (pop rest))
1071                                    (code (pop rest)))
1072                               (while rest
1073                                 (setq code (logior (lsh code 8)
1074                                                    (pop rest))))
1075                               (decode-char ccs code)))))
1076           ;; (char-db-update-comment)
1077           (set-buffer-modified-p nil)
1078           (view-mode the-buf (lambda (buf)
1079                                (set-window-configuration
1080                                 what-character-original-window-configuration)
1081                                ))
1082           (goto-char (point-min)))
1083       (error (progn
1084                (set-window-configuration
1085                 what-character-original-window-configuration)
1086                (signal (car err) (cdr err)))))))
1087
1088 (defun char-db-json-batch-view ()
1089   (setq terminal-coding-system 'binary)
1090   (condition-case err
1091       (let* ((target (pop command-line-args-left))
1092              ret genre
1093              object)
1094         (princ "Content-Type: application/json; charset=UTF-8
1095
1096 ")
1097         (cond
1098          ((stringp target)
1099           (when (string-match "^char=\\(&[^&;]+;\\)" target)
1100             (setq ret (match-end 0))
1101             (setq target
1102                   (concat "char="
1103                           (www-uri-encode-object
1104                            (www-uri-decode-object
1105                             'character (match-string 1 target)))
1106                           (substring target ret))))
1107           (setq target
1108                 (mapcar (lambda (cell)
1109                           (if (string-match "=" cell)
1110                               (progn
1111                                 (setq genre (substring cell 0 (match-beginning 0))
1112                                       ret (substring cell (match-end 0)))
1113                                 (cons
1114                                  (intern
1115                                   (decode-uri-string genre 'utf-8-mcs-er))
1116                                  ret))
1117                             (list (decode-uri-string cell 'utf-8-mcs-er))))
1118                         (split-string target "&")))
1119           (setq ret (car target))
1120           (cond ((eq (car ret) 'char)
1121                  (setq object (www-uri-decode-object (car ret)(cdr ret)))
1122                  (when (characterp object)
1123                    (with-temp-buffer
1124                      (char-db-json-char-data object)
1125                      (encode-coding-region (point-min)(point-max)
1126                                            char-db-file-coding-system)
1127                      (princ (buffer-string))
1128                      ))
1129                  )
1130                 ((eq (car ret) 'character)
1131                  (setq object (www-uri-decode-object (car ret)(cdr ret)))
1132                  (when (characterp object)
1133                    (with-temp-buffer
1134                      (char-db-json-char-data object)
1135                      (encode-coding-region (point-min)(point-max)
1136                                            char-db-file-coding-system)
1137                      (princ (buffer-string))
1138                      ))
1139                  ))
1140           ))
1141         )
1142     (error nil
1143            (princ (format "%S" err)))
1144     ))
1145
1146
1147 ;;; @ end
1148 ;;;
1149
1150 (provide 'char-db-json)
1151
1152 ;;; char-db-json.el ends here