(char-db-json-insert-char-features): Add "@type" based on glyph
[chise/est.git] / char-db-json.el
1 ;;; char-db-json.el --- Character Database utility -*- coding: utf-8-er; -*-
2
3 ;; Copyright (C) 1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,
4 ;;   2008,2009,2010,2011,2012,2013,2014,2015,2016 MORIOKA Tomohiko.
5
6 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
7 ;; Keywords: CHISE, Character Database, ISO/IEC 10646, UCS, Unicode, MULE.
8
9 ;; This file is part of XEmacs CHISE.
10
11 ;; XEmacs CHISE is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
15
16 ;; XEmacs CHISE is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 ;; General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with XEmacs CHISE; see the file COPYING.  If not, write to
23 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Code:
27
28 (require 'char-db-util)
29 (require 'cwiki-format)
30
31 (setq char-db-ignored-attributes
32       '(ideographic-products
33         ;; ->HNG
34         *instance@ruimoku/bibliography/title
35         *instance@morpheme-entry/zh-classical))
36
37
38 ;;; @ char-db formatters
39 ;;;
40     
41 (defun char-db-json-insert-char-spec (char &optional readable column
42                                            required-features)
43   (unless column
44     (setq column (current-column)))
45   (let (char-spec temp-char)
46     (setq char-spec (char-db-make-char-spec char))
47     (unless (or (characterp char) ; char
48                 (condition-case nil
49                     (setq char (find-char char-spec))
50                   (error nil)))
51       ;; define temporary character
52       ;;   Current implementation is dirty.
53       (setq temp-char (define-char (cons '(ideograph-daikanwa . 0)
54                                          char-spec)))
55       (remove-char-attribute temp-char 'ideograph-daikanwa)
56       (setq char temp-char))
57     (char-db-json-insert-char-features char
58                                        readable
59                                        (union (mapcar #'car char-spec)
60                                               required-features)
61                                        nil 'for-sub-node)
62     (when temp-char
63       ;; undefine temporary character
64       ;;   Current implementation is dirty.
65       (setq char-spec (char-attribute-alist temp-char))
66       (while char-spec
67         (remove-char-attribute temp-char (car (car char-spec)))
68         (setq char-spec (cdr char-spec))))))
69
70 (defun char-db-json-insert-alist (alist &optional readable column)
71   (unless column
72     (setq column (current-column)))
73   (let ((line-breaking
74          (concat "\n" (make-string (1+ column) ?\ )))
75         name value
76         ret al ; cal
77         key
78         lbs cell rest separator)
79     (insert "(")
80     (while alist
81       (setq name (car (car alist))
82             value (cdr (car alist)))
83       (cond ((eq name 'char)
84              (insert "(char . ")
85              (if (setq ret (condition-case nil
86                                (find-char value)
87                              (error nil)))
88                  (progn
89                    (setq al nil
90                          ;; cal nil
91                          )
92                    (while value
93                      (setq key (car (car value)))
94                      ;; (if (find-charset key)
95                      ;;     (setq cal (cons key cal))
96                      (setq al (cons key al))
97                      ;; )
98                      (setq value (cdr value)))
99                    (insert-char-attributes ret
100                                            readable
101                                            (or al 'none) ; cal
102                                            nil 'for-sub-node))
103                (insert (prin1-to-string value)))
104              (insert ")")
105              (insert line-breaking))
106             ((consp value)
107              (insert (format "(%-18s " name))
108              (setq lbs (concat "\n" (make-string (current-column) ?\ )))
109              (while (consp value)
110                (setq cell (car value))
111                (if (and (consp cell)
112                         (consp (car cell))
113                         (setq ret (condition-case nil
114                                       (find-char cell)
115                                     (error nil)))
116                         )
117                    (progn
118                      (setq rest cell
119                            al nil
120                            ;; cal nil
121                            )
122                      (while rest
123                        (setq key (car (car rest)))
124                        ;; (if (find-charset key)
125                        ;;     (setq cal (cons key cal))
126                        (setq al (cons key al))
127                        ;; )
128                        (setq rest (cdr rest)))
129                      (if separator
130                          (insert lbs))
131                      (insert-char-attributes ret
132                                              readable
133                                              al ; cal
134                                              nil 'for-sub-node)
135                      (setq separator lbs))
136                  (if separator
137                      (insert separator))
138                  (insert (prin1-to-string cell))
139                  (setq separator " "))
140                (setq value (cdr value)))
141              (insert ")")
142              (insert line-breaking))
143             (t
144              (insert (format "(%-18s . %S)%s"
145                              name value
146                              line-breaking))))
147       (setq alist (cdr alist))))
148   (insert ")"))
149
150 (defun char-db-json-insert-char-reference (plist &optional readable column)
151   (unless column
152     (setq column (current-column)))
153   (let ((line-breaking
154          (concat "\n" (make-string (1+ column) ?\ )))
155         (separator "")
156         name value)
157     (insert "{")
158     (while plist
159       (setq name (pop plist))
160       (setq value (pop plist))
161       (cond ((eq name :char)
162              (insert separator)
163              (insert "\"char\":\t")
164              (cond ((numberp value)
165                     (setq value (decode-char '=ucs value)))
166                    ;; ((consp value)
167                    ;;  (setq value (or (find-char value)
168                    ;;                  value)))
169                    )
170              (char-db-json-insert-char-spec value readable)
171              (insert line-breaking)
172              (setq separator ""))
173             ((eq name :radical)
174              (insert (format "%s%s\t%d, \"_comment\": \"%c\"%s"
175                              separator
176                              name value
177                              (ideographic-radical value)
178                              line-breaking))
179              (setq separator ""))
180             (t
181              (insert (format "%s%s\t%S" separator name value))
182              (setq separator line-breaking)))
183       ))
184   (insert " }"))
185
186 (defun char-db-json-insert-ccs-feature (name value line-breaking)
187   (cond
188    ((integerp value)
189     (insert
190      (format
191       (cond
192        ((memq name '(=>iwds-1
193                      ==shinjigen
194                      =shinjigen
195                      =shinjigen@1ed ==shinjigen@1ed
196                      =shinjigen@rev ==shinjigen@rev
197                      =shinjigen/+p@rev ==shinjigen/+p@rev
198                      ===daikanwa/ho ==daikanwa/ho
199                      =daikanwa/ho =>>daikanwa/ho =>daikanwa/ho))
200         " %-20s  %4d,\t\"_comment\": \"%c")
201        ((eq name '=shinjigen@1ed/24pr)
202         " %-20s  %4d,\t\"_comment\": \"%c")
203        ((or
204          (memq name
205                '(===daikanwa
206                  ==daikanwa =daikanwa =>>daikanwa =>daikanwa
207                  =daikanwa@rev1 =daikanwa@rev2
208                  =daikanwa/+p ==daikanwa/+p ===daikanwa/+p
209                  =>>daikanwa/+p
210                  =daikanwa/+2p =>>daikanwa/+2p
211                  =gt ==gt ===gt
212                  =>>gt =+>gt =>gt
213                  =gt-k ==gt-k ===gt-k
214                  =>>gt-k =>gt-k
215                  =adobe-japan1-0 ==adobe-japan1-0 ===adobe-japan1-0
216                  =adobe-japan1-1 ==adobe-japan1-1 ===adobe-japan1-1
217                  =adobe-japan1-2 ==adobe-japan1-2 ===adobe-japan1-2
218                  =adobe-japan1-3 ==adobe-japan1-3 ===adobe-japan1-3
219                  =adobe-japan1-4 ==adobe-japan1-4 ===adobe-japan1-4
220                  =adobe-japan1-5 ==adobe-japan1-5 ===adobe-japan1-5
221                  =adobe-japan1-6 ==adobe-japan1-6 ===adobe-japan1-6
222                  =>>adobe-japan1-0 =+>adobe-japan1-0
223                  =>>adobe-japan1-1 =+>adobe-japan1-1
224                  =>>adobe-japan1-2 =+>adobe-japan1-2
225                  =>>adobe-japan1-3 =+>adobe-japan1-3
226                  =>>adobe-japan1-4 =+>adobe-japan1-4
227                  =>>adobe-japan1-5 =+>adobe-japan1-5
228                  =>>adobe-japan1-6 =+>adobe-japan1-6
229                  =>cbeta =cbeta =>>cbeta ==cbeta ===cbeta
230                  =zinbun-oracle =>zinbun-oracle
231                  ===hng-jou ===hng-keg ===hng-dng ===hng-mam
232                  ===hng-drt ===hng-kgk ===hng-myz ===hng-kda
233                  ===hng-khi ===hng-khm ===hng-hok ===hng-kyd ===hng-sok
234                  ===hng-yhk ===hng-kak ===hng-kar ===hng-kae
235                  ===hng-sys ===hng-tsu ===hng-tzj
236                  ===hng-hos ===hng-nak ===hng-jhk
237                  ===hng-hod ===hng-gok ===hng-ink ===hng-nto
238                  ===hng-nkm ===hng-k24 ===hng-nkk
239                  ===hng-kcc ===hng-kcj ===hng-kbk ===hng-sik
240                  ===hng-skk ===hng-kyu ===hng-ksk ===hng-wan
241                  ===hng-okd ===hng-wad ===hng-kmi
242                  ===hng-zkd ===hng-doh ===hng-jyu
243                  ===hng-tzs ===hng-kss ===hng-kyo
244                  ===hng-smk))
245          ;; (string-match "^=adobe-" (symbol-name name))
246          )
247         " %-20s %5d,\t\"_comment\": \"%c")
248        ((memq name '(=hanyo-denshi/ks
249                      ==hanyo-denshi/ks ===hanyo-denshi/ks
250                      =>>hanyo-denshi/ks
251                      =koseki ==koseki
252                      =mj ==mj ===mj =>>mj =>mj
253                      =zihai mojikyo))
254         " %-19s %6d,\t\"_comment\": \"%c")
255        ((memq name '(=hanyo-denshi/tk ==hanyo-denshi/tk))
256         " %-20s %8d,\t\"_comment\": \"%c")
257        ((>= (charset-dimension name) 2)
258         " %-20s %5d,\t\"_comment\": \"%c")
259        (t
260         " %-20s   %3d,\t\"_comment\": \"%c"))
261       (format "\"%s\":" name)
262       (if (= (charset-iso-graphic-plane name) 1)
263           (logior value
264                   (cond ((= (charset-dimension name) 1)
265                          #x80)
266                         ((= (charset-dimension name) 2)
267                          #x8080)
268                         ((= (charset-dimension name) 3)
269                          #x808080)
270                         (t 0)))
271         value)
272       (char-db-decode-isolated-char name value)))
273     (if (and (= (charset-chars name) 94)
274              (= (charset-dimension name) 2))
275         (insert (format " [%02d-%02d]\""
276                         (- (lsh value -8) 32)
277                         (- (logand value 255) 32)))
278       (insert "\""))
279     )
280    (t
281     (insert (format " %-20s %s"
282                     (format "\"%s\":" name) value))
283     ))
284   )
285
286 (defun char-db-json-insert-relation-feature (char name value line-breaking
287                                              ccss readable)
288   (insert (format " %-20s [%s    "
289                   (format "\"%s\":" name) line-breaking))
290   (let ((lbs (concat "\n" (make-string (current-column) ?\ )))
291         separator cell sources required-features
292         ret)
293     (while (consp value)
294       (setq cell (car value))
295       (if (integerp cell)
296           (setq cell (decode-char '=ucs cell)))
297       (cond
298        ((eq name '->subsumptive)
299         (when (or (not (some (lambda (atr)
300                                (get-char-attribute cell atr))
301                              char-db-ignored-attributes))
302                   (some (lambda (ccs)
303                           (encode-char cell ccs 'defined-only))
304                         ccss))
305           (if separator
306               (insert separator)
307             (setq separator (format ",%s" lbs)))
308           (let ((char-db-ignored-attributes
309                  (cons '<-subsumptive
310                        char-db-ignored-attributes)))
311             (char-db-json-insert-char-features
312              cell readable nil nil 'for-sub-node))
313           )
314         )
315        ((characterp cell)
316         (setq sources
317               (get-char-attribute
318                char (intern (format "%s*sources" name))))
319         (setq required-features nil)
320         (dolist (source sources)
321           (cond
322            ((memq source '(JP
323                            JP/Jouyou
324                            shinjigen shinjigen@1ed shinjigen@rev))
325             (setq required-features
326                   (union required-features
327                          '(=jis-x0208
328                            =jis-x0208@1990
329                            =jis-x0213-1@2000
330                            =jis-x0213-1@2004
331                            =jis-x0213-2
332                            =jis-x0212
333                            =jis-x0208@1983
334                            =jis-x0208@1978
335                            =shinjigen
336                            =shinjigen@1ed
337                            =shinjigen@rev
338                            =shinjigen/+p@rev))))
339            ((eq source 'CN)
340             (setq required-features
341                   (union required-features
342                          '(=gb2312
343                            =gb12345
344                            =iso-ir165)))))
345           (cond
346            ((find-charset (setq ret (intern (format "=%s" source))))
347             (setq required-features
348                   (cons ret required-features)))
349            (t (setq required-features
350                     (cons source required-features)))))
351         (cond ((string-match "@JP" (symbol-name name))
352                (setq required-features
353                      (union required-features
354                             '(=jis-x0208
355                               =jis-x0208@1990
356                               =jis-x0213-1-2000
357                               =jis-x0213-2-2000
358                               =jis-x0212
359                               =jis-x0208@1983
360                               =jis-x0208@1978))))
361               ((string-match "@CN" (symbol-name name))
362                (setq required-features
363                      (union required-features
364                             '(=gb2312
365                               =gb12345
366                               =iso-ir165)))))
367         (if separator
368             (insert separator)
369           (setq separator (format ",%s" lbs)))
370         (if readable
371             (insert (format "%S" cell))
372           (char-db-json-insert-char-spec cell readable
373                                          nil
374                                          required-features))
375         )
376        ((consp cell)
377         (if separator
378             (insert separator)
379           (setq separator (format ",%s" lbs)))
380         (if (consp (car cell))
381             (char-db-json-insert-char-spec cell readable)
382           (char-db-json-insert-char-reference cell readable))
383         )
384        (t
385         (if separator
386             (insert separator)
387           )
388         (insert (prin1-to-string cell))
389         (setq separator " ")))
390       (setq value (cdr value)))
391     (insert " ]")))
392
393 (defun char-db-json-insert-char-features (char
394                                           &optional readable attributes column
395                                           for-sub-node)
396   (unless column
397     (setq column (current-column)))
398   (let ((est-view-url-prefix "http://chise.org/est/view")
399         id obj-id type
400         name value ; has-long-ccs-name
401         rest
402         radical strokes
403         (line-breaking
404          (concat "\n" (make-string (1+ column) ?\ )))
405         line-separator
406         lbs cell separator ret
407         key al cal
408         dest-ccss ; sources required-features
409         ccss)
410     (let (atr-d)
411       (setq attributes
412             (sort (if attributes
413                       (if (consp attributes)
414                           (progn
415                             (dolist (name attributes)
416                               (unless (memq name char-db-ignored-attributes)
417                                 (if (find-charset name)
418                                     (push name ccss))
419                                 (push name atr-d)))
420                             atr-d))
421                     (dolist (name (char-attribute-list))
422                       (unless (memq name char-db-ignored-attributes)
423                         (if (find-charset name)
424                             (push name ccss))
425                         (push name atr-d)))
426                     atr-d)
427                   #'char-attribute-name<)))
428     (insert
429      (format "{ \"@context\": \"%s/genre/character/context.json\""
430              est-view-url-prefix))
431     (setq line-separator (format ",%s" line-breaking))
432     (setq id (www-uri-make-object-url char))
433     (insert (format "%s \"@id\": \"%s\"" line-separator id))
434     (setq obj-id (file-name-nondirectory id))
435     (setq type
436           (cond
437            ((string-match "^a2\\." obj-id)
438             "chise:super-abstract-character")
439            ((string-match "^a\\." obj-id)
440             "chise:abstract-character")
441            ((string-match "^o\\." obj-id)
442             "chise:unified-glyph")
443            ((string-match "^rep\\." obj-id)
444             "chise:abstract-glyph")
445            ((string-match "^g\\." obj-id)
446             "chise:detailed-glyph")
447            ((string-match "^g2\\." obj-id)
448             "chise:abstract-glyph-form")
449            ((string-match "^gi\\." obj-id)
450             "chise:abstract-glyph-form")
451            ((string-match "^repi\\." obj-id)
452             "chise:glyph-image")
453            (t
454             "chise:character")
455            ))
456     (insert (format "%s \"@type\": \"%s\"" line-separator type))
457     (when (memq '<-subsumptive attributes)
458       (when (or readable (not for-sub-node))
459         (when (setq value (get-char-attribute char '<-subsumptive))
460           (insert line-separator)
461           (char-db-json-insert-relation-feature char '<-subsumptive value
462                                                 line-breaking
463                                                 ccss readable)
464           ))
465       (setq attributes (delq '<-subsumptive attributes))
466       )
467     (when (and (memq '<-denotational attributes)
468                (setq value (get-char-attribute char '<-denotational)))
469       (insert line-separator)
470       (char-db-json-insert-relation-feature char '<-denotational value
471                                             line-breaking
472                                             ccss readable)
473       (setq attributes (delq '<-denotational attributes)))
474     (when (and (memq '<-denotational@component attributes)
475                (setq value
476                      (get-char-attribute char '<-denotational@component)))
477       (insert line-separator)
478       (char-db-json-insert-relation-feature
479        char '<-denotational@component value
480        line-breaking
481        ccss readable)
482       (setq attributes (delq '<-denotational@component attributes)))
483     (when (and (memq 'name attributes)
484                (setq value (get-char-attribute char 'name)))
485       (insert line-separator)
486       (insert (format
487                (if (> (+ (current-column) (length value)) 48)
488                    " \"name\": %S"
489                  " \"name\":                 %S")
490                value))
491       (setq attributes (delq 'name attributes))
492       )
493     (when (and (memq 'name* attributes)
494                (setq value (get-char-attribute char 'name*)))
495       (insert line-separator)
496       (insert (format
497                (if (> (+ (current-column) (length value)) 48)
498                    " \"name*\": %S"
499                  " \"name*\":                 %S")
500                value))
501       (setq attributes (delq 'name* attributes))
502       )
503     (when (and (memq 'script attributes)
504                (setq value (get-char-attribute char 'script)))
505       (insert line-separator)
506       (insert (format " \"script\":\t\t  %s"
507                       (mapconcat (function prin1-to-string)
508                                  value " ")))
509       (setq attributes (delq 'script attributes))
510       )
511     (dolist (name '(=>ucs =>ucs*))
512       (when (and (memq name attributes)
513                  (setq value (get-char-attribute char name)))
514         (insert line-separator)
515         (insert (format " \"%-20s\":  #x%04X,\t\"_comment\": \"%c\"%s"
516                         name value (decode-char '=ucs value)
517                         line-breaking))
518         (setq attributes (delq name attributes))))
519     (dolist (name '(=>ucs@gb =>ucs@big5))
520       (when (and (memq name attributes)
521                  (setq value (get-char-attribute char name)))
522         (insert line-separator)
523         (insert (format " \"%-20s\":  #x%04X,\t\"_comment\": \"%c\"%s"
524                         name value
525                         (decode-char (intern
526                                       (concat "="
527                                               (substring
528                                                (symbol-name name) 2)))
529                                      value)
530                         line-breaking))
531         (setq attributes (delq name attributes))
532         ))
533     (when (and (memq 'general-category attributes)
534                (setq value (get-char-attribute char 'general-category)))
535       (insert line-separator)
536       (insert (format
537                " \"general-category\":\t [ %s ], \"_comment\": \"%s\""
538                (mapconcat (lambda (cell)
539                             (format "%S" cell))
540                           value " ")
541                (cond ((rassoc value unidata-normative-category-alist)
542                       "Normative Category")
543                      ((rassoc value unidata-informative-category-alist)
544                       "Informative Category")
545                      (t
546                       "Unknown Category"))
547                ))
548       (setq attributes (delq 'general-category attributes))
549       )
550     (when (and (memq 'bidi-category attributes)
551                (setq value (get-char-attribute char 'bidi-category)))
552       (insert line-separator)
553       (insert (format " \"bidi-category\":\t  %S"
554                       value))
555       (setq attributes (delq 'bidi-category attributes))
556       )
557     (unless (or (not (memq 'mirrored attributes))
558                 (eq (setq value (get-char-attribute char 'mirrored 'empty))
559                     'empty))
560       (insert line-separator)
561       (insert (format " \"mirrored\":\t\t  %S"
562                       value))
563       (setq attributes (delq 'mirrored attributes))
564       )
565     (cond
566      ((and (memq 'decimal-digit-value attributes)
567            (setq value (get-char-attribute char 'decimal-digit-value)))
568       (insert line-separator)
569       (insert (format " \"decimal-digit-value\":  %S"
570                       value))
571       (setq attributes (delq 'decimal-digit-value attributes))
572       (when (and (memq 'digit-value attributes)
573                  (setq value (get-char-attribute char 'digit-value)))
574         (insert line-separator)
575         (insert (format " \"digit-value\":\t    %S"
576                         value))
577         (setq attributes (delq 'digit-value attributes))
578         )
579       (when (and (memq 'numeric-value attributes)
580                  (setq value (get-char-attribute char 'numeric-value)))
581         (insert line-separator)
582         (insert (format " \"numeric-value\":\t    %S"
583                         value))
584         (setq attributes (delq 'numeric-value attributes))
585         )
586       )
587      (t
588       (when (and (memq 'digit-value attributes)
589                  (setq value (get-char-attribute char 'digit-value)))
590         (insert line-separator)
591         (insert (format " \"digit-value\":\t %S"
592                         value))
593         (setq attributes (delq 'digit-value attributes))
594         )
595       (when (and (memq 'numeric-value attributes)
596                  (setq value (get-char-attribute char 'numeric-value)))
597         (insert line-separator)
598         (insert (format " \"numeric-value\":\t %S"
599                         value))
600         (setq attributes (delq 'numeric-value attributes))
601         )))
602     (when (and (memq 'iso-10646-comment attributes)
603                (setq value (get-char-attribute char 'iso-10646-comment)))
604       (insert line-separator)
605       (insert (format "{\"iso-10646-comment\":\t %S}%s"
606                       value
607                       line-breaking))
608       (setq attributes (delq 'iso-10646-comment attributes))
609       )
610     (when (and (memq 'morohashi-daikanwa attributes)
611                (setq value (get-char-attribute char 'morohashi-daikanwa)))
612       (insert line-separator)
613       (insert (format "{\"morohashi-daikanwa\":\t%s}%s"
614                       (mapconcat (function prin1-to-string) value " ")
615                       line-breaking))
616       (setq attributes (delq 'morohashi-daikanwa attributes))
617       )
618     (setq radical nil
619           strokes nil)
620     (when (and (memq 'ideographic-radical attributes)
621                (setq value (get-char-attribute char 'ideographic-radical)))
622       (setq radical value)
623       (insert line-separator)
624       (insert (format " \"ideographic-radical\": %S,\t\"_comment\": \"%c\""
625                       radical
626                       (ideographic-radical radical)
627                       ))
628       (setq attributes (delq 'ideographic-radical attributes))
629       )
630     (when (and (memq 'shuowen-radical attributes)
631                (setq value (get-char-attribute char 'shuowen-radical)))
632       (insert line-separator)
633       (insert (format " \"shuowen-radical\":\t %S,\t\"_comment\": \"%c\""
634                       value
635                       (shuowen-radical value)))
636       (setq attributes (delq 'shuowen-radical attributes))
637       )
638     (let (key)
639       (dolist (domain
640                (append
641                 char-db-feature-domains
642                 (let (dest domain)
643                   (dolist (feature (char-attribute-list))
644                     (setq feature (symbol-name feature))
645                     (when (string-match
646                            "\\(radical\\|strokes\\)@\\([^@*]+\\)\\(\\*\\|$\\)"
647                            feature)
648                       (setq domain (intern (match-string 2 feature)))
649                      (unless (memq domain dest)
650                        (setq dest (cons domain dest)))))
651                   (sort dest #'string<))))
652         (setq key (intern (format "%s@%s" 'ideographic-radical domain)))
653         (when (and (memq key attributes)
654                    (setq value (get-char-attribute char key)))
655           (setq radical value)
656           (insert line-separator)
657           (insert (format "{\"%s\": %S},\t\"_comment\": \"%c\"%s"
658                           key
659                           radical
660                           (ideographic-radical radical)
661                           line-breaking))
662           (setq attributes (delq key attributes))
663           )
664         (setq key (intern (format "%s@%s" 'ideographic-strokes domain)))
665         (when (and (memq key attributes)
666                    (setq value (get-char-attribute char key)))
667           (setq strokes value)
668           (insert line-separator)
669           (insert (format " \"%s\": %S"
670                           key
671                           strokes))
672           (setq attributes (delq key attributes))
673           )
674         (setq key (intern (format "%s@%s" 'total-strokes domain)))
675         (when (and (memq key attributes)
676                    (setq value (get-char-attribute char key)))
677           (insert line-separator)
678           (insert (format " \"%s\":       %S"
679                           key
680                           value
681                           ))
682           (setq attributes (delq key attributes))
683           )
684         (dolist (feature '(ideographic-radical
685                            ideographic-strokes
686                            total-strokes))
687           (setq key (intern (format "%s@%s*sources" feature domain)))
688           (when (and (memq key attributes)
689                      (setq value (get-char-attribute char key)))
690             (insert line-separator)
691             (insert (format " \"%s\":%s" key line-breaking))
692             (dolist (cell value)
693               (insert (format " %s" cell)))
694             (setq attributes (delq key attributes))
695             ))
696         ))
697     (when (and (memq 'ideographic-strokes attributes)
698                (setq value (get-char-attribute char 'ideographic-strokes)))
699       (setq strokes value)
700       (insert line-separator)
701       (insert (format " \"ideographic-strokes\": %S"
702                       strokes
703                       ))
704       (setq attributes (delq 'ideographic-strokes attributes))
705       )
706     (when (and (memq 'kangxi-radical attributes)
707                (setq value (get-char-attribute char 'kangxi-radical)))
708       (unless (eq value radical)
709         (insert line-separator)
710         (insert (format "{\"kangxi-radical\":\t%S},\t\"_comment\": \"%c\"%s"
711                         value
712                         (ideographic-radical value)
713                         line-breaking))
714         (or radical
715             (setq radical value)))
716       (setq attributes (delq 'kangxi-radical attributes))
717       )
718     (when (and (memq 'kangxi-strokes attributes)
719                (setq value (get-char-attribute char 'kangxi-strokes)))
720       (unless (eq value strokes)
721         (insert line-separator)
722         (insert (format "{\"kangxi-strokes\":\t%S}%s"
723                         value
724                         line-breaking))
725         (or strokes
726             (setq strokes value)))
727       (setq attributes (delq 'kangxi-strokes attributes))
728       )
729     (when (and (memq 'japanese-radical attributes)
730                (setq value (get-char-attribute char 'japanese-radical)))
731       (unless (eq value radical)
732         (insert line-separator)
733         (insert (format "{\"japanese-radical\":\t%S},\t\"_comment\": \"%c\"%s"
734                         value
735                         (ideographic-radical value)
736                         line-breaking))
737         (or radical
738             (setq radical value)))
739       (setq attributes (delq 'japanese-radical attributes))
740       )
741     (when (and (memq 'japanese-strokes attributes)
742                (setq value (get-char-attribute char 'japanese-strokes)))
743       (unless (eq value strokes)
744         (insert line-separator)
745         (insert (format "{\"japanese-strokes\":\t%S}%s"
746                         value
747                         line-breaking))
748         (or strokes
749             (setq strokes value)))
750       (setq attributes (delq 'japanese-strokes attributes))
751       )
752     (when (and (memq 'cns-radical attributes)
753                (setq value (get-char-attribute char 'cns-radical)))
754       (insert line-separator)
755       (insert (format "{\"cns-radical\":\t%S},\t\"_comment\": \"%c\"%s"
756                       value
757                       (ideographic-radical value)
758                       line-breaking))
759       (setq attributes (delq 'cns-radical attributes))
760       )
761     (when (and (memq 'cns-strokes attributes)
762                (setq value (get-char-attribute char 'cns-strokes)))
763       (unless (eq value strokes)
764         (insert line-separator)
765         (insert (format "{\"cns-strokes\":\t%S}%s"
766                         value
767                         line-breaking))
768         (or strokes
769             (setq strokes value)))
770       (setq attributes (delq 'cns-strokes attributes))
771       )
772     (when (and (memq 'ideographic- attributes)
773                (setq value (get-char-attribute char 'ideographic-)))
774       (insert line-separator)
775       (insert "{\"ideographic-\":       ")
776       (setq lbs (concat "\n" (make-string (current-column) ?\ ))
777             separator nil)
778       (while (consp value)
779         (setq cell (car value))
780         (if (integerp cell)
781             (setq cell (decode-char '=ucs cell)))
782         (cond ((characterp cell)
783                (if separator
784                    (insert lbs))
785                (if readable
786                    (insert (format "%S" cell))
787                  (char-db-json-insert-char-spec cell readable))
788                (setq separator lbs))
789               ((consp cell)
790                (if separator
791                    (insert lbs))
792                (if (consp (car cell))
793                    (char-db-json-insert-char-spec cell readable)
794                  (char-db-json-insert-char-reference cell readable))
795                (setq separator lbs))
796               (t
797                (if separator
798                    (insert separator))
799                (insert (prin1-to-string cell))
800                (setq separator " ")))
801         (setq value (cdr value)))
802       (insert " }")
803       (insert line-breaking)
804       (setq attributes (delq 'ideographic- attributes)))
805     (when (and (memq 'total-strokes attributes)
806                (setq value (get-char-attribute char 'total-strokes)))
807       (insert line-separator)
808       (insert (format " \"total-strokes\":       %S"
809                       value
810                       ))
811       (setq attributes (delq 'total-strokes attributes))
812       )
813     (when (and (memq '->ideograph attributes)
814                (setq value (get-char-attribute char '->ideograph)))
815       (insert line-separator)
816       (insert (format "{\"->ideograph\":\t%s}%s"
817                       (mapconcat (lambda (code)
818                                    (cond ((symbolp code)
819                                           (symbol-name code))
820                                          ((integerp code)
821                                           (format "#x%04X" code))
822                                          (t
823                                           (format "%s %S"
824                                                   line-breaking code))))
825                                  value " ")
826                       line-breaking))
827       (setq attributes (delq '->ideograph attributes))
828       )
829     (if (equal (get-char-attribute char '->titlecase)
830                (get-char-attribute char '->uppercase))
831         (setq attributes (delq '->titlecase attributes)))
832     (unless readable
833       (dolist (ignored '(composition
834                          ->denotational <-subsumptive ->ucs-unified
835                          ->ideographic-component-forms))
836         (setq attributes (delq ignored attributes))))
837     (while attributes
838       (setq name (car attributes))
839       (unless (eq (setq value (get-char-attribute char name 'value-is-empty))
840                   'value-is-empty)
841         (cond ((setq ret (find-charset name))
842                (setq name (charset-name ret))
843                (when (not (memq name dest-ccss))
844                  (setq dest-ccss (cons name dest-ccss))
845                  (insert line-separator)
846                  (char-db-json-insert-ccs-feature name value line-breaking))
847                )
848               ((string-match "^=>ucs@" (symbol-name name))
849                (insert line-separator)
850                (insert (format "{\"%-20s\": #x%04X},\t\"_comment\": \"%c\"%s"
851                                name value (decode-char '=ucs value)
852                                line-breaking))
853                )
854               ((eq name 'jisx0208-1978/4X)
855                (insert line-separator)
856                (insert (format "{\"%-20s\": #x%04X}%s"
857                                name value
858                                line-breaking))
859                )
860               ((and
861                 (not readable)
862                 (not (eq name '->subsumptive))
863                 (not (eq name '->uppercase))
864                 (not (eq name '->lowercase))
865                 (not (eq name '->titlecase))
866                 (not (eq name '->canonical))
867                 (not (eq name '->Bopomofo))
868                 (not (eq name '->mistakable))
869                 (not (eq name '->ideographic-variants))
870                 (null (get-char-attribute
871                        char (intern (format "%s*sources" name))))
872                 (not (string-match "\\*sources$" (symbol-name name)))
873                 (null (get-char-attribute
874                        char (intern (format "%s*note" name))))
875                 (not (string-match "\\*note$" (symbol-name name)))
876                 (or (eq name '<-identical)
877                     (eq name '<-uppercase)
878                     (eq name '<-lowercase)
879                     (eq name '<-titlecase)
880                     (eq name '<-canonical)
881                     (eq name '<-ideographic-variants)
882                     ;; (eq name '<-synonyms)
883                     (string-match "^<-synonyms" (symbol-name name))
884                     (eq name '<-mistakable)
885                     (when (string-match "^->" (symbol-name name))
886                       (cond
887                        ((string-match "^->fullwidth" (symbol-name name))
888                         (not (and (consp value)
889                                   (characterp (car value))
890                                   (encode-char
891                                    (car value) '=ucs 'defined-only)))
892                         )
893                        (t)))
894                     ))
895                )
896               ((or (eq name 'ideographic-structure)
897                    (eq name 'ideographic-combination)
898                    (eq name 'ideographic-)
899                    (eq name '=decomposition)
900                    (char-feature-base-name= '=decomposition name)
901                    (char-feature-base-name= '=>decomposition name)
902                    ;; (string-match "^=>*decomposition\\(@[^*]+\\)?$"
903                    ;;               (symbol-name name))
904                    (string-match "^\\(->\\|<-\\)[^*]*$" (symbol-name name))
905                    (string-match "^\\(->\\|<-\\)[^*]*\\*sources$"
906                                  (symbol-name name))
907                    )
908                (insert line-separator)
909                (char-db-json-insert-relation-feature char name value
910                                                      line-breaking
911                                                      ccss readable))
912               ((memq name '(ideograph=
913                             original-ideograph-of
914                             ancient-ideograph-of
915                             vulgar-ideograph-of
916                             wrong-ideograph-of
917                             ;; simplified-ideograph-of
918                             ideographic-variants
919                             ;; ideographic-different-form-of
920                             ))
921                (insert line-separator)
922                (insert (format "{\"%-20s\":%s " name line-breaking))
923                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
924                      separator nil)
925                (while (consp value)
926                  (setq cell (car value))
927                  (if (and (consp cell)
928                           (consp (car cell)))
929                      (progn
930                        (if separator
931                            (insert lbs))
932                        (char-db-json-insert-alist cell readable)
933                        (setq separator lbs))
934                    (if separator
935                        (insert separator))
936                    (insert (prin1-to-string cell))
937                    (setq separator " "))
938                  (setq value (cdr value)))
939                (insert " }")
940                (insert line-breaking))
941               ((consp value)
942                (insert line-separator)
943                (insert (format " %-20s [ "
944                                (format "\"%s\":" name)))
945                (setq lbs (concat "\n" (make-string (current-column) ?\ ))
946                      separator nil)
947                (while (consp value)
948                  (setq cell (car value))
949                  (if (and (consp cell)
950                           (consp (car cell))
951                           (setq ret (condition-case nil
952                                         (find-char cell)
953                                       (error nil))))
954                      (progn
955                        (setq rest cell
956                              al nil
957                              cal nil)
958                        (while rest
959                          (setq key (car (car rest)))
960                          (if (find-charset key)
961                              (setq cal (cons key cal))
962                            (setq al (cons key al)))
963                          (setq rest (cdr rest)))
964                        (if separator
965                            (insert lbs))
966                        (char-db-json-insert-char-features ret
967                                                           readable
968                                                           al
969                                                           nil 'for-sub-node)
970                        (setq separator lbs))
971                    (setq ret (prin1-to-string cell))
972                    (if separator
973                        (if (< (+ (current-column)
974                                  (length ret)
975                                  (length separator))
976                               76)
977                            (insert separator)
978                          (insert lbs)))
979                    (insert ret)
980                    (setq separator " "))
981                  (setq value (cdr value)))
982                (insert " ]")
983                )
984               (t
985                (insert line-separator)
986                (insert (format " %-20s "
987                                (format "\"%s\":" name)))
988                (setq ret (prin1-to-string value))
989                (unless (< (+ (current-column)
990                              (length ret)
991                              3)
992                           76)
993                  (insert line-breaking))
994                (insert ret)
995                )
996               ))
997       (setq attributes (cdr attributes)))
998     (insert "\n" (make-string column ?\ ) "}")))
999
1000 (defun char-db-json-char-data (char &optional readable
1001                                     attributes column)
1002   (unless column
1003     (setq column (current-column)))
1004   (save-restriction
1005     (narrow-to-region (point)(point))
1006     (char-db-json-insert-char-features char readable attributes column)
1007     (goto-char (point-min))
1008     (while (re-search-forward "[ \t]+$" nil t)
1009       (replace-match ""))
1010     ;; from tabify.
1011     (goto-char (point-min))
1012     (while (re-search-forward "[ \t][ \t][ \t]*" nil t)
1013       (let ((column (current-column))
1014             (indent-tabs-mode t))
1015         (delete-region (match-beginning 0) (point))
1016         (indent-to column)))
1017     (goto-char (point-max))
1018     ;; (tabify (point-min)(point-max))
1019     ))
1020
1021 (defun char-db-json-char-data-with-variant (char &optional printable
1022                                            no-ucs-unified
1023                                            script excluded-script)
1024   (insert "[ ")
1025   (char-db-json-char-data char printable)
1026   (let ((variants (char-variants char))
1027         rest
1028         variant vs ret
1029         )
1030     (setq variants (sort variants #'<))
1031     (setq rest variants)
1032     (setq variants (cons char variants))
1033     (while rest
1034       (setq variant (car rest))
1035       (unless (get-char-attribute variant '<-subsumptive)
1036         (if (and (or (null script)
1037                      (null (setq vs (get-char-attribute variant 'script)))
1038                      (memq script vs))
1039                  (or (null excluded-script)
1040                      (null (setq vs (get-char-attribute variant 'script)))
1041                      (not (memq excluded-script vs))))
1042             (unless (and no-ucs-unified (get-char-attribute variant '=ucs))
1043               (insert ",\n  ")
1044               (char-db-json-char-data variant printable)
1045               (if (setq ret (char-variants variant))
1046                   (while ret
1047                     (or (memq (car ret) variants)
1048                         ;; (get-char-attribute (car ret) '<-subsumptive)
1049                         (setq rest (nconc rest (list (car ret)))))
1050                     (setq ret (cdr ret)))))))
1051       (setq rest (cdr rest)))
1052     (insert "\n]\n")))
1053
1054 (defun char-db-json-insert-char-range-data (min max
1055                                                 &optional script
1056                                                 excluded-script)
1057   (let ((code min)
1058         char)
1059     (while (<= code max)
1060       (setq char (decode-char '=ucs code))
1061       (if (encode-char char '=ucs 'defined-only)
1062           (char-db-json-char-data-with-variant char nil 'no-ucs-unified
1063                                          script excluded-script))
1064       (setq code (1+ code)))))
1065
1066 (defun write-char-range-data-to-json-file (min max file
1067                                                &optional script
1068                                                excluded-script)
1069   (let ((coding-system-for-write char-db-file-coding-system))
1070     (with-temp-buffer
1071       (insert (format "// -*- coding: %s -*-\n"
1072                       char-db-file-coding-system))
1073       (char-db-json-insert-char-range-data min max script excluded-script)
1074       (write-region (point-min)(point-max) file))))
1075
1076 ;;;###autoload
1077 (defun what-char-definition-json (char)
1078   (interactive (list (char-after)))
1079   (let ((buf (get-buffer-create "*Character Description*"))
1080         (the-buf (current-buffer))
1081         (win-conf (current-window-configuration)))
1082     (pop-to-buffer buf)
1083     (make-local-variable 'what-character-original-window-configuration)
1084     (setq what-character-original-window-configuration win-conf)
1085     (setq buffer-read-only nil)
1086     (erase-buffer)
1087     (condition-case err
1088         (progn
1089           (char-db-json-char-data-with-variant char nil)
1090           (unless (char-attribute-alist char)
1091             (insert (format "// = %c\n"
1092                             (let* ((rest (split-char char))
1093                                    (ccs (pop rest))
1094                                    (code (pop rest)))
1095                               (while rest
1096                                 (setq code (logior (lsh code 8)
1097                                                    (pop rest))))
1098                               (decode-char ccs code)))))
1099           ;; (char-db-update-comment)
1100           (set-buffer-modified-p nil)
1101           (view-mode the-buf (lambda (buf)
1102                                (set-window-configuration
1103                                 what-character-original-window-configuration)
1104                                ))
1105           (goto-char (point-min)))
1106       (error (progn
1107                (set-window-configuration
1108                 what-character-original-window-configuration)
1109                (signal (car err) (cdr err)))))))
1110
1111 (defun char-db-json-batch-view ()
1112   (setq terminal-coding-system 'binary)
1113   (condition-case err
1114       (let* ((target (pop command-line-args-left))
1115              ret genre
1116              object)
1117         (princ "Content-Type: application/json; charset=UTF-8
1118
1119 ")
1120         (cond
1121          ((stringp target)
1122           (when (string-match "^char=\\(&[^&;]+;\\)" target)
1123             (setq ret (match-end 0))
1124             (setq target
1125                   (concat "char="
1126                           (www-uri-encode-object
1127                            (www-uri-decode-object
1128                             'character (match-string 1 target)))
1129                           (substring target ret))))
1130           (setq target
1131                 (mapcar (lambda (cell)
1132                           (if (string-match "=" cell)
1133                               (progn
1134                                 (setq genre (substring cell 0 (match-beginning 0))
1135                                       ret (substring cell (match-end 0)))
1136                                 (cons
1137                                  (intern
1138                                   (decode-uri-string genre 'utf-8-mcs-er))
1139                                  ret))
1140                             (list (decode-uri-string cell 'utf-8-mcs-er))))
1141                         (split-string target "&")))
1142           (setq ret (car target))
1143           (cond ((eq (car ret) 'char)
1144                  (setq object (www-uri-decode-object (car ret)(cdr ret)))
1145                  (when (characterp object)
1146                    (with-temp-buffer
1147                      (char-db-json-char-data object)
1148                      (encode-coding-region (point-min)(point-max)
1149                                            char-db-file-coding-system)
1150                      (princ (buffer-string))
1151                      ))
1152                  )
1153                 ((eq (car ret) 'character)
1154                  (setq object (www-uri-decode-object (car ret)(cdr ret)))
1155                  (when (characterp object)
1156                    (with-temp-buffer
1157                      (char-db-json-char-data object)
1158                      (encode-coding-region (point-min)(point-max)
1159                                            char-db-file-coding-system)
1160                      (princ (buffer-string))
1161                      ))
1162                  ))
1163           ))
1164         )
1165     (error nil
1166            (princ (format "%S" err)))
1167     ))
1168
1169
1170 ;;; @ end
1171 ;;;
1172
1173 (provide 'char-db-json)
1174
1175 ;;; char-db-json.el ends here