092b39bf0eb0f33d099e7c65913a5a8a4f35e925
[chise/omega.git] / chise2otf / elisp / chise-tex.el
1 ;;; chise-tex.el --- Coding-system based chise2otf like tool
2
3 ;; Copyright (C) 2004,2005,2006,2007,2008 MORIOKA Tomohiko
4
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: OTF package, pTeX, CHISE, Multiscript, Multilingual
7
8 ;; This file is a part of Omega/CHISE.
9
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
14
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 ;; General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING.  If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Code:
26
27 (defvar chise-tex-coded-charset-expression-alist
28   '((=ucs-bmp@gb        "UCSgb"         4 X)
29     (=ucs-bmp@jis       "UCSjis"        4 X)
30     (=ucs-bmp@ks        "UCSks"         4 X)
31     (=gt-pj-1           "GTpjA"         4 X)
32     (=gt-pj-2           "GTpjB"         4 X)
33     (=gt-pj-3           "GTpjC"         4 X)
34     (=gt-pj-4           "GTpjD"         4 X)
35     (=gt-pj-5           "GTpjE"         4 X)
36     (=gt-pj-6           "GTpjF"         4 X)
37     (=gt-pj-7           "GTpjG"         4 X)
38     (=gt-pj-8           "GTpjH"         4 X)
39     (=gt-pj-9           "GTpjI"         4 X)
40     (=gt-pj-10          "GTpjJ"         4 X)
41     (=gt-pj-11          "GTpjK"         4 X)
42     (=ruimoku-v6        "Ruimoku"       4 X)
43     (=hanziku-1         "HanzikuA"      4 X)
44     (=hanziku-2         "HanzikuB"      4 X)
45     (=hanziku-3         "HanzikuC"      4 X)
46     (=hanziku-4         "HanzikuD"      4 X)
47     (=hanziku-5         "HanzikuE"      4 X)
48     (=hanziku-6         "HanzikuF"      4 X)
49     (=hanziku-7         "HanzikuG"      4 X)
50     (=hanziku-8         "HanzikuH"      4 X)
51     (=hanziku-9         "HanzikuI"      4 X)
52     (=hanziku-10        "HanzikuJ"      4 X)
53     (=hanziku-11        "HanzikuK"      4 X)
54     (=hanziku-12        "HanzikuL"      4 X)
55     (=ucs-bmp@cns       "UCScns"        4 X)
56     ))
57
58 (defun chise-tex-encode-region-for-gb (start end)
59   (interactive "r")
60   (save-excursion
61     (save-restriction
62       (narrow-to-region start end)
63       (goto-char start)
64       (let (chr ret rest spec)
65         (while (and (skip-chars-forward "\x00-\xFF")
66                     (not (eobp)))
67           (setq chr (char-after))
68           (cond ((memq chr '(?\e$(O#@\e(B))
69                  (delete-char)
70                  (insert (format "\\UCSjis{%04X}"
71                                  (encode-char chr '=ucs@jis)))
72                  )
73                 ((and (setq ret (encode-char chr '=jis-x0208-1983))
74                       (< ret #x3021))
75                  (forward-char))
76                 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
77                 ;;  (delete-char)
78                 ;;  (insert (decode-char '=jis-x0208-1983 ret)))
79                 ((and (encode-char chr '=ks-x1001)
80                       (setq ret (or (encode-char chr '=ucs@ks)
81                                     (char-ucs chr))))
82                  (delete-char)
83                  ;; (if (eq (char-before) ?\e$B!T\e(B)
84                  ;;     (insert " "))
85                  (insert (format "\\UCSks{%04X}" ret)))
86                 ((catch 'tag
87                    (setq rest chise-tex-coded-charset-expression-alist)
88                    (while (setq spec (car rest))
89                      (if (setq ret (encode-char chr (car spec)))
90                          (throw 'tag ret))
91                      (setq rest (cdr rest))))
92                  (delete-char)
93                  ;; (if (eq (char-before) ?\e$B!T\e(B)
94                  ;;     (insert " "))
95                  (insert (format (format "\\%s{%%0%d%s}"
96                                          (nth 1 spec)
97                                          (nth 2 spec)
98                                          (nth 3 spec))
99                                  ret)))
100                 (t
101                  (forward-char))))))))
102
103 (defun chise-tex-encode-region-for-jis (start end)
104   (interactive "r")
105   (save-excursion
106     (save-restriction
107       (narrow-to-region start end)
108       (goto-char start)
109       (let (chr ret rest spec modifier base modifier-1)
110         (while (and (skip-chars-forward "\x00-\x7F")
111                     (not (eobp)))
112           (setq chr (char-after))
113           (cond ((encode-char chr '=jis-x0208-1983)
114                  (forward-char))
115                 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
116                 ;;  (delete-char)
117                 ;;  (insert (decode-char '=jis-x0208-1983 ret)))
118                 ((and (not (eq (char-ucs chr) #x0439))
119                       (not (eq (char-ucs chr) #x0451))
120                       (setq ret (char-feature chr '=decomposition))
121                       (setq modifier (assq (nth 1 ret)
122                                            '((?\u0300 . "`")
123                                              (?\u0301 . "'")
124                                              (?\u0302 . "^")
125                                              (?\u0303 . "~")
126                                              (?\u0304 . "=")
127                                              (?\u0306 . "u")
128                                              (?\u0307 . ".")
129                                              (?\u0308 . "\"")
130                                              (?\u0309 . "Hook")
131                                              (?\u030C . "v")
132                                              (?\u0323 . "d")
133                                              (?\u0327 . "c")
134                                              ))))
135                  (delete-char)
136                  (setq base (car ret))
137                  (if (and (setq ret (char-feature base '=decomposition))
138                           (setq modifier-1
139                                 (assq (car modifier)
140                                       (cdr
141                                        (assq (nth 1 ret)
142                                              '((?\u0302
143                                                 (?\u0300 . "CircGrave")
144                                                 (?\u0301 . "CircAcute")
145                                                 (?\u0303 . "CircTilde")
146                                                 (?\u0309 . "CircHook")
147                                                 )
148                                                (?\u031B
149                                                 (?\u0301 . "HornAcute")
150                                                 )
151                                                (?\u0323
152                                                 (?\u0302 . "Circudot")
153                                                 )))))))
154                      (insert (format "\\%s{%c}" (cdr modifier-1) (car ret)))
155                    (insert (format "\\%s{%c}" (cdr modifier) base))))
156                 ((and (or (encode-char chr '=jis-x0213-1-2000)
157                           (encode-char chr '=jis-x0213-2-2000))
158                       (setq ret (or (encode-char chr '=ucs@jis/2000)
159                                     (encode-char chr '=ucs@jis/fw)))
160                       (<= ret #xFFFF))
161                  (delete-char)
162                  ;; (if (eq (char-before) ?\e$B!T\e(B)
163                  ;;     (insert " "))
164                  (insert (format "\\UCSjis{%04X}" ret)))
165                 ((and (encode-char chr '=ks-x1001)
166                       (setq ret (or (encode-char chr '=ucs@ks)
167                                     (char-ucs chr))))
168                  (delete-char)
169                  ;; (if (eq (char-before) ?\e$B!T\e(B)
170                  ;;     (insert " "))
171                  (insert (format "\\UCSks{%04X}" ret)))
172                 ((setq ret (encode-char chr '=ucs-hangul))
173                  (delete-char)
174                  ;; (if (eq (char-before) ?\e$B!T\e(B)
175                  ;;     (insert " "))
176                  (insert (format "\\UCSks{%04X}" ret)))
177                 ((eq chr ?\u00B2)
178                  (delete-char)
179                  (insert "$^2$"))
180                 ((eq chr ?\u00B3)
181                  (delete-char)
182                  (insert "$^3$"))
183                 ((eq chr ?\u0111)
184                  (delete-char)
185                  (insert "{\\usefont{T1}{pxr}{m}{n}\\dj}"))
186                 ((eq chr ?\u014B)
187                  (delete-char)
188                  (insert "{\\usefont{T1}{pxr}{m}{n}\\ng}"))
189                 ((eq chr ?\u0282)
190                  (delete-char)
191                  (insert "{\\usefont{T1}{pxr}{m}{n}\\k{s}}"))
192                 ((eq chr ?\u2022)
193                  (delete-char)
194                  (insert "\\textbullet{}"))
195                 ((eq chr ?\u2081)
196                  (delete-char)
197                  (insert "$_1$"))
198                 ((eq chr ?\u2082)
199                  (delete-char)
200                  (insert "$_2$"))
201                 ((eq chr ?\u2083)
202                  (delete-char)
203                  (insert "$_3$"))
204                 ((eq chr ?\u2085)
205                  (delete-char)
206                  (insert "$_5$"))
207                 ((eq chr ?\u0294)
208                  (delete-char)
209                  (insert "\\UCSjis{0294}"))
210                 ((and (encode-char chr '=ucs@jp)
211                       (setq ret (char-representative-of-domain chr 'gb))
212                       (setq ret (encode-char ret '=ucs@gb))
213                       (<= ret #xFFFF))
214                  (delete-char)
215                  ;; (if (eq (char-before) ?\e$B!T\e(B)
216                  ;;     (insert " "))
217                  (insert (format "\\UCSgb{%04X}" ret)))
218                 ((catch 'tag
219                    (setq rest chise-tex-coded-charset-expression-alist)
220                    (while (setq spec (car rest))
221                      (if (setq ret (encode-char chr (car spec)))
222                          (throw 'tag ret))
223                      (setq rest (cdr rest))))
224                  (delete-char)
225                  ;; (if (eq (char-before) ?\e$B!T\e(B)
226                  ;;     (insert " "))
227                  (insert (format (format "\\%s{%%0%d%s}"
228                                          (nth 1 spec)
229                                          (nth 2 spec)
230                                          (nth 3 spec))
231                                  ret)))
232                 (t
233                  (forward-char))))))))
234
235 (defun chise-tex-encode-region-for-utf-8-jis (start end)
236   (interactive "r")
237   (save-excursion
238     (save-restriction
239       (narrow-to-region start end)
240       (goto-char start)
241       (let ((font-encoding 'T1)
242             chr ret rest spec modifier base modifier-1 pos)
243         (while (and (skip-chars-forward "\x00-\x7F")
244                     (not (eobp)))
245           (setq chr (char-after))
246           (cond
247            ((and (setq ret (encode-char chr '=ucs))
248                  (and (<= #x0400 ret)(<= ret #x04F9)))
249             (if (eq font-encoding 'T2A)
250                 (forward-char)
251               (setq pos (point))
252               (unless (and (prog1
253                                (search-backward
254                                 "\\fontencoding{T2A}\\selectfont{}" nil t)
255                              (goto-char pos))
256                            (eq pos (match-end 0)))
257                 (insert "\\fontencoding{T2A}\\selectfont{}")
258                 )
259               (forward-char)
260               (setq font-encoding 'T2A))
261             )
262            (t
263             (unless (eq font-encoding 'T1)
264               (setq pos (point))
265               (unless (and (prog1
266                                (search-backward
267                                 "\\fontencoding{T1}\\selectfont{}" nil t)
268                              (goto-char pos))
269                            (eq pos (match-end 0)))
270                 (insert "\\fontencoding{T1}\\selectfont{}")
271                 )
272               (setq font-encoding 'T1))
273             (cond ((eq (char-ucs chr) #x00D7)
274                    (delete-char)
275                    (insert "\\UCSjis{00D7}"))
276                   ((encode-char chr '=jis-x0208-1983)
277                    (forward-char))
278                   ((and (setq ret (encode-char chr '=ucs))
279                         (or (and (<= #x0374 ret)(<= ret #x03F3))
280                             (eq ret #x1E2B)))
281                    (forward-char))
282                   ;; ((setq ret (encode-char chr '=jis-x0208-1990))
283                   ;;  (delete-char)
284                   ;;  (insert (decode-char '=jis-x0208-1983 ret)))
285                   ((eq (char-ucs chr) #x012B)
286                    (delete-char)
287                    (insert "\\={\\i}"))
288                   ((and (not (eq (char-ucs chr) #x0439))
289                         (not (eq (char-ucs chr) #x0451))
290                         (setq ret (char-feature chr '=decomposition))
291                         (setq modifier (assq (nth 1 ret)
292                                              '((?\u0300 . "`")
293                                                (?\u0301 . "'")
294                                                (?\u0302 . "^")
295                                                (?\u0303 . "~")
296                                                (?\u0304 . "=")
297                                                (?\u0306 . "u")
298                                                (?\u0307 . ".")
299                                                (?\u0308 . "\"")
300                                                (?\u0309 . "Hook")
301                                                (?\u030C . "v")
302                                                (?\u0323 . "d")
303                                                (?\u0327 . "c")
304                                                (?\u032E . "ubreve")
305                                                (?\u0331 . "umacron")
306                                                ))))
307                    (delete-char)
308                    (setq base (car ret))
309                    (if (and (setq ret (char-feature base '=decomposition))
310                             (setq modifier-1
311                                   (assq (car modifier)
312                                         (cdr
313                                          (assq (nth 1 ret)
314                                                '((?\u0302
315                                                   (?\u0300 . "CircGrave")
316                                                   (?\u0301 . "CircAcute")
317                                                   (?\u0303 . "CircTilde")
318                                                   (?\u0309 . "CircHook")
319                                                   )
320                                                  (?\u031B
321                                                   (?\u0301 . "HornAcute")
322                                                   )
323                                                  (?\u0323
324                                                   (?\u0302 . "Circudot")
325                                                   )))))))
326                        (insert (format "\\%s{%c}" (cdr modifier-1) (car ret)))
327                      (insert (format "\\%s{%c}" (cdr modifier) base))))
328                   ((and (or (encode-char chr '=jis-x0213-1-2000)
329                             (encode-char chr '=jis-x0213-2-2000))
330                         (setq ret (or (encode-char chr '=ucs@jis/2000)
331                                       (encode-char chr '=ucs@jis/fw)))
332                         (<= ret #xFFFF))
333                    ;; (delete-char)
334                    ;; (if (eq (char-before) ?\e$B!T\e(B)
335                    ;;     (insert " "))
336                    ;; (insert (format "\\UCSjis{%04X}" ret))
337                    (forward-char))
338                   ((and (encode-char chr '=ks-x1001)
339                         (setq ret (or (encode-char chr '=ucs@ks)
340                                       (char-ucs chr))))
341                    (delete-char)
342                    ;; (if (eq (char-before) ?\e$B!T\e(B)
343                    ;;     (insert " "))
344                    (insert (format "\\UCSks{%04X}" ret)))
345                   ((setq ret (encode-char chr '=ucs-hangul))
346                    (delete-char)
347                    ;; (if (eq (char-before) ?\e$B!T\e(B)
348                    ;;     (insert " "))
349                    (insert (format "\\UCSks{%04X}" ret)))
350                   ((eq chr ?\u00B2)
351                    (delete-char)
352                    (insert "$^2$"))
353                   ((eq chr ?\u00B3)
354                    (delete-char)
355                    (insert "$^3$"))
356                   ((eq chr ?\u0111)
357                    (delete-char)
358                    (insert "{\\usefont{T1}{pxr}{m}{n}\\dj}"))
359                   ((eq chr ?\u014B)
360                    (delete-char)
361                    (insert "{\\usefont{T1}{pxr}{m}{n}\\ng}"))
362                   ((eq chr ?\u0282)
363                    (delete-char)
364                    (insert "{\\usefont{T1}{pxr}{m}{n}\\k{s}}"))
365                   ((eq chr ?\u0294)
366                    (delete-char)
367                    (insert "\\UCSjis{0294}"))
368                   ((eq chr ?\u2022)
369                    (delete-char)
370                    (insert "\\textbullet{}"))
371                   ((eq chr ?\u2081)
372                    (delete-char)
373                    (insert "$_1$"))
374                   ((eq chr ?\u2082)
375                    (delete-char)
376                    (insert "$_2$"))
377                   ((eq chr ?\u2083)
378                    (delete-char)
379                    (insert "$_3$"))
380                   ((eq chr ?\u2085)
381                    (delete-char)
382                    (insert "$_5$"))
383                   ((eq chr ?\u2637)
384                    (delete-char)
385                    (insert "\\UCSgb{2637}"))
386                   ((and (encode-char chr '=ucs@jp)
387                         (setq ret (char-representative-of-domain chr 'gb))
388                         (setq ret (encode-char ret '=ucs@gb))
389                         (<= ret #xFFFF))
390                    (delete-char)
391                    ;; (if (eq (char-before) ?\e$B!T\e(B)
392                    ;;     (insert " "))
393                    (insert (format "\\UCSgb{%04X}" ret)))
394                   ((catch 'tag
395                      (setq rest chise-tex-coded-charset-expression-alist)
396                      (while (setq spec (car rest))
397                        (if (setq ret (encode-char chr (car spec)))
398                            (throw 'tag ret))
399                        (setq rest (cdr rest))))
400                    (delete-char)
401                    ;; (if (eq (char-before) ?\e$B!T\e(B)
402                    ;;     (insert " "))
403                    (insert (format (format "\\%s{%%0%d%s}"
404                                            (nth 1 spec)
405                                            (nth 2 spec)
406                                            (nth 3 spec))
407                                    ret)))
408                   (t
409                    (forward-char))))))))))
410
411 (defun chise-tex-decode-region (start end)
412   (interactive "r")
413   (save-excursion
414     (save-restriction
415       (narrow-to-region start end)
416       (goto-char start)
417       (let (macro code ret me rest spec)
418         (while (re-search-forward "\\\\\\(.\\){\\(.\\)}" nil t)
419           (when (and
420                  (setq macro
421                        (assq
422                         (aref (match-string 1) 0)
423                         '((?\` . ?\u0300) ; <COMBINING GRAVE ACCENT>
424                           (?\' . ?\u0301) ; <COMBINING ACUTE ACCENT>
425                           (?^  . ?\u0302) ; <COMBINING CIRCUMFLEX ACCENT>
426                           (?~  . ?\u0303) ; <COMBINING TILDE>
427                           (?=  . ?\u0304) ; <COMBINING MACRON>
428                           (?u  . ?\u0306) ; <COMBINING BREVE>
429                           (?\. . ?\u0307) ; <COMBINING DOT ABOVE>
430                           (?\" . ?\u0308) ; <COMBINING DIAERESIS>
431                           (?v  . ?\u030C) ; <COMBINING CARON>
432                           (?d  . ?\u0323) ; <COMBINING DOT BELOW>
433                           (?c  . ?\u0327) ; <COMBINING CEDILLA>
434                           )))
435                  (setq ret
436                        (cdr (assq (cdr macro)
437                                   (char-feature (aref (match-string 2) 0)
438                                                 'composition)))))
439             (delete-region (match-beginning 0)(match-end 0))
440             (insert ret)))
441         (goto-char start)
442         (while (re-search-forward "\\\\\\([a-zA-Z0-9]+\\){\\([0-9A-Fa-f]+\\)}"
443                                   nil t)
444           (setq macro (match-string 1)
445                 code (match-string 2)
446                 me (match-end 0))
447           (if (and (catch 'tag
448                      (setq rest chise-tex-coded-charset-expression-alist)
449                      (while (setq spec (car rest))
450                        (if (string= (nth 1 spec) macro)
451                            (throw 'tag spec))
452                        (setq rest (cdr rest))))
453                    (setq ret (decode-char (car spec)
454                                           (string-to-int
455                                            code
456                                            (if (eq (nth 3 spec) 'X)
457                                                16)))))
458               (progn
459                 (delete-region (match-beginning 0)(match-end 0))
460                 (insert ret))
461             (goto-char me)))))))
462
463 (make-coding-system
464  'iso-2022-jp-tex-gb 'iso2022
465  "ISO-2022-JP with TeX representation for GB fonts."
466  '(charset-g0 ascii
467    short t
468    seven t
469    ;; input-charset-conversion ((latin-jisx0201 ascii)
470    ;;                        (japanese-jisx0208-1978 japanese-jisx0208))
471    pre-write-conversion chise-tex-encode-region-for-gb
472    post-read-conversion chise-tex-decode-region
473    mnemonic "pTeX(GB)/7bit"
474    ))
475
476 (make-coding-system
477  'iso-2022-jp-tex-jis 'iso2022
478  "ISO-2022-JP with TeX representation for JIS fonts."
479  '(charset-g0 ascii
480    short t
481    seven t
482    ccs-priority-list (ascii
483                       =jis-x0208@1983 =jis-x0208@1978
484                       latin-jisx0201)
485    ;; output-charset-conversion ((=jis-x0208@1990 =jis-x0208@1983))
486    pre-write-conversion chise-tex-encode-region-for-jis
487    post-read-conversion chise-tex-decode-region
488    mnemonic "pTeX(JIS)/7bit"
489    ))
490
491 (make-coding-system
492  'utf-8-jp-tex 'utf-8
493  "Coding-system of UTF-8 for common glyphs used in Japan."
494  '(pre-write-conversion chise-tex-encode-region-for-utf-8-jis
495    post-read-conversion chise-tex-decode-region
496    charset-g0 =ucs@jp
497    charset-g1 =>ucs-jis
498    charset-g2 =>ucs
499    mnemonic "upTeX(JP)/UTF8"))
500
501
502 ;;; @ End.
503 ;;;
504
505 (provide 'chise-tex)
506
507 ;;; chise-tex.el ends here