3eb194823197f635f43f4ffe3046175c34da91f4
[chise/omega.git] / chise2otf / elisp / chise-tex.el
1 ;;; chise-tex.el --- Coding-system based chise2otf like tool
2
3 ;; Copyright (C) 2004,2005,2006,2007,2008 MORIOKA Tomohiko
4
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: OTF package, pTeX, CHISE, Multiscript, Multilingual
7
8 ;; This file is a part of Omega/CHISE.
9
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
14
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 ;; General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING.  If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Code:
26
27 (defvar chise-tex-coded-charset-expression-alist
28   '((=ucs-bmp@gb        "UCSgb"         4 X)
29     (=ucs-bmp@jis       "UCSjis"        4 X)
30     (=ucs-bmp@ks        "UCSks"         4 X)
31     (=gt-pj-1           "GTpjA"         4 X)
32     (=gt-pj-2           "GTpjB"         4 X)
33     (=gt-pj-3           "GTpjC"         4 X)
34     (=gt-pj-4           "GTpjD"         4 X)
35     (=gt-pj-5           "GTpjE"         4 X)
36     (=gt-pj-6           "GTpjF"         4 X)
37     (=gt-pj-7           "GTpjG"         4 X)
38     (=gt-pj-8           "GTpjH"         4 X)
39     (=gt-pj-9           "GTpjI"         4 X)
40     (=gt-pj-10          "GTpjJ"         4 X)
41     (=gt-pj-11          "GTpjK"         4 X)
42     (=ruimoku-v6        "Ruimoku"       4 X)
43     (=hanziku-1         "HanzikuA"      4 X)
44     (=hanziku-2         "HanzikuB"      4 X)
45     (=hanziku-3         "HanzikuC"      4 X)
46     (=hanziku-4         "HanzikuD"      4 X)
47     (=hanziku-5         "HanzikuE"      4 X)
48     (=hanziku-6         "HanzikuF"      4 X)
49     (=hanziku-7         "HanzikuG"      4 X)
50     (=hanziku-8         "HanzikuH"      4 X)
51     (=hanziku-9         "HanzikuI"      4 X)
52     (=hanziku-10        "HanzikuJ"      4 X)
53     (=hanziku-11        "HanzikuK"      4 X)
54     (=hanziku-12        "HanzikuL"      4 X)
55     (=ucs-bmp@cns       "UCScns"        4 X)
56     ))
57
58 (defvar chise-tex-accent-macro-alist
59   '((?\u0300 . "`") ; <COMBINING GRAVE ACCENT>
60     (?\u0301 . "'") ; <COMBINING ACUTE ACCENT>
61     (?\u0302 . "^") ; <COMBINING CIRCUMFLEX ACCENT>
62     (?\u0303 . "~") ; <COMBINING TILDE>
63     (?\u0304 . "=") ; <COMBINING MACRON>
64     (?\u0306 . "u") ; <COMBINING BREVE>
65     (?\u0307 . ".") ; <COMBINING DOT ABOVE>
66     (?\u0308 . "\"") ; <COMBINING DIAERESIS>
67     (?\u0309 . "Hook")
68     (?\u030C . "v") ; <COMBINING CARON>
69     (?\u0323 . "d") ; <COMBINING DOT BELOW>
70     (?\u0327 . "c") ; <COMBINING CEDILLA>
71     (?\u032E . "ubreve")
72     (?\u0331 . "umacron")
73     ))
74
75 (defun chise-tex-encode-region-for-gb (start end)
76   (interactive "r")
77   (save-excursion
78     (save-restriction
79       (narrow-to-region start end)
80       (goto-char start)
81       (let (chr ret rest spec)
82         (while (and (skip-chars-forward "\x00-\xFF")
83                     (not (eobp)))
84           (setq chr (char-after))
85           (cond ((memq chr '(?\e$(O#@\e(B))
86                  (delete-char)
87                  (insert (format "\\UCSjis{%04X}"
88                                  (encode-char chr '=ucs@jis)))
89                  )
90                 ((and (setq ret (encode-char chr '=jis-x0208-1983))
91                       (< ret #x3021))
92                  (forward-char))
93                 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
94                 ;;  (delete-char)
95                 ;;  (insert (decode-char '=jis-x0208-1983 ret)))
96                 ((and (encode-char chr '=ks-x1001)
97                       (setq ret (or (encode-char chr '=ucs@ks)
98                                     (char-ucs chr))))
99                  (delete-char)
100                  ;; (if (eq (char-before) ?\e$B!T\e(B)
101                  ;;     (insert " "))
102                  (insert (format "\\UCSks{%04X}" ret)))
103                 ((catch 'tag
104                    (setq rest chise-tex-coded-charset-expression-alist)
105                    (while (setq spec (car rest))
106                      (if (setq ret (encode-char chr (car spec)))
107                          (throw 'tag ret))
108                      (setq rest (cdr rest))))
109                  (delete-char)
110                  ;; (if (eq (char-before) ?\e$B!T\e(B)
111                  ;;     (insert " "))
112                  (insert (format (format "\\%s{%%0%d%s}"
113                                          (nth 1 spec)
114                                          (nth 2 spec)
115                                          (nth 3 spec))
116                                  ret)))
117                 (t
118                  (forward-char))))))))
119
120 (defun chise-tex-encode-region-for-jis (start end)
121   (interactive "r")
122   (save-excursion
123     (save-restriction
124       (narrow-to-region start end)
125       (goto-char start)
126       (let (chr ret rest spec modifier base modifier-1)
127         (while (and (skip-chars-forward "\x00-\x7F")
128                     (not (eobp)))
129           (setq chr (char-after))
130           (cond ((encode-char chr '=jis-x0208-1983)
131                  (forward-char))
132                 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
133                 ;;  (delete-char)
134                 ;;  (insert (decode-char '=jis-x0208-1983 ret)))
135                 ((and (not (eq (char-ucs chr) #x0439))
136                       (not (eq (char-ucs chr) #x0451))
137                       (setq ret (char-feature chr '=decomposition))
138                       (setq modifier (assq (nth 1 ret)
139                                            '((?\u0300 . "`")
140                                              (?\u0301 . "'")
141                                              (?\u0302 . "^")
142                                              (?\u0303 . "~")
143                                              (?\u0304 . "=")
144                                              (?\u0306 . "u")
145                                              (?\u0307 . ".")
146                                              (?\u0308 . "\"")
147                                              (?\u0309 . "Hook")
148                                              (?\u030C . "v")
149                                              (?\u0323 . "d")
150                                              (?\u0327 . "c")
151                                              ))))
152                  (delete-char)
153                  (setq base (car ret))
154                  (if (and (setq ret (char-feature base '=decomposition))
155                           (setq modifier-1
156                                 (assq (car modifier)
157                                       (cdr
158                                        (assq (nth 1 ret)
159                                              '((?\u0302
160                                                 (?\u0300 . "CircGrave")
161                                                 (?\u0301 . "CircAcute")
162                                                 (?\u0303 . "CircTilde")
163                                                 (?\u0309 . "CircHook")
164                                                 )
165                                                (?\u031B
166                                                 (?\u0301 . "HornAcute")
167                                                 )
168                                                (?\u0323
169                                                 (?\u0302 . "Circudot")
170                                                 )))))))
171                      (insert (format "\\%s{%c}" (cdr modifier-1) (car ret)))
172                    (insert (format "\\%s{%c}" (cdr modifier) base))))
173                 ((and (or (encode-char chr '=jis-x0213-1-2000)
174                           (encode-char chr '=jis-x0213-2-2000))
175                       (setq ret (or (encode-char chr '=ucs@jis/2000)
176                                     (encode-char chr '=ucs@jis/fw)))
177                       (<= ret #xFFFF))
178                  (delete-char)
179                  ;; (if (eq (char-before) ?\e$B!T\e(B)
180                  ;;     (insert " "))
181                  (insert (format "\\UCSjis{%04X}" ret)))
182                 ((and (encode-char chr '=ks-x1001)
183                       (setq ret (or (encode-char chr '=ucs@ks)
184                                     (char-ucs chr))))
185                  (delete-char)
186                  ;; (if (eq (char-before) ?\e$B!T\e(B)
187                  ;;     (insert " "))
188                  (insert (format "\\UCSks{%04X}" ret)))
189                 ((setq ret (encode-char chr '=ucs-hangul))
190                  (delete-char)
191                  ;; (if (eq (char-before) ?\e$B!T\e(B)
192                  ;;     (insert " "))
193                  (insert (format "\\UCSks{%04X}" ret)))
194                 ((eq chr ?\u00B2)
195                  (delete-char)
196                  (insert "$^2$"))
197                 ((eq chr ?\u00B3)
198                  (delete-char)
199                  (insert "$^3$"))
200                 ((eq chr ?\u0111)
201                  (delete-char)
202                  (insert "{\\usefont{T1}{pxr}{m}{n}\\dj}"))
203                 ((eq chr ?\u014B)
204                  (delete-char)
205                  (insert "{\\usefont{T1}{pxr}{m}{n}\\ng}"))
206                 ((eq chr ?\u0282)
207                  (delete-char)
208                  (insert "{\\usefont{T1}{pxr}{m}{n}\\k{s}}"))
209                 ((eq chr ?\u2022)
210                  (delete-char)
211                  (insert "\\textbullet{}"))
212                 ((eq chr ?\u2081)
213                  (delete-char)
214                  (insert "$_1$"))
215                 ((eq chr ?\u2082)
216                  (delete-char)
217                  (insert "$_2$"))
218                 ((eq chr ?\u2083)
219                  (delete-char)
220                  (insert "$_3$"))
221                 ((eq chr ?\u2085)
222                  (delete-char)
223                  (insert "$_5$"))
224                 ((eq chr ?\u0294)
225                  (delete-char)
226                  (insert "\\UCSjis{0294}"))
227                 ((and (encode-char chr '=ucs@jp)
228                       (setq ret (char-representative-of-domain chr 'gb))
229                       (setq ret (encode-char ret '=ucs@gb))
230                       (<= ret #xFFFF))
231                  (delete-char)
232                  ;; (if (eq (char-before) ?\e$B!T\e(B)
233                  ;;     (insert " "))
234                  (insert (format "\\UCSgb{%04X}" ret)))
235                 ((catch 'tag
236                    (setq rest chise-tex-coded-charset-expression-alist)
237                    (while (setq spec (car rest))
238                      (if (setq ret (encode-char chr (car spec)))
239                          (throw 'tag ret))
240                      (setq rest (cdr rest))))
241                  (delete-char)
242                  ;; (if (eq (char-before) ?\e$B!T\e(B)
243                  ;;     (insert " "))
244                  (insert (format (format "\\%s{%%0%d%s}"
245                                          (nth 1 spec)
246                                          (nth 2 spec)
247                                          (nth 3 spec))
248                                  ret)))
249                 (t
250                  (forward-char))))))))
251
252 (defun chise-tex-encode-region-for-utf-8-jis (start end)
253   (interactive "r")
254   (save-excursion
255     (save-restriction
256       (narrow-to-region start end)
257       (goto-char start)
258       (let ((font-encoding 'T1)
259             chr ret rest spec modifier base modifier-1 pos)
260         (while (and (skip-chars-forward "\x00-\x7F")
261                     (not (eobp)))
262           (setq chr (char-after))
263           (cond
264            ((and (setq ret (encode-char chr '=ucs))
265                  (and (<= #x0400 ret)(<= ret #x04F9)))
266             (if (eq font-encoding 'T2A)
267                 (forward-char)
268               (setq pos (point))
269               (unless (and (prog1
270                                (search-backward
271                                 "\\fontencoding{T2A}\\selectfont{}" nil t)
272                              (goto-char pos))
273                            (eq pos (match-end 0)))
274                 (insert "\\fontencoding{T2A}\\selectfont{}")
275                 )
276               (forward-char)
277               (setq font-encoding 'T2A))
278             )
279            ((and (setq ret (encode-char chr '=ucs))
280                  (and (<= #x0374 ret)(<= ret #x03F3)))
281             (if (eq font-encoding 'LGR)
282                 (forward-char)
283               (setq pos (point))
284               (unless (and (prog1
285                                (search-backward
286                                 "\\fontencoding{LGR}\\selectfont{}" nil t)
287                              (goto-char pos))
288                            (eq pos (match-end 0)))
289                 (insert "\\fontencoding{LGR}\\selectfont{}")
290                 )
291               (forward-char)
292               (setq font-encoding 'LGR))
293             (skip-chars-forward "\u0374-\u03F3")
294             (unless (eq font-encoding 'T1)
295               (unless (looking-at
296                        "\\\\fontencoding{T1}\\\\selectfont{}")
297                 (insert "\\fontencoding{T1}\\selectfont{}")
298                 )
299               (setq font-encoding 'T1))
300             )
301            (t
302             (unless (eq font-encoding 'T1)
303               (setq pos (point))
304               (unless (and (prog1
305                                (search-backward
306                                 "\\fontencoding{T1}\\selectfont{}" nil t)
307                              (goto-char pos))
308                            (eq pos (match-end 0)))
309                 (insert "\\fontencoding{T1}\\selectfont{}")
310                 )
311               (setq font-encoding 'T1))
312             (cond ((eq (char-ucs chr) #x00D7)
313                    (delete-char)
314                    (insert "\\UCSjis{00D7}"))
315                   ((encode-char chr '=jis-x0208-1983)
316                    (forward-char))
317                   ((and (setq ret (char-ucs chr))
318                         (or (eq ret #x00C5)
319                             (eq ret #x00E5)
320                             (eq ret #x1E2B)
321                             ))
322                    (forward-char))
323                   ;; ((setq ret (encode-char chr '=jis-x0208-1990))
324                   ;;  (delete-char)
325                   ;;  (insert (decode-char '=jis-x0208-1983 ret)))
326                   ((eq (char-ucs chr) #x012B)
327                    (delete-char)
328                    (insert "\\={\\i}"))
329                   ((and (not (eq (char-ucs chr) #x0439))
330                         (not (eq (char-ucs chr) #x0451))
331                         (setq ret (char-feature chr '=decomposition))
332                         (setq modifier (assq (nth 1 ret)
333                                              chise-tex-accent-macro-alist)))
334                    (delete-char)
335                    (setq base (car ret))
336                    (if (and (setq ret (char-feature base '=decomposition))
337                             (setq modifier-1
338                                   (assq (car modifier)
339                                         (cdr
340                                          (assq (nth 1 ret)
341                                                '((?\u0302
342                                                   (?\u0300 . "\\CircGrave{%c}")
343                                                   (?\u0301 . "\\'{\\^%c}")
344                                                   (?\u0303 . "\\~{\\^%c}")
345                                                   (?\u0309 . "\\CircHook{%c}")
346                                                   )
347                                                  (?\u031B
348                                                   (?\u0301 . "\\HornAcute{%c}")
349                                                   (?\u0303 . "\\HornTilde{%c}")
350                                                   )
351                                                  (?\u0323
352                                                   (?\u0302 . "\\Circudot{%c}")
353                                                   )))))))
354                        (insert (format (cdr modifier-1) (car ret)))
355                      (insert (format "\\%s{%c}" (cdr modifier) base))))
356                   ((and (or (encode-char chr '=jis-x0213-1-2000)
357                             (encode-char chr '=jis-x0213-2-2000))
358                         (setq ret (or (encode-char chr '=ucs@jis/2000)
359                                       (encode-char chr '=ucs@jis/fw)))
360                         (<= ret #xFFFF))
361                    ;; (delete-char)
362                    ;; (if (eq (char-before) ?\e$B!T\e(B)
363                    ;;     (insert " "))
364                    ;; (insert (format "\\UCSjis{%04X}" ret))
365                    (forward-char))
366                   ((and (encode-char chr '=ks-x1001)
367                         (setq ret (or (encode-char chr '=ucs@ks)
368                                       (char-ucs chr))))
369                    (delete-char)
370                    ;; (if (eq (char-before) ?\e$B!T\e(B)
371                    ;;     (insert " "))
372                    (insert (format "\\UCSks{%04X}" ret)))
373                   ((setq ret (encode-char chr '=ucs-hangul))
374                    (delete-char)
375                    ;; (if (eq (char-before) ?\e$B!T\e(B)
376                    ;;     (insert " "))
377                    (insert (format "\\UCSks{%04X}" ret)))
378                   ((eq chr ?\u00B2)
379                    (delete-char)
380                    (insert "$^2$"))
381                   ((eq chr ?\u00B3)
382                    (delete-char)
383                    (insert "$^3$"))
384                   ((eq chr ?\u0111)
385                    (delete-char)
386                    (insert "{\\usefont{T1}{pxr}{m}{n}\\dj}"))
387                   ((eq chr ?\u014B)
388                    (delete-char)
389                    (insert "{\\usefont{T1}{pxr}{m}{n}\\ng}"))
390                   ((eq chr ?\u0282)
391                    (delete-char)
392                    (insert "{\\usefont{T1}{pxr}{m}{n}\\k{s}}"))
393                   ((eq chr ?\u0294)
394                    (delete-char)
395                    (insert "\\UCSjis{0294}"))
396                   ((eq chr ?\u2022)
397                    (delete-char)
398                    (insert "\\textbullet{}"))
399                   ((eq chr ?\u2081)
400                    (delete-char)
401                    (insert "$_1$"))
402                   ((eq chr ?\u2082)
403                    (delete-char)
404                    (insert "$_2$"))
405                   ((eq chr ?\u2083)
406                    (delete-char)
407                    (insert "$_3$"))
408                   ((eq chr ?\u2085)
409                    (delete-char)
410                    (insert "$_5$"))
411                   ((eq chr ?\u2637)
412                    (delete-char)
413                    (insert "\\UCSgb{2637}"))
414                   ((and (encode-char chr '=ucs@jp)
415                         (setq ret (char-representative-of-domain chr 'gb))
416                         (setq ret (encode-char ret '=ucs@gb))
417                         (<= ret #xFFFF))
418                    (delete-char)
419                    ;; (if (eq (char-before) ?\e$B!T\e(B)
420                    ;;     (insert " "))
421                    (insert (format "\\UCSgb{%04X}" ret)))
422                   ((catch 'tag
423                      (setq rest chise-tex-coded-charset-expression-alist)
424                      (while (setq spec (car rest))
425                        (if (setq ret (encode-char chr (car spec)))
426                            (throw 'tag ret))
427                        (setq rest (cdr rest))))
428                    (delete-char)
429                    ;; (if (eq (char-before) ?\e$B!T\e(B)
430                    ;;     (insert " "))
431                    (insert (format (format "\\%s{%%0%d%s}"
432                                            (nth 1 spec)
433                                            (nth 2 spec)
434                                            (nth 3 spec))
435                                    ret)))
436                   (t
437                    (forward-char))))))))))
438
439 (defun chise-tex-decode-region (start end)
440   (interactive "r")
441   (save-excursion
442     (save-restriction
443       (narrow-to-region start end)
444       (goto-char start)
445       (let (macro code ret me rest spec)
446         (while (search-forward "\\={\\i}" nil t)
447           (replace-match "\e.D\eNo" t t))
448         (goto-char start)
449         (while (re-search-forward "\\\\\\(.\\){\\(.\\)}" nil t)
450           (when (and
451                  (setq macro
452                        (assq
453                         (aref (match-string 1) 0)
454                         '((?\` . ?\u0300) ; <COMBINING GRAVE ACCENT>
455                           (?\' . ?\u0301) ; <COMBINING ACUTE ACCENT>
456                           (?^  . ?\u0302) ; <COMBINING CIRCUMFLEX ACCENT>
457                           (?~  . ?\u0303) ; <COMBINING TILDE>
458                           (?=  . ?\u0304) ; <COMBINING MACRON>
459                           (?u  . ?\u0306) ; <COMBINING BREVE>
460                           (?\. . ?\u0307) ; <COMBINING DOT ABOVE>
461                           (?\" . ?\u0308) ; <COMBINING DIAERESIS>
462                           (?v  . ?\u030C) ; <COMBINING CARON>
463                           (?d  . ?\u0323) ; <COMBINING DOT BELOW>
464                           (?c  . ?\u0327) ; <COMBINING CEDILLA>
465                           )))
466                  (setq ret
467                        (cdr (assq (cdr macro)
468                                   (char-feature (aref (match-string 2) 0)
469                                                 'composition)))))
470             (delete-region (match-beginning 0)(match-end 0))
471             (insert ret)))
472         (goto-char start)
473         (while (re-search-forward "\\\\\\([a-zA-Z0-9]+\\){\\([0-9A-Fa-f]+\\)}"
474                                   nil t)
475           (setq macro (match-string 1)
476                 code (match-string 2)
477                 me (match-end 0))
478           (if (and (catch 'tag
479                      (setq rest chise-tex-coded-charset-expression-alist)
480                      (while (setq spec (car rest))
481                        (if (string= (nth 1 spec) macro)
482                            (throw 'tag spec))
483                        (setq rest (cdr rest))))
484                    (setq ret (decode-char (car spec)
485                                           (string-to-int
486                                            code
487                                            (if (eq (nth 3 spec) 'X)
488                                                16)))))
489               (progn
490                 (delete-region (match-beginning 0)(match-end 0))
491                 (insert ret))
492             (goto-char me)))))))
493
494 (make-coding-system
495  'iso-2022-jp-tex-gb 'iso2022
496  "ISO-2022-JP with TeX representation for GB fonts."
497  '(charset-g0 ascii
498    short t
499    seven t
500    ;; input-charset-conversion ((latin-jisx0201 ascii)
501    ;;                        (japanese-jisx0208-1978 japanese-jisx0208))
502    pre-write-conversion chise-tex-encode-region-for-gb
503    post-read-conversion chise-tex-decode-region
504    mnemonic "pTeX(GB)/7bit"
505    ))
506
507 (make-coding-system
508  'iso-2022-jp-tex-jis 'iso2022
509  "ISO-2022-JP with TeX representation for JIS fonts."
510  '(charset-g0 ascii
511    short t
512    seven t
513    ccs-priority-list (ascii
514                       =jis-x0208@1983 =jis-x0208@1978
515                       latin-jisx0201)
516    ;; output-charset-conversion ((=jis-x0208@1990 =jis-x0208@1983))
517    pre-write-conversion chise-tex-encode-region-for-jis
518    post-read-conversion chise-tex-decode-region
519    mnemonic "pTeX(JIS)/7bit"
520    ))
521
522 (make-coding-system
523  'utf-8-jp-tex 'utf-8
524  "Coding-system of UTF-8 for common glyphs used in Japan."
525  '(pre-write-conversion chise-tex-encode-region-for-utf-8-jis
526    post-read-conversion chise-tex-decode-region
527    charset-g0 =ucs@jp
528    charset-g1 =>ucs-jis
529    charset-g2 =>ucs
530    mnemonic "upTeX(JP)/UTF8"))
531
532
533 ;;; @ End.
534 ;;;
535
536 (provide 'chise-tex)
537
538 ;;; chise-tex.el ends here