(chise-tex-encode-region-for-jis): Don't decompose <CYRILLIC SMALL
[chise/omega.git] / chise2otf / elisp / chise-tex.el
1 ;;; chise-tex.el --- Coding-system based chise2otf like tool
2
3 ;; Copyright (C) 2004,2005,2006 MORIOKA Tomohiko
4
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: OTF package, pTeX, CHISE, Multiscript, Multilingual
7
8 ;; This file is a part of Omega/CHISE.
9
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
14
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 ;; General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING.  If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
24
25 ;;; Code:
26
27 (defvar chise-tex-coded-charset-expression-alist
28   '((=ucs-bmp@gb        "UCSgb"         4 X)
29     (=ucs-bmp@jis       "UCSjis"        4 X)
30     (=ucs-bmp@ks        "UCSks"         4 X)
31     (=gt-pj-1           "GTpjA"         4 X)
32     (=gt-pj-2           "GTpjB"         4 X)
33     (=gt-pj-3           "GTpjC"         4 X)
34     (=gt-pj-4           "GTpjD"         4 X)
35     (=gt-pj-5           "GTpjE"         4 X)
36     (=gt-pj-6           "GTpjF"         4 X)
37     (=gt-pj-7           "GTpjG"         4 X)
38     (=gt-pj-8           "GTpjH"         4 X)
39     (=gt-pj-9           "GTpjI"         4 X)
40     (=gt-pj-10          "GTpjJ"         4 X)
41     (=gt-pj-11          "GTpjK"         4 X)
42     (=ucs-bmp@cns       "UCScns"        4 X)))
43
44 (defun chise-tex-encode-region-for-gb (start end)
45   (interactive "r")
46   (save-excursion
47     (save-restriction
48       (narrow-to-region start end)
49       (goto-char start)
50       (let (chr ret rest spec)
51         (while (and (skip-chars-forward "\x00-\xFF")
52                     (not (eobp)))
53           (setq chr (char-after))
54           (cond ((memq chr '(?\e$(O#@\e(B))
55                  (delete-char)
56                  (insert (format "\\UCSjis{%04X}"
57                                  (encode-char chr '=ucs@jis)))
58                  )
59                 ((and (setq ret (encode-char chr '=jis-x0208-1983))
60                       (< ret #x3021))
61                  (forward-char))
62                 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
63                 ;;  (delete-char)
64                 ;;  (insert (decode-char '=jis-x0208-1983 ret)))
65                 ((and (encode-char chr '=ks-x1001)
66                       (setq ret (or (encode-char chr '=ucs@ks)
67                                     (char-ucs chr))))
68                  (delete-char)
69                  ;; (if (eq (char-before) ?\e$B!T\e(B)
70                  ;;     (insert " "))
71                  (insert (format "\\UCSks{%04X}" ret)))
72                 ((catch 'tag
73                    (setq rest chise-tex-coded-charset-expression-alist)
74                    (while (setq spec (car rest))
75                      (if (setq ret (encode-char chr (car spec)))
76                          (throw 'tag ret))
77                      (setq rest (cdr rest))))
78                  (delete-char)
79                  ;; (if (eq (char-before) ?\e$B!T\e(B)
80                  ;;     (insert " "))
81                  (insert (format (format "\\%s{%%0%d%s}"
82                                          (nth 1 spec)
83                                          (nth 2 spec)
84                                          (nth 3 spec))
85                                  ret)))
86                 (t
87                  (forward-char))))))))
88
89 (defun chise-tex-encode-region-for-jis (start end)
90   (interactive "r")
91   (save-excursion
92     (save-restriction
93       (narrow-to-region start end)
94       (goto-char start)
95       (let (chr ret rest spec modifier)
96         (while (and (skip-chars-forward "\x00-\x7F")
97                     (not (eobp)))
98           (setq chr (char-after))
99           (cond ((encode-char chr '=jis-x0208-1983)
100                  (forward-char))
101                 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
102                 ;;  (delete-char)
103                 ;;  (insert (decode-char '=jis-x0208-1983 ret)))
104                 ((and (not (eq (char-ucs chr) #x0451))
105                       (setq ret (char-feature chr '=decomposition))
106                       (setq modifier (assq (nth 1 ret)
107                                            '((?\u0300 . "`")
108                                              (?\u0301 . "'")
109                                              (?\u0302 . "^")
110                                              (?\u0303 . "~")
111                                              (?\u0304 . "=")
112                                              (?\u0307 . ".")
113                                              (?\u0308 . "\"")
114                                              (?\u030C . "v")
115                                              (?\u0323 . "d")
116                                              (?\u0327 . "c")
117                                              ))))
118                  (delete-char)
119                  (insert (format "\\%s{%c}" (cdr modifier) (car ret))))
120                 ((and (or (encode-char chr '=jis-x0213-1-2000)
121                           (encode-char chr '=jis-x0213-2-2000))
122                       (setq ret (or (encode-char chr '=ucs@jis/2000)
123                                     (encode-char chr '=ucs@jis/fw))))
124                  (delete-char)
125                  ;; (if (eq (char-before) ?\e$B!T\e(B)
126                  ;;     (insert " "))
127                  (insert (format "\\UCSjis{%04X}" ret)))
128                 ((and (encode-char chr '=ks-x1001)
129                       (setq ret (or (encode-char chr '=ucs@ks)
130                                     (char-ucs chr))))
131                  (delete-char)
132                  ;; (if (eq (char-before) ?\e$B!T\e(B)
133                  ;;     (insert " "))
134                  (insert (format "\\UCSks{%04X}" ret)))
135                 ((setq ret (encode-char chr '=ucs-hangul))
136                  (delete-char)
137                  ;; (if (eq (char-before) ?\e$B!T\e(B)
138                  ;;     (insert " "))
139                  (insert (format "\\UCSks{%04X}" ret)))
140                 ((eq chr ?\u2022)
141                  (delete-char)
142                  (insert "\\textbullet{}"))
143                 ((eq chr ?\u0294)
144                  (delete-char)
145                  (insert "\\UCSjis{0294}"))
146                 ((and (encode-char chr '=ucs@jp)
147                       (setq ret (char-representative-of-domain chr 'gb))
148                       (setq ret (encode-char ret '=ucs@gb)))
149                  (delete-char)
150                  ;; (if (eq (char-before) ?\e$B!T\e(B)
151                  ;;     (insert " "))
152                  (insert (format "\\UCSgb{%04X}" ret)))
153                 ((catch 'tag
154                    (setq rest chise-tex-coded-charset-expression-alist)
155                    (while (setq spec (car rest))
156                      (if (setq ret (encode-char chr (car spec)))
157                          (throw 'tag ret))
158                      (setq rest (cdr rest))))
159                  (delete-char)
160                  ;; (if (eq (char-before) ?\e$B!T\e(B)
161                  ;;     (insert " "))
162                  (insert (format (format "\\%s{%%0%d%s}"
163                                          (nth 1 spec)
164                                          (nth 2 spec)
165                                          (nth 3 spec))
166                                  ret)))
167                 (t
168                  (forward-char))))))))
169
170 (defun chise-tex-decode-region (start end)
171   (interactive "r")
172   (save-excursion
173     (save-restriction
174       (narrow-to-region start end)
175       (goto-char start)
176       (let (macro code ret me rest spec)
177         (while (re-search-forward "\\\\\\([a-zA-Z0-9]+\\){\\([0-9A-Fa-f]+\\)}"
178                                   nil t)
179           (setq macro (match-string 1)
180                 code (match-string 2)
181                 me (match-end 0))
182           (if (and (catch 'tag
183                      (setq rest chise-tex-coded-charset-expression-alist)
184                      (while (setq spec (car rest))
185                        (if (string= (nth 1 spec) macro)
186                            (throw 'tag spec))
187                        (setq rest (cdr rest))))
188                    (setq ret (decode-char (car spec)
189                                           (string-to-int
190                                            code
191                                            (if (eq (nth 3 spec) 'X)
192                                                16)))))
193               (progn
194                 (delete-region (match-beginning 0)(match-end 0))
195                 (insert ret))
196             (goto-char me)))))))
197
198 (make-coding-system
199  'iso-2022-jp-tex-gb 'iso2022
200  "ISO-2022-JP with TeX representation for GB fonts."
201  '(charset-g0 ascii
202    short t
203    seven t
204    input-charset-conversion ((latin-jisx0201 ascii)
205                              (japanese-jisx0208-1978 japanese-jisx0208))
206    pre-write-conversion chise-tex-encode-region-for-gb
207    post-read-conversion chise-tex-decode-region
208    mnemonic "pTeX(GB)/7bit"
209    ))
210
211 (make-coding-system
212  'iso-2022-jp-tex-jis 'iso2022
213  "ISO-2022-JP with TeX representation for JIS fonts."
214  '(charset-g0 ascii
215    short t
216    seven t
217    ccs-priority-list (ascii
218                       =jis-x0208@1983 =jis-x0208@1978
219                       latin-jisx0201)
220    pre-write-conversion chise-tex-encode-region-for-jis
221    post-read-conversion chise-tex-decode-region
222    mnemonic "pTeX(JIS)/7bit"
223    ))
224
225
226 ;;; @ End.
227 ;;;
228
229 (provide 'chise-tex)
230
231 ;;; chise-tex.el ends here