1 ;;; chise-tex.el --- Coding-system based chise2otf like tool
3 ;; Copyright (C) 2004,2005,2006,2007,2008 MORIOKA Tomohiko
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: OTF package, pTeX, CHISE, Multiscript, Multilingual
8 ;; This file is a part of Omega/CHISE.
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (defvar chise-tex-coded-charset-expression-alist
28 '((=ucs-bmp@gb "UCSgb" 4 X)
29 (=ucs-bmp@jis "UCSjis" 4 X)
30 (=ucs-bmp@ks "UCSks" 4 X)
31 (=gt-pj-1 "GTpjA" 4 X)
32 (=gt-pj-2 "GTpjB" 4 X)
33 (=gt-pj-3 "GTpjC" 4 X)
34 (=gt-pj-4 "GTpjD" 4 X)
35 (=gt-pj-5 "GTpjE" 4 X)
36 (=gt-pj-6 "GTpjF" 4 X)
37 (=gt-pj-7 "GTpjG" 4 X)
38 (=gt-pj-8 "GTpjH" 4 X)
39 (=gt-pj-9 "GTpjI" 4 X)
40 (=gt-pj-10 "GTpjJ" 4 X)
41 (=gt-pj-11 "GTpjK" 4 X)
42 (=ruimoku-v6 "Ruimoku" 4 X)
43 (=hanziku-1 "HanzikuA" 4 X)
44 (=hanziku-2 "HanzikuB" 4 X)
45 (=hanziku-3 "HanzikuC" 4 X)
46 (=hanziku-4 "HanzikuD" 4 X)
47 (=hanziku-5 "HanzikuE" 4 X)
48 (=hanziku-6 "HanzikuF" 4 X)
49 (=hanziku-7 "HanzikuG" 4 X)
50 (=hanziku-8 "HanzikuH" 4 X)
51 (=hanziku-9 "HanzikuI" 4 X)
52 (=hanziku-10 "HanzikuJ" 4 X)
53 (=hanziku-11 "HanzikuK" 4 X)
54 (=hanziku-12 "HanzikuL" 4 X)
55 (=ucs-bmp@cns "UCScns" 4 X)
58 (defun chise-tex-encode-region-for-gb (start end)
62 (narrow-to-region start end)
64 (let (chr ret rest spec)
65 (while (and (skip-chars-forward "\x00-\xFF")
67 (setq chr (char-after))
68 (cond ((memq chr '(?
\e$(O#@
\e(B))
70 (insert (format "\\UCSjis{%04X}"
71 (encode-char chr '=ucs@jis)))
73 ((and (setq ret (encode-char chr '=jis-x0208-1983))
76 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
78 ;; (insert (decode-char '=jis-x0208-1983 ret)))
79 ((and (encode-char chr '=ks-x1001)
80 (setq ret (or (encode-char chr '=ucs@ks)
83 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
85 (insert (format "\\UCSks{%04X}" ret)))
87 (setq rest chise-tex-coded-charset-expression-alist)
88 (while (setq spec (car rest))
89 (if (setq ret (encode-char chr (car spec)))
91 (setq rest (cdr rest))))
93 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
95 (insert (format (format "\\%s{%%0%d%s}"
101 (forward-char))))))))
103 (defun chise-tex-encode-region-for-jis (start end)
107 (narrow-to-region start end)
109 (let (chr ret rest spec modifier base modifier-1)
110 (while (and (skip-chars-forward "\x00-\x7F")
112 (setq chr (char-after))
113 (cond ((encode-char chr '=jis-x0208-1983)
115 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
117 ;; (insert (decode-char '=jis-x0208-1983 ret)))
118 ((and (not (eq (char-ucs chr) #x0439))
119 (not (eq (char-ucs chr) #x0451))
120 (setq ret (char-feature chr '=decomposition))
121 (setq modifier (assq (nth 1 ret)
136 (setq base (car ret))
137 (if (and (setq ret (char-feature base '=decomposition))
143 (?\u0300 . "CircGrave")
144 (?\u0301 . "CircAcute")
145 (?\u0303 . "CircTilde")
146 (?\u0309 . "CircHook")
149 (?\u0301 . "HornAcute")
152 (?\u0302 . "Circudot")
154 (insert (format "\\%s{%c}" (cdr modifier-1) (car ret)))
155 (insert (format "\\%s{%c}" (cdr modifier) base))))
156 ((and (or (encode-char chr '=jis-x0213-1-2000)
157 (encode-char chr '=jis-x0213-2-2000))
158 (setq ret (or (encode-char chr '=ucs@jis/2000)
159 (encode-char chr '=ucs@jis/fw)))
162 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
164 (insert (format "\\UCSjis{%04X}" ret)))
165 ((and (encode-char chr '=ks-x1001)
166 (setq ret (or (encode-char chr '=ucs@ks)
169 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
171 (insert (format "\\UCSks{%04X}" ret)))
172 ((setq ret (encode-char chr '=ucs-hangul))
174 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
176 (insert (format "\\UCSks{%04X}" ret)))
185 (insert "{\\usefont{T1}{pxr}{m}{n}\\dj}"))
188 (insert "{\\usefont{T1}{pxr}{m}{n}\\ng}"))
191 (insert "{\\usefont{T1}{pxr}{m}{n}\\k{s}}"))
194 (insert "\\textbullet{}"))
209 (insert "\\UCSjis{0294}"))
210 ((and (encode-char chr '=ucs@jp)
211 (setq ret (char-representative-of-domain chr 'gb))
212 (setq ret (encode-char ret '=ucs@gb))
215 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
217 (insert (format "\\UCSgb{%04X}" ret)))
219 (setq rest chise-tex-coded-charset-expression-alist)
220 (while (setq spec (car rest))
221 (if (setq ret (encode-char chr (car spec)))
223 (setq rest (cdr rest))))
225 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
227 (insert (format (format "\\%s{%%0%d%s}"
233 (forward-char))))))))
235 (defun chise-tex-encode-region-for-utf-8-jis (start end)
239 (narrow-to-region start end)
241 (let ((font-encoding 'T1)
242 chr ret rest spec modifier base modifier-1 pos)
243 (while (and (skip-chars-forward "\x00-\x7F")
245 (setq chr (char-after))
247 ((and (setq ret (encode-char chr '=ucs))
248 (and (<= #x0400 ret)(<= ret #x04F9)))
249 (if (eq font-encoding 'T2A)
254 "\\fontencoding{T2A}\\selectfont{}" nil t)
256 (eq pos (match-end 0)))
257 (insert "\\fontencoding{T2A}\\selectfont{}")
260 (setq font-encoding 'T2A))
263 (unless (eq font-encoding 'T1)
267 "\\fontencoding{T1}\\selectfont{}" nil t)
269 (eq pos (match-end 0)))
270 (insert "\\fontencoding{T1}\\selectfont{}")
272 (setq font-encoding 'T1))
273 (cond ((eq (char-ucs chr) #x00D7)
275 (insert "\\UCSjis{00D7}"))
276 ((encode-char chr '=jis-x0208-1983)
278 ((and (setq ret (encode-char chr '=ucs))
279 (or (and (<= #x0374 ret)(<= ret #x03F3))
282 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
284 ;; (insert (decode-char '=jis-x0208-1983 ret)))
285 ((eq (char-ucs chr) #x012B)
288 ((and (not (eq (char-ucs chr) #x0439))
289 (not (eq (char-ucs chr) #x0451))
290 (setq ret (char-feature chr '=decomposition))
291 (setq modifier (assq (nth 1 ret)
305 (?\u0331 . "umacron")
308 (setq base (car ret))
309 (if (and (setq ret (char-feature base '=decomposition))
315 (?\u0300 . "CircGrave")
316 (?\u0301 . "CircAcute")
317 (?\u0303 . "CircTilde")
318 (?\u0309 . "CircHook")
321 (?\u0301 . "HornAcute")
324 (?\u0302 . "Circudot")
326 (insert (format "\\%s{%c}" (cdr modifier-1) (car ret)))
327 (insert (format "\\%s{%c}" (cdr modifier) base))))
328 ((and (or (encode-char chr '=jis-x0213-1-2000)
329 (encode-char chr '=jis-x0213-2-2000))
330 (setq ret (or (encode-char chr '=ucs@jis/2000)
331 (encode-char chr '=ucs@jis/fw)))
334 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
336 ;; (insert (format "\\UCSjis{%04X}" ret))
338 ((and (encode-char chr '=ks-x1001)
339 (setq ret (or (encode-char chr '=ucs@ks)
342 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
344 (insert (format "\\UCSks{%04X}" ret)))
345 ((setq ret (encode-char chr '=ucs-hangul))
347 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
349 (insert (format "\\UCSks{%04X}" ret)))
358 (insert "{\\usefont{T1}{pxr}{m}{n}\\dj}"))
361 (insert "{\\usefont{T1}{pxr}{m}{n}\\ng}"))
364 (insert "{\\usefont{T1}{pxr}{m}{n}\\k{s}}"))
367 (insert "\\UCSjis{0294}"))
370 (insert "\\textbullet{}"))
385 (insert "\\UCSgb{2637}"))
386 ((and (encode-char chr '=ucs@jp)
387 (setq ret (char-representative-of-domain chr 'gb))
388 (setq ret (encode-char ret '=ucs@gb))
391 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
393 (insert (format "\\UCSgb{%04X}" ret)))
395 (setq rest chise-tex-coded-charset-expression-alist)
396 (while (setq spec (car rest))
397 (if (setq ret (encode-char chr (car spec)))
399 (setq rest (cdr rest))))
401 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
403 (insert (format (format "\\%s{%%0%d%s}"
409 (forward-char))))))))))
411 (defun chise-tex-decode-region (start end)
415 (narrow-to-region start end)
417 (let (macro code ret me rest spec)
418 (while (re-search-forward "\\\\\\(.\\){\\(.\\)}" nil t)
422 (aref (match-string 1) 0)
423 '((?\` . ?\u0300) ; <COMBINING GRAVE ACCENT>
424 (?\' . ?\u0301) ; <COMBINING ACUTE ACCENT>
425 (?^ . ?\u0302) ; <COMBINING CIRCUMFLEX ACCENT>
426 (?~ . ?\u0303) ; <COMBINING TILDE>
427 (?= . ?\u0304) ; <COMBINING MACRON>
428 (?u . ?\u0306) ; <COMBINING BREVE>
429 (?\. . ?\u0307) ; <COMBINING DOT ABOVE>
430 (?\" . ?\u0308) ; <COMBINING DIAERESIS>
431 (?v . ?\u030C) ; <COMBINING CARON>
432 (?d . ?\u0323) ; <COMBINING DOT BELOW>
433 (?c . ?\u0327) ; <COMBINING CEDILLA>
436 (cdr (assq (cdr macro)
437 (char-feature (aref (match-string 2) 0)
439 (delete-region (match-beginning 0)(match-end 0))
442 (while (re-search-forward "\\\\\\([a-zA-Z0-9]+\\){\\([0-9A-Fa-f]+\\)}"
444 (setq macro (match-string 1)
445 code (match-string 2)
448 (setq rest chise-tex-coded-charset-expression-alist)
449 (while (setq spec (car rest))
450 (if (string= (nth 1 spec) macro)
452 (setq rest (cdr rest))))
453 (setq ret (decode-char (car spec)
456 (if (eq (nth 3 spec) 'X)
459 (delete-region (match-beginning 0)(match-end 0))
464 'iso-2022-jp-tex-gb 'iso2022
465 "ISO-2022-JP with TeX representation for GB fonts."
469 ;; input-charset-conversion ((latin-jisx0201 ascii)
470 ;; (japanese-jisx0208-1978 japanese-jisx0208))
471 pre-write-conversion chise-tex-encode-region-for-gb
472 post-read-conversion chise-tex-decode-region
473 mnemonic "pTeX(GB)/7bit"
477 'iso-2022-jp-tex-jis 'iso2022
478 "ISO-2022-JP with TeX representation for JIS fonts."
482 ccs-priority-list (ascii
483 =jis-x0208@1983 =jis-x0208@1978
485 ;; output-charset-conversion ((=jis-x0208@1990 =jis-x0208@1983))
486 pre-write-conversion chise-tex-encode-region-for-jis
487 post-read-conversion chise-tex-decode-region
488 mnemonic "pTeX(JIS)/7bit"
493 "Coding-system of UTF-8 for common glyphs used in Japan."
494 '(pre-write-conversion chise-tex-encode-region-for-utf-8-jis
495 post-read-conversion chise-tex-decode-region
499 mnemonic "upTeX(JP)/UTF8"))
507 ;;; chise-tex.el ends here