1 ;;; chise-tex.el --- Coding-system based chise2otf like tool
3 ;; Copyright (C) 2004,2005,2006,2007,2008,2009 MORIOKA Tomohiko
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: OTF package, pTeX, CHISE, Multiscript, Multilingual
8 ;; This file is a part of Omega/CHISE.
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (defvar chise-tex-coded-charset-expression-alist
28 '((=ucs-bmp@gb "UCSgb" 4 X)
29 (=ucs-bmp@jis "UCSjis" 4 X)
30 (=ucs-bmp@ks "UCSks" 4 X)
31 (=gt-pj-1 "GTpjA" 4 X)
32 (=gt-pj-2 "GTpjB" 4 X)
33 (=gt-pj-3 "GTpjC" 4 X)
34 (=gt-pj-4 "GTpjD" 4 X)
35 (=gt-pj-5 "GTpjE" 4 X)
36 (=gt-pj-6 "GTpjF" 4 X)
37 (=gt-pj-7 "GTpjG" 4 X)
38 (=gt-pj-8 "GTpjH" 4 X)
39 (=gt-pj-9 "GTpjI" 4 X)
40 (=gt-pj-10 "GTpjJ" 4 X)
41 (=gt-pj-11 "GTpjK" 4 X)
42 (=ruimoku-v6 "Ruimoku" 4 X)
43 (=hanziku-1 "HanzikuA" 4 X)
44 (=hanziku-2 "HanzikuB" 4 X)
45 (=hanziku-3 "HanzikuC" 4 X)
46 (=hanziku-4 "HanzikuD" 4 X)
47 (=hanziku-5 "HanzikuE" 4 X)
48 (=hanziku-6 "HanzikuF" 4 X)
49 (=hanziku-7 "HanzikuG" 4 X)
50 (=hanziku-8 "HanzikuH" 4 X)
51 (=hanziku-9 "HanzikuI" 4 X)
52 (=hanziku-10 "HanzikuJ" 4 X)
53 (=hanziku-11 "HanzikuK" 4 X)
54 (=hanziku-12 "HanzikuL" 4 X)
55 (=ucs-bmp@cns "UCScns" 4 X)
56 (thai-tis620 "ThaiTIS" 2 X)
59 (defvar chise-tex-accent-macro-alist
60 '((?\u0300 . "`") ; <COMBINING GRAVE ACCENT>
61 (?\u0301 . "'") ; <COMBINING ACUTE ACCENT>
62 (?\u0302 . "^") ; <COMBINING CIRCUMFLEX ACCENT>
63 (?\u0303 . "~") ; <COMBINING TILDE>
64 (?\u0304 . "=") ; <COMBINING MACRON>
65 (?\u0306 . "u") ; <COMBINING BREVE>
66 (?\u0307 . ".") ; <COMBINING DOT ABOVE>
67 (?\u0308 . "\"") ; <COMBINING DIAERESIS>
69 (?\u030B . "H") ; <COMBINING DOUBLE ACUTE ACCENT>
70 (?\u030C . "v") ; <COMBINING CARON>
71 (?\u0323 . "d") ; <COMBINING DOT BELOW>
72 (?\u0327 . "c") ; <COMBINING CEDILLA>
73 (?\u0328 . "k") ; <COMBINING OGONEK>
78 (defun chise-tex-encode-region-for-gb (start end)
82 (narrow-to-region start end)
84 (let (chr ret rest spec)
85 (while (and (skip-chars-forward "\x00-\xFF")
87 (setq chr (char-after))
88 (cond ((memq chr '(?
\e$(O#@
\e(B))
90 (insert (format "\\UCSjis{%04X}"
91 (encode-char chr '=ucs@jis)))
93 ((and (setq ret (encode-char chr '=jis-x0208-1983))
96 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
98 ;; (insert (decode-char '=jis-x0208-1983 ret)))
99 ((and (encode-char chr '=ks-x1001)
100 (setq ret (or (encode-char chr '=ucs@ks)
103 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
105 (insert (format "\\UCSks{%04X}" ret)))
107 (setq rest chise-tex-coded-charset-expression-alist)
108 (while (setq spec (car rest))
109 (if (setq ret (encode-char chr (car spec)))
111 (setq rest (cdr rest))))
113 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
115 (insert (format (format "\\%s{%%0%d%s}"
121 (forward-char))))))))
123 (defun chise-tex-encode-ucs-char-at-point (&optional chr)
125 (setq chr (char-after)))
128 ((and (encode-char chr '=ks-x1001)
129 (setq ret (or (encode-char chr '=ucs@ks)
132 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
134 (insert (format "\\UCSks{%04X}" ret))
136 ((setq ret (encode-char chr '=ucs-hangul))
138 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
140 (insert (format "\\UCSks{%04X}" ret))
156 (insert "{\\usefont{T1}{pxr}{m}{n}\\dj}")
164 (insert "{\\usefont{T1}{pxr}{m}{n}\\ng}")
168 (insert "\\textturna{}")
172 (insert "\\textopeno{}")
176 (insert "\\textramshorns{}")
180 (insert "\\textturnm{}")
184 (insert "{\\usefont{T1}{pxr}{m}{n}\\k{s}}")
188 ;; (insert "\\UCSjis{0294}")
189 (insert "\\textglotstop{}")
193 (insert "\\textbullet{}")
257 (insert "\\UCSgb{2637}")
259 ((eq (encode-char chr '=ucs@jis) #x8DBC)
261 (insert "\\GTpjG{4933}")
263 ((and (encode-char chr '=ucs@jp)
264 (setq ret (char-representative-of-domain chr 'gb))
265 (setq ret (encode-char ret '=ucs@gb))
268 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
270 (insert (format "\\UCSgb{%04X}" ret))
273 (setq rest chise-tex-coded-charset-expression-alist)
274 (while (setq spec (car rest))
275 (if (setq ret (encode-char chr (car spec)))
277 (setq rest (cdr rest))))
279 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
281 (insert (format (format "\\%s{%%0%d%s}"
289 (defun chise-tex-encode-region-for-jis (start end)
293 (narrow-to-region start end)
297 modifier base modifier-1)
298 (while (and (skip-chars-forward "\x00-\x7F")
300 (setq chr (char-after))
301 (cond ((encode-char chr '=jis-x0208-1983)
303 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
305 ;; (insert (decode-char '=jis-x0208-1983 ret)))
306 ((and (not (eq (char-ucs chr) #x0439))
307 (not (eq (char-ucs chr) #x0451))
308 (setq ret (char-feature chr '=decomposition))
309 (setq modifier (assq (nth 1 ret)
324 (setq base (car ret))
325 (if (and (setq ret (char-feature base '=decomposition))
331 (?\u0300 . "CircGrave")
332 (?\u0301 . "CircAcute")
333 (?\u0303 . "CircTilde")
334 (?\u0309 . "CircHook")
337 (?\u0301 . "HornAcute")
340 (?\u0302 . "Circudot")
342 (insert (format "\\%s{%c}" (cdr modifier-1) (car ret)))
343 (insert (format "\\%s{%c}" (cdr modifier) base))))
344 ((and (or (encode-char chr '=jis-x0213-1-2000)
345 (encode-char chr '=jis-x0213-2-2000))
346 (setq ret (or (encode-char chr '=ucs@jis/2000)
347 (encode-char chr '=ucs@jis/fw)))
350 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
352 (insert (format "\\UCSjis{%04X}" ret)))
353 ((chise-tex-encode-ucs-char-at-point chr))
355 (forward-char))))))))
357 (defun chise-tex-encode-region-for-utf-8-jis (start end)
361 (narrow-to-region start end)
363 (let ((font-encoding 'T1)
366 modifier base modifier-1 pos)
367 (while (and (skip-chars-forward "\x00-\x7F")
369 (setq chr (char-after))
371 ((and (setq ret (encode-char chr '=ucs))
372 (and (<= #x0400 ret)(<= ret #x04F9)))
373 (if (eq font-encoding 'T2A)
376 ;; (unless (and (prog1
378 ;; "\\fontencoding{T2A}\\selectfont{}" nil t)
380 ;; (eq pos (match-end 0)))
381 ;; (insert "\\fontencoding{T2A}\\selectfont{}")
383 (if (search-backward "\\CyrillicScript{"
384 (- pos (eval-when-compile
385 (length "\\CyrillicScript{")))
387 (search-forward "}" nil t)
388 (insert "\\CyrillicScript{")
390 (skip-chars-forward "---\u0400-\u04F9 ")
392 ;; (setq font-encoding 'T2A)
395 ((and (setq ret (encode-char chr '=ucs))
396 (and (<= #x0374 ret)(<= ret #x03F3)))
397 (if (eq font-encoding 'LGR)
400 ;; (unless (and (prog1
402 ;; "\\fontencoding{LGR}\\selectfont{}" nil t)
404 ;; (eq pos (match-end 0)))
405 ;; (insert "\\fontencoding{LGR}\\selectfont{}")
407 (if (search-backward "\\GreekScript{"
408 (- pos (eval-when-compile
409 (length "\\GreekScript{")))
411 (search-forward "}" nil t)
412 (insert "\\GreekScript{")
414 (skip-chars-forward "\u0374-\u03F3 ")
416 ;; (setq font-encoding 'LGR)
418 ;; (unless (eq font-encoding 'T1)
419 ;; (unless (looking-at
420 ;; "\\\\fontencoding{T1}\\\\selectfont{}")
421 ;; (insert "\\fontencoding{T1}\\selectfont{}")
423 ;; (setq font-encoding 'T1))
426 (unless (eq font-encoding 'T1)
430 "\\fontencoding{T1}\\selectfont{}" nil t)
432 (eq pos (match-end 0)))
433 (insert "\\fontencoding{T1}\\selectfont{}")
435 (setq font-encoding 'T1))
436 (cond ((eq (char-ucs chr) #x00D7)
438 (insert "\\UCSjis{00D7}"))
439 ((encode-char chr '=jis-x0208-1983)
441 ((and (setq ret (char-ucs chr))
447 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
449 ;; (insert (decode-char '=jis-x0208-1983 ret)))
450 ((eq (char-ucs chr) #x012B)
454 ((setq ret (encode-char chr 'thai-tis620))
456 (insert (format "\\ThaiTIS{%X}" (logior ret #x80)))
458 ((and (not (eq (char-ucs chr) #x0439))
459 (not (eq (char-ucs chr) #x0451))
460 (setq ret (char-feature chr '=decomposition))
461 (setq modifier (assq (nth 1 ret)
462 chise-tex-accent-macro-alist)))
464 (setq base (car ret))
465 (if (and (setq ret (char-feature base '=decomposition))
473 (?\u0300 . "\\CircGrave{%c}")
474 (?\u0301 . "\\'{\\^%c}")
475 (?\u0303 . "\\~{\\^%c}")
476 (?\u0309 . "\\CircHook{%c}")
478 (?\u0304 ; <COMBINING MACRON>
479 (?\u0301 ; <COMBINING ACUTE ACCENT>
480 . "\\textacutemacron{%c}")
483 (?\u0301 . "\\HornAcute{%c}")
484 (?\u0303 . "\\HornTilde{%c}")
487 (?\u0302 . "\\Circudot{%c}")
489 (insert (format (cdr modifier-1) (car ret)))
490 (insert (format "\\%s{%c}" (cdr modifier) base))))
491 ((and (or (encode-char chr '=jis-x0213-1-2000)
492 (encode-char chr '=jis-x0213-2-2000))
493 (setq ret (or (encode-char chr '=ucs@jis/2000)
494 (encode-char chr '=ucs@jis/fw)))
497 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
499 ;; (insert (format "\\UCSjis{%04X}" ret))
501 ((chise-tex-encode-ucs-char-at-point chr))
503 (forward-char))))))))))
505 (defun chise-tex-decode-region (start end)
509 (narrow-to-region start end)
511 (let (macro code ret me rest spec)
512 (while (search-forward "\\={\\i}" nil t)
513 (replace-match "
\e.D
\eNo" t t))
515 (while (re-search-forward "\\\\\\(.\\){\\(.\\)}" nil t)
519 (aref (match-string 1) 0)
520 '((?\` . ?\u0300) ; <COMBINING GRAVE ACCENT>
521 (?\' . ?\u0301) ; <COMBINING ACUTE ACCENT>
522 (?^ . ?\u0302) ; <COMBINING CIRCUMFLEX ACCENT>
523 (?~ . ?\u0303) ; <COMBINING TILDE>
524 (?= . ?\u0304) ; <COMBINING MACRON>
525 (?u . ?\u0306) ; <COMBINING BREVE>
526 (?\. . ?\u0307) ; <COMBINING DOT ABOVE>
527 (?\" . ?\u0308) ; <COMBINING DIAERESIS>
528 (?v . ?\u030C) ; <COMBINING CARON>
529 (?d . ?\u0323) ; <COMBINING DOT BELOW>
530 (?c . ?\u0327) ; <COMBINING CEDILLA>
533 (cdr (assq (cdr macro)
534 (char-feature (aref (match-string 2) 0)
536 (delete-region (match-beginning 0)(match-end 0))
539 (while (re-search-forward "\\\\\\([a-zA-Z0-9]+\\){\\([0-9A-Fa-f]+\\)}"
541 (setq macro (match-string 1)
542 code (match-string 2)
545 (setq rest chise-tex-coded-charset-expression-alist)
546 (while (setq spec (car rest))
547 (if (string= (nth 1 spec) macro)
549 (setq rest (cdr rest))))
550 (setq ret (decode-char (car spec)
553 (if (eq (nth 3 spec) 'X)
556 (delete-region (match-beginning 0)(match-end 0))
561 'iso-2022-jp-tex-gb 'iso2022
562 "ISO-2022-JP with TeX representation for GB fonts."
566 ;; input-charset-conversion ((latin-jisx0201 ascii)
567 ;; (japanese-jisx0208-1978 japanese-jisx0208))
568 pre-write-conversion chise-tex-encode-region-for-gb
569 post-read-conversion chise-tex-decode-region
570 mnemonic "pTeX(GB)/7bit"
574 'iso-2022-jp-tex-jis 'iso2022
575 "ISO-2022-JP with TeX representation for JIS fonts."
579 ccs-priority-list (ascii
580 =jis-x0208@1983 =jis-x0208@1978
582 ;; output-charset-conversion ((=jis-x0208@1990 =jis-x0208@1983))
583 pre-write-conversion chise-tex-encode-region-for-jis
584 post-read-conversion chise-tex-decode-region
585 mnemonic "pTeX(JIS)/7bit"
590 "Coding-system of UTF-8 for common glyphs used in Japan."
591 '(pre-write-conversion chise-tex-encode-region-for-utf-8-jis
592 post-read-conversion chise-tex-decode-region
596 mnemonic "upTeX(JP)/UTF8"))
604 ;;; chise-tex.el ends here