1 ;;; chise-tex.el --- Coding-system based chise2otf like tool
3 ;; Copyright (C) 2004,2005,2006,2007,2008,2009,2010 MORIOKA Tomohiko
5 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
6 ;; Keywords: OTF package, pTeX, CHISE, Multiscript, Multilingual
8 ;; This file is a part of Omega/CHISE.
10 ;; This program is free software; you can redistribute it and/or
11 ;; modify it under the terms of the GNU General Public License as
12 ;; published by the Free Software Foundation; either version 2, or (at
13 ;; your option) any later version.
15 ;; This program is distributed in the hope that it will be useful, but
16 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 ;; General Public License for more details.
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with this program; see the file COPYING. If not, write to
22 ;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA.
27 (defun decompose-char (char)
29 (while (setq ret (char-feature char '=decomposition))
32 (append (cdr ret) dest)
33 (cons (nth 1 ret) dest))
39 (defvar chise-tex-coded-charset-expression-alist
40 '((=adobe-japan1-6 "AdobeJP" 5 d)
41 (=ucs-bmp@gb "UCSgb" 4 X)
42 (=ucs-bmp@jis "UCSjis" 4 X)
43 (=ucs-bmp@ks "UCSks" 4 X)
44 ;; (=ucs-sip-ext-b "ucsSIP" 4 X)
45 ;; (=ucs-sip-ext-b@iso "ucsSIP" 4 X)
46 (=gt-pj-1 "GTpjA" 4 X)
47 (=gt-pj-2 "GTpjB" 4 X)
48 (=gt-pj-3 "GTpjC" 4 X)
49 (=gt-pj-4 "GTpjD" 4 X)
50 (=gt-pj-5 "GTpjE" 4 X)
51 (=gt-pj-6 "GTpjF" 4 X)
52 (=gt-pj-7 "GTpjG" 4 X)
53 (=gt-pj-8 "GTpjH" 4 X)
54 (=gt-pj-9 "GTpjI" 4 X)
55 (=gt-pj-10 "GTpjJ" 4 X)
56 (=gt-pj-11 "GTpjK" 4 X)
57 (=ruimoku-v6 "Ruimoku" 4 X)
58 (=hanziku-1 "HanzikuA" 4 X)
59 (=hanziku-2 "HanzikuB" 4 X)
60 (=hanziku-3 "HanzikuC" 4 X)
61 (=hanziku-4 "HanzikuD" 4 X)
62 (=hanziku-5 "HanzikuE" 4 X)
63 (=hanziku-6 "HanzikuF" 4 X)
64 (=hanziku-7 "HanzikuG" 4 X)
65 (=hanziku-8 "HanzikuH" 4 X)
66 (=hanziku-9 "HanzikuI" 4 X)
67 (=hanziku-10 "HanzikuJ" 4 X)
68 (=hanziku-11 "HanzikuK" 4 X)
69 (=hanziku-12 "HanzikuL" 4 X)
70 (=ucs-bmp@cns "UCScns" 4 X)
71 (thai-tis620 "ThaiTIS" 2 X)
74 (defvar chise-tex-accents-macro-alist
75 '((?\u0300 . "`") ; <COMBINING GRAVE ACCENT>
76 (?\u0301 . "'") ; <COMBINING ACUTE ACCENT>
77 (?\u0302 . "^") ; <COMBINING CIRCUMFLEX ACCENT>
78 ((?\u0302 ?\u0300) . "CircGrave")
79 ((?\u0302 ?\u0301) . ("\\'{\\^" . "}"))
80 ((?\u0302 ?\u0303) . ("\\~{\\^" . "}"))
81 ((?\u0302 ?\u0309) . "CircHook")
82 (?\u0303 . "~") ; <COMBINING TILDE>
83 (?\u0304 . "=") ; <COMBINING MACRON>
84 ((?\u0304 ?\u0301) . "textacutemacron")
85 (?\u0306 . "u") ; <COMBINING BREVE>
86 (?\u0307 . ".") ; <COMBINING DOT ABOVE>
87 (?\u0308 . "\"") ; <COMBINING DIAERESIS>
89 (?\u030B . "H") ; <COMBINING DOUBLE ACUTE ACCENT>
90 (?\u030C . "v") ; <COMBINING CARON>
91 (?\u031B . "Horn") ; <COMBINING HORN>
92 ((?\u031B ?\u0301) . "HornAcute")
93 ((?\u031B ?\u0303) . "HornTilde")
94 (?\u0323 . "d") ; <COMBINING DOT BELOW>
95 ((?\u0323 ?\u0302) . "Circudot")
96 (?\u0327 . "c") ; <COMBINING CEDILLA>
97 (?\u0328 . "k") ; <COMBINING OGONEK>
102 (defun chise-tex-encode-region-for-gb (start end)
106 (narrow-to-region start end)
108 (let (chr ret rest spec)
109 (while (and (skip-chars-forward "\x00-\xFF")
111 (setq chr (char-after))
112 (cond ((memq chr '(?
\e$(O#@
\e(B))
114 (insert (format "\\UCSjis{%04X}"
115 (encode-char chr '=ucs@jis)))
117 ((and (setq ret (encode-char chr '=jis-x0208-1983))
120 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
122 ;; (insert (decode-char '=jis-x0208-1983 ret)))
123 ((and (encode-char chr '=ks-x1001)
124 (setq ret (or (encode-char chr '=ucs@ks)
127 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
129 (insert (format "\\UCSks{%04X}" ret)))
131 (setq rest chise-tex-coded-charset-expression-alist)
132 (while (setq spec (car rest))
133 (if (setq ret (encode-char chr (car spec)))
135 (setq rest (cdr rest))))
137 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
139 (insert (format (format "\\%s{%%0%d%s}"
145 (forward-char))))))))
147 (defun chise-tex-encode-ucs-char-at-point (&optional chr)
149 (setq chr (char-after)))
152 ((setq ret (encode-char chr '=adobe-japan1-6))
154 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
156 (insert (format "\\AdobeJP{%05d}" ret))
158 ((and (encode-char chr '=ks-x1001)
159 (setq ret (or (encode-char chr '=ucs@ks)
162 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
164 (insert (format "\\UCSks{%04X}" ret))
166 ((setq ret (encode-char chr '=ucs-hangul))
168 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
170 (insert (format "\\UCSks{%04X}" ret))
186 (insert "{\\usefont{T1}{pxr}{m}{n}\\dj}")
198 ;; (insert "{\\usefont{T1}{pxr}{m}{n}\\ng}")
199 (insert "\\LATINeng{}")
203 (insert "\\textturna{}")
207 (insert "\\textscripta{}")
211 (insert "\\textopeno{}")
215 (insert "\\IPAepsilon{}")
219 (insert "\\textramshorns{}")
223 (insert "\\IPAiota{}")
227 (insert "\\textturnm{}")
231 (insert "\\IPArevfishhookr{}")
235 (insert "{\\usefont{T1}{pxr}{m}{n}\\k{s}}")
239 ;; (insert "\\UCSjis{0294}")
240 (insert "\\textglotstop{}")
244 (insert "\\textrevapostrophe{}")
248 (insert "\\textbullet{}")
320 (insert "\\UCSgb{2637}")
322 ((eq (encode-char chr '=ucs@jis) #x8DBC)
324 (insert "\\GTpjG{4933}")
326 ((and (encode-char chr '=ucs@jp)
327 (setq ret (char-representative-of-domain chr 'gb))
328 (setq ret (encode-char ret '=ucs@gb))
331 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
333 (insert (format "\\UCSgb{%04X}" ret))
336 (setq rest chise-tex-coded-charset-expression-alist)
337 (while (setq spec (car rest))
338 (if (setq ret (encode-char chr (car spec)))
340 (setq rest (cdr rest))))
342 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
344 (insert (format (format "\\%s{%%0%d%s}"
352 (defun chise-tex-encode-region-for-jis (start end)
356 (narrow-to-region start end)
360 modifier base modifier-1)
361 (while (and (skip-chars-forward "\x00-\x7F")
363 (setq chr (char-after))
364 (cond ((encode-char chr '=jis-x0208-1983)
366 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
368 ;; (insert (decode-char '=jis-x0208-1983 ret)))
369 ((encode-char chr '=jis-x0208-1983)
371 ((and (not (eq (char-ucs chr) #x0439))
372 (not (eq (char-ucs chr) #x0451))
373 (setq ret (char-feature chr '=decomposition))
374 (setq modifier (assq (nth 1 ret)
389 (setq base (car ret))
390 (if (and (setq ret (char-feature base '=decomposition))
396 (?\u0300 . "CircGrave")
397 (?\u0301 . "CircAcute")
398 (?\u0303 . "CircTilde")
399 (?\u0309 . "CircHook")
402 (?\u0301 . "HornAcute")
405 (?\u0302 . "Circudot")
407 (insert (format "\\%s{%c}" (cdr modifier-1) (car ret)))
408 (insert (format "\\%s{%c}" (cdr modifier) base))))
409 ((and (or (encode-char chr '=jis-x0213-1-2000)
410 (encode-char chr '=jis-x0213-2-2000))
411 (setq ret (or (encode-char chr '=ucs@jis/2000)
412 (encode-char chr '=ucs@jis/fw)))
415 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
417 (insert (format "\\UCSjis{%04X}" ret)))
418 ((chise-tex-encode-ucs-char-at-point chr))
420 (forward-char))))))))
422 (defun chise-tex-encode-region-for-utf-8-jis (start end &optional ptex-mode)
426 (narrow-to-region start end)
428 (let ((font-encoding 'T1)
434 (while (and (skip-chars-forward "\x00-\x7F")
436 (setq chr (char-after))
438 ((and (setq ret (encode-char chr '=ucs))
439 (and (<= #x0400 ret)(<= ret #x04F9)))
440 (if (eq font-encoding 'T2A)
443 ;; (unless (and (prog1
445 ;; "\\fontencoding{T2A}\\selectfont{}" nil t)
447 ;; (eq pos (match-end 0)))
448 ;; (insert "\\fontencoding{T2A}\\selectfont{}")
450 (if (search-backward "\\CyrillicScript{"
451 (- pos (eval-when-compile
452 (length "\\CyrillicScript{")))
454 (search-forward "}" nil t)
455 (insert "\\CyrillicScript{")
457 (skip-chars-forward "---\u0400-\u04F9 ")
459 ;; (setq font-encoding 'T2A)
462 ((and (setq ret (encode-char chr '=ucs))
463 (and (<= #x0374 ret)(<= ret #x03F3)))
464 (if (eq font-encoding 'LGR)
467 ;; (unless (and (prog1
469 ;; "\\fontencoding{LGR}\\selectfont{}" nil t)
471 ;; (eq pos (match-end 0)))
472 ;; (insert "\\fontencoding{LGR}\\selectfont{}")
474 (if (search-backward "\\GreekScript{"
475 (- pos (eval-when-compile
476 (length "\\GreekScript{")))
478 (search-forward "}" nil t)
479 (insert "\\GreekScript{")
481 (skip-chars-forward "\u0374-\u03F3 ")
483 ;; (setq font-encoding 'LGR)
485 ;; (unless (eq font-encoding 'T1)
486 ;; (unless (looking-at
487 ;; "\\\\fontencoding{T1}\\\\selectfont{}")
488 ;; (insert "\\fontencoding{T1}\\selectfont{}")
490 ;; (setq font-encoding 'T1))
493 (unless (eq font-encoding 'T1)
497 "\\fontencoding{T1}\\selectfont{}" nil t)
499 (eq pos (match-end 0)))
500 (insert "\\fontencoding{T1}\\selectfont{}")
502 (setq font-encoding 'T1))
503 (cond ((eq (char-ucs chr) #x00D7)
505 (insert "\\UCSjis{00D7}"))
506 ((encode-char chr '=jis-x0208@1983)
508 ((encode-char chr '=jis-x0208@1990)
510 ((and (setq ret (char-ucs chr))
517 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
519 ;; (insert (decode-char '=jis-x0208-1983 ret)))
520 ((eq (char-ucs chr) #x012B)
524 ((setq ret (encode-char chr 'thai-tis620))
526 (insert (format "\\ThaiTIS{%X}" (logior ret #x80)))
528 ((and (not (eq (char-ucs chr) #x0439))
529 (not (eq (char-ucs chr) #x0451))
530 (listp (setq ret (decompose-char chr)))
531 ;; (setq ret (char-feature chr '=decomposition))
532 (setq modifier (cdr ret))
533 ;; (setq modifier (assq (nth 1 ret)
534 ;; chise-tex-accent-macro-alist))
537 (setq base (car ret))
541 chise-tex-accents-macro-alist)
543 chise-tex-accents-macro-alist)))
548 (insert (format "%s%c%s"
549 (car ret) base (cdr ret)))
550 (insert (format "\\%s{%c}" ret base))))
553 ((eq (encode-char chr '=ucs@jis) #x0153)
557 ((and (not ptex-mode)
558 (setq ret (encode-char chr '=ucs@JP))
561 (insert (format "\\UCSsip{%X}" ret))
563 ((and (not ptex-mode)
564 (or (encode-char chr '=jis-x0213-1-2000)
565 (encode-char chr '=jis-x0213-2-2000))
566 (setq ret (or (encode-char chr '=ucs@jis/2000)
567 (encode-char chr '=ucs@jis/fw)))
570 ;; (if (eq (char-before) ?
\e$B!T
\e(B)
572 ;; (insert (format "\\UCSjis{%04X}" ret))
574 ((chise-tex-encode-ucs-char-at-point chr))
576 (forward-char))))))))))
578 (defun chise-ptex-encode-region-for-utf-8-jis (start end)
580 (chise-tex-encode-region-for-utf-8-jis start end 'ptex-mode))
582 (defun chise-xetex-encode-region-for-utf-8-jis (start end)
586 (narrow-to-region start end)
588 (let (chr ret rest spec)
589 (while (and (skip-chars-forward "\x00-\x7F")
591 (setq chr (char-after))
592 (cond ((encode-char chr '=jis-x0208@1983)
594 ((encode-char chr '=jis-x0208@1990)
596 ;; ((encode-char chr '=jis-x0212)
598 ((and (setq ret (encode-char chr '=ucs@JP))
600 (insert "\\SIPChars{")
603 ((encode-char chr '=jis-x0213-1@2000)
605 ((encode-char chr '=jis-x0213-1@2004)
607 ((encode-char chr '=jis-x0213-2)
609 ((setq ret (encode-char chr 'thai-tis620))
611 (insert (format "\\ThaiTIS{%X}" (logior ret #x80)))
613 ((or (encode-char chr '=ks-x1001)
614 (encode-char chr '=ucs-hangul))
615 (insert "\\KoreanChars{")
618 ((encode-char chr '=ucs@gb)
619 (insert "\\GBChars{")
622 ((encode-char chr '=ucs@cns)
623 (insert "\\CNSChars{")
626 ((and (encode-char chr '=ucs@JP)
627 (setq ret (char-representative-of-domain chr 'gb))
628 (setq ret (encode-char ret '=ucs@gb)))
629 (insert "\\GBChars{")
632 ((setq ret (char-feature chr '=decomposition))
637 (setq rest chise-tex-coded-charset-expression-alist)
638 (while (setq spec (car rest))
639 (if (setq ret (encode-char chr (car spec)))
641 (setq rest (cdr rest))))
643 (insert (format (format "\\%s{%%0%d%s}"
650 (forward-char))))))))
652 (defun chise-tex-decode-region (start end)
656 (narrow-to-region start end)
658 (let (macro code ret me rest spec)
659 (while (search-forward "\\={\\i}" nil t)
660 (replace-match "
\e.D
\eNo" t t))
662 (while (re-search-forward "\\\\\\(.\\){\\(.\\)}" nil t)
666 (aref (match-string 1) 0)
667 '((?\` . ?\u0300) ; <COMBINING GRAVE ACCENT>
668 (?\' . ?\u0301) ; <COMBINING ACUTE ACCENT>
669 (?^ . ?\u0302) ; <COMBINING CIRCUMFLEX ACCENT>
670 (?~ . ?\u0303) ; <COMBINING TILDE>
671 (?= . ?\u0304) ; <COMBINING MACRON>
672 (?u . ?\u0306) ; <COMBINING BREVE>
673 (?\. . ?\u0307) ; <COMBINING DOT ABOVE>
674 (?\" . ?\u0308) ; <COMBINING DIAERESIS>
675 (?v . ?\u030C) ; <COMBINING CARON>
676 (?d . ?\u0323) ; <COMBINING DOT BELOW>
677 (?c . ?\u0327) ; <COMBINING CEDILLA>
680 (cdr (assq (cdr macro)
681 (char-feature (aref (match-string 2) 0)
683 (delete-region (match-beginning 0)(match-end 0))
686 (while (re-search-forward "\\\\\\([a-zA-Z0-9]+\\){\\([0-9A-Fa-f]+\\)}"
688 (setq macro (match-string 1)
689 code (match-string 2)
692 (setq rest chise-tex-coded-charset-expression-alist)
693 (while (setq spec (car rest))
694 (if (string= (nth 1 spec) macro)
696 (setq rest (cdr rest))))
697 (setq ret (decode-char (car spec)
700 (if (eq (nth 3 spec) 'X)
703 (delete-region (match-beginning 0)(match-end 0))
708 'iso-2022-jp-tex-gb 'iso2022
709 "ISO-2022-JP with TeX representation for GB fonts."
713 ;; input-charset-conversion ((latin-jisx0201 ascii)
714 ;; (japanese-jisx0208-1978 japanese-jisx0208))
715 pre-write-conversion chise-tex-encode-region-for-gb
716 post-read-conversion chise-tex-decode-region
717 mnemonic "pTeX(GB)/7bit"
721 'iso-2022-jp-tex-jis 'iso2022
722 "ISO-2022-JP with TeX representation for JIS fonts."
726 ccs-priority-list (ascii
727 =jis-x0208@1983 =jis-x0208@1978
729 ;; output-charset-conversion ((=jis-x0208@1990 =jis-x0208@1983))
730 pre-write-conversion chise-tex-encode-region-for-jis
731 post-read-conversion chise-tex-decode-region
732 mnemonic "pTeX(JIS)/7bit"
736 'utf-8-jp-ptex 'utf-8
737 "Coding-system of UTF-8 for pLaTeX with common glyphs used in Japan."
738 '(pre-write-conversion chise-ptex-encode-region-for-utf-8-jis
739 post-read-conversion chise-tex-decode-region
743 mnemonic "pTeX(JP)/UTF8"))
747 "Coding-system of UTF-8 for upLaTeX with common glyphs used in Japan."
748 '(pre-write-conversion chise-tex-encode-region-for-utf-8-jis
749 post-read-conversion chise-tex-decode-region
753 mnemonic "upTeX(JP)/UTF8"))
756 'utf-8-jp-xetex 'utf-8
757 "Coding-system of UTF-8 for XeLaTeX with common glyphs used in Japan."
758 '(pre-write-conversion chise-xetex-encode-region-for-utf-8-jis
759 post-read-conversion chise-tex-decode-region
763 mnemonic "XeTeX(JP)/UTF8"))
771 ;;; chise-tex.el ends here