(chise-tex-encode-region-for-utf-8-jis): New optional argument
[chise/uptex-chise.git] / elisp / chise-tex.el
index 5527325..e0256f5 100644 (file)
@@ -1,6 +1,6 @@
 ;;; chise-tex.el --- Coding-system based chise2otf like tool
 
-;; Copyright (C) 2004,2005,2006,2007,2008,2009 MORIOKA Tomohiko
+;; Copyright (C) 2004,2005,2006,2007,2008,2009,2010 MORIOKA Tomohiko
 
 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
 ;; Keywords: OTF package, pTeX, CHISE, Multiscript, Multilingual
 
 ;;; Code:
 
+(defun decompose-char (char)
+  (let (ret dest)
+    (while (setq ret (char-feature char '=decomposition))
+      (setq dest
+           (if (cddr ret)
+               (append (cdr ret) dest)
+             (cons (nth 1 ret) dest))
+           char (car ret)))
+    (if dest
+       (cons char dest)
+      char)))
+
 (defvar chise-tex-coded-charset-expression-alist
-  '((=ucs-bmp@gb       "UCSgb"         4 X)
+  '((=adobe-japan1-5   "AdobeJP"       5 d)
+    (=ucs-bmp@gb       "UCSgb"         4 X)
     (=ucs-bmp@jis      "UCSjis"        4 X)
     (=ucs-bmp@ks       "UCSks"         4 X)
+    ;; (=ucs-sip-ext-b     "ucsSIP"        4 X)
+    ;; (=ucs-sip-ext-b@iso "ucsSIP"        4 X)
     (=gt-pj-1          "GTpjA"         4 X)
     (=gt-pj-2          "GTpjB"         4 X)
     (=gt-pj-3          "GTpjC"         4 X)
     (thai-tis620       "ThaiTIS"       2 X)
     ))
 
-(defvar chise-tex-accent-macro-alist
+(defvar chise-tex-accents-macro-alist
   '((?\u0300 . "`") ; <COMBINING GRAVE ACCENT>
     (?\u0301 . "'") ; <COMBINING ACUTE ACCENT>
     (?\u0302 . "^") ; <COMBINING CIRCUMFLEX ACCENT>
+    ((?\u0302 ?\u0300) . "CircGrave")
+    ((?\u0302 ?\u0301) . ("\\'{\\^" . "}"))
+    ((?\u0302 ?\u0303) . ("\\~{\\^" . "}"))
+    ((?\u0302 ?\u0309) . "CircHook")
     (?\u0303 . "~") ; <COMBINING TILDE>
     (?\u0304 . "=") ; <COMBINING MACRON>
+    ((?\u0304 ?\u0301) . "textacutemacron")
     (?\u0306 . "u") ; <COMBINING BREVE>
     (?\u0307 . ".") ; <COMBINING DOT ABOVE>
     (?\u0308 . "\"") ; <COMBINING DIAERESIS>
     (?\u0309 . "Hook")
     (?\u030B . "H") ; <COMBINING DOUBLE ACUTE ACCENT>
     (?\u030C . "v") ; <COMBINING CARON>
+    (?\u031B . "Horn") ; <COMBINING HORN>
+    ((?\u031B ?\u0301) . "HornAcute")
+    ((?\u031B ?\u0303) . "HornTilde")
     (?\u0323 . "d") ; <COMBINING DOT BELOW>
+    ((?\u0323 ?\u0302) . "Circudot")
     (?\u0327 . "c") ; <COMBINING CEDILLA>
     (?\u0328 . "k") ; <COMBINING OGONEK>
     (?\u032E . "ubreve")
       (delete-char)
       (insert "{\\usefont{T1}{pxr}{m}{n}\\dj}")
       t)
+     ((eq chr ?\u0131)
+      (delete-char)
+      (insert "\\i{}")
+      t)
      ((eq chr ?\u0142)
       (delete-char)
       (insert "\\l{}")
       t)
      ((eq chr ?\u014B)
       (delete-char)
-      (insert "{\\usefont{T1}{pxr}{m}{n}\\ng}")
+      ;; (insert "{\\usefont{T1}{pxr}{m}{n}\\ng}")
+      (insert "\\LATINeng{}")
       t)
      ((eq chr ?\u0250)
       (delete-char)
       (insert "\\textturna{}")
       t)
+     ((eq chr ?\u0251)
+      (delete-char)
+      (insert "\\textscripta{}")
+      t)
      ((eq chr ?\u0254)
       (delete-char)
       (insert "\\textopeno{}")
       t)
+     ((eq chr ?\u025B)
+      (delete-char)
+      (insert "\\IPAepsilon{}")
+      t)
      ((eq chr ?\u0264)
       (delete-char)
       (insert "\\textramshorns{}")
       t)
+     ((eq chr ?\u0269)
+      (delete-char)
+      (insert "\\IPAiota{}")
+      t)
      ((eq chr ?\u026F)
       (delete-char)
       (insert "\\textturnm{}")
       t)
+     ((eq chr ?\u027F)
+      (delete-char)
+      (insert "\\IPArevfishhookr{}")
+      t)
      ((eq chr ?\u0282)
       (delete-char)
       (insert "{\\usefont{T1}{pxr}{m}{n}\\k{s}}")
       ;; (insert "\\UCSjis{0294}")
       (insert "\\textglotstop{}")
       t)
+     ((eq chr ?\u02BF)
+      (delete-char)
+      (insert "\\textrevapostrophe{}")
+      t)
      ((eq chr ?\u2022)
       (delete-char)
       (insert "\\textbullet{}")
       t)
+     ((eq chr ?\u2074)
+      (delete-char)
+      (insert "$^4$")
+      t)
      ((eq chr ?\u2075)
       (delete-char)
       (insert "$^5$")
                 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
                 ;;  (delete-char)
                 ;;  (insert (decode-char '=jis-x0208-1983 ret)))
-               ((and (not (eq (char-ucs chr) #x0439))
+               ((encode-char chr '=jis-x0208-1983)
+                (forward-char))
+                ((and (not (eq (char-ucs chr) #x0439))
                      (not (eq (char-ucs chr) #x0451))
                      (setq ret (char-feature chr '=decomposition))
                      (setq modifier (assq (nth 1 ret)
                 (t
                 (forward-char))))))))
 
-(defun chise-tex-encode-region-for-utf-8-jis (start end)
+(defun chise-tex-encode-region-for-utf-8-jis (start end &optional ptex-mode)
   (interactive "r")
   (save-excursion
     (save-restriction
       (let ((font-encoding 'T1)
            chr ret
            ;; rest spec
-           modifier base modifier-1 pos)
+           modifier base
+            ;; modifier-1
+           pos)
        (while (and (skip-chars-forward "\x00-\x7F")
                    (not (eobp)))
          (setq chr (char-after))
            (cond ((eq (char-ucs chr) #x00D7)
                   (delete-char)
                   (insert "\\UCSjis{00D7}"))
-                 ((encode-char chr '=jis-x0208-1983)
+                 ((encode-char chr '=jis-x0208@1983)
+                  (forward-char))
+                 ((encode-char chr '=jis-x0208@1990)
                   (forward-char))
                  ((and (setq ret (char-ucs chr))
                        (or (eq ret #x00C5)
                            (eq ret #x00E5)
+                           (eq ret #x015B)
                            (eq ret #x1E2B)
                            ))
                   (forward-char))
                   )
                  ((and (not (eq (char-ucs chr) #x0439))
                        (not (eq (char-ucs chr) #x0451))
-                       (setq ret (char-feature chr '=decomposition))
-                       (setq modifier (assq (nth 1 ret)
-                                            chise-tex-accent-macro-alist)))
-                  (delete-char)
+                       (listp (setq ret (decompose-char chr)))
+                       ;; (setq ret (char-feature chr '=decomposition))
+                       (setq modifier (cdr ret))
+                        ;; (setq modifier (assq (nth 1 ret)
+                        ;;                      chise-tex-accent-macro-alist))
+                       )
+                   ;; (delete-char)
                   (setq base (car ret))
-                  (if (and (setq ret (char-feature base '=decomposition))
-                           (setq modifier-1
-                                 (assq
-                                  (car modifier)
-                                  (cdr
-                                   (assq
-                                    (nth 1 ret)
-                                    '((?\u0302
-                                       (?\u0300 . "\\CircGrave{%c}")
-                                       (?\u0301 . "\\'{\\^%c}")
-                                       (?\u0303 . "\\~{\\^%c}")
-                                       (?\u0309 . "\\CircHook{%c}")
-                                       )
-                                      (?\u0304 ; <COMBINING MACRON>
-                                       (?\u0301 ; <COMBINING ACUTE ACCENT>
-                                        . "\\textacutemacron{%c}")
-                                       )
-                                      (?\u031B
-                                       (?\u0301 . "\\HornAcute{%c}")
-                                       (?\u0303 . "\\HornTilde{%c}")
-                                       )
-                                      (?\u0323
-                                       (?\u0302 . "\\Circudot{%c}")
-                                       )))))))
-                      (insert (format (cdr modifier-1) (car ret)))
-                    (insert (format "\\%s{%c}" (cdr modifier) base))))
-                 ((and (or (encode-char chr '=jis-x0213-1-2000)
+                  (if (setq ret
+                            (if (cdr modifier)
+                                (assoc modifier
+                                       chise-tex-accents-macro-alist)
+                              (assq (car modifier)
+                                    chise-tex-accents-macro-alist)))
+                      (progn
+                        (delete-char)
+                        (setq ret (cdr ret))
+                        (if (consp ret)
+                            (insert (format "%s%c%s"
+                                            (car ret) base (cdr ret)))
+                          (insert (format "\\%s{%c}" ret base))))
+                    (forward-char))
+                   )
+                 ((eq (encode-char chr '=ucs@jis) #x0153)
+                  (delete-char)
+                  (insert "\\oe{}")
+                  t)
+                 ((and (not ptex-mode)
+                       (setq ret (encode-char chr '=ucs@JP))
+                       (>= ret #x20000))
+                  (delete-char)
+                  (insert (format "\\UCSsip{%X}" ret))
+                  t)
+                 ((and (not ptex-mode)
+                       (or (encode-char chr '=jis-x0213-1-2000)
                            (encode-char chr '=jis-x0213-2-2000))
                        (setq ret (or (encode-char chr '=ucs@jis/2000)
                                      (encode-char chr '=ucs@jis/fw)))
                   (t
                   (forward-char))))))))))
 
+(defun chise-ptex-encode-region-for-utf-8-jis (start end)
+  (interactive "r")
+  (chise-tex-encode-region-for-utf-8-jis start end 'ptex-mode))
+
+(defun chise-xetex-encode-region-for-utf-8-jis (start end)
+  (interactive "r")
+  (save-excursion
+    (save-restriction
+      (narrow-to-region start end)
+      (goto-char start)
+      (let (chr ret rest spec)
+       (while (and (skip-chars-forward "\x00-\x7F")
+                   (not (eobp)))
+         (setq chr (char-after))
+         (cond ((encode-char chr '=jis-x0208@1983)
+                (forward-char))
+               ((encode-char chr '=jis-x0208@1990)
+                (forward-char))
+                ;; ((encode-char chr '=jis-x0212)
+                ;;  (forward-char))
+               ((and (setq ret (encode-char chr '=ucs@JP))
+                     (>= ret #x20000))
+                (insert "\\SIPChars{")
+                (forward-char)
+                (insert "}"))
+               ((encode-char chr '=jis-x0213-1@2000)
+                (forward-char))
+               ((encode-char chr '=jis-x0213-1@2004)
+                (forward-char))
+               ((encode-char chr '=jis-x0213-2)
+                (forward-char))
+               ((setq ret (encode-char chr 'thai-tis620))
+                (delete-char)
+                (insert (format "\\ThaiTIS{%X}" (logior ret #x80)))
+                )
+               ((or (encode-char chr '=ks-x1001)
+                    (encode-char chr '=ucs-hangul))
+                (insert "\\KoreanChars{")
+                (forward-char)
+                (insert "}"))
+               ((encode-char chr '=ucs@gb)
+                (insert "\\GBChars{")
+                (forward-char)
+                (insert "}"))
+               ((encode-char chr '=ucs@cns)
+                (insert "\\CNSChars{")
+                (forward-char)
+                (insert "}"))
+               ((and (encode-char chr '=ucs@JP)
+                     (setq ret (char-representative-of-domain chr 'gb))
+                     (setq ret (encode-char ret '=ucs@gb)))
+                (insert "\\GBChars{")
+                (forward-char)
+                (insert "}"))
+               ((setq ret (char-feature chr '=decomposition))
+                (delete-char)
+                (dolist (c ret)
+                  (insert c)))
+               ((catch 'tag
+                  (setq rest chise-tex-coded-charset-expression-alist)
+                  (while (setq spec (car rest))
+                    (if (setq ret (encode-char chr (car spec)))
+                        (throw 'tag ret))
+                    (setq rest (cdr rest))))
+                (delete-char)
+                (insert (format (format "\\%s{%%0%d%s}"
+                                        (nth 1 spec)
+                                        (nth 2 spec)
+                                        (nth 3 spec))
+                                ret))
+                )
+               (t
+                (forward-char))))))))
+
 (defun chise-tex-decode-region (start end)
   (interactive "r")
   (save-excursion
    ))
 
 (make-coding-system
+ 'utf-8-jp-ptex 'utf-8
+ "Coding-system of UTF-8 for pLaTeX with common glyphs used in Japan."
+ '(pre-write-conversion chise-ptex-encode-region-for-utf-8-jis
+   post-read-conversion chise-tex-decode-region
+   charset-g0 =ucs@jp
+   charset-g1 =>ucs-jis
+   charset-g2 =>ucs
+   mnemonic "pTeX(JP)/UTF8"))
+
+(make-coding-system
  'utf-8-jp-tex 'utf-8
- "Coding-system of UTF-8 for common glyphs used in Japan."
+ "Coding-system of UTF-8 for upLaTeX with common glyphs used in Japan."
  '(pre-write-conversion chise-tex-encode-region-for-utf-8-jis
    post-read-conversion chise-tex-decode-region
    charset-g0 =ucs@jp
    charset-g2 =>ucs
    mnemonic "upTeX(JP)/UTF8"))
 
+(make-coding-system
+ 'utf-8-jp-xetex 'utf-8
+ "Coding-system of UTF-8 for XeLaTeX with common glyphs used in Japan."
+ '(pre-write-conversion chise-xetex-encode-region-for-utf-8-jis
+   post-read-conversion chise-tex-decode-region
+   charset-g0 =ucs@jp
+   charset-g1 =>ucs-jis
+   charset-g2 =>ucs
+   mnemonic "XeTeX(JP)/UTF8"))
+
 
 ;;; @ End.
 ;;;