(chise-tex-encode-region-for-jis): \UCSjis{XXXX} or \UCSgb{XXXX} can
[chise/omega.git] / chise2otf / elisp / chise-tex.el
index b580985..0f5e29c 100644 (file)
@@ -1,6 +1,6 @@
 ;;; chise-tex.el --- Coding-system based chise2otf like tool
 
-;; Copyright (C) 2004 MORIOKA Tomohiko
+;; Copyright (C) 2004,2005,2006 MORIOKA Tomohiko
 
 ;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
 ;; Keywords: OTF package, pTeX, CHISE, Multiscript, Multilingual
 ;;; Code:
 
 (defvar chise-tex-coded-charset-expression-alist
-  '((=ucs@gb   "UCSgb"         4 X)
-    (=ucs@jis  "UCSjis"        4 X)
-    (=gt-pj-1  "GTpj1"         4 X)
-    (=gt-pj-2  "GTpj2"         4 X)
-    (=gt-pj-3  "GTpj3"         4 X)
-    (=gt-pj-4  "GTpj4"         4 X)
-    (=gt-pj-5  "GTpj5"         4 X)
-    (=gt-pj-6  "GTpj6"         4 X)
-    (=gt-pj-7  "GTpj7"         4 X)
-    (=gt-pj-8  "GTpj8"         4 X)
-    (=gt-pj-9  "GTpj9"         4 X)
-    (=gt-pj-10 "GTpj10"        4 X)
-    (=gt-pj-11 "GTpj11"        4 X)
-    (=ucs@ks   "UCSks"         4 X)
-    (=ucs@cns  "UCScns"        4 X)))
+  '((=ucs-bmp@gb       "UCSgb"         4 X)
+    (=ucs-bmp@jis      "UCSjis"        4 X)
+    (=ucs-bmp@ks       "UCSks"         4 X)
+    (=gt-pj-1          "GTpjA"         4 X)
+    (=gt-pj-2          "GTpjB"         4 X)
+    (=gt-pj-3          "GTpjC"         4 X)
+    (=gt-pj-4          "GTpjD"         4 X)
+    (=gt-pj-5          "GTpjE"         4 X)
+    (=gt-pj-6          "GTpjF"         4 X)
+    (=gt-pj-7          "GTpjG"         4 X)
+    (=gt-pj-8          "GTpjH"         4 X)
+    (=gt-pj-9          "GTpjI"         4 X)
+    (=gt-pj-10         "GTpjJ"         4 X)
+    (=gt-pj-11         "GTpjK"         4 X)
+    (=ruimoku-v6       "Ruimoku"       4 X)
+    (=ucs-bmp@cns      "UCScns"        4 X)))
 
 (defun chise-tex-encode-region-for-gb (start end)
   (interactive "r")
                 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
                 ;;  (delete-char)
                 ;;  (insert (decode-char '=jis-x0208-1983 ret)))
+               ((and (encode-char chr '=ks-x1001)
+                     (setq ret (or (encode-char chr '=ucs@ks)
+                                   (char-ucs chr))))
+                (delete-char)
+                ;; (if (eq (char-before) ?\e$B!T\e(B)
+                ;;     (insert " "))
+                (insert (format "\\UCSks{%04X}" ret)))
                ((catch 'tag
                   (setq rest chise-tex-coded-charset-expression-alist)
                   (while (setq spec (car rest))
@@ -85,8 +93,8 @@
     (save-restriction
       (narrow-to-region start end)
       (goto-char start)
-      (let (chr ret rest spec)
-       (while (and (skip-chars-forward "\x00-\xFF")
+      (let (chr ret rest spec modifier base modifier-1)
+       (while (and (skip-chars-forward "\x00-\x7F")
                    (not (eobp)))
          (setq chr (char-after))
           (cond ((encode-char chr '=jis-x0208-1983)
                 ;; ((setq ret (encode-char chr '=jis-x0208-1990))
                 ;;  (delete-char)
                 ;;  (insert (decode-char '=jis-x0208-1983 ret)))
+               ((and (not (eq (char-ucs chr) #x0439))
+                     (not (eq (char-ucs chr) #x0451))
+                     (setq ret (char-feature chr '=decomposition))
+                     (setq modifier (assq (nth 1 ret)
+                                          '((?\u0300 . "`")
+                                            (?\u0301 . "'")
+                                            (?\u0302 . "^")
+                                            (?\u0303 . "~")
+                                            (?\u0304 . "=")
+                                            (?\u0306 . "u")
+                                            (?\u0307 . ".")
+                                            (?\u0308 . "\"")
+                                            (?\u0309 . "Hook")
+                                            (?\u030C . "v")
+                                            (?\u0323 . "d")
+                                            (?\u0327 . "c")
+                                            ))))
+                (delete-char)
+                (setq base (car ret))
+                (if (and (setq ret (char-feature base '=decomposition))
+                         (setq modifier-1
+                               (assq (car modifier)
+                                     (cdr
+                                      (assq (nth 1 ret)
+                                            '((?\u0302
+                                               (?\u0301 . "CircAcute")
+                                               (?\u0303 . "CircTilde")
+                                               (?\u0309 . "CircHook")
+                                               )
+                                              (?\u0323
+                                               (?\u0302 . "Circudot")
+                                               )))))))
+                    (insert (format "\\%s{%c}" (cdr modifier-1) (car ret)))
+                  (insert (format "\\%s{%c}" (cdr modifier) base))))
+               ((and (or (encode-char chr '=jis-x0213-1-2000)
+                         (encode-char chr '=jis-x0213-2-2000))
+                     (setq ret (or (encode-char chr '=ucs@jis/2000)
+                                   (encode-char chr '=ucs@jis/fw)))
+                     (<= ret #xFFFF))
+                (delete-char)
+                ;; (if (eq (char-before) ?\e$B!T\e(B)
+                ;;     (insert " "))
+                (insert (format "\\UCSjis{%04X}" ret)))
+               ((and (encode-char chr '=ks-x1001)
+                     (setq ret (or (encode-char chr '=ucs@ks)
+                                   (char-ucs chr))))
+                (delete-char)
+                ;; (if (eq (char-before) ?\e$B!T\e(B)
+                ;;     (insert " "))
+                (insert (format "\\UCSks{%04X}" ret)))
+               ((setq ret (encode-char chr '=ucs-hangul))
+                (delete-char)
+                ;; (if (eq (char-before) ?\e$B!T\e(B)
+                ;;     (insert " "))
+                (insert (format "\\UCSks{%04X}" ret)))
+               ((eq chr ?\u00B2)
+                (delete-char)
+                (insert "$^2$"))
+               ((eq chr ?\u00B3)
+                (delete-char)
+                (insert "$^3$"))
+               ((eq chr ?\u0111)
+                (delete-char)
+                (insert "{\\usefont{T1}{pxr}{m}{n}\\dj}"))
+               ((eq chr ?\u014B)
+                (delete-char)
+                (insert "{\\usefont{T1}{pxr}{m}{n}\\ng}"))
+               ((eq chr ?\u0282)
+                (delete-char)
+                (insert "{\\usefont{T1}{pxr}{m}{n}\\k{s}}"))
+               ((eq chr ?\u2022)
+                (delete-char)
+                (insert "\\textbullet{}"))
+               ((eq chr ?\u2083)
+                (delete-char)
+                (insert "$_3$"))
+               ((eq chr ?\u2085)
+                (delete-char)
+                (insert "$_5$"))
+               ((eq chr ?\u0294)
+                (delete-char)
+                (insert "\\UCSjis{0294}"))
+               ((and (encode-char chr '=ucs@jp)
+                     (setq ret (char-representative-of-domain chr 'gb))
+                     (setq ret (encode-char ret '=ucs@gb))
+                     (<= ret #xFFFF))
+                (delete-char)
+                ;; (if (eq (char-before) ?\e$B!T\e(B)
+                ;;     (insert " "))
+                (insert (format "\\UCSgb{%04X}" ret)))
                ((catch 'tag
                   (setq rest chise-tex-coded-charset-expression-alist)
                   (while (setq spec (car rest))
     (save-restriction
       (narrow-to-region start end)
       (goto-char start)
-      (let (macro code ret ms me)
+      (let (macro code ret me rest spec)
        (while (re-search-forward "\\\\\\([a-zA-Z0-9]+\\){\\([0-9A-Fa-f]+\\)}"
                                  nil t)
          (setq macro (match-string 1)
                code (match-string 2)
-               ms (match-beginning 0)
                me (match-end 0))
          (if (and (catch 'tag
                     (setq rest chise-tex-coded-charset-expression-alist)
  '(charset-g0 ascii
    short t
    seven t
-   input-charset-conversion ((latin-jisx0201 ascii)
-                            (japanese-jisx0208-1978 japanese-jisx0208))
+   ccs-priority-list (ascii
+                     =jis-x0208@1983 =jis-x0208@1978
+                     latin-jisx0201)
    pre-write-conversion chise-tex-encode-region-for-jis
    post-read-conversion chise-tex-decode-region
    mnemonic "pTeX(JIS)/7bit"