;;; chise-tex.el --- Coding-system based chise2otf like tool
-;; Copyright (C) 2004,2005,2006,2007,2008,2009 MORIOKA Tomohiko
+;; Copyright (C) 2004,2005,2006,2007,2008,2009,2010 MORIOKA Tomohiko
;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
;; Keywords: OTF package, pTeX, CHISE, Multiscript, Multilingual
;;; Code:
+(defun decompose-char (char)
+ (let (ret dest)
+ (while (setq ret (char-feature char '=decomposition))
+ (setq dest
+ (if (cddr ret)
+ (append (cdr ret) dest)
+ (cons (nth 1 ret) dest))
+ char (car ret)))
+ (if dest
+ (cons char dest)
+ char)))
+
(defvar chise-tex-coded-charset-expression-alist
- '((=ucs-bmp@gb "UCSgb" 4 X)
+ '((=adobe-japan1-5 "AdobeJP" 5 d)
+ (=ucs-bmp@gb "UCSgb" 4 X)
(=ucs-bmp@jis "UCSjis" 4 X)
(=ucs-bmp@ks "UCSks" 4 X)
+ ;; (=ucs-sip-ext-b "ucsSIP" 4 X)
+ ;; (=ucs-sip-ext-b@iso "ucsSIP" 4 X)
(=gt-pj-1 "GTpjA" 4 X)
(=gt-pj-2 "GTpjB" 4 X)
(=gt-pj-3 "GTpjC" 4 X)
(thai-tis620 "ThaiTIS" 2 X)
))
-(defvar chise-tex-accent-macro-alist
+(defvar chise-tex-accents-macro-alist
'((?\u0300 . "`") ; <COMBINING GRAVE ACCENT>
(?\u0301 . "'") ; <COMBINING ACUTE ACCENT>
(?\u0302 . "^") ; <COMBINING CIRCUMFLEX ACCENT>
+ ((?\u0302 ?\u0300) . "CircGrave")
+ ((?\u0302 ?\u0301) . ("\\'{\\^" . "}"))
+ ((?\u0302 ?\u0303) . ("\\~{\\^" . "}"))
+ ((?\u0302 ?\u0309) . "CircHook")
(?\u0303 . "~") ; <COMBINING TILDE>
(?\u0304 . "=") ; <COMBINING MACRON>
+ ((?\u0304 ?\u0301) . "textacutemacron")
(?\u0306 . "u") ; <COMBINING BREVE>
(?\u0307 . ".") ; <COMBINING DOT ABOVE>
(?\u0308 . "\"") ; <COMBINING DIAERESIS>
(?\u0309 . "Hook")
(?\u030B . "H") ; <COMBINING DOUBLE ACUTE ACCENT>
(?\u030C . "v") ; <COMBINING CARON>
+ (?\u031B . "Horn") ; <COMBINING HORN>
+ ((?\u031B ?\u0301) . "HornAcute")
+ ((?\u031B ?\u0303) . "HornTilde")
(?\u0323 . "d") ; <COMBINING DOT BELOW>
+ ((?\u0323 ?\u0302) . "Circudot")
(?\u0327 . "c") ; <COMBINING CEDILLA>
(?\u0328 . "k") ; <COMBINING OGONEK>
(?\u032E . "ubreve")
(delete-char)
(insert "{\\usefont{T1}{pxr}{m}{n}\\dj}")
t)
+ ((eq chr ?\u0131)
+ (delete-char)
+ (insert "\\i{}")
+ t)
((eq chr ?\u0142)
(delete-char)
(insert "\\l{}")
t)
((eq chr ?\u014B)
(delete-char)
- (insert "{\\usefont{T1}{pxr}{m}{n}\\ng}")
+ ;; (insert "{\\usefont{T1}{pxr}{m}{n}\\ng}")
+ (insert "\\LATINeng{}")
t)
((eq chr ?\u0250)
(delete-char)
(insert "\\textturna{}")
t)
+ ((eq chr ?\u0251)
+ (delete-char)
+ (insert "\\textscripta{}")
+ t)
((eq chr ?\u0254)
(delete-char)
(insert "\\textopeno{}")
t)
+ ((eq chr ?\u025B)
+ (delete-char)
+ (insert "\\IPAepsilon{}")
+ t)
((eq chr ?\u0264)
(delete-char)
(insert "\\textramshorns{}")
t)
+ ((eq chr ?\u0269)
+ (delete-char)
+ (insert "\\IPAiota{}")
+ t)
((eq chr ?\u026F)
(delete-char)
(insert "\\textturnm{}")
t)
+ ((eq chr ?\u027F)
+ (delete-char)
+ (insert "\\IPArevfishhookr{}")
+ t)
((eq chr ?\u0282)
(delete-char)
(insert "{\\usefont{T1}{pxr}{m}{n}\\k{s}}")
;; (insert "\\UCSjis{0294}")
(insert "\\textglotstop{}")
t)
+ ((eq chr ?\u02BF)
+ (delete-char)
+ (insert "\\textrevapostrophe{}")
+ t)
((eq chr ?\u2022)
(delete-char)
(insert "\\textbullet{}")
t)
+ ((eq chr ?\u2074)
+ (delete-char)
+ (insert "$^4$")
+ t)
((eq chr ?\u2075)
(delete-char)
(insert "$^5$")
;; ((setq ret (encode-char chr '=jis-x0208-1990))
;; (delete-char)
;; (insert (decode-char '=jis-x0208-1983 ret)))
- ((and (not (eq (char-ucs chr) #x0439))
+ ((encode-char chr '=jis-x0208-1983)
+ (forward-char))
+ ((and (not (eq (char-ucs chr) #x0439))
(not (eq (char-ucs chr) #x0451))
(setq ret (char-feature chr '=decomposition))
(setq modifier (assq (nth 1 ret)
(t
(forward-char))))))))
-(defun chise-tex-encode-region-for-utf-8-jis (start end)
+(defun chise-tex-encode-region-for-utf-8-jis (start end &optional ptex-mode)
(interactive "r")
(save-excursion
(save-restriction
(let ((font-encoding 'T1)
chr ret
;; rest spec
- modifier base modifier-1 pos)
+ modifier base
+ ;; modifier-1
+ pos)
(while (and (skip-chars-forward "\x00-\x7F")
(not (eobp)))
(setq chr (char-after))
(cond ((eq (char-ucs chr) #x00D7)
(delete-char)
(insert "\\UCSjis{00D7}"))
- ((encode-char chr '=jis-x0208-1983)
+ ((encode-char chr '=jis-x0208@1983)
+ (forward-char))
+ ((encode-char chr '=jis-x0208@1990)
(forward-char))
((and (setq ret (char-ucs chr))
(or (eq ret #x00C5)
(eq ret #x00E5)
+ (eq ret #x015B)
(eq ret #x1E2B)
))
(forward-char))
)
((and (not (eq (char-ucs chr) #x0439))
(not (eq (char-ucs chr) #x0451))
- (setq ret (char-feature chr '=decomposition))
- (setq modifier (assq (nth 1 ret)
- chise-tex-accent-macro-alist)))
- (delete-char)
+ (listp (setq ret (decompose-char chr)))
+ ;; (setq ret (char-feature chr '=decomposition))
+ (setq modifier (cdr ret))
+ ;; (setq modifier (assq (nth 1 ret)
+ ;; chise-tex-accent-macro-alist))
+ )
+ ;; (delete-char)
(setq base (car ret))
- (if (and (setq ret (char-feature base '=decomposition))
- (setq modifier-1
- (assq
- (car modifier)
- (cdr
- (assq
- (nth 1 ret)
- '((?\u0302
- (?\u0300 . "\\CircGrave{%c}")
- (?\u0301 . "\\'{\\^%c}")
- (?\u0303 . "\\~{\\^%c}")
- (?\u0309 . "\\CircHook{%c}")
- )
- (?\u0304 ; <COMBINING MACRON>
- (?\u0301 ; <COMBINING ACUTE ACCENT>
- . "\\textacutemacron{%c}")
- )
- (?\u031B
- (?\u0301 . "\\HornAcute{%c}")
- (?\u0303 . "\\HornTilde{%c}")
- )
- (?\u0323
- (?\u0302 . "\\Circudot{%c}")
- )))))))
- (insert (format (cdr modifier-1) (car ret)))
- (insert (format "\\%s{%c}" (cdr modifier) base))))
- ((and (or (encode-char chr '=jis-x0213-1-2000)
+ (if (setq ret
+ (if (cdr modifier)
+ (assoc modifier
+ chise-tex-accents-macro-alist)
+ (assq (car modifier)
+ chise-tex-accents-macro-alist)))
+ (progn
+ (delete-char)
+ (setq ret (cdr ret))
+ (if (consp ret)
+ (insert (format "%s%c%s"
+ (car ret) base (cdr ret)))
+ (insert (format "\\%s{%c}" ret base))))
+ (forward-char))
+ )
+ ((eq (encode-char chr '=ucs@jis) #x0153)
+ (delete-char)
+ (insert "\\oe{}")
+ t)
+ ((and (not ptex-mode)
+ (setq ret (encode-char chr '=ucs@JP))
+ (>= ret #x20000))
+ (delete-char)
+ (insert (format "\\UCSsip{%X}" ret))
+ t)
+ ((and (not ptex-mode)
+ (or (encode-char chr '=jis-x0213-1-2000)
(encode-char chr '=jis-x0213-2-2000))
(setq ret (or (encode-char chr '=ucs@jis/2000)
(encode-char chr '=ucs@jis/fw)))
(t
(forward-char))))))))))
+(defun chise-ptex-encode-region-for-utf-8-jis (start end)
+ (interactive "r")
+ (chise-tex-encode-region-for-utf-8-jis start end 'ptex-mode))
+
+(defun chise-xetex-encode-region-for-utf-8-jis (start end)
+ (interactive "r")
+ (save-excursion
+ (save-restriction
+ (narrow-to-region start end)
+ (goto-char start)
+ (let (chr ret rest spec)
+ (while (and (skip-chars-forward "\x00-\x7F")
+ (not (eobp)))
+ (setq chr (char-after))
+ (cond ((encode-char chr '=jis-x0208@1983)
+ (forward-char))
+ ((encode-char chr '=jis-x0208@1990)
+ (forward-char))
+ ;; ((encode-char chr '=jis-x0212)
+ ;; (forward-char))
+ ((and (setq ret (encode-char chr '=ucs@JP))
+ (>= ret #x20000))
+ (insert "\\SIPChars{")
+ (forward-char)
+ (insert "}"))
+ ((encode-char chr '=jis-x0213-1@2000)
+ (forward-char))
+ ((encode-char chr '=jis-x0213-1@2004)
+ (forward-char))
+ ((encode-char chr '=jis-x0213-2)
+ (forward-char))
+ ((setq ret (encode-char chr 'thai-tis620))
+ (delete-char)
+ (insert (format "\\ThaiTIS{%X}" (logior ret #x80)))
+ )
+ ((or (encode-char chr '=ks-x1001)
+ (encode-char chr '=ucs-hangul))
+ (insert "\\KoreanChars{")
+ (forward-char)
+ (insert "}"))
+ ((encode-char chr '=ucs@gb)
+ (insert "\\GBChars{")
+ (forward-char)
+ (insert "}"))
+ ((encode-char chr '=ucs@cns)
+ (insert "\\CNSChars{")
+ (forward-char)
+ (insert "}"))
+ ((and (encode-char chr '=ucs@JP)
+ (setq ret (char-representative-of-domain chr 'gb))
+ (setq ret (encode-char ret '=ucs@gb)))
+ (insert "\\GBChars{")
+ (forward-char)
+ (insert "}"))
+ ((setq ret (char-feature chr '=decomposition))
+ (delete-char)
+ (dolist (c ret)
+ (insert c)))
+ ((catch 'tag
+ (setq rest chise-tex-coded-charset-expression-alist)
+ (while (setq spec (car rest))
+ (if (setq ret (encode-char chr (car spec)))
+ (throw 'tag ret))
+ (setq rest (cdr rest))))
+ (delete-char)
+ (insert (format (format "\\%s{%%0%d%s}"
+ (nth 1 spec)
+ (nth 2 spec)
+ (nth 3 spec))
+ ret))
+ )
+ (t
+ (forward-char))))))))
+
(defun chise-tex-decode-region (start end)
(interactive "r")
(save-excursion
))
(make-coding-system
+ 'utf-8-jp-ptex 'utf-8
+ "Coding-system of UTF-8 for pLaTeX with common glyphs used in Japan."
+ '(pre-write-conversion chise-ptex-encode-region-for-utf-8-jis
+ post-read-conversion chise-tex-decode-region
+ charset-g0 =ucs@jp
+ charset-g1 =>ucs-jis
+ charset-g2 =>ucs
+ mnemonic "pTeX(JP)/UTF8"))
+
+(make-coding-system
'utf-8-jp-tex 'utf-8
- "Coding-system of UTF-8 for common glyphs used in Japan."
+ "Coding-system of UTF-8 for upLaTeX with common glyphs used in Japan."
'(pre-write-conversion chise-tex-encode-region-for-utf-8-jis
post-read-conversion chise-tex-decode-region
charset-g0 =ucs@jp
charset-g2 =>ucs
mnemonic "upTeX(JP)/UTF8"))
+(make-coding-system
+ 'utf-8-jp-xetex 'utf-8
+ "Coding-system of UTF-8 for XeLaTeX with common glyphs used in Japan."
+ '(pre-write-conversion chise-xetex-encode-region-for-utf-8-jis
+ post-read-conversion chise-tex-decode-region
+ charset-g0 =ucs@jp
+ charset-g1 =>ucs-jis
+ charset-g2 =>ucs
+ mnemonic "XeTeX(JP)/UTF8"))
+
;;; @ End.
;;;