From e1bcdb49d227e28d2fd11ab4cacc1bd737b193ee Mon Sep 17 00:00:00 2001 From: MORIOKA Tomohiko Date: Thu, 29 Jan 2009 03:11:01 +0000 Subject: [PATCH] Copied from /chise2otf/elisp/. --- elisp/ChangeLog | 126 ++++++++++++ elisp/chise-tex.el | 554 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 680 insertions(+) create mode 100644 elisp/ChangeLog create mode 100644 elisp/chise-tex.el diff --git a/elisp/ChangeLog b/elisp/ChangeLog new file mode 100644 index 0000000..a840a86 --- /dev/null +++ b/elisp/ChangeLog @@ -0,0 +1,126 @@ +2009-01-14 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-coded-charset-expression-alist): Add + setting for `thai-tis620' to decode \ThaiTIS{XX}. + (chise-tex-encode-ucs-char-at-point): New function. + (chise-tex-encode-region-for-jis): Use + `chise-tex-encode-ucs-char-at-point'. + (chise-tex-encode-region-for-utf-8-jis): Likewise. + +2008-01-24 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-encode-region-for-utf-8-jis): Support + Thai script. + +2008-01-24 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-encode-region-for-utf-8-jis): Use + \GTpjG{4933} for JU+8DBC. + +2008-01-23 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-accent-macro-alist): New variable. + (chise-tex-encode-region-for-utf-8-jis): Support Greek characters; + don't encode U+00C5 and U+00E5; refer + `chise-tex-accent-macro-alist'; use "\'{\^X}" instead of + "\CircAcute{X}"; use "\~{\^X}" instead of "\CircTilde{X}"; add + setting for `HornTilde'. + (chise-tex-decode-region): Decode "\={\i}". + +2008-01-22 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-encode-region-for-utf-8-jis): New + function. + (chise-tex-decode-region): Decode \u, \v and \c. + (utf-8-jp-tex): New coding-system. + +2008-01-21 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-encode-region-for-jis): Add settings for + $_1$ and $_2$. + +2008-01-18 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-decode-region): Decode \`, \', \^, \~, + \=, \., \" and \d. + +2007-02-06 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-coded-charset-expression-alist): Add + settings for `=hanziku-{1..12}'. + +2007-01-24 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-encode-region-for-jis): Add settings for + `CircGrave' and `HornAcute'. + +2006-02-27 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-encode-region-for-jis): \UCSjis{XXXX} or + \UCSgb{XXXX} can support only BMP. + +2006-01-30 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-coded-charset-expression-alist): Add + setting for `=ruimoku-v6'. + (chise-tex-encode-region-for-jis): Don't decompose ; add settings for and ; support some double accents; support , , , + , , + and . + +2006-01-23 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-encode-region-for-jis): Don't decompose + . + +2006-01-05 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-encode-region-for-jis): Don't skip + Latin-1 characters; try to convert composite characters to LaTeX + macros. + +2005-06-30 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-coded-charset-expression-alist): Use + `=ucs-bmp@{gb|jis|ks|cns}' instead of `=ucs@{gb|jis|ks|cns}'; + don't use UCSjis for `=ucs@jis/2000; use `GTpj{A|B|C|...|K}' + instead of `GTpj{1|2|3|...|11}' for `=gt-pj-{1|2|3|...|11}'. + (iso-2022-jp-tex-jis): Specify `ccs-priority-list'; don't setup + `input-charset-conversion'. + +2005-01-12 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-encode-region-for-gb): Add setting for + KS character. + +2005-01-12 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-encode-region-for-jis): Add setting for + KS character. + +2004-12-15 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-coded-charset-expression-alist): Add + settings for `=ucs-hangul' and `=ucs@jis/2000'; prefer `=ucs@ks' + than `=gt-pj-*'. + (chise-tex-encode-region-for-jis): Add settings for JIS X 0213 + characters, Hangul characters, U+2022 and U+0294; use GB domain + characters as a fallback. + +2004-08-22 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-encode-region-for-jis): New function. + (iso-2022-jp-tex-jis): New coding-system. + +2004-06-24 MORIOKA Tomohiko + + * chise-tex.el (chise-tex-encode-region-for-gb): Renamed from + `chise-tex-encode-region'. + (iso-2022-jp-tex-gb): Renamed from `iso-2022-jp-tex'. + +2004-06-17 MORIOKA Tomohiko + + * chise-tex.el: New file. + diff --git a/elisp/chise-tex.el b/elisp/chise-tex.el new file mode 100644 index 0000000..3760ef7 --- /dev/null +++ b/elisp/chise-tex.el @@ -0,0 +1,554 @@ +;;; chise-tex.el --- Coding-system based chise2otf like tool + +;; Copyright (C) 2004,2005,2006,2007,2008 MORIOKA Tomohiko + +;; Author: MORIOKA Tomohiko +;; Keywords: OTF package, pTeX, CHISE, Multiscript, Multilingual + +;; This file is a part of Omega/CHISE. + +;; This program is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 2, or (at +;; your option) any later version. + +;; This program is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program; see the file COPYING. If not, write to +;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Code: + +(defvar chise-tex-coded-charset-expression-alist + '((=ucs-bmp@gb "UCSgb" 4 X) + (=ucs-bmp@jis "UCSjis" 4 X) + (=ucs-bmp@ks "UCSks" 4 X) + (=gt-pj-1 "GTpjA" 4 X) + (=gt-pj-2 "GTpjB" 4 X) + (=gt-pj-3 "GTpjC" 4 X) + (=gt-pj-4 "GTpjD" 4 X) + (=gt-pj-5 "GTpjE" 4 X) + (=gt-pj-6 "GTpjF" 4 X) + (=gt-pj-7 "GTpjG" 4 X) + (=gt-pj-8 "GTpjH" 4 X) + (=gt-pj-9 "GTpjI" 4 X) + (=gt-pj-10 "GTpjJ" 4 X) + (=gt-pj-11 "GTpjK" 4 X) + (=ruimoku-v6 "Ruimoku" 4 X) + (=hanziku-1 "HanzikuA" 4 X) + (=hanziku-2 "HanzikuB" 4 X) + (=hanziku-3 "HanzikuC" 4 X) + (=hanziku-4 "HanzikuD" 4 X) + (=hanziku-5 "HanzikuE" 4 X) + (=hanziku-6 "HanzikuF" 4 X) + (=hanziku-7 "HanzikuG" 4 X) + (=hanziku-8 "HanzikuH" 4 X) + (=hanziku-9 "HanzikuI" 4 X) + (=hanziku-10 "HanzikuJ" 4 X) + (=hanziku-11 "HanzikuK" 4 X) + (=hanziku-12 "HanzikuL" 4 X) + (=ucs-bmp@cns "UCScns" 4 X) + (thai-tis620 "ThaiTIS" 2 X) + )) + +(defvar chise-tex-accent-macro-alist + '((?\u0300 . "`") ; + (?\u0301 . "'") ; + (?\u0302 . "^") ; + (?\u0303 . "~") ; + (?\u0304 . "=") ; + (?\u0306 . "u") ; + (?\u0307 . ".") ; + (?\u0308 . "\"") ; + (?\u0309 . "Hook") + (?\u030C . "v") ; + (?\u0323 . "d") ; + (?\u0327 . "c") ; + (?\u032E . "ubreve") + (?\u0331 . "umacron") + )) + +(defun chise-tex-encode-region-for-gb (start end) + (interactive "r") + (save-excursion + (save-restriction + (narrow-to-region start end) + (goto-char start) + (let (chr ret rest spec) + (while (and (skip-chars-forward "\x00-\xFF") + (not (eobp))) + (setq chr (char-after)) + (cond ((memq chr '(?$(O#@(B)) + (delete-char) + (insert (format "\\UCSjis{%04X}" + (encode-char chr '=ucs@jis))) + ) + ((and (setq ret (encode-char chr '=jis-x0208-1983)) + (< ret #x3021)) + (forward-char)) + ;; ((setq ret (encode-char chr '=jis-x0208-1990)) + ;; (delete-char) + ;; (insert (decode-char '=jis-x0208-1983 ret))) + ((and (encode-char chr '=ks-x1001) + (setq ret (or (encode-char chr '=ucs@ks) + (char-ucs chr)))) + (delete-char) + ;; (if (eq (char-before) ?$B!T(B) + ;; (insert " ")) + (insert (format "\\UCSks{%04X}" ret))) + ((catch 'tag + (setq rest chise-tex-coded-charset-expression-alist) + (while (setq spec (car rest)) + (if (setq ret (encode-char chr (car spec))) + (throw 'tag ret)) + (setq rest (cdr rest)))) + (delete-char) + ;; (if (eq (char-before) ?$B!T(B) + ;; (insert " ")) + (insert (format (format "\\%s{%%0%d%s}" + (nth 1 spec) + (nth 2 spec) + (nth 3 spec)) + ret))) + (t + (forward-char)))))))) + +(defun chise-tex-encode-ucs-char-at-point (&optional chr) + (unless chr + (setq chr (char-after))) + (let (ret rest spec) + (cond + ((and (encode-char chr '=ks-x1001) + (setq ret (or (encode-char chr '=ucs@ks) + (char-ucs chr)))) + (delete-char) + ;; (if (eq (char-before) ?$B!T(B) + ;; (insert " ")) + (insert (format "\\UCSks{%04X}" ret)) + t) + ((setq ret (encode-char chr '=ucs-hangul)) + (delete-char) + ;; (if (eq (char-before) ?$B!T(B) + ;; (insert " ")) + (insert (format "\\UCSks{%04X}" ret)) + t) + ((eq chr ?\u00B2) + (delete-char) + (insert "$^2$") + t) + ((eq chr ?\u00B3) + (delete-char) + (insert "$^3$") + t) + ((eq chr ?\u0111) + (delete-char) + (insert "{\\usefont{T1}{pxr}{m}{n}\\dj}") + t) + ((eq chr ?\u014B) + (delete-char) + (insert "{\\usefont{T1}{pxr}{m}{n}\\ng}") + t) + ((eq chr ?\u0282) + (delete-char) + (insert "{\\usefont{T1}{pxr}{m}{n}\\k{s}}") + t) + ((eq chr ?\u0294) + (delete-char) + (insert "\\UCSjis{0294}") + t) + ((eq chr ?\u2022) + (delete-char) + (insert "\\textbullet{}") + t) + ((eq chr ?\u2080) + (delete-char) + (insert "$_0$") + t) + ((eq chr ?\u2081) + (delete-char) + (insert "$_1$") + t) + ((eq chr ?\u2082) + (delete-char) + (insert "$_2$") + t) + ((eq chr ?\u2083) + (delete-char) + (insert "$_3$") + t) + ((eq chr ?\u2084) + (delete-char) + (insert "$_4$") + t) + ((eq chr ?\u2085) + (delete-char) + (insert "$_5$") + t) + ((eq chr ?\u2086) + (delete-char) + (insert "$_6$") + t) + ((eq chr ?\u2087) + (delete-char) + (insert "$_7$") + t) + ((eq chr ?\u2088) + (delete-char) + (insert "$_8$") + t) + ((eq chr ?\u2089) + (delete-char) + (insert "$_9$") + t) + ((eq chr ?\u208A) + (delete-char) + (insert "$_+$") + t) + ((eq chr ?\u208B) + (delete-char) + (insert "$_-$") + t) + ((eq chr ?\u208C) + (delete-char) + (insert "$_=$") + t) + ((eq chr ?\u208D) + (delete-char) + (insert "$_($") + t) + ((eq chr ?\u208E) + (delete-char) + (insert "$_)$") + t) + ((eq chr ?\u2637) + (delete-char) + (insert "\\UCSgb{2637}") + t) + ((eq (encode-char chr '=ucs@jis) #x8DBC) + (delete-char) + (insert "\\GTpjG{4933}") + t) + ((and (encode-char chr '=ucs@jp) + (setq ret (char-representative-of-domain chr 'gb)) + (setq ret (encode-char ret '=ucs@gb)) + (<= ret #xFFFF)) + (delete-char) + ;; (if (eq (char-before) ?$B!T(B) + ;; (insert " ")) + (insert (format "\\UCSgb{%04X}" ret)) + t) + ((catch 'tag + (setq rest chise-tex-coded-charset-expression-alist) + (while (setq spec (car rest)) + (if (setq ret (encode-char chr (car spec))) + (throw 'tag ret)) + (setq rest (cdr rest)))) + (delete-char) + ;; (if (eq (char-before) ?$B!T(B) + ;; (insert " ")) + (insert (format (format "\\%s{%%0%d%s}" + (nth 1 spec) + (nth 2 spec) + (nth 3 spec)) + ret)) + t) + ))) + +(defun chise-tex-encode-region-for-jis (start end) + (interactive "r") + (save-excursion + (save-restriction + (narrow-to-region start end) + (goto-char start) + (let (chr ret + ;; rest spec + modifier base modifier-1) + (while (and (skip-chars-forward "\x00-\x7F") + (not (eobp))) + (setq chr (char-after)) + (cond ((encode-char chr '=jis-x0208-1983) + (forward-char)) + ;; ((setq ret (encode-char chr '=jis-x0208-1990)) + ;; (delete-char) + ;; (insert (decode-char '=jis-x0208-1983 ret))) + ((and (not (eq (char-ucs chr) #x0439)) + (not (eq (char-ucs chr) #x0451)) + (setq ret (char-feature chr '=decomposition)) + (setq modifier (assq (nth 1 ret) + '((?\u0300 . "`") + (?\u0301 . "'") + (?\u0302 . "^") + (?\u0303 . "~") + (?\u0304 . "=") + (?\u0306 . "u") + (?\u0307 . ".") + (?\u0308 . "\"") + (?\u0309 . "Hook") + (?\u030C . "v") + (?\u0323 . "d") + (?\u0327 . "c") + )))) + (delete-char) + (setq base (car ret)) + (if (and (setq ret (char-feature base '=decomposition)) + (setq modifier-1 + (assq (car modifier) + (cdr + (assq (nth 1 ret) + '((?\u0302 + (?\u0300 . "CircGrave") + (?\u0301 . "CircAcute") + (?\u0303 . "CircTilde") + (?\u0309 . "CircHook") + ) + (?\u031B + (?\u0301 . "HornAcute") + ) + (?\u0323 + (?\u0302 . "Circudot") + ))))))) + (insert (format "\\%s{%c}" (cdr modifier-1) (car ret))) + (insert (format "\\%s{%c}" (cdr modifier) base)))) + ((and (or (encode-char chr '=jis-x0213-1-2000) + (encode-char chr '=jis-x0213-2-2000)) + (setq ret (or (encode-char chr '=ucs@jis/2000) + (encode-char chr '=ucs@jis/fw))) + (<= ret #xFFFF)) + (delete-char) + ;; (if (eq (char-before) ?$B!T(B) + ;; (insert " ")) + (insert (format "\\UCSjis{%04X}" ret))) + ((chise-tex-encode-ucs-char-at-point chr)) + (t + (forward-char)))))))) + +(defun chise-tex-encode-region-for-utf-8-jis (start end) + (interactive "r") + (save-excursion + (save-restriction + (narrow-to-region start end) + (goto-char start) + (let ((font-encoding 'T1) + chr ret + ;; rest spec + modifier base modifier-1 pos) + (while (and (skip-chars-forward "\x00-\x7F") + (not (eobp))) + (setq chr (char-after)) + (cond + ((and (setq ret (encode-char chr '=ucs)) + (and (<= #x0400 ret)(<= ret #x04F9))) + (if (eq font-encoding 'T2A) + (forward-char) + (setq pos (point)) + (unless (and (prog1 + (search-backward + "\\fontencoding{T2A}\\selectfont{}" nil t) + (goto-char pos)) + (eq pos (match-end 0))) + (insert "\\fontencoding{T2A}\\selectfont{}") + ) + (forward-char) + (setq font-encoding 'T2A)) + ) + ((and (setq ret (encode-char chr '=ucs)) + (and (<= #x0374 ret)(<= ret #x03F3))) + (if (eq font-encoding 'LGR) + (forward-char) + (setq pos (point)) + (unless (and (prog1 + (search-backward + "\\fontencoding{LGR}\\selectfont{}" nil t) + (goto-char pos)) + (eq pos (match-end 0))) + (insert "\\fontencoding{LGR}\\selectfont{}") + ) + (forward-char) + (setq font-encoding 'LGR)) + (skip-chars-forward "\u0374-\u03F3") + (unless (eq font-encoding 'T1) + (unless (looking-at + "\\\\fontencoding{T1}\\\\selectfont{}") + (insert "\\fontencoding{T1}\\selectfont{}") + ) + (setq font-encoding 'T1)) + ) + (t + (unless (eq font-encoding 'T1) + (setq pos (point)) + (unless (and (prog1 + (search-backward + "\\fontencoding{T1}\\selectfont{}" nil t) + (goto-char pos)) + (eq pos (match-end 0))) + (insert "\\fontencoding{T1}\\selectfont{}") + ) + (setq font-encoding 'T1)) + (cond ((eq (char-ucs chr) #x00D7) + (delete-char) + (insert "\\UCSjis{00D7}")) + ((encode-char chr '=jis-x0208-1983) + (forward-char)) + ((and (setq ret (char-ucs chr)) + (or (eq ret #x00C5) + (eq ret #x00E5) + (eq ret #x1E2B) + )) + (forward-char)) + ;; ((setq ret (encode-char chr '=jis-x0208-1990)) + ;; (delete-char) + ;; (insert (decode-char '=jis-x0208-1983 ret))) + ((eq (char-ucs chr) #x012B) + (delete-char) + (insert "\\={\\i}") + ) + ((setq ret (encode-char chr 'thai-tis620)) + (delete-char) + (insert (format "\\ThaiTIS{%X}" (logior ret #x80))) + ) + ((and (not (eq (char-ucs chr) #x0439)) + (not (eq (char-ucs chr) #x0451)) + (setq ret (char-feature chr '=decomposition)) + (setq modifier (assq (nth 1 ret) + chise-tex-accent-macro-alist))) + (delete-char) + (setq base (car ret)) + (if (and (setq ret (char-feature base '=decomposition)) + (setq modifier-1 + (assq (car modifier) + (cdr + (assq (nth 1 ret) + '((?\u0302 + (?\u0300 . "\\CircGrave{%c}") + (?\u0301 . "\\'{\\^%c}") + (?\u0303 . "\\~{\\^%c}") + (?\u0309 . "\\CircHook{%c}") + ) + (?\u031B + (?\u0301 . "\\HornAcute{%c}") + (?\u0303 . "\\HornTilde{%c}") + ) + (?\u0323 + (?\u0302 . "\\Circudot{%c}") + ))))))) + (insert (format (cdr modifier-1) (car ret))) + (insert (format "\\%s{%c}" (cdr modifier) base)))) + ((and (or (encode-char chr '=jis-x0213-1-2000) + (encode-char chr '=jis-x0213-2-2000)) + (setq ret (or (encode-char chr '=ucs@jis/2000) + (encode-char chr '=ucs@jis/fw))) + (<= ret #xFFFF)) + ;; (delete-char) + ;; (if (eq (char-before) ?$B!T(B) + ;; (insert " ")) + ;; (insert (format "\\UCSjis{%04X}" ret)) + (forward-char)) + ((chise-tex-encode-ucs-char-at-point chr)) + (t + (forward-char)))))))))) + +(defun chise-tex-decode-region (start end) + (interactive "r") + (save-excursion + (save-restriction + (narrow-to-region start end) + (goto-char start) + (let (macro code ret me rest spec) + (while (search-forward "\\={\\i}" nil t) + (replace-match ".DNo" t t)) + (goto-char start) + (while (re-search-forward "\\\\\\(.\\){\\(.\\)}" nil t) + (when (and + (setq macro + (assq + (aref (match-string 1) 0) + '((?\` . ?\u0300) ; + (?\' . ?\u0301) ; + (?^ . ?\u0302) ; + (?~ . ?\u0303) ; + (?= . ?\u0304) ; + (?u . ?\u0306) ; + (?\. . ?\u0307) ; + (?\" . ?\u0308) ; + (?v . ?\u030C) ; + (?d . ?\u0323) ; + (?c . ?\u0327) ; + ))) + (setq ret + (cdr (assq (cdr macro) + (char-feature (aref (match-string 2) 0) + 'composition))))) + (delete-region (match-beginning 0)(match-end 0)) + (insert ret))) + (goto-char start) + (while (re-search-forward "\\\\\\([a-zA-Z0-9]+\\){\\([0-9A-Fa-f]+\\)}" + nil t) + (setq macro (match-string 1) + code (match-string 2) + me (match-end 0)) + (if (and (catch 'tag + (setq rest chise-tex-coded-charset-expression-alist) + (while (setq spec (car rest)) + (if (string= (nth 1 spec) macro) + (throw 'tag spec)) + (setq rest (cdr rest)))) + (setq ret (decode-char (car spec) + (string-to-int + code + (if (eq (nth 3 spec) 'X) + 16))))) + (progn + (delete-region (match-beginning 0)(match-end 0)) + (insert ret)) + (goto-char me))))))) + +(make-coding-system + 'iso-2022-jp-tex-gb 'iso2022 + "ISO-2022-JP with TeX representation for GB fonts." + '(charset-g0 ascii + short t + seven t + ;; input-charset-conversion ((latin-jisx0201 ascii) + ;; (japanese-jisx0208-1978 japanese-jisx0208)) + pre-write-conversion chise-tex-encode-region-for-gb + post-read-conversion chise-tex-decode-region + mnemonic "pTeX(GB)/7bit" + )) + +(make-coding-system + 'iso-2022-jp-tex-jis 'iso2022 + "ISO-2022-JP with TeX representation for JIS fonts." + '(charset-g0 ascii + short t + seven t + ccs-priority-list (ascii + =jis-x0208@1983 =jis-x0208@1978 + latin-jisx0201) + ;; output-charset-conversion ((=jis-x0208@1990 =jis-x0208@1983)) + pre-write-conversion chise-tex-encode-region-for-jis + post-read-conversion chise-tex-decode-region + mnemonic "pTeX(JIS)/7bit" + )) + +(make-coding-system + 'utf-8-jp-tex 'utf-8 + "Coding-system of UTF-8 for common glyphs used in Japan." + '(pre-write-conversion chise-tex-encode-region-for-utf-8-jis + post-read-conversion chise-tex-decode-region + charset-g0 =ucs@jp + charset-g1 =>ucs-jis + charset-g2 =>ucs + mnemonic "upTeX(JP)/UTF8")) + + +;;; @ End. +;;; + +(provide 'chise-tex) + +;;; chise-tex.el ends here -- 1.7.10.4