;;; ideo-trans.el --- Translation utility for Ideographic Strings
-;; Copyright (C) 2003 MORIOKA Tomohiko
+;; Copyright (C) 2003,2004 MORIOKA Tomohiko
;; Author: MORIOKA Tomohiko <tomo@kanji.zinbun.kyoto-u.ac.jp>
;; Keywords: Ideographs, Character Database, Chaon, CHISE
;;; Code:
+(defun char-cns11643-p (char &optional defined-only)
+ (some (lambda (n)
+ (encode-char char
+ (intern (format "=cns11643-%d" n))
+ defined-only))
+ '(1 2 3 4 5 6 7)))
+
+(defun char-ks-x1001-p (char &optional defined-only)
+ (encode-char char 'korean-ksc5601 defined-only))
+
+(defun find-char-variant (char predicate)
+ (if (funcall predicate char)
+ char
+ (let ((ucs (char-ucs char))
+ variants)
+ (if (and ucs
+ (setq variants
+ (char-variants (decode-char 'ucs ucs))))
+ (while (and variants
+ (setq char (car variants))
+ (not (funcall predicate char)))
+ (setq variants (cdr variants))))
+ char)))
+
+;;;###autoload
+(defun char-representative-of-ucs (char)
+ "Convert CHAR into representative character of UCS."
+ (if (setq ret (char-ucs char))
+ (decode-char '=ucs ret)
+ char))
+
+;;;###autoload
+(defun char-representative-of-domain (char domain)
+ "Convert CHAR into representative character of DOMAIN."
+ (let (ret)
+ (cond ((eq domain 'daikanwa)
+ (char-representative-of-daikanwa char))
+ ((eq domain 'ucs)
+ (char-representative-of-ucs char))
+ ((eq domain 'cns)
+ (if (setq ret (char-feature char '=>ucs@cns))
+ (decode-char '=ucs@cns ret)
+ (find-char-variant char 'char-cns11643-p)))
+ ((eq domain 'ks)
+ (if (setq ret (char-feature char '=>ucs@ks))
+ (decode-char '=ucs@ks ret)
+ (find-char-variant char 'char-ks-x1001-p)))
+ ((setq ret
+ (or (char-feature char
+ (intern (format "=>ucs@%s" domain)))
+ (char-ucs char)))
+ (decode-char (intern (format "=ucs@%s" domain)) ret))
+ (t char))))
+
+;;;###autoload
+(defun ideo-translate-string-into-ucs (string)
+ "Convert characters in STRING into UCS-representative characters."
+ (mapconcat (lambda (char)
+ (char-to-string (char-representative-of-ucs char)))
+ string ""))
+
;;;###autoload
(defun ideo-translate-string-into-simplified-chinese (string)
"Simplify Chinese traditional characters in STRING."
(mapconcat
(lambda (chr)
(setq uchr
- (if (setq ret (or (char-ucs chr)
- (get-char-attribute chr '=>ucs@gb)))
- (decode-char '=ucs ret)
- chr))
+ (cond ((setq ret (char-feature chr '=>ucs@gb))
+ (setq chr (decode-char '=ucs@gb ret)))
+ ((setq ret (char-ucs chr))
+ (setq chr (decode-char '=ucs@gb ret))
+ (if (setq ret (get-char-attribute chr '=>ucs*))
+ (decode-char '=ucs@gb ret)
+ chr))
+ (t chr)))
(char-to-string
(if (setq ret (encode-char uchr 'chinese-gb12345))
(decode-char 'chinese-gb2312 ret)
'ideo-translate-string-into-simplified-chinese)
;;;###autoload
+(defun ideo-translate-string-into-simplified-japanese (string)
+ "Simplify traditional Kanji characters in STRING."
+ (let (uchr ret)
+ (mapconcat
+ (lambda (chr)
+ (setq ret (or (char-feature chr '->simplified@JP/Jouyou)
+ (char-feature chr '->simplified@JP)
+ (char-feature chr '->simplified)))
+ (char-to-string
+ (cond ((car ret))
+ ((setq ret (char-feature chr '=>ucs@jis))
+ (decode-char '=ucs@jis ret))
+ ((setq ret (char-ucs chr))
+ (decode-char '=ucs@jp ret))
+ (t chr))))
+ string "")))
+
+;;;###autoload
+(defun ideo-translate-string-into-traditional (string)
+ "Convert simplified Kanji in STRING into traditional characters."
+ (let (uchr ret)
+ (mapconcat
+ (lambda (chr)
+ (char-to-string
+ (cond ((car (char-feature chr '<-simplified)))
+ ((progn
+ (setq ret
+ (cond ((setq ret (char-feature chr '=>ucs@jis))
+ (decode-char '=ucs@jis ret))
+ ((setq ret (char-ucs chr))
+ (decode-char '=ucs@jp ret))
+ (t chr)))
+ (setq ret (or (char-feature ret '<-simplified@JP/Jouyou)
+ (char-feature ret '<-simplified@JP))))
+ (car ret))
+ ((progn
+ (setq ret
+ (cond ((setq ret (char-feature chr '=>ucs@gb))
+ (decode-char '=ucs@gb ret))
+ ((setq ret (char-ucs chr))
+ (decode-char '=ucs@gb ret))
+ (t chr)))
+ (if (setq ret (encode-char ret 'chinese-gb2312))
+ (setq ret (decode-char 'chinese-gb12345 ret))))
+ ret)
+ ((setq ret (char-feature chr '=>ucs@jis))
+ (decode-char '=ucs@jis ret))
+ ((setq ret (char-ucs chr))
+ (decode-char '=ucs@jp ret))
+ (t chr))))
+ string "")))
+
+;;;###autoload
(defun ideo-translate-region-into-traditional (start end)
(interactive "r")
(save-excursion
(while (and (skip-chars-forward "\x00-\xFF")
(not (eobp)))
(setq chr (char-after))
- (if (setq ret (or (get-char-attribute chr '<-simplified@jp-jouyou)
+ (if (setq ret (or (get-char-attribute chr '<-simplified@JP/Jouyou)
+ (get-char-attribute chr '<-simplified@jp-jouyou)
+ (get-char-attribute chr '<-simplified@JP)
(get-char-attribute chr '<-simplified@jp)
- (get-char-attribute chr '<-jp-simplified)))
+ (get-char-attribute chr '<-jp-simplified)
+ (get-char-attribute chr '<-simplified)))
(progn
(if (cdr ret)
(progn