X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=ideo-trans.el;h=63755eb82d1b79f94a59b8f0b3ba7c4c6e2de02c;hb=afb32e0b764bfc083ec3d3d32d6bcec24c884db1;hp=5055791c4c5ebf0e1eeca6a9ab4bfd64f3d4ca74;hpb=d0c81d521e6c5efa6bc852816c56b4820f7bd1c9;p=chise%2Ftomoyo-tools.git diff --git a/ideo-trans.el b/ideo-trans.el index 5055791..63755eb 100644 --- a/ideo-trans.el +++ b/ideo-trans.el @@ -1,6 +1,6 @@ ;;; ideo-trans.el --- Translation utility for Ideographic Strings -;; Copyright (C) 2003 MORIOKA Tomohiko +;; Copyright (C) 2003, 2004, 2005, 2008, 2012, 2013 MORIOKA Tomohiko ;; Author: MORIOKA Tomohiko ;; Keywords: Ideographs, Character Database, Chaon, CHISE @@ -24,17 +24,83 @@ ;;; Code: +(defun char-cns11643-p (char &optional defined-only) + (some (lambda (n) + (encode-char char + (intern (format "=cns11643-%d" n)) + defined-only)) + '(1 2 3 4 5 6 7))) + +(defun char-ks-x1001-p (char &optional defined-only) + (encode-char char 'korean-ksc5601 defined-only)) + +(defun find-char-variant (char predicate) + (if (funcall predicate char) + char + (let ((ucs (char-ucs char)) + variants) + (if (and ucs + (setq variants + (char-variants (decode-char 'ucs ucs)))) + (while (and variants + (setq char (car variants)) + (not (funcall predicate char))) + (setq variants (cdr variants)))) + char))) + +;;;###autoload +(defun char-representative-of-ucs (char) + "Convert CHAR into representative character of UCS." + (let (ret) + (if (setq ret (char-ucs char)) + (decode-char '=ucs ret) + char))) + +;;;###autoload +(defun char-representative-of-domain (char domain) + "Convert CHAR into representative character of DOMAIN." + (let (ret) + (cond ((eq domain 'daikanwa) + (char-representative-of-daikanwa char)) + ((eq domain 'ucs) + (char-representative-of-ucs char)) + ((eq domain 'cns) + (if (setq ret (char-feature char '=>ucs@cns)) + (decode-char '=ucs@cns ret) + (find-char-variant char 'char-cns11643-p))) + ((eq domain 'ks) + (if (setq ret (char-feature char '=>ucs@ks)) + (decode-char '=ucs@ks ret) + (find-char-variant char 'char-ks-x1001-p))) + ((setq ret + (or (char-feature char + (intern (format "=>ucs@%s" domain))) + (char-ucs char))) + (decode-char (intern (format "=ucs@%s" domain)) ret)) + (t char)))) + +;;;###autoload +(defun ideo-translate-string-into-ucs (string) + "Convert characters in STRING into UCS-representative characters." + (mapconcat (lambda (char) + (char-to-string (char-representative-of-ucs char))) + string "")) + ;;;###autoload -(defun ideo-translate-string-into-simplified-chinese (string) +(defun chinese-simplify-string (string) "Simplify Chinese traditional characters in STRING." (let (uchr ret) (mapconcat (lambda (chr) (setq uchr - (if (setq ret (or (char-ucs chr) - (get-char-attribute chr '=>ucs@gb))) - (decode-char '=ucs ret) - chr)) + (cond ((setq ret (char-feature chr '=>ucs@gb)) + (setq chr (decode-char '=ucs@gb ret))) + ((setq ret (char-ucs chr)) + (setq chr (decode-char '=ucs@gb ret)) + (if (setq ret (char-feature chr '=>ucs*)) + (setq chr (decode-char '=ucs@gb ret)) + chr)) + (t chr))) (char-to-string (if (setq ret (encode-char uchr 'chinese-gb12345)) (decode-char 'chinese-gb2312 ret) @@ -42,46 +108,247 @@ string ""))) ;;;###autoload +(define-obsolete-function-alias + 'ideo-translate-string-into-simplified-chinese + 'chinese-simplify-string) + +;;;###autoload (define-obsolete-function-alias 'ideo-trans-simplify-chinese-string - 'ideo-translate-string-into-simplified-chinese) + 'chinese-simplify-string) + +;;;###autoload +(defvar japanese-simplified-relation-features + '(->simplified@JP/Jouyou + ->simplified@jp-jouyou + ;; ->simplified@JP/extra + ;; ->simplified@JP/extra/design + ;; ->simplified@JP/jis + ;; ->simplified@JP/jis/2004 + ;; ->simplified@JP/jis/1978 + ;; ->simplified@JP/misc + ;; ->simplified@JP + ;; ->simplified@jp + ;; ->jp-simplified + ;; ->simplified + ;; ->simplified@JP/old + ;; ->simplified@JP/buddhism + ) + "List of relation features to map traditional Kanji to simplified Kanji used in Japanese.") + +;;;###autoload +(defvar japanese-traditional-relation-features + '(<-simplified@JP/Jouyou + <-simplified@jp-jouyou + <-simplified@JP/extra + <-simplified@JP/extra/design + <-simplified@JP/jis + <-simplified@JP/jis/2004 + <-simplified@JP/jis/1978 + <-simplified@JP/misc + <-simplified@JP + <-simplified@jp + <-jp-simplified + <-simplified + ;; <-simplified@JP/old + ;; <-simplified@JP/buddhism + ) + "List of relation features to map simplified Kanji to traditional Kanji used in Japanese.") + +;;;###autoload +(defun japanese-simplify-string (string) + "Simplify traditional Kanji characters in STRING." + (let (ret rest) + (mapconcat + (lambda (chr) + (setq uchr + (cond ((setq ret (char-feature chr '=>ucs@jis)) + (setq chr (decode-char '=ucs@jis ret))) + ((setq ret (char-ucs chr)) + (setq chr (decode-char '=ucs@jis ret)) + (if (setq ret (char-feature chr '=>ucs*)) + (setq chr (decode-char '=ucs@jis ret)) + chr)) + (t chr))) + (setq rest japanese-simplified-relation-features) + (while (and rest + (null (setq ret (char-feature chr (car rest))))) + (setq rest (cdr rest))) + (char-to-string + (cond ((car ret)) + ((setq ret (char-feature chr '=>ucs@jis)) + (decode-char '=ucs@jis ret)) + ((setq ret (char-ucs chr)) + (decode-char '=ucs@JP ret)) + (t chr)))) + string ""))) + +;;;###autoload +(define-obsolete-function-alias + 'ideo-translate-string-into-simplified-japanese + 'japanese-simplify-string) + + +(defun ideo-trans-select-char (chars &optional prefix) + (let ((i 0) + prompt ret) + (setq prompt + (concat + prefix + (mapconcat (lambda (cell) + (setq i (1+ i)) + (format "%d. %c" i cell)) + chars " ") + " : ")) + (while (and (setq ret (string-to-int (read-string prompt))) + (not (and (< 0 ret) + (<= ret (length chars)))))) + (nth (1- ret) chars))) + +;;;###autoload +(defun chinese-traditionalize-string (string &optional selector) + "Convert simplified Chinese characters in STRING to traditional characters." + (unless selector + (setq selector + (lambda (chars) + (ideo-trans-select-char chars (format "%c => " chr))))) + (let (ret) + (mapconcat + (lambda (chr) + (char-to-string + (cond ((car (setq ret (char-feature chr '<-simplified@CN))) + (if (cdr ret) + (funcall selector ret) + (car ret))) + ((progn + (setq ret + (cond ((setq ret (char-feature chr '=>ucs@gb)) + (decode-char '=ucs@gb ret)) + ((setq ret (char-ucs chr)) + (decode-char '=ucs@gb ret)) + (t chr))) + (if (setq ret (encode-char ret 'chinese-gb2312)) + (setq ret (decode-char 'chinese-gb12345 ret)))) + ret) + (t chr)))) + string ""))) ;;;###autoload -(defun ideo-translate-region-into-traditional (start end) +(define-obsolete-function-alias + 'ideo-translate-chinese-string-into-traditional + 'chinese-traditionalize-string) + +;;;###autoload +(defun japanese-traditionalize-string (string &optional selector) + "Convert simplified Kanji in STRING into traditional characters." + (unless selector + (setq selector + (lambda (chars) + (ideo-trans-select-char chars (format "%c => " chr))))) + (let (ret rest) + (mapconcat + (lambda (chr) + (setq rest japanese-traditional-relation-features) + (while (and rest + (null (setq ret (char-feature chr (car rest))))) + (setq rest (cdr rest))) + (char-to-string + (cond (ret + (if (cdr ret) + (funcall selector ret) + (car ret))) + ((progn + (setq ret + (cond ((setq ret (char-feature chr '=>ucs@jis)) + (decode-char '=ucs@jis ret)) + ((setq ret (char-ucs chr)) + (decode-char '=ucs@JP ret)) + (t chr))) + (setq ret (or (char-feature ret '<-simplified@JP/Jouyou) + (char-feature ret '<-simplified@JP)))) + (if (cdr ret) + (funcall selector ret) + (car ret))) + ((setq ret (char-feature chr '=>ucs@jis)) + (decode-char '=ucs@jis ret)) + ((setq ret (char-ucs chr)) + (decode-char '=ucs@JP ret)) + (t chr)))) + string ""))) + +;;;###autoload +(define-obsolete-function-alias + 'ideo-translate-japanese-string-into-traditional + 'japanese-traditionalize-string) + +;;;###autoload +(defun japanese-traditionalize-region (start end &optional selector) + "Convert Japanese simplified Kanji in the region into traditional characters." (interactive "r") + (unless selector + (setq selector + (lambda (chars) + (ideo-trans-select-char chars (format "%c => " chr))))) (save-excursion (save-restriction (narrow-to-region start end) (goto-char start) - (let (chr ret rret i prompt) + (let (chr ret rret rest) (while (and (skip-chars-forward "\x00-\xFF") (not (eobp))) (setq chr (char-after)) - (if (setq ret (or (get-char-attribute chr '<-simplified@jp-jouyou) - (get-char-attribute chr '<-simplified@jp) - (get-char-attribute chr '<-jp-simplified))) + (setq rest japanese-traditional-relation-features) + (while (and rest + (null (setq ret (char-feature chr (car rest))))) + (setq rest (cdr rest))) + (if ret (progn (if (cdr ret) (progn - (setq i 0) - (setq prompt - (concat - (mapconcat (lambda (cell) - (setq i (1+ i)) - (format "%d. %c" i cell)) - ret " ") - " : ")) - (while (and (setq rret - (string-to-int - (read-string prompt))) - (not (and (< 0 rret) - (<= rret (length ret)))))) + (setq rret (funcall selector ret)) (delete-char) - (insert (nth (1- rret) ret))) + (insert rret)) (delete-char) (insert (car ret)))) (or (eobp) (forward-char)))))))) +;;;###autoload +(defun japanese-simplify-region (start end) + (interactive "r") + (save-excursion + (save-restriction + (narrow-to-region start end) + (goto-char start) + (let (chr ret rret rest) + (while (and (skip-chars-forward "\x00-\xFF") + (not (eobp))) + (setq chr (char-after)) + (setq rest japanese-simplified-relation-features) + (while (and rest + (null (setq ret (char-feature chr (car rest))))) + (setq rest (cdr rest))) + (if ret + (progn + (if (cdr ret) + (progn + (setq rret (ideo-trans-select-char ret)) + (delete-char) + (insert rret)) + (delete-char) + (insert (car ret)))) + (or (eobp) + (forward-char)))))))) + +;;;###autoload +(define-obsolete-function-alias + 'ideo-translate-japanese-region-into-traditional + 'japanese-traditionalize-region) + +;;;###autoload +(define-obsolete-function-alias + 'ideo-translate-region-into-traditional + 'japanese-traditionalize-region) + ;;; @ End. ;;;