X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=egg-com.el;h=72d8045a7978ab53cfe454e4cb66d21045a956d1;hb=9a41be0e7710d1d2238431123fd5e12cd4bcf77e;hp=1a26c8fa96c89b7db1f033c4ac8ca4a141304ec8;hpb=ccc2c3cf2a993db1166638b047bff13ee5100336;p=elisp%2Fegg.git diff --git a/egg-com.el b/egg-com.el index 1a26c8f..72d8045 100644 --- a/egg-com.el +++ b/egg-com.el @@ -31,8 +31,6 @@ ;;; Code: -(require 'egg-edep) - (defvar egg-fixed-euc 'fixed-euc-jp) (make-variable-buffer-local 'egg-fixed-euc) @@ -62,24 +60,30 @@ (r0 = (r1 | ?\x80)) (write-read-repeat r0))))))))) -(define-ccl-program ccl-encode-fixed-euc-jp +(define-ccl-program ccl-encode-fixed-euc `(2 ((read r0) (loop - (if (r0 == ,(charset-id 'latin-jisx0201)) ; Unify +; (if (r0 < ?\x20) +; (write-read-repeat r0)) + (if (r0 == ,(charset-id 'latin-jisx0201)) ; Unify ((read r0) (r0 &= ?\x7f))) - (if (r0 < ?\x80) ;G0 + (if (r0 < ?\x80) ((write 0) (write-read-repeat r0))) (r6 = (r0 == ,(charset-id 'japanese-jisx0208))) (r6 |= (r0 == ,(charset-id 'japanese-jisx0208-1978))) + (r6 |= (r0 == ,(charset-id 'chinese-gb2312))) + (r6 |= (r0 == ,(charset-id 'korean-ksc5601))) (if r6 ;G1 ((read r0) (write r0) (read r0) (write-read-repeat r0))) - (if (r0 == ,(charset-id 'katakana-jisx0201)) ;G2 + (r6 = (r0 == ,(charset-id 'katakana-jisx0201))) + (r6 |= (r0 == ,(charset-id 'chinese-sisheng))) + (if r6 ;G2 ((read r0) (write 0) (write-read-repeat r0))) @@ -94,7 +98,7 @@ ) (make-coding-system 'fixed-euc-jp 4 ?W "Coding System for fixed EUC Japanese" - (cons ccl-decode-fixed-euc-jp ccl-encode-fixed-euc-jp)) + (cons ccl-decode-fixed-euc-jp ccl-encode-fixed-euc)) ;; Korean @@ -115,13 +119,17 @@ `(2 ((read r0) (loop +; (if (r0 < ?\x20) +; (write-read-repeat r0)) (if (r0 < ?\x80) ((write 0) (write-read-repeat r0))) (if (r0 == ,(charset-id 'korean-ksc5601)) ((read r0) + (r0 |= ?\x80) (write r0) (read r0) + (r0 |= ?\x80) (write-read-repeat r0))) (read r0) (repeat))))) @@ -408,23 +416,20 @@ ?\x0000 ]) -(defconst egg-chinese-syllable-max-len - (max (length "Zhu(0!(Bng(0@(B") (length "(0ShdA(B"))) - -(defun egg-chinese-syllable (str pos) - (setq str (substring str pos (min (length str) - (+ pos egg-chinese-syllable-max-len)))) +(defun egg-chinese-syllable (str &optional start) + (if start + (setq str (substring str start))) (or (car (egg-pinyin-syllable str)) (car (egg-zhuyin-syllable str)))) (defsubst egg-make-fixed-euc-china-code (s y) - (cons - (+ (* 2 (nth 1 y)) (logand (nth 2 y) 1) 32) - (+ (* 4 (if (= s 0) 20 s)) (lsh (nth 2 y) -1) 156))) + (concat (list + (+ (* 2 (nth 1 y)) (logand (nth 2 y) 1) 32) + (+ (* 4 (if (= s 0) 20 s)) (lsh (nth 2 y) -1) 156)))) (defun egg-pinyin-syllable (str) (let (s y end) - (if (eq (string-match "^[A-Za-z(0!(B-(0?(B]+(0@(B" str) 0) + (if (string-match "^[A-Za-z(0!(B-(0?(B]+(0@(B" str) (progn (setq end (match-end 0)) (cond @@ -439,8 +444,8 @@ (cons end (egg-make-fixed-euc-china-code s y))))))) (defun egg-zhuyin-syllable (str) - (let (end s y c z (zhuyin-len (egg-charset-bytes 'chinese-sisheng))) - (if (eq (string-match "^[(0E(B-(0i(B@0-4]+[(0@ABCD(B]" str) 0) + (let (end s y c z (zhuyin-len (charset-bytes 'chinese-sisheng))) + (if (string-match "^[(0E(B-(0i(B@0-4]+[(0@ABCD(B]" str) (progn (setq end (match-end 0) c (substring str 0 zhuyin-len) @@ -460,22 +465,21 @@ (defun encode-fixed-euc-china-region (beg end type) "Encode the text in the region to EUC-CN/TW." - (let (s syl c cset) + (let (s syl c cset (maxlen (max (length "Zhu(0!(Bng(0@(B") (length "(0ShdA(B")))) (save-excursion (save-restriction (narrow-to-region beg end) (goto-char (point-min)) (while (< (point) (point-max)) - (setq s (buffer-substring - (point) - (min (point-max) (+ (point) egg-chinese-syllable-max-len)))) + (setq s (buffer-substring (point) + (min (+ (point) maxlen) (point-max)))) (cond ((setq syl (egg-pinyin-syllable s)) (delete-region (point) (+ (point) (car syl))) - (insert (car (cdr syl)) (cdr (cdr syl)))) + (insert (cdr syl))) ((setq syl (egg-zhuyin-syllable s)) (delete-region (point) (+ (point) (car syl))) - (insert (car (cdr syl)) (cdr (cdr syl)))) + (insert (cdr syl))) (t (setq c (split-char (following-char)) cset (car c)) @@ -490,11 +494,9 @@ ((eq cset 'chinese-sisheng) (delete-char 1) (insert 0 (+ (nth 1 c) 128))) - ((eq cset 'ascii) - (delete-char 1) - (insert 0 (nth 1 c))) (t - (delete-char 1)))))) + (delete-region (point) (1+ (point))) + (insert 0 (nth 1 c))))))) (- (point-max) (point-min)))))) (defun pre-write-encode-fixed-euc-china (from to type) @@ -502,11 +504,9 @@ (work (get-buffer-create " *pre-write-encoding-work*"))) (set-buffer work) (erase-buffer) - (if (null (stringp from)) - (save-excursion - (set-buffer buf) - (setq from (buffer-substring from to)))) - (insert (string-as-multibyte from)) + (if (stringp from) + (insert from) + (insert-buffer-substring buf from to)) (encode-fixed-euc-china-region 1 (point-max) type) nil)) @@ -519,56 +519,54 @@ (defun decode-fixed-euc-china-region (beg end type) "Decode EUC-CN/TW encoded text in the region. Return the length of resulting text." + (interactive "r") (prog1 - (let ((str (string-as-unibyte (buffer-substring beg end))) - (i 0) - l c0 c1 s y ss) - (delete-region beg end) - (setq l (1- (length str))) - (while (< i l) - (setq c0 (aref str i) - c1 (aref str (1+ i)) - i (+ i 2)) - (cond - ((eq c0 0) - (if (> c1 ?\xa0) - (insert leading-code-private-11 - (charset-id 'chinese-sisheng) - c1) - (insert c1))) - ((>= c0 ?\x80) + (let (c0 c1 s y ss) + (save-restriction + (narrow-to-region beg end) + (goto-char (point-min)) + (while (< (point) (point-max)) + (setq c1 (buffer-substring (point) (+ (point) 2)) + c0 (aref c1 0) + c1 (aref c1 1)) + (delete-region (point) (+ (point) 2)) (cond - ((eq type 'cn) - (insert (charset-id 'chinese-gb2312) c0 (logior c1 ?\x80))) + ((eq c0 0) + (if (> c1 ?\xa0) + (insert leading-code-private-11 + (charset-id 'chinese-sisheng) + c1) + (insert c1))) ((>= c0 ?\x80) - (insert (charset-id 'chinese-cns11643-1) c0 c1)) + (cond + ((eq type 'cn) + (insert (charset-id 'chinese-gb2312) c0 (logior c1 ?\x80))) + ((>= c0 ?\x80) + (insert (charset-id 'chinese-cns11643-1) c0 c1)) + (t + (insert (charset-id 'chinese-cns11643-2) c0 (+ c1 ?\x80))))) (t - (insert (charset-id 'chinese-cns11643-2) c0 (+ c1 ?\x80))))) - (t - (setq c1 (logand c1 ?\x7f)) - (setq s (- (lsh c1 -2) 7);;(+ (lsh (- c1 32) -2) 1) - y (- (lsh c0 -1) 16);;(lsh (- c0 32) -1) - ss (+ (logand c0 1) (logand c1 3))) - (if egg-zhuyin - (progn - (setq c0 (aref yincode-zhuyin-table (+ (* 41 s) y))) - (if (eq (logand c0 ?\x8080) ?\x80) - (setq s (lsh c0 -8) - y (logand c0 ?\x7f))) - (if (and (eq s 20) - (eq (aref yincode-pinyin-table (+ (* 39 s) y)) 0)) - (setq s 0)) - (setq s (car (nth s yincode-zhuyin-shengmu)) - y (car (nth (+ (* 5 y) ss) yincode-zhuyin-yunmu)))) - (if (and (eq s 20) - (eq (aref yincode-pinyin-table (+ (* 39 s) y)) 0)) - (setq s 0)) - (setq s (car (nth s yincode-pinyin-shengmu)) - y (car (nth (+ (* 5 y) ss) yincode-pinyin-yunmu)))) - (if enable-multibyte-characters - (insert s y) - (insert (string-as-unibyte s) (string-as-unibyte y)))))) - (- (point) beg)) + (setq c1 (logand c1 ?\x7f)) + (setq s (- (lsh c1 -2) 7) ;;(+ (lsh (- c1 32) -2) 1) + y (- (lsh c0 -1) 16) ;;(lsh (- c0 32) -1) + ss (+ (logand c0 1) (logand c1 3))) + (if egg-zhuyin + (progn + (setq c0 (aref yincode-zhuyin-table (+ (* 41 s) y))) + (if (eq (logand c0 ?\x8080) ?\x80) + (setq s (lsh c0 -8) + y (logand c0 ?\x7f))) + (if (and (eq s 20) + (eq (aref yincode-pinyin-table (+ (* 39 s) y)) 0)) + (setq s 0)) + (insert (car (nth s yincode-zhuyin-shengmu)) + (car (nth (+ (* 5 y) ss) yincode-zhuyin-yunmu)))) + (if (and (eq s 20) + (eq (aref yincode-pinyin-table (+ (* 39 s) y)) 0)) + (setq s 0)) + (insert (car (nth s yincode-pinyin-shengmu)) + (car (nth (+ (* 5 y) ss) yincode-pinyin-yunmu))))))) + (- (point-max) (point-min)))) (if (looking-at "\0\0") (forward-char 2)))) (defun post-read-decode-fixed-euc-china (len type) @@ -584,13 +582,13 @@ Return the length of resulting text." (defun post-read-decode-euc-tw (len) (post-read-decode-fixed-euc-china len 'tw)) -(make-coding-system 'fixed-euc-cn 0 ?W "Coding System for fixed EUC Chinese-gb2312") -(coding-system-put 'fixed-euc-cn 'pre-write-conversion 'pre-write-encode-euc-cn) -(coding-system-put 'fixed-euc-cn 'post-read-conversion 'post-read-decode-euc-cn) +(make-coding-system 'fixed-euc-cn 5 ?W "Coding System for fixed EUC Chinese-gb2312") +(put 'fixed-euc-cn 'pre-write-conversion 'pre-write-encode-euc-cn) +(put 'fixed-euc-cn 'post-read-conversion 'post-read-decode-euc-cn) -(make-coding-system 'fixed-euc-tw 0 ?W "Coding System for fixed EUC Chinese-cns11643") -(coding-system-put 'fixed-euc-tw 'pre-write-conversion 'pre-write-encode-euc-tw) -(coding-system-put 'fixed-euc-tw 'post-read-conversion 'post-read-decode-euc-tw) +(make-coding-system 'fixed-euc-tw 5 ?W "Coding System for fixed EUC Chinese-cns11643") +(put 'fixed-euc-tw 'pre-write-conversion 'pre-write-encode-euc-tw) +(put 'fixed-euc-tw 'post-read-conversion 'post-read-decode-euc-tw) (defsubst comm-format-u32c (uint32c) (let ((h0 (car uint32c)) @@ -691,6 +689,10 @@ v means 8-bit vector." 'progn result))) +(if (not (fboundp 'string-as-multibyte)) + (defsubst string-as-multibyte (str) + str)) + ;; Do not move the point, leave it where it was. (defun comm-accept-process-output (proc) (let ((p (point)))