+(defun egg-separate-languages (str &optional last-lang)
+ (let (lang last-chinese
+ (len (length str)) i j l)
+ ;; 1st pass -- mark undefined Chinese part
+ (if (or (eq last-lang 'Chinese-GB) (eq last-lang 'Chinese-CNS))
+ (setq last-chinese last-lang))
+ (setq i 0)
+ (while (< i len)
+ (setq j (egg-next-single-property-change i 'egg-lang str len))
+ (if (get-text-property i 'egg-lang str)
+ nil
+ (setq c (egg-string-to-char-at str i)
+ cset (char-charset c))
+ (cond
+ ((eq cset 'chinese-sisheng)
+ (string-match egg-chinese-sisheng-regexp str i)
+ (setq l (match-end 0)
+ j (min j l)
+ lang 'Chinese))
+ ((setq l (egg-chinese-syllable str i))
+ (setq j (+ i l)
+ lang 'Chinese))
+ ((eq cset 'ascii)
+ (if (eq (string-match "[\0-\177\240-\377]+" str (1+ i)) (1+ i))
+ (setq j (match-end 0))
+ (setq j (1+ i)))
+ (if (and (< j len)
+ (eq (char-charset (egg-string-to-char-at str j))
+ 'chinese-sisheng))
+ (setq j (max (1+ i) (- j 6))))
+ (setq lang nil))
+ ((eq cset 'composition)
+ (setq j (+ i (egg-char-bytes c))
+ lang (egg-charset-to-language
+ (char-charset
+ (car (decompose-composite-char c 'list))))))
+ (t
+ (string-match (concat "[" (list (make-char cset 32 32))
+ "-" (list (make-char cset 127 127))
+ "]+")
+ str i)
+ (setq j (match-end 0)
+ lang (egg-charset-to-language cset))))
+ (if lang
+ (put-text-property i j 'egg-lang lang str)))
+ (setq i j))
+ ;; 2nd pass -- set language property
+ (setq i 0)
+ (while (< i len)
+ (setq lang (get-text-property i 'egg-lang str))
+ (cond
+ ((null lang)
+ (setq lang (or last-lang
+ (egg-next-part-lang str i))))
+ ((equal lang 'Chinese)
+ (setq lang (or last-chinese
+ (egg-next-chinese-lang str i)))))
+ (setq last-lang lang)
+ (if (or (eq lang 'Chinese-GB) (eq lang 'Chinese-CNS))
+ (setq last-chinese lang))
+ (setq j i
+ i (egg-next-single-property-change i 'egg-lang str len))
+ (set-text-properties j i (list 'egg-lang lang) str))))
+
+;;; Should think again the interface to language-info-alist
+(defun egg-charset-to-language (charset)
+ (let ((list language-info-alist))
+ (while (and list
+ (null (memq charset (assq 'charset (car list)))))
+ (setq list (cdr list)))
+ (if list
+ (intern (car (car list))))))
+
+(defun egg-next-part-lang (str pos)
+ (let ((lang (get-text-property
+ (egg-next-single-property-change pos 'egg-lang str (length str))
+ 'egg-lang str)))
+ (if (eq lang 'Chinese)
+ (egg-next-chinese-lang str pos)
+ (or lang
+ its-current-language
+ egg-default-language))))
+
+(defun egg-next-chinese-lang (str pos)
+ (let ((len (length str)) lang)
+ (while (and (< pos len) (null lang))
+ (setq pos (egg-next-single-property-change pos 'egg-lang str len)
+ lang (get-text-property pos 'egg-lang str))
+ (if (null (or (eq lang 'Chinese-GB)
+ (eq lang 'Chinese-CNS)))
+ (setq lang nil)))
+ (cond
+ (lang lang)
+ ((eq its-current-language 'Chinese-GB) 'Chinese-GB)
+ ((eq its-current-language 'Chinese-CNS) 'Chinese-CNS)
+ ((eq egg-default-language 'Chinese-GB) 'Chinese-GB)
+ ((eq egg-default-language 'Chinese-CNS) 'Chinese-CNS)
+ (t 'Chinese-GB))))
+\f