From: yamaoka <yamaoka>
Date: Sun, 25 Dec 2005 10:45:57 +0000 (+0000)
Subject: Update.
X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=0b74ed20f087bead547291ea8df79134a308ac05;p=elisp%2Fflim.git

Update.
---

diff --git a/ChangeLog b/ChangeLog
index 95605a4..669a673 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,23 @@
+2005-12-25  Katsumi Yamaoka  <yamaoka@jpl.org>
+
+	* eword-decode.el: Change the way to decode successive
+	encoded-words: decode B- or Q-encoding in each encoded-word,
+	concatenate them, and decode it as charset.  See the following
+	threads for more information:
+	http://news.gmane.org/group/gmane.emacs.pretest.bugs/thread=9541
+	http://news.gmane.org/group/gmane.emacs.gnus.general/thread=61176
+	(eword-decode-allow-incomplete-encoded-text): New variable.
+	(eword-decode-encoded-words): New function.
+	(eword-decode-string): Use it.
+	(eword-decode-region): Use it.
+	(eword-analyze-encoded-word): Use it.
+	(eword-decode-encoded-word): Abolish.
+	(eword-decode-encoded-text): Abolish.
+	(eword-decode-encoded-word-error-handler): Abolish.
+	(eword-warning-face): Abolish.
+	(eword-decode-encoded-word-default-error-handler): Abolish.
+
+
 2005-12-25  MORIOKA Tomohiko  <tomo@kanji.zinbun.kyoto-u.ac.jp>
 
 	* FLIM: Version 1.14.8 (Shij-Dò) released.-A
diff --git a/eword-decode.el b/eword-decode.el
index ba5b9ac..fdf895d 100644
--- a/eword-decode.el
+++ b/eword-decode.el
@@ -1,6 +1,7 @@
 ;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs
 
-;; Copyright (C) 1995,96,97,98,99,2000,01,03,04 Free Software Foundation, Inc.
+;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
+;;   2005 Free Software Foundation, Inc.
 
 ;; Author: ENAMI Tsugutomo <enami@sys.ptg.sony.co.jp>
 ;;         MORIOKA Tomohiko <tomo@m17n.org>
@@ -88,30 +89,28 @@ If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 if there are in decoded encoded-words (generated by bad manner MUA
 such as a version of Net$cape)."
   (setq string (std11-unfold-string string))
-  (let ((dest "")(ew nil)
-	beg end)
-    (while (and (string-match eword-encoded-word-regexp string)
-		(setq beg (match-beginning 0)
-		      end (match-end 0))
-		)
-      (if (> beg 0)
-	  (if (not
-	       (and (eq ew t)
-		    (string-match "^[ \t]+$" (substring string 0 beg))
-		    ))
-	      (setq dest (concat dest (substring string 0 beg)))
-	    )
-	)
-      (setq dest
-	    (concat dest
-		    (eword-decode-encoded-word
-		     (substring string beg end) must-unfold)
-		    ))
-      (setq string (substring string end))
-      (setq ew t)
-      )
-    (concat dest string)
-    ))
+  (let ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)"))
+	(next 0)
+	match start words)
+    (while (setq match (string-match regexp string next))
+      (setq start (match-beginning 1)
+	    words nil)
+      (while match
+	(setq next (match-end 0))
+	(push (list (match-string 2 string) ;; charset
+		    (match-string 3 string) ;; language
+		    (match-string 4 string) ;; encoding
+		    (match-string 5 string) ;; encoded-text
+		    (match-string 1 string)) ;; encoded-word
+	      words)
+	(setq match (and (string-match regexp string next)
+			 (= next (match-beginning 0)))))
+      (setq words (eword-decode-encoded-words (nreverse words) must-unfold)
+	    string (concat (substring string 0 start)
+			   words
+			   (substring string next))
+	    next (+ start (length words)))))
+  string)
 
 (defun eword-decode-structured-field-body (string
 					   &optional start-column max-column
@@ -230,24 +229,25 @@ such as a version of Net$cape)."
     (save-restriction
       (narrow-to-region start end)
       (if unfolding
-	  (eword-decode-unfold)
-	)
+	  (eword-decode-unfold))
       (goto-char (point-min))
-      (while (re-search-forward (concat "\\(" eword-encoded-word-regexp "\\)"
-                                        "\\(\n?[ \t]\\)+"
-                                        "\\(" eword-encoded-word-regexp "\\)")
-                                nil t)
-	(replace-match "\\1\\7")
-        (goto-char (point-min))
-	)
-      (while (re-search-forward eword-encoded-word-regexp nil t)
-	(insert (eword-decode-encoded-word
-		 (prog1
-		     (buffer-substring (match-beginning 0) (match-end 0))
-		   (delete-region (match-beginning 0) (match-end 0))
-		   ) must-unfold))
-	)
-      )))
+      (let ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)"))
+	    match words)
+	(while (setq match (re-search-forward regexp nil t))
+	  (setq start (match-beginning 1)
+		words nil)
+	  (while match
+	    (goto-char (setq end (match-end 0)))
+	    (push (list (match-string 2) ;; charset
+			(match-string 3) ;; language
+			(match-string 4) ;; encoding
+			(match-string 5) ;; encoded-text
+			(match-string 1)) ;; encoded-word
+		  words)
+	    (setq match (looking-at regexp)))
+	  (delete-region start end)
+	  (insert
+	   (eword-decode-encoded-words (nreverse words) must-unfold)))))))
 
 (defun eword-decode-unfold ()
   (goto-char (point-min))
@@ -518,86 +518,82 @@ If SEPARATOR is not nil, it is used as header separator."
 (make-obsolete 'eword-decode-header 'mime-decode-header-in-buffer)
 
 
-;;; @ encoded-word decoder
-;;;
-
-(defvar eword-decode-encoded-word-error-handler
-  'eword-decode-encoded-word-default-error-handler)
-
-(defvar eword-warning-face nil
-  "Face used for invalid encoded-word.")
-
-(defun eword-decode-encoded-word-default-error-handler (word signal)
-  (and (add-text-properties 0 (length word)
-			    (and eword-warning-face
-				 (list 'face eword-warning-face))
-			    word)
-       word))
-
-(defun eword-decode-encoded-word (word &optional must-unfold)
-  "Decode WORD as an encoded-word.
-
-If charset is unknown or unsupported, return WORD.
-If encoding is unknown, or some error occurs while decoding,
-`eword-decode-encoded-word-error-handler' is called with WORD and an
-error condition.
-
-If MUST-UNFOLD is non-nil, unfold decoded WORD."
-  (or (and (string-match eword-encoded-word-regexp word)
-	   (condition-case err
-	       (eword-decode-encoded-text
-		;; charset
-		(substring word (match-beginning 1)(match-end 1))
-		;; language
-		(when (match-beginning 2)
-		  (intern
-		   (downcase
-		    (substring word (1+ (match-beginning 2))(match-end 2)))))
-		;; encoding
-		(upcase
-		 (substring word (match-beginning 3)(match-end 3)))
-		;; encoded-text
-		(substring word (match-beginning 4)(match-end 4))
-		must-unfold)
-	     (error
-	      (funcall eword-decode-encoded-word-error-handler word err))))
-      word))
-
-
-;;; @ encoded-text decoder
+;;; @ encoded-words decoder
 ;;;
 
-(defun eword-decode-encoded-text (charset language encoding string
-					  &optional must-unfold)
-  "Decode STRING as an encoded-text.
-
-If your emacs implementation can not decode CHARSET, it returns nil.
+(defvar eword-decode-allow-incomplete-encoded-text t
+  "*Non-nil means allow incomplete encoded-text in successive encoded-words.
+Dividing of encoded-text in the place other than character boundaries
+violates RFC2047 section 5, while we have a capability to decode it.
+If it is non-nil, the decoder will decode B- or Q-encoding in each
+encoded-word, concatenate them, and decode it by charset.  Otherwise,
+the decoder will fully decode each encoded-word before concatenating
+them.")
 
-If LANGUAGE is non-nil, it is put to `mime-language' text-property.
-If ENCODING is not \"B\" or \"Q\", it occurs error.
-So you should write error-handling code if you don't want break by errors.
+(defun eword-decode-encoded-words (words must-unfold)
+  "Decode successive encoded-words in WORDS and return a decoded string.
+Each element of WORDS looks like (CHARSET LANGUAGE ENCODING ENCODED-TEXT
+ENCODED-WORD).
 
 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-text (generated by bad manner MUA such
-as a version of Net$cape)."
-  (when (mime-charset-to-coding-system charset)
-    (let ((dest (encoded-text-decode-string string encoding)))
-      (when dest
-	(setq dest (decode-mime-charset-string dest charset))
-	(when must-unfold
-	  (setq dest
-		(mapconcat
-		 (function
-		  (lambda (chr)
-		    (cond ((eq chr ?\n) "")
-			  ((eq chr ?\r) "")
-			  ((eq chr ?\t) " ")
-			  (t (char-to-string chr)))))
-		 (std11-unfold-string dest) "")))
-	(when language
-	  (put-text-property 0 (length dest) 'mime-language language dest))
-	dest))))
-
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
+  (let (word language charset encoding text rest)
+    (while words
+      (setq word (pop words)
+	    language (nth 1 word))
+      (if (and (or (mime-charset-to-coding-system (setq charset (car word)))
+		   (progn
+		     (message "Unknown charset: %s" charset)
+		     nil))
+	       (cond ((member (setq encoding (nth 2 word)) '("B" "Q"))
+		      t)
+		     ((member encoding '("b" "q"))
+		      (setq encoding (upcase encoding)))
+		     (t
+		      (message "Invalid encoding: %s" encoding)
+		      nil))
+	       (condition-case err
+		   (setq text
+			 (encoded-text-decode-string (nth 3 word) encoding))
+		 (error
+		  (message "%s" (error-message-string err))
+		  nil)))
+	  (if (and eword-decode-allow-incomplete-encoded-text
+		   rest
+		   (caaar rest)
+		   (string-equal (downcase charset) (downcase (caaar rest)))
+		   (equal language (cdaar rest)))
+	      ;; Concatenate text of which the charset is the same.
+	      (setcdr (car rest) (concat (cdar rest) text))
+	    (push (cons (cons charset language) text) rest))
+	;; Don't decode encoded-word.
+	(push (cons (cons nil language) (nth 4 word)) rest)))
+    (while rest
+      (setq word (or (and (setq charset (caaar rest))
+			  (condition-case err
+			      (decode-mime-charset-string (cdar rest) charset)
+			    (error
+			     (message "%s" (error-message-string err))
+			     nil)))
+		     (concat (when (cdr rest) " ")
+			     (cdar rest)
+			     (when (and words
+					(not (eq (string-to-char words) ? )))
+			       " "))))
+      (when must-unfold
+	(setq word (mapconcat (lambda (chr)
+				(cond ((eq chr ?\n) "")
+				      ((eq chr ?\r) "")
+				      ((eq chr ?\t) " ")
+				      (t (char-to-string chr))))
+			      (std11-unfold-string word)
+			      "")))
+      (when (setq language (cdaar rest))
+	(put-text-property 0 (length word) 'mime-language language word))
+      (setq words (concat word words)
+	    rest (cdr rest)))
+    words))
 
 ;;; @ lexical analyze
 ;;;
@@ -717,31 +713,24 @@ be the result.")
   (std11-analyze-special string start))
 
 (defun eword-analyze-encoded-word (string start &optional must-unfold)
-  (if (and (string-match eword-encoded-word-regexp string start)
-	   (= (match-beginning 0) start))
-      (let ((end (match-end 0))
-	    (dest (eword-decode-encoded-word (match-string 0 string)
-					     must-unfold))
-	    )
-	;;(setq string (substring string end))
-	(setq start end)
-	(while (and (string-match (eval-when-compile
-				    (concat "[ \t\n]*\\("
-					    eword-encoded-word-regexp
-					    "\\)"))
-				  string start)
-		    (= (match-beginning 0) start))
-	  (setq end (match-end 0))
-	  (setq dest
-		(concat dest
-			(eword-decode-encoded-word (match-string 1 string)
-						   must-unfold))
-		;;string (substring string end))
-		start end)
-	  )
-	(cons (cons 'atom dest) ;;string)
-	      end)
-	)))
+  (let* ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)"))
+	 (match (and (string-match regexp string start)
+		     (= start (match-beginning 0))))
+	 next words)
+    (while match
+      (setq next (match-end 0))
+      (push (list (match-string 2 string) ;; charset
+		  (match-string 3 string) ;; language
+		  (match-string 4 string) ;; encoding
+		  (match-string 5 string) ;; encoded-text
+		  (match-string 1 string)) ;; encoded-word
+	    words)
+      (setq match (and (string-match regexp string next)
+		       (= next (match-beginning 0)))))
+    (when words
+      (cons (cons 'atom (eword-decode-encoded-words (nreverse words)
+						    must-unfold))
+	    next))))
 
 (defun eword-analyze-atom (string start &optional must-unfold)
   (if (and (string-match std11-atom-regexp string start)