* eword-decode.el: Change the way to decode successive encoded-words:

author yamaoka <yamaoka>

Sun, 25 Dec 2005 10:45:52 +0000 (10:45 +0000)

committer yamaoka <yamaoka>

Sun, 25 Dec 2005 10:45:52 +0000 (10:45 +0000)
author yamaoka <yamaoka>
Sun, 25 Dec 2005 10:45:52 +0000 (10:45 +0000)
committer yamaoka <yamaoka>
Sun, 25 Dec 2005 10:45:52 +0000 (10:45 +0000)
diff --git a/ChangeLog b/ChangeLog

index 7bcbe60..34cbe2e 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,23 @@
+2005-12-25  Katsumi Yamaoka  <yamaoka@jpl.org>
+
+       * eword-decode.el: Change the way to decode successive
+       encoded-words: decode B- or Q-encoding in each encoded-word,
+       concatenate them, and decode it as charset.  See the following
+       threads for more information:
+       http://news.gmane.org/group/gmane.emacs.pretest.bugs/thread=9541
+       http://news.gmane.org/group/gmane.emacs.gnus.general/thread=61176
+       (eword-decode-allow-incomplete-encoded-text): New variable.
+       (eword-decode-encoded-words): New function.
+       (eword-decode-string): Use it.
+       (eword-decode-region): Use it.
+       (eword-analyze-encoded-word): Use it.
+       (eword-decode-encoded-word): Abolish.
+       (eword-decode-encoded-text): Abolish.
+       (eword-decode-encoded-word-error-handler): Abolish.
+       (eword-warning-face): Abolish.
+       (eword-decode-encoded-word-default-error-handler): Abolish.
+
+\f
  2005-12-25  MORIOKA Tomohiko  <tomo@kanji.zinbun.kyoto-u.ac.jp>
  
         * FLIM: Version 1.14.8 (Shij\e-Dò) released.\e-A
diff --git a/eword-decode.el b/eword-decode.el

index fe46018..ff38088 100644 (file)
--- a/eword-decode.el
+++ b/eword-decode.el
@@ -1,6 +1,7 @@
  ;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs
  
-;; Copyright (C) 1995,96,97,98,99,2000,01,03,04 Free Software Foundation, Inc.
+;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
+;;   2005 Free Software Foundation, Inc.
  
  ;; Author: ENAMI Tsugutomo <enami@sys.ptg.sony.co.jp>
  ;;         MORIOKA Tomohiko <tomo@m17n.org>
@@ -88,30 +89,28 @@ If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
  if there are in decoded encoded-words (generated by bad manner MUA
  such as a version of Net$cape)."
    (setq string (std11-unfold-string string))
-  (let ((dest "")(ew nil)
-       beg end)
-    (while (and (string-match eword-encoded-word-regexp string)
-               (setq beg (match-beginning 0)
-                     end (match-end 0))
-               )
-      (if (> beg 0)
-         (if (not
-              (and (eq ew t)
-                   (string-match "^[ \t]+$" (substring string 0 beg))
-                   ))
-             (setq dest (concat dest (substring string 0 beg)))
-           )
-       )
-      (setq dest
-           (concat dest
-                   (eword-decode-encoded-word
-                    (substring string beg end) must-unfold)
-                   ))
-      (setq string (substring string end))
-      (setq ew t)
-      )
-    (concat dest string)
-    ))
+  (let ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)"))
+       (next 0)
+       match start words)
+    (while (setq match (string-match regexp string next))
+      (setq start (match-beginning 1)
+           words nil)
+      (while match
+       (setq next (match-end 0))
+       (push (list (match-string 2 string) ;; charset
+                   (match-string 3 string) ;; language
+                   (match-string 4 string) ;; encoding
+                   (match-string 5 string) ;; encoded-text
+                   (match-string 1 string)) ;; encoded-word
+             words)
+       (setq match (and (string-match regexp string next)
+                        (= next (match-beginning 0)))))
+      (setq words (eword-decode-encoded-words (nreverse words) must-unfold)
+           string (concat (substring string 0 start)
+                          words
+                          (substring string next))
+           next (+ start (length words)))))
+  string)
  
  (defun eword-decode-structured-field-body (string
                                            &optional start-column max-column
@@ -223,24 +222,25 @@ such as a version of Net$cape)."
      (save-restriction
        (narrow-to-region start end)
        (if unfolding
-         (eword-decode-unfold)
-       )
+         (eword-decode-unfold))
        (goto-char (point-min))
-      (while (re-search-forward (concat "\\(" eword-encoded-word-regexp "\\)"
-                                        "\\(\n?[ \t]\\)+"
-                                        "\\(" eword-encoded-word-regexp "\\)")
-                                nil t)
-       (replace-match "\\1\\7")
-        (goto-char (point-min))
-       )
-      (while (re-search-forward eword-encoded-word-regexp nil t)
-       (insert (eword-decode-encoded-word
-                (prog1
-                    (buffer-substring (match-beginning 0) (match-end 0))
-                  (delete-region (match-beginning 0) (match-end 0))
-                  ) must-unfold))
-       )
-      )))
+      (let ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)"))
+           match words)
+       (while (setq match (re-search-forward regexp nil t))
+         (setq start (match-beginning 1)
+               words nil)
+         (while match
+           (goto-char (setq end (match-end 0)))
+           (push (list (match-string 2) ;; charset
+                       (match-string 3) ;; language
+                       (match-string 4) ;; encoding
+                       (match-string 5) ;; encoded-text
+                       (match-string 1)) ;; encoded-word
+                 words)
+           (setq match (looking-at regexp)))
+         (delete-region start end)
+         (insert
+          (eword-decode-encoded-words (nreverse words) must-unfold)))))))
  
  (defun eword-decode-unfold ()
    (goto-char (point-min))
@@ -511,86 +511,82 @@ If SEPARATOR is not nil, it is used as header separator."
  (make-obsolete 'eword-decode-header 'mime-decode-header-in-buffer)
  
  
-;;; @ encoded-word decoder
-;;;
-
-(defvar eword-decode-encoded-word-error-handler
-  'eword-decode-encoded-word-default-error-handler)
-
-(defvar eword-warning-face nil
-  "Face used for invalid encoded-word.")
-
-(defun eword-decode-encoded-word-default-error-handler (word signal)
-  (and (add-text-properties 0 (length word)
-                           (and eword-warning-face
-                                (list 'face eword-warning-face))
-                           word)
-       word))
-
-(defun eword-decode-encoded-word (word &optional must-unfold)
-  "Decode WORD as an encoded-word.
-
-If charset is unknown or unsupported, return WORD.
-If encoding is unknown, or some error occurs while decoding,
-`eword-decode-encoded-word-error-handler' is called with WORD and an
-error condition.
-
-If MUST-UNFOLD is non-nil, unfold decoded WORD."
-  (or (and (string-match eword-encoded-word-regexp word)
-          (condition-case err
-              (eword-decode-encoded-text
-               ;; charset
-               (substring word (match-beginning 1)(match-end 1))
-               ;; language
-               (when (match-beginning 2)
-                 (intern
-                  (downcase
-                   (substring word (1+ (match-beginning 2))(match-end 2)))))
-               ;; encoding
-               (upcase
-                (substring word (match-beginning 3)(match-end 3)))
-               ;; encoded-text
-               (substring word (match-beginning 4)(match-end 4))
-               must-unfold)
-            (error
-             (funcall eword-decode-encoded-word-error-handler word err))))
-      word))
-
-
-;;; @ encoded-text decoder
+;;; @ encoded-words decoder
  ;;;
  
-(defun eword-decode-encoded-text (charset language encoding string
-                                         &optional must-unfold)
-  "Decode STRING as an encoded-text.
-
-If your emacs implementation can not decode CHARSET, it returns nil.
+(defvar eword-decode-allow-incomplete-encoded-text t
+  "*Non-nil means allow incomplete encoded-text in successive encoded-words.
+Dividing of encoded-text in the place other than character boundaries
+violates RFC2047 section 5, while we have a capability to decode it.
+If it is non-nil, the decoder will decode B- or Q-encoding in each
+encoded-word, concatenate them, and decode it by charset.  Otherwise,
+the decoder will fully decode each encoded-word before concatenating
+them.")
  
-If LANGUAGE is non-nil, it is put to `mime-language' text-property.
-If ENCODING is not \"B\" or \"Q\", it occurs error.
-So you should write error-handling code if you don't want break by errors.
+(defun eword-decode-encoded-words (words must-unfold)
+  "Decode successive encoded-words in WORDS and return a decoded string.
+Each element of WORDS looks like (CHARSET LANGUAGE ENCODING ENCODED-TEXT
+ENCODED-WORD).
  
  If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-text (generated by bad manner MUA such
-as a version of Net$cape)."
-  (when (mime-charset-to-coding-system charset)
-    (let ((dest (encoded-text-decode-string string encoding)))
-      (when dest
-       (setq dest (decode-mime-charset-string dest charset))
-       (when must-unfold
-         (setq dest
-               (mapconcat
-                (function
-                 (lambda (chr)
-                   (cond ((eq chr ?\n) "")
-                         ((eq chr ?\r) "")
-                         ((eq chr ?\t) " ")
-                         (t (char-to-string chr)))))
-                (std11-unfold-string dest) "")))
-       (when language
-         (put-text-property 0 (length dest) 'mime-language language dest))
-       dest))))
-
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
+  (let (word language charset encoding text rest)
+    (while words
+      (setq word (pop words)
+           language (nth 1 word))
+      (if (and (or (mime-charset-to-coding-system (setq charset (car word)))
+                  (progn
+                    (message "Unknown charset: %s" charset)
+                    nil))
+              (cond ((member (setq encoding (nth 2 word)) '("B" "Q"))
+                     t)
+                    ((member encoding '("b" "q"))
+                     (setq encoding (upcase encoding)))
+                    (t
+                     (message "Invalid encoding: %s" encoding)
+                     nil))
+              (condition-case err
+                  (setq text
+                        (encoded-text-decode-string (nth 3 word) encoding))
+                (error
+                 (message "%s" (error-message-string err))
+                 nil)))
+         (if (and eword-decode-allow-incomplete-encoded-text
+                  rest
+                  (caaar rest)
+                  (string-equal (downcase charset) (downcase (caaar rest)))
+                  (equal language (cdaar rest)))
+             ;; Concatenate text of which the charset is the same.
+             (setcdr (car rest) (concat (cdar rest) text))
+           (push (cons (cons charset language) text) rest))
+       ;; Don't decode encoded-word.
+       (push (cons (cons nil language) (nth 4 word)) rest)))
+    (while rest
+      (setq word (or (and (setq charset (caaar rest))
+                         (condition-case err
+                             (decode-mime-charset-string (cdar rest) charset)
+                           (error
+                            (message "%s" (error-message-string err))
+                            nil)))
+                    (concat (when (cdr rest) " ")
+                            (cdar rest)
+                            (when (and words
+                                       (not (eq (string-to-char words) ? )))
+                              " "))))
+      (when must-unfold
+       (setq word (mapconcat (lambda (chr)
+                               (cond ((eq chr ?\n) "")
+                                     ((eq chr ?\r) "")
+                                     ((eq chr ?\t) " ")
+                                     (t (char-to-string chr))))
+                             (std11-unfold-string word)
+                             "")))
+      (when (setq language (cdaar rest))
+       (put-text-property 0 (length word) 'mime-language language word))
+      (setq words (concat word words)
+           rest (cdr rest)))
+    words))
  
  ;;; @ lexical analyze
  ;;;
@@ -710,31 +706,24 @@ be the result.")
    (std11-analyze-special string start))
  
  (defun eword-analyze-encoded-word (string start &optional must-unfold)
-  (if (and (string-match eword-encoded-word-regexp string start)
-          (= (match-beginning 0) start))
-      (let ((end (match-end 0))
-           (dest (eword-decode-encoded-word (match-string 0 string)
-                                            must-unfold))
-           )
-       ;;(setq string (substring string end))
-       (setq start end)
-       (while (and (string-match (eval-when-compile
-                                   (concat "[ \t\n]*\\("
-                                           eword-encoded-word-regexp
-                                           "\\)"))
-                                 string start)
-                   (= (match-beginning 0) start))
-         (setq end (match-end 0))
-         (setq dest
-               (concat dest
-                       (eword-decode-encoded-word (match-string 1 string)
-                                                  must-unfold))
-               ;;string (substring string end))
-               start end)
-         )
-       (cons (cons 'atom dest) ;;string)
-             end)
-       )))
+  (let* ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)"))
+        (match (and (string-match regexp string start)
+                    (= start (match-beginning 0))))
+        next words)
+    (while match
+      (setq next (match-end 0))
+      (push (list (match-string 2 string) ;; charset
+                 (match-string 3 string) ;; language
+                 (match-string 4 string) ;; encoding
+                 (match-string 5 string) ;; encoded-text
+                 (match-string 1 string)) ;; encoded-word
+           words)
+      (setq match (and (string-match regexp string next)
+                      (= next (match-beginning 0)))))
+    (when words
+      (cons (cons 'atom (eword-decode-encoded-words (nreverse words)
+                                                   must-unfold))
+           next))))
  
  (defun eword-analyze-atom (string start &optional must-unfold)
    (if (and (string-match std11-atom-regexp string start)
author	yamaoka <yamaoka>
	Sun, 25 Dec 2005 10:45:52 +0000 (10:45 +0000)
committer	yamaoka <yamaoka>
	Sun, 25 Dec 2005 10:45:52 +0000 (10:45 +0000)
ChangeLog		patch \| blob \| history
eword-decode.el		patch \| blob \| history