;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs
-;; Copyright (C) 1995,1996,1997,1998,1999,2000 Free Software Foundation, Inc.
+;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
+;; 2005 Free Software Foundation, Inc.
;; Author: ENAMI Tsugutomo <enami@sys.ptg.sony.co.jp>
;; MORIOKA Tomohiko <tomo@m17n.org>
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
;;; Code:
(eval-when-compile
(concat (regexp-quote "=?")
"\\("
- mime-charset-regexp
+ mime-charset-regexp ; 1
"\\)"
+ "\\("
+ (regexp-quote "*")
+ mime-language-regexp ; 2
+ "\\)?"
(regexp-quote "?")
- "\\([BbQq]\\)"
+ "\\("
+ mime-encoding-regexp ; 3
+ "\\)"
(regexp-quote "?")
"\\("
- eword-encoded-text-regexp
+ eword-encoded-text-regexp ; 4
"\\)"
(regexp-quote "?="))))
)
If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
if there are in decoded encoded-words (generated by bad manner MUA
-such as a version of Net$cape)."
+such as a version of Net$cape).
+
+The language informations specified in the encoded words, if any, are
+put to the decoded text as the `mime-language' text property."
(setq string (std11-unfold-string string))
- (let ((dest "")(ew nil)
- beg end)
- (while (and (string-match eword-encoded-word-regexp string)
- (setq beg (match-beginning 0)
- end (match-end 0))
- )
- (if (> beg 0)
- (if (not
- (and (eq ew t)
- (string-match "^[ \t]+$" (substring string 0 beg))
- ))
- (setq dest (concat dest (substring string 0 beg)))
- )
- )
- (setq dest
- (concat dest
- (eword-decode-encoded-word
- (substring string beg end) must-unfold)
- ))
- (setq string (substring string end))
- (setq ew t)
- )
- (concat dest string)
- ))
+ (let ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)"))
+ (next 0)
+ match start words)
+ (while (setq match (string-match regexp string next))
+ (setq start (match-beginning 1)
+ words nil)
+ (while match
+ (setq next (match-end 0))
+ (push (list (match-string 2 string) ;; charset
+ (when (match-beginning 3) ;; language
+ (intern
+ (downcase
+ (substring string
+ (1+ (match-beginning 3)) (match-end 3)))))
+ (match-string 4 string) ;; encoding
+ (match-string 5 string) ;; encoded-text
+ (match-string 1 string)) ;; encoded-word
+ words)
+ (setq match (and (string-match regexp string next)
+ (= next (match-beginning 0)))))
+ (setq words (eword-decode-encoded-words (nreverse words) must-unfold)
+ string (concat (substring string 0 start)
+ words
+ (substring string next))
+ next (+ start (length words)))))
+ string)
(defun eword-decode-structured-field-body (string
&optional start-column max-column
If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
if there are in decoded encoded-words (generated by bad manner MUA
-such as a version of Net$cape)."
+such as a version of Net$cape).
+
+The language informations specified in the encoded words, if any, are
+put to the decoded text as the `mime-language' text property."
(interactive "*r")
(save-excursion
(save-restriction
(narrow-to-region start end)
(if unfolding
- (eword-decode-unfold)
- )
+ (eword-decode-unfold))
(goto-char (point-min))
- (while (re-search-forward (concat "\\(" eword-encoded-word-regexp "\\)"
- "\\(\n?[ \t]\\)+"
- "\\(" eword-encoded-word-regexp "\\)")
- nil t)
- (replace-match "\\1\\6")
- (goto-char (point-min))
- )
- (while (re-search-forward eword-encoded-word-regexp nil t)
- (insert (eword-decode-encoded-word
- (prog1
- (buffer-substring (match-beginning 0) (match-end 0))
- (delete-region (match-beginning 0) (match-end 0))
- ) must-unfold))
- )
- )))
+ (let ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)"))
+ match words)
+ (while (setq match (re-search-forward regexp nil t))
+ (setq start (match-beginning 1)
+ words nil)
+ (while match
+ (goto-char (setq end (match-end 0)))
+ (push (list (match-string 2) ;; charset
+ (when (match-beginning 3) ;; language
+ (intern
+ (downcase
+ (buffer-substring (1+ (match-beginning 3))
+ (match-end 3)))))
+ (match-string 4) ;; encoding
+ (match-string 5) ;; encoded-text
+ (match-string 1)) ;; encoded-word
+ words)
+ (setq match (looking-at regexp)))
+ (delete-region start end)
+ (insert
+ (eword-decode-encoded-words (nreverse words) must-unfold)))))))
(defun eword-decode-unfold ()
(goto-char (point-min))
))
code-conversion))
-;; (define-obsolete-function-alias 'eword-decode-header
-;; 'mime-decode-header-in-buffer)
-
-
-;;; @ encoded-word decoder
-;;;
-
-(defvar eword-decode-encoded-word-error-handler
- 'eword-decode-encoded-word-default-error-handler)
-
-(defvar eword-warning-face nil
- "Face used for invalid encoded-word.")
-
-(defun eword-decode-encoded-word-default-error-handler (word signal)
- (and (add-text-properties 0 (length word)
- (and eword-warning-face
- (list 'face eword-warning-face))
- word)
- word))
-
-(defun eword-decode-encoded-word (word &optional must-unfold)
- "Decode WORD if it is an encoded-word.
-
-If your emacs implementation can not decode the charset of WORD, it
-returns WORD. Similarly the encoded-word is broken, it returns WORD.
-
-If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-word (generated by bad manner MUA such
-as a version of Net$cape)."
- (or (if (string-match eword-encoded-word-regexp word)
- (let ((charset
- (substring word (match-beginning 1) (match-end 1))
- )
- (encoding
- (upcase
- (substring word (match-beginning 2) (match-end 2))
- ))
- (text
- (substring word (match-beginning 3) (match-end 3))
- ))
- (condition-case err
- (eword-decode-encoded-text charset encoding text must-unfold)
- (error
- (funcall eword-decode-encoded-word-error-handler word err)
- ))
- ))
- word))
+(defalias 'eword-decode-header 'mime-decode-header-in-buffer)
+(make-obsolete 'eword-decode-header 'mime-decode-header-in-buffer)
-;;; @ encoded-text decoder
+;;; @ encoded-words decoder
;;;
-(defun eword-decode-encoded-text (charset encoding string
- &optional must-unfold)
- "Decode STRING as an encoded-text.
+(defvar eword-decode-allow-incomplete-encoded-text t
+ "*Non-nil means allow incomplete encoded-text in successive encoded-words.
+Dividing of encoded-text in the place other than character boundaries
+violates RFC2047 section 5, while we have a capability to decode it.
+If it is non-nil, the decoder will decode B- or Q-encoding in each
+encoded-word, concatenate them, and decode it by charset. Otherwise,
+the decoder will fully decode each encoded-word before concatenating
+them.")
-If your emacs implementation can not decode CHARSET, it returns nil.
-
-If ENCODING is not \"B\" or \"Q\", it occurs error.
-So you should write error-handling code if you don't want break by errors.
+(defun eword-decode-encoded-words (words must-unfold)
+ "Decode successive encoded-words in WORDS and return a decoded string.
+Each element of WORDS looks like (CHARSET LANGUAGE ENCODING ENCODED-TEXT
+ENCODED-WORD).
If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-text (generated by bad manner MUA such
-as a version of Net$cape)."
- (let ((cs (mime-charset-to-coding-system charset)))
- (if cs
- (let ((dest (encoded-text-decode-string string encoding)))
- (when dest
- (setq dest (decode-mime-charset-string dest charset))
- (if must-unfold
- (mapconcat (function
- (lambda (chr)
- (cond ((eq chr ?\n) "")
- ((eq chr ?\t) " ")
- (t (char-to-string chr)))
- ))
- (std11-unfold-string dest)
- "")
- dest))))))
-
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
+ (let (word language charset encoding text rest)
+ (while words
+ (setq word (pop words)
+ language (nth 1 word))
+ (if (and (or (mime-charset-to-coding-system (setq charset (car word)))
+ (progn
+ (message "Unknown charset: %s" charset)
+ nil))
+ (cond ((member (setq encoding (nth 2 word)) '("B" "Q"))
+ t)
+ ((member encoding '("b" "q"))
+ (setq encoding (upcase encoding)))
+ (t
+ (message "Invalid encoding: %s" encoding)
+ nil))
+ (condition-case err
+ (setq text
+ (encoded-text-decode-string (nth 3 word) encoding))
+ (error
+ (message "%s" (error-message-string err))
+ nil)))
+ (if (and eword-decode-allow-incomplete-encoded-text
+ rest
+ (caaar rest)
+ (string-equal (downcase charset) (downcase (caaar rest)))
+ (equal language (cdaar rest)))
+ ;; Concatenate text of which the charset is the same.
+ (setcdr (car rest) (concat (cdar rest) text))
+ (push (cons (cons charset language) text) rest))
+ ;; Don't decode encoded-word.
+ (push (cons (cons nil language) (nth 4 word)) rest)))
+ (while rest
+ (setq word (or (and (setq charset (caaar rest))
+ (condition-case err
+ (decode-mime-charset-string (cdar rest) charset)
+ (error
+ (message "%s" (error-message-string err))
+ nil)))
+ (concat (when (cdr rest) " ")
+ (cdar rest)
+ (when (and words
+ (not (eq (string-to-char words) ? )))
+ " "))))
+ (when must-unfold
+ (setq word (mapconcat (lambda (chr)
+ (cond ((eq chr ?\n) "")
+ ((eq chr ?\r) "")
+ ((eq chr ?\t) " ")
+ (t (char-to-string chr))))
+ (std11-unfold-string word)
+ "")))
+ (when (setq language (cdaar rest))
+ (put-text-property 0 (length word) 'mime-language language word))
+ (setq words (concat word words)
+ rest (cdr rest)))
+ words))
;;; @ lexical analyze
;;;
be the result.")
(defun eword-analyze-quoted-string (string start &optional must-unfold)
- (let ((p (std11-check-enclosure string ?\" ?\" nil start)))
- (if p
- (cons (cons 'quoted-string
- (decode-mime-charset-string
- (std11-strip-quoted-pair
- (substring string (1+ start) (1- p)))
- default-mime-charset))
- ;;(substring string p))
- p)
- )))
+ (let ((p (std11-check-enclosure string ?\" ?\" nil start))
+ ret)
+ (when p
+ (setq ret (decode-mime-charset-string
+ (std11-strip-quoted-pair
+ (substring string (1+ start) (1- p)))
+ default-mime-charset))
+ (if mime-header-accept-quoted-encoded-words
+ (setq ret (eword-decode-string ret)))
+ (cons (cons 'quoted-string ret)
+ p))))
(defun eword-analyze-domain-literal (string start &optional must-unfold)
(std11-analyze-domain-literal string start))
(std11-analyze-special string start))
(defun eword-analyze-encoded-word (string start &optional must-unfold)
- (if (and (string-match eword-encoded-word-regexp string start)
- (= (match-beginning 0) start))
- (let ((end (match-end 0))
- (dest (eword-decode-encoded-word (match-string 0 string)
- must-unfold))
- )
- ;;(setq string (substring string end))
- (setq start end)
- (while (and (string-match (eval-when-compile
- (concat "[ \t\n]*\\("
- eword-encoded-word-regexp
- "\\)"))
- string start)
- (= (match-beginning 0) start))
- (setq end (match-end 0))
- (setq dest
- (concat dest
- (eword-decode-encoded-word (match-string 1 string)
- must-unfold))
- ;;string (substring string end))
- start end)
- )
- (cons (cons 'atom dest) ;;string)
- end)
- )))
+ (let* ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)"))
+ (match (and (string-match regexp string start)
+ (= start (match-beginning 0))))
+ next words)
+ (while match
+ (setq next (match-end 0))
+ (push (list (match-string 2 string) ;; charset
+ (when (match-beginning 3) ;; language
+ (intern
+ (downcase
+ (substring string
+ (1+ (match-beginning 3)) (match-end 3)))))
+ (match-string 4 string) ;; encoding
+ (match-string 5 string) ;; encoded-text
+ (match-string 1 string)) ;; encoded-word
+ words)
+ (setq match (and (string-match regexp string next)
+ (= next (match-beginning 0)))))
+ (when words
+ (cons (cons 'atom (eword-decode-encoded-words (nreverse words)
+ must-unfold))
+ next))))
(defun eword-analyze-atom (string start &optional must-unfold)
(if (and (string-match std11-atom-regexp string start)
)
(setq rest (cdr rest)))
(or r
- (list (cons 'error (substring string start)) (1+ len)))
+ (cons (cons 'error (substring string start)) (1+ len)))
))
(setq dest (cons (car ret) dest)
start (cdr ret))