;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs
-;; Copyright (C) 1995,96,97,98,99,2000,01,03,04 Free Software Foundation, Inc.
+;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
+;; 2005 Free Software Foundation, Inc.
;; Author: ENAMI Tsugutomo <enami@sys.ptg.sony.co.jp>
;; MORIOKA Tomohiko <tomo@m17n.org>
if there are in decoded encoded-words (generated by bad manner MUA
such as a version of Net$cape)."
(setq string (std11-unfold-string string))
- (let ((dest "")(ew nil)
- beg end)
- (while (and (string-match eword-encoded-word-regexp string)
- (setq beg (match-beginning 0)
- end (match-end 0))
- )
- (if (> beg 0)
- (if (not
- (and (eq ew t)
- (string-match "^[ \t]+$" (substring string 0 beg))
- ))
- (setq dest (concat dest (substring string 0 beg)))
- )
- )
- (setq dest
- (concat dest
- (eword-decode-encoded-word
- (substring string beg end) must-unfold)
- ))
- (setq string (substring string end))
- (setq ew t)
- )
- (concat dest string)
- ))
+ (let ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)"))
+ (next 0)
+ match start words)
+ (while (setq match (string-match regexp string next))
+ (setq start (match-beginning 1)
+ words nil)
+ (while match
+ (setq next (match-end 0))
+ (push (list (match-string 2 string) ;; charset
+ (match-string 3 string) ;; language
+ (match-string 4 string) ;; encoding
+ (match-string 5 string) ;; encoded-text
+ (match-string 1 string)) ;; encoded-word
+ words)
+ (setq match (and (string-match regexp string next)
+ (= next (match-beginning 0)))))
+ (setq words (eword-decode-encoded-words (nreverse words) must-unfold)
+ string (concat (substring string 0 start)
+ words
+ (substring string next))
+ next (+ start (length words)))))
+ string)
(defun eword-decode-structured-field-body (string
&optional start-column max-column
(save-restriction
(narrow-to-region start end)
(if unfolding
- (eword-decode-unfold)
- )
+ (eword-decode-unfold))
(goto-char (point-min))
- (while (re-search-forward (concat "\\(" eword-encoded-word-regexp "\\)"
- "\\(\n?[ \t]\\)+"
- "\\(" eword-encoded-word-regexp "\\)")
- nil t)
- (replace-match "\\1\\7")
- (goto-char (point-min))
- )
- (while (re-search-forward eword-encoded-word-regexp nil t)
- (insert (eword-decode-encoded-word
- (prog1
- (buffer-substring (match-beginning 0) (match-end 0))
- (delete-region (match-beginning 0) (match-end 0))
- ) must-unfold))
- )
- )))
+ (let ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)"))
+ match words)
+ (while (setq match (re-search-forward regexp nil t))
+ (setq start (match-beginning 1)
+ words nil)
+ (while match
+ (goto-char (setq end (match-end 0)))
+ (push (list (match-string 2) ;; charset
+ (match-string 3) ;; language
+ (match-string 4) ;; encoding
+ (match-string 5) ;; encoded-text
+ (match-string 1)) ;; encoded-word
+ words)
+ (setq match (looking-at regexp)))
+ (delete-region start end)
+ (insert
+ (eword-decode-encoded-words (nreverse words) must-unfold)))))))
(defun eword-decode-unfold ()
(goto-char (point-min))
(make-obsolete 'eword-decode-header 'mime-decode-header-in-buffer)
-;;; @ encoded-word decoder
-;;;
-
-(defvar eword-decode-encoded-word-error-handler
- 'eword-decode-encoded-word-default-error-handler)
-
-(defvar eword-warning-face nil
- "Face used for invalid encoded-word.")
-
-(defun eword-decode-encoded-word-default-error-handler (word signal)
- (and (add-text-properties 0 (length word)
- (and eword-warning-face
- (list 'face eword-warning-face))
- word)
- word))
-
-(defun eword-decode-encoded-word (word &optional must-unfold)
- "Decode WORD as an encoded-word.
-
-If charset is unknown or unsupported, return WORD.
-If encoding is unknown, or some error occurs while decoding,
-`eword-decode-encoded-word-error-handler' is called with WORD and an
-error condition.
-
-If MUST-UNFOLD is non-nil, unfold decoded WORD."
- (or (and (string-match eword-encoded-word-regexp word)
- (condition-case err
- (eword-decode-encoded-text
- ;; charset
- (substring word (match-beginning 1)(match-end 1))
- ;; language
- (when (match-beginning 2)
- (intern
- (downcase
- (substring word (1+ (match-beginning 2))(match-end 2)))))
- ;; encoding
- (upcase
- (substring word (match-beginning 3)(match-end 3)))
- ;; encoded-text
- (substring word (match-beginning 4)(match-end 4))
- must-unfold)
- (error
- (funcall eword-decode-encoded-word-error-handler word err))))
- word))
-
-
-;;; @ encoded-text decoder
+;;; @ encoded-words decoder
;;;
-(defun eword-decode-encoded-text (charset language encoding string
- &optional must-unfold)
- "Decode STRING as an encoded-text.
-
-If your emacs implementation can not decode CHARSET, it returns nil.
+(defvar eword-decode-allow-incomplete-encoded-text t
+ "*Non-nil means allow incomplete encoded-text in successive encoded-words.
+Dividing of encoded-text in the place other than character boundaries
+violates RFC2047 section 5, while we have a capability to decode it.
+If it is non-nil, the decoder will decode B- or Q-encoding in each
+encoded-word, concatenate them, and decode it by charset. Otherwise,
+the decoder will fully decode each encoded-word before concatenating
+them.")
-If LANGUAGE is non-nil, it is put to `mime-language' text-property.
-If ENCODING is not \"B\" or \"Q\", it occurs error.
-So you should write error-handling code if you don't want break by errors.
+(defun eword-decode-encoded-words (words must-unfold)
+ "Decode successive encoded-words in WORDS and return a decoded string.
+Each element of WORDS looks like (CHARSET LANGUAGE ENCODING ENCODED-TEXT
+ENCODED-WORD).
If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-text (generated by bad manner MUA such
-as a version of Net$cape)."
- (when (mime-charset-to-coding-system charset)
- (let ((dest (encoded-text-decode-string string encoding)))
- (when dest
- (setq dest (decode-mime-charset-string dest charset))
- (when must-unfold
- (setq dest
- (mapconcat
- (function
- (lambda (chr)
- (cond ((eq chr ?\n) "")
- ((eq chr ?\r) "")
- ((eq chr ?\t) " ")
- (t (char-to-string chr)))))
- (std11-unfold-string dest) "")))
- (when language
- (put-text-property 0 (length dest) 'mime-language language dest))
- dest))))
-
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
+ (let (word language charset encoding text rest)
+ (while words
+ (setq word (pop words)
+ language (nth 1 word))
+ (if (and (or (mime-charset-to-coding-system (setq charset (car word)))
+ (progn
+ (message "Unknown charset: %s" charset)
+ nil))
+ (cond ((member (setq encoding (nth 2 word)) '("B" "Q"))
+ t)
+ ((member encoding '("b" "q"))
+ (setq encoding (upcase encoding)))
+ (t
+ (message "Invalid encoding: %s" encoding)
+ nil))
+ (condition-case err
+ (setq text
+ (encoded-text-decode-string (nth 3 word) encoding))
+ (error
+ (message "%s" (error-message-string err))
+ nil)))
+ (if (and eword-decode-allow-incomplete-encoded-text
+ rest
+ (caaar rest)
+ (string-equal (downcase charset) (downcase (caaar rest)))
+ (equal language (cdaar rest)))
+ ;; Concatenate text of which the charset is the same.
+ (setcdr (car rest) (concat (cdar rest) text))
+ (push (cons (cons charset language) text) rest))
+ ;; Don't decode encoded-word.
+ (push (cons (cons nil language) (nth 4 word)) rest)))
+ (while rest
+ (setq word (or (and (setq charset (caaar rest))
+ (condition-case err
+ (decode-mime-charset-string (cdar rest) charset)
+ (error
+ (message "%s" (error-message-string err))
+ nil)))
+ (concat (when (cdr rest) " ")
+ (cdar rest)
+ (when (and words
+ (not (eq (string-to-char words) ? )))
+ " "))))
+ (when must-unfold
+ (setq word (mapconcat (lambda (chr)
+ (cond ((eq chr ?\n) "")
+ ((eq chr ?\r) "")
+ ((eq chr ?\t) " ")
+ (t (char-to-string chr))))
+ (std11-unfold-string word)
+ "")))
+ (when (setq language (cdaar rest))
+ (put-text-property 0 (length word) 'mime-language language word))
+ (setq words (concat word words)
+ rest (cdr rest)))
+ words))
;;; @ lexical analyze
;;;
(std11-analyze-special string start))
(defun eword-analyze-encoded-word (string start &optional must-unfold)
- (if (and (string-match eword-encoded-word-regexp string start)
- (= (match-beginning 0) start))
- (let ((end (match-end 0))
- (dest (eword-decode-encoded-word (match-string 0 string)
- must-unfold))
- )
- ;;(setq string (substring string end))
- (setq start end)
- (while (and (string-match (eval-when-compile
- (concat "[ \t\n]*\\("
- eword-encoded-word-regexp
- "\\)"))
- string start)
- (= (match-beginning 0) start))
- (setq end (match-end 0))
- (setq dest
- (concat dest
- (eword-decode-encoded-word (match-string 1 string)
- must-unfold))
- ;;string (substring string end))
- start end)
- )
- (cons (cons 'atom dest) ;;string)
- end)
- )))
+ (let* ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)"))
+ (match (and (string-match regexp string start)
+ (= start (match-beginning 0))))
+ next words)
+ (while match
+ (setq next (match-end 0))
+ (push (list (match-string 2 string) ;; charset
+ (match-string 3 string) ;; language
+ (match-string 4 string) ;; encoding
+ (match-string 5 string) ;; encoded-text
+ (match-string 1 string)) ;; encoded-word
+ words)
+ (setq match (and (string-match regexp string next)
+ (= next (match-beginning 0)))))
+ (when words
+ (cons (cons 'atom (eword-decode-encoded-words (nreverse words)
+ must-unfold))
+ next))))
(defun eword-analyze-atom (string start &optional must-unfold)
(if (and (string-match std11-atom-regexp string start)