;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs
-;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc.
+;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004,
+;; 2005 Free Software Foundation, Inc.
;; Author: ENAMI Tsugutomo <enami@sys.ptg.sony.co.jp>
-;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
-;; TANAKA Akira <akr@jaist.ac.jp>
+;; MORIOKA Tomohiko <tomo@m17n.org>
+;; TANAKA Akira <akr@m17n.org>
;; Created: 1995/10/03
;; Original: 1992/07/20 ENAMI Tsugutomo's `mime.el'.
;; Renamed: 1993/06/03 to tiny-mime.el by MORIOKA Tomohiko
;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-;; Boston, MA 02111-1307, USA.
+;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+;; Boston, MA 02110-1301, USA.
;;; Code:
(eval-when-compile (require 'cl)) ; list*, pop
-(defgroup eword-decode nil
- "Encoded-word decoding"
- :group 'mime)
-(defcustom eword-max-size-to-decode 1000
- "*Max size to decode header field."
- :group 'eword-decode
- :type '(choice (integer :tag "Limit (bytes)")
- (const :tag "Don't limit" nil)))
+;;; @ Variables
+;;;
+
+;; User options are defined in mime-def.el.
;;; @ MIME encoded-word definition
(eval-when-compile
(concat (regexp-quote "=?")
"\\("
- mime-charset-regexp
+ mime-charset-regexp ; 1
"\\)"
+ "\\("
+ (regexp-quote "*")
+ mime-language-regexp ; 2
+ "\\)?"
(regexp-quote "?")
- "\\([BbQq]\\)"
+ "\\("
+ mime-encoding-regexp ; 3
+ "\\)"
(regexp-quote "?")
"\\("
- eword-encoded-text-regexp
+ eword-encoded-text-regexp ; 4
"\\)"
(regexp-quote "?="))))
)
if there are in decoded encoded-words (generated by bad manner MUA
such as a version of Net$cape)."
(setq string (std11-unfold-string string))
- (let ((dest "")(ew nil)
- beg end)
- (while (and (string-match eword-encoded-word-regexp string)
- (setq beg (match-beginning 0)
- end (match-end 0))
- )
- (if (> beg 0)
- (if (not
- (and (eq ew t)
- (string-match "^[ \t]+$" (substring string 0 beg))
- ))
- (setq dest (concat dest (substring string 0 beg)))
- )
- )
- (setq dest
- (concat dest
- (eword-decode-encoded-word
- (substring string beg end) must-unfold)
- ))
- (setq string (substring string end))
- (setq ew t)
- )
- (concat dest string)
- ))
+ (let ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)"))
+ (next 0)
+ match start words)
+ (while (setq match (string-match regexp string next))
+ (setq start (match-beginning 1)
+ words nil)
+ (while match
+ (setq next (match-end 0))
+ (push (list (match-string 2 string) ;; charset
+ (match-string 3 string) ;; language
+ (match-string 4 string) ;; encoding
+ (match-string 5 string) ;; encoded-text
+ (match-string 1 string)) ;; encoded-word
+ words)
+ (setq match (and (string-match regexp string next)
+ (= next (match-beginning 0)))))
+ (setq words (eword-decode-encoded-words (nreverse words) must-unfold)
+ string (concat (substring string 0 start)
+ words
+ (substring string next))
+ next (+ start (length words)))))
+ string)
(defun eword-decode-structured-field-body (string
&optional start-column max-column
start-column
&optional max-column
start)
- (if (and eword-max-size-to-decode
- (> (length string) eword-max-size-to-decode))
+ (if (and mime-field-decoding-max-size
+ (> (length string) mime-field-decoding-max-size))
string
(or max-column
(setq max-column fill-column))
(save-restriction
(narrow-to-region start end)
(if unfolding
- (eword-decode-unfold)
- )
+ (eword-decode-unfold))
(goto-char (point-min))
- (while (re-search-forward (concat "\\(" eword-encoded-word-regexp "\\)"
- "\\(\n?[ \t]\\)+"
- "\\(" eword-encoded-word-regexp "\\)")
- nil t)
- (replace-match "\\1\\6")
- (goto-char (point-min))
- )
- (while (re-search-forward eword-encoded-word-regexp nil t)
- (insert (eword-decode-encoded-word
- (prog1
- (buffer-substring (match-beginning 0) (match-end 0))
- (delete-region (match-beginning 0) (match-end 0))
- ) must-unfold))
- )
- )))
+ (let ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)"))
+ match words)
+ (while (setq match (re-search-forward regexp nil t))
+ (setq start (match-beginning 1)
+ words nil)
+ (while match
+ (goto-char (setq end (match-end 0)))
+ (push (list (match-string 2) ;; charset
+ (match-string 3) ;; language
+ (match-string 4) ;; encoding
+ (match-string 5) ;; encoded-text
+ (match-string 1)) ;; encoded-word
+ words)
+ (setq match (looking-at regexp)))
+ (delete-region start end)
+ (insert
+ (eword-decode-encoded-words (nreverse words) must-unfold)))))))
(defun eword-decode-unfold ()
(goto-char (point-min))
))
code-conversion))
-;; (define-obsolete-function-alias 'eword-decode-header
-;; 'mime-decode-header-in-buffer)
-
-
-;;; @ encoded-word decoder
-;;;
-
-(defvar eword-decode-encoded-word-error-handler
- 'eword-decode-encoded-word-default-error-handler)
+(defalias 'eword-decode-header 'mime-decode-header-in-buffer)
+(make-obsolete 'eword-decode-header 'mime-decode-header-in-buffer)
-(defvar eword-warning-face nil
- "Face used for invalid encoded-word.")
-(defun eword-decode-encoded-word-default-error-handler (word signal)
- (and (add-text-properties 0 (length word)
- (and eword-warning-face
- (list 'face eword-warning-face))
- word)
- word))
-
-(defun eword-decode-encoded-word (word &optional must-unfold)
- "Decode WORD if it is an encoded-word.
-
-If your emacs implementation can not decode the charset of WORD, it
-returns WORD. Similarly the encoded-word is broken, it returns WORD.
-
-If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-word (generated by bad manner MUA such
-as a version of Net$cape)."
- (or (if (string-match eword-encoded-word-regexp word)
- (let ((charset
- (substring word (match-beginning 1) (match-end 1))
- )
- (encoding
- (upcase
- (substring word (match-beginning 2) (match-end 2))
- ))
- (text
- (substring word (match-beginning 3) (match-end 3))
- ))
- (condition-case err
- (eword-decode-encoded-text charset encoding text must-unfold)
- (error
- (funcall eword-decode-encoded-word-error-handler word err)
- ))
- ))
- word))
-
-
-;;; @ encoded-text decoder
+;;; @ encoded-words decoder
;;;
-(defun eword-decode-encoded-text (charset encoding string
- &optional must-unfold)
- "Decode STRING as an encoded-text.
-
-If your emacs implementation can not decode CHARSET, it returns nil.
+(defvar eword-decode-allow-incomplete-encoded-text t
+ "*Non-nil means allow incomplete encoded-text in successive encoded-words.
+Dividing of encoded-text in the place other than character boundaries
+violates RFC2047 section 5, while we have a capability to decode it.
+If it is non-nil, the decoder will decode B- or Q-encoding in each
+encoded-word, concatenate them, and decode it by charset. Otherwise,
+the decoder will fully decode each encoded-word before concatenating
+them.")
-If ENCODING is not \"B\" or \"Q\", it occurs error.
-So you should write error-handling code if you don't want break by errors.
+(defun eword-decode-encoded-words (words must-unfold)
+ "Decode successive encoded-words in WORDS and return a decoded string.
+Each element of WORDS looks like (CHARSET LANGUAGE ENCODING ENCODED-TEXT
+ENCODED-WORD).
If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-text (generated by bad manner MUA such
-as a version of Net$cape)."
- (let ((cs (mime-charset-to-coding-system charset)))
- (if cs
- (let ((dest (encoded-text-decode-string string encoding)))
- (when dest
- (setq dest (decode-mime-charset-string dest charset))
- (if must-unfold
- (mapconcat (function
- (lambda (chr)
- (cond ((eq chr ?\n) "")
- ((eq chr ?\t) " ")
- (t (char-to-string chr)))
- ))
- (std11-unfold-string dest)
- "")
- dest))))))
-
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
+ (let (word language charset encoding text rest)
+ (while words
+ (setq word (pop words)
+ language (nth 1 word))
+ (if (and (or (mime-charset-to-coding-system (setq charset (car word)))
+ (progn
+ (message "Unknown charset: %s" charset)
+ nil))
+ (cond ((member (setq encoding (nth 2 word)) '("B" "Q"))
+ t)
+ ((member encoding '("b" "q"))
+ (setq encoding (upcase encoding)))
+ (t
+ (message "Invalid encoding: %s" encoding)
+ nil))
+ (condition-case err
+ (setq text
+ (encoded-text-decode-string (nth 3 word) encoding))
+ (error
+ (message "%s" (error-message-string err))
+ nil)))
+ (if (and eword-decode-allow-incomplete-encoded-text
+ rest
+ (caaar rest)
+ (string-equal (downcase charset) (downcase (caaar rest)))
+ (equal language (cdaar rest)))
+ ;; Concatenate text of which the charset is the same.
+ (setcdr (car rest) (concat (cdar rest) text))
+ (push (cons (cons charset language) text) rest))
+ ;; Don't decode encoded-word.
+ (push (cons (cons nil language) (nth 4 word)) rest)))
+ (while rest
+ (setq word (or (and (setq charset (caaar rest))
+ (condition-case err
+ (decode-mime-charset-string (cdar rest) charset)
+ (error
+ (message "%s" (error-message-string err))
+ nil)))
+ (concat (when (cdr rest) " ")
+ (cdar rest)
+ (when (and words
+ (not (eq (string-to-char words) ? )))
+ " "))))
+ (when must-unfold
+ (setq word (mapconcat (lambda (chr)
+ (cond ((eq chr ?\n) "")
+ ((eq chr ?\r) "")
+ ((eq chr ?\t) " ")
+ (t (char-to-string chr))))
+ (std11-unfold-string word)
+ "")))
+ (when (setq language (cdaar rest))
+ (put-text-property 0 (length word) 'mime-language language word))
+ (setq words (concat word words)
+ rest (cdr rest)))
+ words))
;;; @ lexical analyze
;;;
"*Max position of eword-lexical-analyze-cache.
It is max size of eword-lexical-analyze-cache - 1.")
-(defcustom eword-lexical-analyzer
+(defvar mime-header-lexical-analyzer
'(eword-analyze-quoted-string
eword-analyze-domain-literal
eword-analyze-comment
Previous function is preferred to next function. If a function
returns nil, next function is used. Otherwise the return value will
-be the result."
- :group 'eword-decode
- :type '(repeat function))
+be the result.")
(defun eword-analyze-quoted-string (string start &optional must-unfold)
- (let ((p (std11-check-enclosure string ?\" ?\" nil start)))
- (if p
- (cons (cons 'quoted-string
- (decode-mime-charset-string
- (std11-strip-quoted-pair
- (substring string (1+ start) (1- p)))
- default-mime-charset))
- ;;(substring string p))
- p)
- )))
+ (let ((p (std11-check-enclosure string ?\" ?\" nil start))
+ ret)
+ (when p
+ (setq ret (decode-mime-charset-string
+ (std11-strip-quoted-pair
+ (substring string (1+ start) (1- p)))
+ default-mime-charset))
+ (if mime-header-accept-quoted-encoded-words
+ (setq ret (eword-decode-string ret)))
+ (cons (cons 'quoted-string ret)
+ p))))
(defun eword-analyze-domain-literal (string start &optional must-unfold)
(std11-analyze-domain-literal string start))
(std11-analyze-special string start))
(defun eword-analyze-encoded-word (string start &optional must-unfold)
- (if (and (string-match eword-encoded-word-regexp string start)
- (= (match-beginning 0) start))
- (let ((end (match-end 0))
- (dest (eword-decode-encoded-word (match-string 0 string)
- must-unfold))
- )
- ;;(setq string (substring string end))
- (setq start end)
- (while (and (string-match (eval-when-compile
- (concat "[ \t\n]*\\("
- eword-encoded-word-regexp
- "\\)"))
- string start)
- (= (match-beginning 0) start))
- (setq end (match-end 0))
- (setq dest
- (concat dest
- (eword-decode-encoded-word (match-string 1 string)
- must-unfold))
- ;;string (substring string end))
- start end)
- )
- (cons (cons 'atom dest) ;;string)
- end)
- )))
+ (let* ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)"))
+ (match (and (string-match regexp string start)
+ (= start (match-beginning 0))))
+ next words)
+ (while match
+ (setq next (match-end 0))
+ (push (list (match-string 2 string) ;; charset
+ (match-string 3 string) ;; language
+ (match-string 4 string) ;; encoding
+ (match-string 5 string) ;; encoded-text
+ (match-string 1 string)) ;; encoded-word
+ words)
+ (setq match (and (string-match regexp string next)
+ (= next (match-beginning 0)))))
+ (when words
+ (cons (cons 'atom (eword-decode-encoded-words (nreverse words)
+ must-unfold))
+ next))))
(defun eword-analyze-atom (string start &optional must-unfold)
(if (and (string-match std11-atom-regexp string start)
dest ret)
(while (< start len)
(setq ret
- (let ((rest eword-lexical-analyzer)
+ (let ((rest mime-header-lexical-analyzer)
func r)
(while (and (setq func (car rest))
(null
)
(setq rest (cdr rest)))
(or r
- (list (cons 'error (substring string start)) (1+ len)))
+ (cons (cons 'error (substring string start)) (1+ len)))
))
(setq dest (cons (car ret) dest)
start (cdr ret))