X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=eword-decode.el;h=ff380881e6fbe72ac5ca71e8656f13df820bbada;hb=44c6ba5bde5efca67e9e10efec973058674c2c88;hp=7a11ec3f19e5ee54ac2745dac14880789bffb8b4;hpb=25422ef4504d3fe059ae8936d9d9cfa0fc72753d;p=elisp%2Fflim.git diff --git a/eword-decode.el b/eword-decode.el index 7a11ec3..ff38088 100644 --- a/eword-decode.el +++ b/eword-decode.el @@ -1,10 +1,11 @@ ;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs -;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc. +;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, +;; 2005 Free Software Foundation, Inc. ;; Author: ENAMI Tsugutomo -;; MORIOKA Tomohiko -;; TANAKA Akira +;; MORIOKA Tomohiko +;; TANAKA Akira ;; Created: 1995/10/03 ;; Original: 1992/07/20 ENAMI Tsugutomo's `mime.el'. ;; Renamed: 1993/06/03 to tiny-mime.el by MORIOKA Tomohiko @@ -27,8 +28,8 @@ ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. ;;; Code: @@ -38,15 +39,11 @@ (eval-when-compile (require 'cl)) ; list*, pop -(defgroup eword-decode nil - "Encoded-word decoding" - :group 'mime) -(defcustom eword-max-size-to-decode 1000 - "*Max size to decode header field." - :group 'eword-decode - :type '(choice (integer :tag "Limit (bytes)") - (const :tag "Don't limit" nil))) +;;; @ Variables +;;; + +;; User options are defined in mime-def.el. ;;; @ MIME encoded-word definition @@ -59,13 +56,19 @@ (eval-when-compile (concat (regexp-quote "=?") "\\(" - mime-charset-regexp + mime-charset-regexp ; 1 "\\)" + "\\(" + (regexp-quote "*") + mime-language-regexp ; 2 + "\\)?" (regexp-quote "?") - "\\([BbQq]\\)" + "\\(" + mime-encoding-regexp ; 3 + "\\)" (regexp-quote "?") "\\(" - eword-encoded-text-regexp + eword-encoded-text-regexp ; 4 "\\)" (regexp-quote "?=")))) ) @@ -86,30 +89,28 @@ If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even if there are in decoded encoded-words (generated by bad manner MUA such as a version of Net$cape)." (setq string (std11-unfold-string string)) - (let ((dest "")(ew nil) - beg end) - (while (and (string-match eword-encoded-word-regexp string) - (setq beg (match-beginning 0) - end (match-end 0)) - ) - (if (> beg 0) - (if (not - (and (eq ew t) - (string-match "^[ \t]+$" (substring string 0 beg)) - )) - (setq dest (concat dest (substring string 0 beg))) - ) - ) - (setq dest - (concat dest - (eword-decode-encoded-word - (substring string beg end) must-unfold) - )) - (setq string (substring string end)) - (setq ew t) - ) - (concat dest string) - )) + (let ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)")) + (next 0) + match start words) + (while (setq match (string-match regexp string next)) + (setq start (match-beginning 1) + words nil) + (while match + (setq next (match-end 0)) + (push (list (match-string 2 string) ;; charset + (match-string 3 string) ;; language + (match-string 4 string) ;; encoding + (match-string 5 string) ;; encoded-text + (match-string 1 string)) ;; encoded-word + words) + (setq match (and (string-match regexp string next) + (= next (match-beginning 0))))) + (setq words (eword-decode-encoded-words (nreverse words) must-unfold) + string (concat (substring string 0 start) + words + (substring string next)) + next (+ start (length words))))) + string) (defun eword-decode-structured-field-body (string &optional start-column max-column @@ -152,8 +153,8 @@ decode the charset included in it, it is not decoded." start-column &optional max-column start) - (if (and eword-max-size-to-decode - (> (length string) eword-max-size-to-decode)) + (if (and mime-field-decoding-max-size + (> (length string) mime-field-decoding-max-size)) string (or max-column (setq max-column fill-column)) @@ -221,24 +222,25 @@ such as a version of Net$cape)." (save-restriction (narrow-to-region start end) (if unfolding - (eword-decode-unfold) - ) + (eword-decode-unfold)) (goto-char (point-min)) - (while (re-search-forward (concat "\\(" eword-encoded-word-regexp "\\)" - "\\(\n?[ \t]\\)+" - "\\(" eword-encoded-word-regexp "\\)") - nil t) - (replace-match "\\1\\6") - (goto-char (point-min)) - ) - (while (re-search-forward eword-encoded-word-regexp nil t) - (insert (eword-decode-encoded-word - (prog1 - (buffer-substring (match-beginning 0) (match-end 0)) - (delete-region (match-beginning 0) (match-end 0)) - ) must-unfold)) - ) - ))) + (let ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)")) + match words) + (while (setq match (re-search-forward regexp nil t)) + (setq start (match-beginning 1) + words nil) + (while match + (goto-char (setq end (match-end 0))) + (push (list (match-string 2) ;; charset + (match-string 3) ;; language + (match-string 4) ;; encoding + (match-string 5) ;; encoded-text + (match-string 1)) ;; encoded-word + words) + (setq match (looking-at regexp))) + (delete-region start end) + (insert + (eword-decode-encoded-words (nreverse words) must-unfold))))))) (defun eword-decode-unfold () (goto-char (point-min)) @@ -505,86 +507,86 @@ If SEPARATOR is not nil, it is used as header separator." )) code-conversion)) -(define-obsolete-function-alias 'eword-decode-header - 'mime-decode-header-in-buffer) - - -;;; @ encoded-word decoder -;;; - -(defvar eword-decode-encoded-word-error-handler - 'eword-decode-encoded-word-default-error-handler) +(defalias 'eword-decode-header 'mime-decode-header-in-buffer) +(make-obsolete 'eword-decode-header 'mime-decode-header-in-buffer) -(defvar eword-warning-face nil - "Face used for invalid encoded-word.") -(defun eword-decode-encoded-word-default-error-handler (word signal) - (and (add-text-properties 0 (length word) - (and eword-warning-face - (list 'face eword-warning-face)) - word) - word)) - -(defun eword-decode-encoded-word (word &optional must-unfold) - "Decode WORD if it is an encoded-word. - -If your emacs implementation can not decode the charset of WORD, it -returns WORD. Similarly the encoded-word is broken, it returns WORD. - -If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-word (generated by bad manner MUA such -as a version of Net$cape)." - (or (if (string-match eword-encoded-word-regexp word) - (let ((charset - (substring word (match-beginning 1) (match-end 1)) - ) - (encoding - (upcase - (substring word (match-beginning 2) (match-end 2)) - )) - (text - (substring word (match-beginning 3) (match-end 3)) - )) - (condition-case err - (eword-decode-encoded-text charset encoding text must-unfold) - (error - (funcall eword-decode-encoded-word-error-handler word err) - )) - )) - word)) - - -;;; @ encoded-text decoder +;;; @ encoded-words decoder ;;; -(defun eword-decode-encoded-text (charset encoding string - &optional must-unfold) - "Decode STRING as an encoded-text. - -If your emacs implementation can not decode CHARSET, it returns nil. +(defvar eword-decode-allow-incomplete-encoded-text t + "*Non-nil means allow incomplete encoded-text in successive encoded-words. +Dividing of encoded-text in the place other than character boundaries +violates RFC2047 section 5, while we have a capability to decode it. +If it is non-nil, the decoder will decode B- or Q-encoding in each +encoded-word, concatenate them, and decode it by charset. Otherwise, +the decoder will fully decode each encoded-word before concatenating +them.") -If ENCODING is not \"B\" or \"Q\", it occurs error. -So you should write error-handling code if you don't want break by errors. +(defun eword-decode-encoded-words (words must-unfold) + "Decode successive encoded-words in WORDS and return a decoded string. +Each element of WORDS looks like (CHARSET LANGUAGE ENCODING ENCODED-TEXT +ENCODED-WORD). If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-text (generated by bad manner MUA such -as a version of Net$cape)." - (let ((cs (mime-charset-to-coding-system charset))) - (if cs - (let ((dest (encoded-text-decode-string string encoding))) - (when dest - (setq dest (decode-mime-charset-string dest charset)) - (if must-unfold - (mapconcat (function - (lambda (chr) - (cond ((eq chr ?\n) "") - ((eq chr ?\t) " ") - (t (char-to-string chr))) - )) - (std11-unfold-string dest) - "") - dest)))))) - +if there are in decoded encoded-words (generated by bad manner MUA +such as a version of Net$cape)." + (let (word language charset encoding text rest) + (while words + (setq word (pop words) + language (nth 1 word)) + (if (and (or (mime-charset-to-coding-system (setq charset (car word))) + (progn + (message "Unknown charset: %s" charset) + nil)) + (cond ((member (setq encoding (nth 2 word)) '("B" "Q")) + t) + ((member encoding '("b" "q")) + (setq encoding (upcase encoding))) + (t + (message "Invalid encoding: %s" encoding) + nil)) + (condition-case err + (setq text + (encoded-text-decode-string (nth 3 word) encoding)) + (error + (message "%s" (error-message-string err)) + nil))) + (if (and eword-decode-allow-incomplete-encoded-text + rest + (caaar rest) + (string-equal (downcase charset) (downcase (caaar rest))) + (equal language (cdaar rest))) + ;; Concatenate text of which the charset is the same. + (setcdr (car rest) (concat (cdar rest) text)) + (push (cons (cons charset language) text) rest)) + ;; Don't decode encoded-word. + (push (cons (cons nil language) (nth 4 word)) rest))) + (while rest + (setq word (or (and (setq charset (caaar rest)) + (condition-case err + (decode-mime-charset-string (cdar rest) charset) + (error + (message "%s" (error-message-string err)) + nil))) + (concat (when (cdr rest) " ") + (cdar rest) + (when (and words + (not (eq (string-to-char words) ? ))) + " ")))) + (when must-unfold + (setq word (mapconcat (lambda (chr) + (cond ((eq chr ?\n) "") + ((eq chr ?\r) "") + ((eq chr ?\t) " ") + (t (char-to-string chr)))) + (std11-unfold-string word) + ""))) + (when (setq language (cdaar rest)) + (put-text-property 0 (length word) 'mime-language language word)) + (setq words (concat word words) + rest (cdr rest))) + words)) ;;; @ lexical analyze ;;; @@ -594,7 +596,7 @@ as a version of Net$cape)." "*Max position of eword-lexical-analyze-cache. It is max size of eword-lexical-analyze-cache - 1.") -(defcustom eword-lexical-analyzer +(defvar mime-header-lexical-analyzer '(eword-analyze-quoted-string eword-analyze-domain-literal eword-analyze-comment @@ -614,21 +616,20 @@ format. Previous function is preferred to next function. If a function returns nil, next function is used. Otherwise the return value will -be the result." - :group 'eword-decode - :type '(repeat function)) +be the result.") (defun eword-analyze-quoted-string (string start &optional must-unfold) - (let ((p (std11-check-enclosure string ?\" ?\" nil start))) - (if p - (cons (cons 'quoted-string - (decode-mime-charset-string - (std11-strip-quoted-pair - (substring string (1+ start) (1- p))) - default-mime-charset)) - ;;(substring string p)) - p) - ))) + (let ((p (std11-check-enclosure string ?\" ?\" nil start)) + ret) + (when p + (setq ret (decode-mime-charset-string + (std11-strip-quoted-pair + (substring string (1+ start) (1- p))) + default-mime-charset)) + (if mime-header-accept-quoted-encoded-words + (setq ret (eword-decode-string ret))) + (cons (cons 'quoted-string ret) + p)))) (defun eword-analyze-domain-literal (string start &optional must-unfold) (std11-analyze-domain-literal string start)) @@ -705,31 +706,24 @@ be the result." (std11-analyze-special string start)) (defun eword-analyze-encoded-word (string start &optional must-unfold) - (if (and (string-match eword-encoded-word-regexp string start) - (= (match-beginning 0) start)) - (let ((end (match-end 0)) - (dest (eword-decode-encoded-word (match-string 0 string) - must-unfold)) - ) - ;;(setq string (substring string end)) - (setq start end) - (while (and (string-match (eval-when-compile - (concat "[ \t\n]*\\(" - eword-encoded-word-regexp - "\\)")) - string start) - (= (match-beginning 0) start)) - (setq end (match-end 0)) - (setq dest - (concat dest - (eword-decode-encoded-word (match-string 1 string) - must-unfold)) - ;;string (substring string end)) - start end) - ) - (cons (cons 'atom dest) ;;string) - end) - ))) + (let* ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)")) + (match (and (string-match regexp string start) + (= start (match-beginning 0)))) + next words) + (while match + (setq next (match-end 0)) + (push (list (match-string 2 string) ;; charset + (match-string 3 string) ;; language + (match-string 4 string) ;; encoding + (match-string 5 string) ;; encoded-text + (match-string 1 string)) ;; encoded-word + words) + (setq match (and (string-match regexp string next) + (= next (match-beginning 0))))) + (when words + (cons (cons 'atom (eword-decode-encoded-words (nreverse words) + must-unfold)) + next)))) (defun eword-analyze-atom (string start &optional must-unfold) (if (and (string-match std11-atom-regexp string start) @@ -747,7 +741,7 @@ be the result." dest ret) (while (< start len) (setq ret - (let ((rest eword-lexical-analyzer) + (let ((rest mime-header-lexical-analyzer) func r) (while (and (setq func (car rest)) (null @@ -755,7 +749,7 @@ be the result." ) (setq rest (cdr rest))) (or r - (list (cons 'error (substring string start)) (1+ len))) + (cons (cons 'error (substring string start)) (1+ len))) )) (setq dest (cons (car ret) dest) start (cdr ret))