X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=eword-decode.el;h=ff380881e6fbe72ac5ca71e8656f13df820bbada;hb=44c6ba5bde5efca67e9e10efec973058674c2c88;hp=4365cd6a5efe1fe16f40f27f9bdb60c1249541e2;hpb=635edef501f7deb04cdf7e5c327e3c8afd67ca46;p=elisp%2Fflim.git diff --git a/eword-decode.el b/eword-decode.el index 4365cd6..ff38088 100644 --- a/eword-decode.el +++ b/eword-decode.el @@ -1,18 +1,20 @@ ;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs -;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc. +;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, +;; 2005 Free Software Foundation, Inc. ;; Author: ENAMI Tsugutomo -;; MORIOKA Tomohiko -;; Maintainer: MORIOKA Tomohiko +;; MORIOKA Tomohiko +;; TANAKA Akira ;; Created: 1995/10/03 ;; Original: 1992/07/20 ENAMI Tsugutomo's `mime.el'. -;; Renamed: 1993/06/03 to tiny-mime.el -;; Renamed: 1995/10/03 from tiny-mime.el (split off encoder) -;; Renamed: 1997/02/22 from tm-ew-d.el +;; Renamed: 1993/06/03 to tiny-mime.el by MORIOKA Tomohiko +;; Renamed: 1995/10/03 to tm-ew-d.el (split off encoder) +;; by MORIOKA Tomohiko +;; Renamed: 1997/02/22 from tm-ew-d.el by MORIOKA Tomohiko ;; Keywords: encoded-word, MIME, multilingual, header, mail, news -;; This file is part of SEMI (Spadework for Emacs MIME Interfaces). +;; This file is part of FLIM (Faithful Library about Internet Message). ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as @@ -26,75 +28,50 @@ ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. ;;; Code: -(require 'std11-parse) -(require 'mel) (require 'mime-def) +(require 'mel) +(require 'std11) -(defgroup eword-decode nil - "Encoded-word decoding" - :group 'mime) - -(defconst eword-decode-version "1.2.2") +(eval-when-compile (require 'cl)) ; list*, pop -;;; @ MIME encoded-word definition +;;; @ Variables ;;; -(defconst eword-encoded-text-regexp "[!->@-~]+") -(defconst eword-encoded-word-regexp - (concat (regexp-quote "=?") - "\\(" - mime-charset-regexp - "\\)" - (regexp-quote "?") - "\\(B\\|Q\\)" - (regexp-quote "?") - "\\(" - eword-encoded-text-regexp - "\\)" - (regexp-quote "?="))) - - -;;; @@ Base64 -;;; - -(defconst base64-token-regexp "[A-Za-z0-9+/]") -(defconst base64-token-padding-regexp "[A-Za-z0-9+/=]") - -(defconst eword-B-encoded-text-regexp - (concat "\\(\\(" - base64-token-regexp - base64-token-regexp - base64-token-regexp - base64-token-regexp - "\\)*" - base64-token-regexp - base64-token-regexp - base64-token-padding-regexp - base64-token-padding-regexp - "\\)")) +;; User options are defined in mime-def.el. -;; (defconst eword-B-encoding-and-encoded-text-regexp -;; (concat "\\(B\\)\\?" eword-B-encoded-text-regexp)) - -;;; @@ Quoted-Printable +;;; @ MIME encoded-word definition ;;; -(defconst quoted-printable-hex-chars "0123456789ABCDEF") -(defconst quoted-printable-octet-regexp - (concat "=[" quoted-printable-hex-chars - "][" quoted-printable-hex-chars "]")) - -(defconst eword-Q-encoded-text-regexp - (concat "\\([^=?]\\|" quoted-printable-octet-regexp "\\)+")) -;; (defconst eword-Q-encoding-and-encoded-text-regexp -;; (concat "\\(Q\\)\\?" eword-Q-encoded-text-regexp)) +(eval-and-compile + (defconst eword-encoded-text-regexp "[!->@-~]+") + + (defconst eword-encoded-word-regexp + (eval-when-compile + (concat (regexp-quote "=?") + "\\(" + mime-charset-regexp ; 1 + "\\)" + "\\(" + (regexp-quote "*") + mime-language-regexp ; 2 + "\\)?" + (regexp-quote "?") + "\\(" + mime-encoding-regexp ; 3 + "\\)" + (regexp-quote "?") + "\\(" + eword-encoded-text-regexp ; 4 + "\\)" + (regexp-quote "?=")))) + ) ;;; @ for string @@ -112,30 +89,121 @@ If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even if there are in decoded encoded-words (generated by bad manner MUA such as a version of Net$cape)." (setq string (std11-unfold-string string)) - (let ((dest "")(ew nil) - beg end) - (while (and (string-match eword-encoded-word-regexp string) - (setq beg (match-beginning 0) - end (match-end 0)) + (let ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)")) + (next 0) + match start words) + (while (setq match (string-match regexp string next)) + (setq start (match-beginning 1) + words nil) + (while match + (setq next (match-end 0)) + (push (list (match-string 2 string) ;; charset + (match-string 3 string) ;; language + (match-string 4 string) ;; encoding + (match-string 5 string) ;; encoded-text + (match-string 1 string)) ;; encoded-word + words) + (setq match (and (string-match regexp string next) + (= next (match-beginning 0))))) + (setq words (eword-decode-encoded-words (nreverse words) must-unfold) + string (concat (substring string 0 start) + words + (substring string next)) + next (+ start (length words))))) + string) + +(defun eword-decode-structured-field-body (string + &optional start-column max-column + start) + (let ((tokens (eword-lexical-analyze string start 'must-unfold)) + (result "") + token) + (while tokens + (setq token (car tokens)) + (setq result (concat result (eword-decode-token token))) + (setq tokens (cdr tokens))) + result)) + +(defun eword-decode-and-unfold-structured-field-body (string + &optional + start-column + max-column + start) + "Decode and unfold STRING as structured field body. +It decodes non us-ascii characters in FULL-NAME encoded as +encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii +characters are regarded as variable `default-mime-charset'. + +If an encoded-word is broken or your emacs implementation can not +decode the charset included in it, it is not decoded." + (let ((tokens (eword-lexical-analyze string start 'must-unfold)) + (result "")) + (while tokens + (let* ((token (car tokens)) + (type (car token))) + (setq tokens (cdr tokens)) + (setq result + (if (eq type 'spaces) + (concat result " ") + (concat result (eword-decode-token token)) + )))) + result)) + +(defun eword-decode-and-fold-structured-field-body (string + start-column + &optional max-column + start) + (if (and mime-field-decoding-max-size + (> (length string) mime-field-decoding-max-size)) + string + (or max-column + (setq max-column fill-column)) + (let ((c start-column) + (tokens (eword-lexical-analyze string start 'must-unfold)) + (result "") + token) + (while (and (setq token (car tokens)) + (setq tokens (cdr tokens))) + (let* ((type (car token))) + (if (eq type 'spaces) + (let* ((next-token (car tokens)) + (next-str (eword-decode-token next-token)) + (next-len (string-width next-str)) + (next-c (+ c next-len 1))) + (if (< next-c max-column) + (setq result (concat result " " next-str) + c next-c) + (setq result (concat result "\n " next-str) + c (1+ next-len))) + (setq tokens (cdr tokens)) ) - (if (> beg 0) - (if (not - (and (eq ew t) - (string-match "^[ \t]+$" (substring string 0 beg)) - )) - (setq dest (concat dest (substring string 0 beg))) - ) - ) - (setq dest - (concat dest - (eword-decode-encoded-word - (substring string beg end) must-unfold) - )) - (setq string (substring string end)) - (setq ew t) - ) - (concat dest string) - )) + (let* ((str (eword-decode-token token))) + (setq result (concat result str) + c (+ c (string-width str))) + )))) + (if token + (concat result (eword-decode-token token)) + result)))) + +(defun eword-decode-unstructured-field-body (string &optional start-column + max-column) + (eword-decode-string + (decode-mime-charset-string string default-mime-charset))) + +(defun eword-decode-and-unfold-unstructured-field-body (string + &optional start-column + max-column) + (eword-decode-string + (decode-mime-charset-string (std11-unfold-string string) + default-mime-charset) + 'must-unfold)) + +(defun eword-decode-unfolded-unstructured-field-body (string + &optional start-column + max-column) + (eword-decode-string + (decode-mime-charset-string string default-mime-charset) + 'must-unfold)) ;;; @ for region @@ -154,201 +222,371 @@ such as a version of Net$cape)." (save-restriction (narrow-to-region start end) (if unfolding - (eword-decode-unfold) - ) + (eword-decode-unfold)) (goto-char (point-min)) - (while (re-search-forward (concat "\\(" eword-encoded-word-regexp "\\)" - "\\(\n?[ \t]\\)+" - "\\(" eword-encoded-word-regexp "\\)") - nil t) - (replace-match "\\1\\6") - (goto-char (point-min)) - ) - (while (re-search-forward eword-encoded-word-regexp nil t) - (insert (eword-decode-encoded-word - (prog1 - (buffer-substring (match-beginning 0) (match-end 0)) - (delete-region (match-beginning 0) (match-end 0)) - ) must-unfold)) - ) + (let ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)")) + match words) + (while (setq match (re-search-forward regexp nil t)) + (setq start (match-beginning 1) + words nil) + (while match + (goto-char (setq end (match-end 0))) + (push (list (match-string 2) ;; charset + (match-string 3) ;; language + (match-string 4) ;; encoding + (match-string 5) ;; encoded-text + (match-string 1)) ;; encoded-word + words) + (setq match (looking-at regexp))) + (delete-region start end) + (insert + (eword-decode-encoded-words (nreverse words) must-unfold))))))) + +(defun eword-decode-unfold () + (goto-char (point-min)) + (let (field beg end) + (while (re-search-forward std11-field-head-regexp nil t) + (setq beg (match-beginning 0) + end (std11-field-end)) + (setq field (buffer-substring beg end)) + (if (string-match eword-encoded-word-regexp field) + (save-restriction + (narrow-to-region (goto-char beg) end) + (while (re-search-forward "\n\\([ \t]\\)" nil t) + (replace-match (match-string 1)) + ) + (goto-char (point-max)) + )) ))) ;;; @ for message header ;;; -(defcustom eword-decode-ignored-field-list - '(newsgroups path lines nntp-posting-host message-id date) - "*List of field-names to be ignored when decoding. -Each field name must be symbol." - :group 'eword-decode - :type '(repeat symbol)) - -(defcustom eword-decode-structured-field-list - '(reply-to resent-reply-to from resent-from sender resent-sender - to resent-to cc resent-cc bcc resent-bcc dcc - mime-version content-type content-transfer-encoding - content-disposition) - "*List of field-names to decode as structured field. -Each field name must be symbol." - :group 'eword-decode - :type '(repeat symbol)) - -(defun eword-decode-header (&optional code-conversion separator) - "Decode MIME encoded-words in header fields. +(defvar mime-field-decoder-alist nil) + +(defvar mime-field-decoder-cache nil) + +(defvar mime-update-field-decoder-cache 'mime-update-field-decoder-cache + "*Field decoder cache update function.") + +;;;###autoload +(defun mime-set-field-decoder (field &rest specs) + "Set decoder of FIELD. +SPECS must be like `MODE1 DECODER1 MODE2 DECODER2 ...'. +Each mode must be `nil', `plain', `wide', `summary' or `nov'. +If mode is `nil', corresponding decoder is set up for every modes." + (when specs + (let ((mode (pop specs)) + (function (pop specs))) + (if mode + (progn + (let ((cell (assq mode mime-field-decoder-alist))) + (if cell + (setcdr cell (put-alist field function (cdr cell))) + (setq mime-field-decoder-alist + (cons (cons mode (list (cons field function))) + mime-field-decoder-alist)) + )) + (apply (function mime-set-field-decoder) field specs) + ) + (mime-set-field-decoder field + 'plain function + 'wide function + 'summary function + 'nov function) + )))) + +;;;###autoload +(defmacro mime-find-field-presentation-method (name) + "Return field-presentation-method from NAME. +NAME must be `plain', `wide', `summary' or `nov'." + (cond ((eq name nil) + `(or (assq 'summary mime-field-decoder-cache) + '(summary)) + ) + ((and (consp name) + (car name) + (consp (cdr name)) + (symbolp (car (cdr name))) + (null (cdr (cdr name)))) + `(or (assq ,name mime-field-decoder-cache) + (cons ,name nil)) + ) + (t + `(or (assq (or ,name 'summary) mime-field-decoder-cache) + (cons (or ,name 'summary) nil)) + ))) + +(defun mime-find-field-decoder-internal (field &optional mode) + "Return function to decode field-body of FIELD in MODE. +Optional argument MODE must be object of field-presentation-method." + (cdr (or (assq field (cdr mode)) + (prog1 + (funcall mime-update-field-decoder-cache + field (car mode)) + (setcdr mode + (cdr (assq (car mode) mime-field-decoder-cache))) + )))) + +;;;###autoload +(defun mime-find-field-decoder (field &optional mode) + "Return function to decode field-body of FIELD in MODE. +Optional argument MODE must be object or name of +field-presentation-method. Name of field-presentation-method must be +`plain', `wide', `summary' or `nov'. +Default value of MODE is `summary'." + (if (symbolp mode) + (let ((p (cdr (mime-find-field-presentation-method mode)))) + (if (and p (setq p (assq field p))) + (cdr p) + (cdr (funcall mime-update-field-decoder-cache + field (or mode 'summary))))) + (inline (mime-find-field-decoder-internal field mode)) + )) + +;;;###autoload +(defun mime-update-field-decoder-cache (field mode &optional function) + "Update field decoder cache `mime-field-decoder-cache'." + (cond ((eq function 'identity) + (setq function nil) + ) + ((null function) + (let ((decoder-alist + (cdr (assq (or mode 'summary) mime-field-decoder-alist)))) + (setq function (cdr (or (assq field decoder-alist) + (assq t decoder-alist))))) + )) + (let ((cell (assq mode mime-field-decoder-cache)) + ret) + (if cell + (if (setq ret (assq field (cdr cell))) + (setcdr ret function) + (setcdr cell (cons (setq ret (cons field function)) (cdr cell)))) + (setq mime-field-decoder-cache + (cons (cons mode (list (setq ret (cons field function)))) + mime-field-decoder-cache))) + ret)) + +;; ignored fields +(mime-set-field-decoder 'Archive nil nil) +(mime-set-field-decoder 'Content-Md5 nil nil) +(mime-set-field-decoder 'Control nil nil) +(mime-set-field-decoder 'Date nil nil) +(mime-set-field-decoder 'Distribution nil nil) +(mime-set-field-decoder 'Followup-Host nil nil) +(mime-set-field-decoder 'Followup-To nil nil) +(mime-set-field-decoder 'Lines nil nil) +(mime-set-field-decoder 'Message-Id nil nil) +(mime-set-field-decoder 'Newsgroups nil nil) +(mime-set-field-decoder 'Nntp-Posting-Host nil nil) +(mime-set-field-decoder 'Path nil nil) +(mime-set-field-decoder 'Posted-And-Mailed nil nil) +(mime-set-field-decoder 'Received nil nil) +(mime-set-field-decoder 'Status nil nil) +(mime-set-field-decoder 'X-Face nil nil) +(mime-set-field-decoder 'X-Face-Version nil nil) +(mime-set-field-decoder 'X-Info nil nil) +(mime-set-field-decoder 'X-Pgp-Key-Info nil nil) +(mime-set-field-decoder 'X-Pgp-Sig nil nil) +(mime-set-field-decoder 'X-Pgp-Sig-Version nil nil) +(mime-set-field-decoder 'Xref nil nil) + +;; structured fields +(let ((fields + '(Reply-To Resent-Reply-To From Resent-From Sender Resent-Sender + To Resent-To Cc Resent-Cc Bcc Resent-Bcc Dcc + Mail-Followup-To + Mime-Version Content-Type Content-Transfer-Encoding + Content-Disposition User-Agent)) + field) + (while fields + (setq field (pop fields)) + (mime-set-field-decoder + field + 'plain #'eword-decode-structured-field-body + 'wide #'eword-decode-and-fold-structured-field-body + 'summary #'eword-decode-and-unfold-structured-field-body + 'nov #'eword-decode-and-unfold-structured-field-body) + )) + +;; unstructured fields (default) +(mime-set-field-decoder + t + 'plain #'eword-decode-unstructured-field-body + 'wide #'eword-decode-unstructured-field-body + 'summary #'eword-decode-and-unfold-unstructured-field-body + 'nov #'eword-decode-unfolded-unstructured-field-body) + +;;;###autoload +(defun mime-decode-field-body (field-body field-name + &optional mode max-column) + "Decode FIELD-BODY as FIELD-NAME in MODE, and return the result. +Optional argument MODE must be `plain', `wide', `summary' or `nov'. +Default mode is `summary'. + +If MODE is `wide' and MAX-COLUMN is non-nil, the result is folded with +MAX-COLUMN. + +Non MIME encoded-word part in FILED-BODY is decoded with +`default-mime-charset'." + (let (field-name-symbol len decoder) + (if (symbolp field-name) + (setq field-name-symbol field-name + len (1+ (string-width (symbol-name field-name)))) + (setq field-name-symbol (intern (capitalize field-name)) + len (1+ (string-width field-name)))) + (setq decoder (mime-find-field-decoder field-name-symbol mode)) + (if decoder + (funcall decoder field-body len max-column) + ;; Don't decode + (if (eq mode 'summary) + (std11-unfold-string field-body) + field-body) + ))) + +;;;###autoload +(defun mime-decode-header-in-region (start end + &optional code-conversion) + "Decode MIME encoded-words in region between START and END. If CODE-CONVERSION is nil, it decodes only encoded-words. If it is mime-charset, it decodes non-ASCII bit patterns as the mime-charset. Otherwise it decodes non-ASCII bit patterns as the -default-mime-charset. -If SEPARATOR is not nil, it is used as header separator." - (interactive "*") +default-mime-charset." + (interactive "*r") (save-excursion (save-restriction - (std11-narrow-to-header separator) + (narrow-to-region start end) (let ((default-charset (if code-conversion (if (mime-charset-to-coding-system code-conversion) code-conversion default-mime-charset)))) (if default-charset - (let (beg p end field-name len) + (let ((mode-obj (mime-find-field-presentation-method 'wide)) + beg p end field-name len field-decoder) (goto-char (point-min)) (while (re-search-forward std11-field-head-regexp nil t) (setq beg (match-beginning 0) p (match-end 0) field-name (buffer-substring beg (1- p)) len (string-width field-name) - field-name (intern (downcase field-name)) - end (std11-field-end)) - (cond ((memq field-name eword-decode-ignored-field-list) - ;; Don't decode - ) - ((memq field-name eword-decode-structured-field-list) - ;; Decode as structured field - (let ((body (buffer-substring p end)) - (default-mime-charset default-charset)) - (delete-region p end) - (insert (eword-decode-and-fold-structured-field - body (1+ len))) - )) - (t - ;; Decode as unstructured field - (save-restriction - (narrow-to-region beg (1+ end)) - (decode-mime-charset-region p end default-charset) - (goto-char p) - (if (re-search-forward eword-encoded-word-regexp - nil t) - (eword-decode-region beg (point-max) 'unfold)) - ))))) + field-name (intern (capitalize field-name)) + field-decoder (inline + (mime-find-field-decoder-internal + field-name mode-obj))) + (when field-decoder + (setq end (std11-field-end)) + (let ((body (buffer-substring p end)) + (default-mime-charset default-charset)) + (delete-region p end) + (insert (funcall field-decoder body (1+ len))) + )) + )) (eword-decode-region (point-min) (point-max) t) ))))) -(defun eword-decode-unfold () - (goto-char (point-min)) - (let (field beg end) - (while (re-search-forward std11-field-head-regexp nil t) - (setq beg (match-beginning 0) - end (std11-field-end)) - (setq field (buffer-substring beg end)) - (if (string-match eword-encoded-word-regexp field) - (save-restriction - (narrow-to-region (goto-char beg) end) - (while (re-search-forward "\n\\([ \t]\\)" nil t) - (replace-match (match-string 1)) - ) - (goto-char (point-max)) - )) - ))) - - -;;; @ encoded-word decoder -;;; - -(defvar eword-warning-face nil "Face used for invalid encoded-word.") - -(defun eword-decode-encoded-word (word &optional must-unfold) - "Decode WORD if it is an encoded-word. - -If your emacs implementation can not decode the charset of WORD, it -returns WORD. Similarly the encoded-word is broken, it returns WORD. - -If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-word (generated by bad manner MUA such -as a version of Net$cape)." - (or (if (string-match eword-encoded-word-regexp word) - (let ((charset - (substring word (match-beginning 1) (match-end 1)) - ) - (encoding - (upcase - (substring word (match-beginning 2) (match-end 2)) - )) - (text - (substring word (match-beginning 3) (match-end 3)) - )) - (condition-case err - (eword-decode-encoded-text charset encoding text must-unfold) - (error - (and - (add-text-properties 0 (length word) - (and eword-warning-face - (list 'face eword-warning-face)) - word) - word))) - )) - word)) - - -;;; @ encoded-text decoder +;;;###autoload +(defun mime-decode-header-in-buffer (&optional code-conversion separator) + "Decode MIME encoded-words in header fields. +If CODE-CONVERSION is nil, it decodes only encoded-words. If it is +mime-charset, it decodes non-ASCII bit patterns as the mime-charset. +Otherwise it decodes non-ASCII bit patterns as the +default-mime-charset. +If SEPARATOR is not nil, it is used as header separator." + (interactive "*") + (mime-decode-header-in-region + (point-min) + (save-excursion + (goto-char (point-min)) + (if (re-search-forward + (concat "^\\(" (regexp-quote (or separator "")) "\\)?$") + nil t) + (match-beginning 0) + (point-max) + )) + code-conversion)) + +(defalias 'eword-decode-header 'mime-decode-header-in-buffer) +(make-obsolete 'eword-decode-header 'mime-decode-header-in-buffer) + + +;;; @ encoded-words decoder ;;; -(defun eword-decode-encoded-text (charset encoding string - &optional must-unfold) - "Decode STRING as an encoded-text. +(defvar eword-decode-allow-incomplete-encoded-text t + "*Non-nil means allow incomplete encoded-text in successive encoded-words. +Dividing of encoded-text in the place other than character boundaries +violates RFC2047 section 5, while we have a capability to decode it. +If it is non-nil, the decoder will decode B- or Q-encoding in each +encoded-word, concatenate them, and decode it by charset. Otherwise, +the decoder will fully decode each encoded-word before concatenating +them.") -If your emacs implementation can not decode CHARSET, it returns nil. - -If ENCODING is not \"B\" or \"Q\", it occurs error. -So you should write error-handling code if you don't want break by errors. +(defun eword-decode-encoded-words (words must-unfold) + "Decode successive encoded-words in WORDS and return a decoded string. +Each element of WORDS looks like (CHARSET LANGUAGE ENCODING ENCODED-TEXT +ENCODED-WORD). If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-text (generated by bad manner MUA such -as a version of Net$cape)." - (let ((cs (mime-charset-to-coding-system charset))) - (if cs - (let ((dest - (cond - ((string-equal "B" encoding) - (if (and (string-match eword-B-encoded-text-regexp string) - (string-equal string (match-string 0 string))) - (base64-decode-string string) - (error "Invalid encoded-text %s" string))) - ((string-equal "Q" encoding) - (if (and (string-match eword-Q-encoded-text-regexp string) - (string-equal string (match-string 0 string))) - (q-encoding-decode-string string) - (error "Invalid encoded-text %s" string))) - (t - (error "Invalid encoding %s" encoding) - ))) - ) - (if dest - (progn - (setq dest (decode-coding-string dest cs)) - (if must-unfold - (mapconcat (function - (lambda (chr) - (cond - ((eq chr ?\n) "") - ((eq chr ?\t) " ") - (t (char-to-string chr))) - )) - (std11-unfold-string dest) - "") - dest) - )))))) - +if there are in decoded encoded-words (generated by bad manner MUA +such as a version of Net$cape)." + (let (word language charset encoding text rest) + (while words + (setq word (pop words) + language (nth 1 word)) + (if (and (or (mime-charset-to-coding-system (setq charset (car word))) + (progn + (message "Unknown charset: %s" charset) + nil)) + (cond ((member (setq encoding (nth 2 word)) '("B" "Q")) + t) + ((member encoding '("b" "q")) + (setq encoding (upcase encoding))) + (t + (message "Invalid encoding: %s" encoding) + nil)) + (condition-case err + (setq text + (encoded-text-decode-string (nth 3 word) encoding)) + (error + (message "%s" (error-message-string err)) + nil))) + (if (and eword-decode-allow-incomplete-encoded-text + rest + (caaar rest) + (string-equal (downcase charset) (downcase (caaar rest))) + (equal language (cdaar rest))) + ;; Concatenate text of which the charset is the same. + (setcdr (car rest) (concat (cdar rest) text)) + (push (cons (cons charset language) text) rest)) + ;; Don't decode encoded-word. + (push (cons (cons nil language) (nth 4 word)) rest))) + (while rest + (setq word (or (and (setq charset (caaar rest)) + (condition-case err + (decode-mime-charset-string (cdar rest) charset) + (error + (message "%s" (error-message-string err)) + nil))) + (concat (when (cdr rest) " ") + (cdar rest) + (when (and words + (not (eq (string-to-char words) ? ))) + " ")))) + (when must-unfold + (setq word (mapconcat (lambda (chr) + (cond ((eq chr ?\n) "") + ((eq chr ?\r) "") + ((eq chr ?\t) " ") + (t (char-to-string chr)))) + (std11-unfold-string word) + ""))) + (when (setq language (cdaar rest)) + (put-text-property 0 (length word) 'mime-language language word)) + (setq words (concat word words) + rest (cdr rest))) + words)) ;;; @ lexical analyze ;;; @@ -358,7 +596,7 @@ as a version of Net$cape)." "*Max position of eword-lexical-analyze-cache. It is max size of eword-lexical-analyze-cache - 1.") -(defcustom eword-lexical-analyzers +(defvar mime-header-lexical-analyzer '(eword-analyze-quoted-string eword-analyze-domain-literal eword-analyze-comment @@ -367,8 +605,9 @@ It is max size of eword-lexical-analyze-cache - 1.") eword-analyze-encoded-word eword-analyze-atom) "*List of functions to return result of lexical analyze. -Each function must have two arguments: STRING and MUST-UNFOLD. +Each function must have three arguments: STRING, START and MUST-UNFOLD. STRING is the target string to be analyzed. +START is start position of STRING to analyze. If MUST-UNFOLD is not nil, each function must unfold and eliminate bare-CR and bare-LF from the result even if they are included in content of the encoded-word. @@ -377,106 +616,165 @@ format. Previous function is preferred to next function. If a function returns nil, next function is used. Otherwise the return value will -be the result." - :group 'eword-decode - :type '(repeat function)) - -(defun eword-analyze-quoted-string (string &optional must-unfold) - (let ((p (std11-check-enclosure string ?\" ?\"))) - (if p - (cons (cons 'quoted-string - (decode-mime-charset-string - (std11-strip-quoted-pair (substring string 1 (1- p))) - default-mime-charset)) - (substring string p)) - ))) - -(defun eword-analyze-domain-literal (string &optional must-unfold) - (std11-analyze-domain-literal string)) - -(defun eword-analyze-comment (string &optional must-unfold) - (let ((p (std11-check-enclosure string ?\( ?\) t))) - (if p - (cons (cons 'comment - (eword-decode-string - (decode-mime-charset-string - (std11-strip-quoted-pair (substring string 1 (1- p))) - default-mime-charset) - must-unfold)) - (substring string p)) - ))) - -(defun eword-analyze-spaces (string &optional must-unfold) - (std11-analyze-spaces string)) +be the result.") -(defun eword-analyze-special (string &optional must-unfold) - (std11-analyze-special string)) - -(defun eword-analyze-encoded-word (string &optional must-unfold) - (if (eq (string-match eword-encoded-word-regexp string) 0) - (let ((end (match-end 0)) - (dest (eword-decode-encoded-word (match-string 0 string) - must-unfold)) - ) - (setq string (substring string end)) - (while (eq (string-match `,(concat "[ \t\n]*\\(" - eword-encoded-word-regexp - "\\)") - string) - 0) - (setq end (match-end 0)) - (setq dest - (concat dest - (eword-decode-encoded-word (match-string 1 string) - must-unfold)) - string (substring string end)) - ) - (cons (cons 'atom dest) string) - ))) +(defun eword-analyze-quoted-string (string start &optional must-unfold) + (let ((p (std11-check-enclosure string ?\" ?\" nil start)) + ret) + (when p + (setq ret (decode-mime-charset-string + (std11-strip-quoted-pair + (substring string (1+ start) (1- p))) + default-mime-charset)) + (if mime-header-accept-quoted-encoded-words + (setq ret (eword-decode-string ret))) + (cons (cons 'quoted-string ret) + p)))) + +(defun eword-analyze-domain-literal (string start &optional must-unfold) + (std11-analyze-domain-literal string start)) + +(defun eword-analyze-comment (string from &optional must-unfold) + (let ((len (length string)) + (i (or from 0)) + dest last-str + chr ret) + (when (and (> len i) + (eq (aref string i) ?\()) + (setq i (1+ i) + from i) + (catch 'tag + (while (< i len) + (setq chr (aref string i)) + (cond ((eq chr ?\\) + (setq i (1+ i)) + (if (>= i len) + (throw 'tag nil) + ) + (setq last-str (concat last-str + (substring string from (1- i)) + (char-to-string (aref string i))) + i (1+ i) + from i) + ) + ((eq chr ?\)) + (setq ret (concat last-str + (substring string from i))) + (throw 'tag (cons + (cons 'comment + (nreverse + (if (string= ret "") + dest + (cons + (eword-decode-string + (decode-mime-charset-string + ret default-mime-charset) + must-unfold) + dest) + ))) + (1+ i))) + ) + ((eq chr ?\() + (if (setq ret (eword-analyze-comment string i must-unfold)) + (setq last-str + (concat last-str + (substring string from i)) + dest + (if (string= last-str "") + (cons (car ret) dest) + (list* (car ret) + (eword-decode-string + (decode-mime-charset-string + last-str default-mime-charset) + must-unfold) + dest) + ) + i (cdr ret) + from i + last-str "") + (throw 'tag nil) + )) + (t + (setq i (1+ i)) + )) + ))))) -(defun eword-analyze-atom (string &optional must-unfold) - (if (string-match std11-atom-regexp string) +(defun eword-analyze-spaces (string start &optional must-unfold) + (std11-analyze-spaces string start)) + +(defun eword-analyze-special (string start &optional must-unfold) + (std11-analyze-special string start)) + +(defun eword-analyze-encoded-word (string start &optional must-unfold) + (let* ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)")) + (match (and (string-match regexp string start) + (= start (match-beginning 0)))) + next words) + (while match + (setq next (match-end 0)) + (push (list (match-string 2 string) ;; charset + (match-string 3 string) ;; language + (match-string 4 string) ;; encoding + (match-string 5 string) ;; encoded-text + (match-string 1 string)) ;; encoded-word + words) + (setq match (and (string-match regexp string next) + (= next (match-beginning 0))))) + (when words + (cons (cons 'atom (eword-decode-encoded-words (nreverse words) + must-unfold)) + next)))) + +(defun eword-analyze-atom (string start &optional must-unfold) + (if (and (string-match std11-atom-regexp string start) + (= (match-beginning 0) start)) (let ((end (match-end 0))) (cons (cons 'atom (decode-mime-charset-string - (substring string 0 end) + (substring string start end) default-mime-charset)) - (substring string end) - )))) + ;;(substring string end) + end) + ))) -(defun eword-lexical-analyze-internal (string must-unfold) - (let (dest ret) - (while (not (string-equal string "")) +(defun eword-lexical-analyze-internal (string start must-unfold) + (let ((len (length string)) + dest ret) + (while (< start len) (setq ret - (let ((rest eword-lexical-analyzers) + (let ((rest mime-header-lexical-analyzer) func r) (while (and (setq func (car rest)) - (null (setq r (funcall func string must-unfold))) + (null + (setq r (funcall func string start must-unfold))) ) (setq rest (cdr rest))) - (or r `((error . ,string) . "")) + (or r + (cons (cons 'error (substring string start)) (1+ len))) )) - (setq dest (cons (car ret) dest)) - (setq string (cdr ret)) + (setq dest (cons (car ret) dest) + start (cdr ret)) ) (nreverse dest) )) -(defun eword-lexical-analyze (string &optional must-unfold) +(defun eword-lexical-analyze (string &optional start must-unfold) "Return lexical analyzed list corresponding STRING. It is like std11-lexical-analyze, but it decodes non us-ascii characters encoded as encoded-words or invalid \"raw\" format. \"Raw\" non us-ascii characters are regarded as variable `default-mime-charset'." - (let ((key (copy-sequence string)) - ret) + (let ((key (substring string (or start 0))) + ret cell) (set-text-properties 0 (length key) nil key) (if (setq ret (assoc key eword-lexical-analyze-cache)) (cdr ret) - (setq ret (eword-lexical-analyze-internal key must-unfold)) + (setq ret (eword-lexical-analyze-internal key 0 must-unfold)) (setq eword-lexical-analyze-cache (cons (cons key ret) - (last eword-lexical-analyze-cache - eword-lexical-analyze-cache-max))) + eword-lexical-analyze-cache)) + (if (cdr (setq cell (nthcdr eword-lexical-analyze-cache-max + eword-lexical-analyze-cache))) + (setcdr cell nil)) ret))) (defun eword-decode-token (token) @@ -485,118 +783,21 @@ characters encoded as encoded-words or invalid \"raw\" format. (cond ((eq type 'quoted-string) (std11-wrap-as-quoted-string value)) ((eq type 'comment) - (concat "(" (std11-wrap-as-quoted-pairs value '(?( ?))) ")")) + (let ((dest "")) + (while value + (setq dest (concat dest + (if (stringp (car value)) + (std11-wrap-as-quoted-pairs + (car value) '(?( ?))) + (eword-decode-token (car value)) + )) + value (cdr value)) + ) + (concat "(" dest ")") + )) (t value)))) -(defun eword-decode-and-fold-structured-field - (string start-column &optional max-column must-unfold) - "Decode and fold (fill) STRING as structured field body. -It decodes non us-ascii characters in FULL-NAME encoded as -encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii -characters are regarded as variable `default-mime-charset'. - -If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is not decoded. - -If MAX-COLUMN is omitted, `fill-column' is used. - -If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-words (generated by bad manner MUA -such as a version of Net$cape)." - (or max-column - (setq max-column fill-column)) - (let ((c start-column) - (tokens (eword-lexical-analyze string must-unfold)) - (result "") - token) - (while (and (setq token (car tokens)) - (setq tokens (cdr tokens))) - (let* ((type (car token))) - (if (eq type 'spaces) - (let* ((next-token (car tokens)) - (next-str (eword-decode-token next-token)) - (next-len (string-width next-str)) - (next-c (+ c next-len 1))) - (if (< next-c max-column) - (setq result (concat result " " next-str) - c next-c) - (setq result (concat result "\n " next-str) - c (1+ next-len))) - (setq tokens (cdr tokens)) - ) - (let* ((str (eword-decode-token token))) - (setq result (concat result str) - c (+ c (string-width str))) - )))) - (if token - (concat result (eword-decode-token token)) - result))) - -(defun eword-decode-and-unfold-structured-field (string) - "Decode and unfold STRING as structured field body. -It decodes non us-ascii characters in FULL-NAME encoded as -encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii -characters are regarded as variable `default-mime-charset'. - -If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is not decoded." - (let ((tokens (eword-lexical-analyze string 'must-unfold)) - (result "")) - (while tokens - (let* ((token (car tokens)) - (type (car token))) - (setq tokens (cdr tokens)) - (setq result - (if (eq type 'spaces) - (concat result " ") - (concat result (eword-decode-token token)) - )))) - result)) - -(defun eword-decode-structured-field-body (string &optional must-unfold - start-column max-column) - "Decode non us-ascii characters in STRING as structured field body. -STRING is unfolded before decoding. - -It decodes non us-ascii characters in FULL-NAME encoded as -encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii -characters are regarded as variable `default-mime-charset'. - -If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is not decoded. - -If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-words (generated by bad manner MUA -such as a version of Net$cape)." - (if start-column - ;; fold with max-column - (eword-decode-and-fold-structured-field - string start-column max-column must-unfold) - ;; Don't fold - (mapconcat (function eword-decode-token) - (eword-lexical-analyze string must-unfold) - "") - )) - -(defun eword-decode-unstructured-field-body (string &optional must-unfold) - "Decode non us-ascii characters in STRING as unstructured field body. -STRING is unfolded before decoding. - -It decodes non us-ascii characters in FULL-NAME encoded as -encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii -characters are regarded as variable `default-mime-charset'. - -If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is not decoded. - -If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-words (generated by bad manner MUA -such as a version of Net$cape)." - (eword-decode-string - (decode-mime-charset-string string default-mime-charset) - must-unfold)) - -(defun eword-extract-address-components (string) +(defun eword-extract-address-components (string &optional start) "Extract full name and canonical address from STRING. Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). If no name can be extracted, FULL-NAME will be nil. @@ -605,7 +806,8 @@ encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii characters are regarded as variable `default-mime-charset'." (let* ((structure (car (std11-parse-address (eword-lexical-analyze - (std11-unfold-string string) 'must-unfold)))) + (std11-unfold-string string) start + 'must-unfold)))) (phrase (std11-full-name-string structure)) (address (std11-address-string structure)) )