X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=eword-decode.el;h=f11b7cc4d448f24aafabe26c9032b4823b1834ce;hb=99c158b34f006c5840785347fc7e711a6b246c0c;hp=4f11e6b57f4a8c5121cbd25f207e85d01579e33f;hpb=7e7f84c58d5a26195f7308b23e01124042d75746;p=elisp%2Fflim.git diff --git a/eword-decode.el b/eword-decode.el index 4f11e6b..f11b7cc 100644 --- a/eword-decode.el +++ b/eword-decode.el @@ -32,14 +32,29 @@ ;;; Code: -(require 'std11-parse) +(require 'std11) (require 'mel) (require 'mime-def) +(require 'ew-dec) +(require 'ew-line) + +(eval-when-compile (require 'cl)) + (defgroup eword-decode nil "Encoded-word decoding" :group 'mime) +;;; TEST + +(defvar rotate-memo nil) +(defmacro rotate-memo (var val) + `(when rotate-memo + (unless (boundp ',var) (setq ,var ())) + (setq ,var (cons ,val ,var)) + (let ((tmp (last ,var (- (length ,var) 100)))) + (when tmp (setcdr tmp nil))) + ,var)) ;;; @ variables ;;; @@ -57,6 +72,12 @@ however this behaviour violates RFC2047." :group 'eword-decode :type 'boolean) +(defcustom eword-max-size-to-decode 1000 + "*Max size to decode header field." + :group 'eword-decode + :type '(choice (integer :tag "Limit (bytes)") + (const :tag "Don't limit" nil))) + ;;; @ MIME encoded-word definition ;;; @@ -82,7 +103,7 @@ however this behaviour violates RFC2047." (concat eword-encoded-word-prefix-regexp "\\(" eword-encoded-text-in-phrase-regexp "\\)" eword-encoded-word-suffix-regexp)) -(defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t(]\\|$\\)") +(defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t]\\|$\\)") (defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+") (defconst eword-encoded-word-in-comment-regexp @@ -103,43 +124,6 @@ however this behaviour violates RFC2047." (defconst eword-encoded-word-regexp eword-encoded-word-in-unstructured-regexp) -;;; @@ Base64 -;;; - -(defconst base64-token-regexp "[A-Za-z0-9+/]") -(defconst base64-token-padding-regexp "[A-Za-z0-9+/=]") - -(defconst eword-B-encoded-text-regexp - (concat "\\(\\(" - base64-token-regexp - base64-token-regexp - base64-token-regexp - base64-token-regexp - "\\)*" - base64-token-regexp - base64-token-regexp - base64-token-padding-regexp - base64-token-padding-regexp - "\\)")) - -;; (defconst eword-B-encoding-and-encoded-text-regexp -;; (concat "\\(B\\)\\?" eword-B-encoded-text-regexp)) - - -;;; @@ Quoted-Printable -;;; - -(defconst quoted-printable-hex-chars "0123456789ABCDEF") -(defconst quoted-printable-octet-regexp - (concat "=[" quoted-printable-hex-chars - "][" quoted-printable-hex-chars "]")) - -(defconst eword-Q-encoded-text-regexp - (concat "\\([^=?]\\|" quoted-printable-octet-regexp "\\)+")) -;; (defconst eword-Q-encoding-and-encoded-text-regexp -;; (concat "\\(Q\\)\\?" eword-Q-encoded-text-regexp)) - - ;;; @ internal utilities ;;; @@ -224,6 +208,7 @@ such as a version of Net$cape)." safe-regexp escape ; ?\\ or nil. delimiters ; list of chars. + chars-must-be-quote must-unfold code-conversion) (if (and code-conversion @@ -245,14 +230,14 @@ such as a version of Net$cape)." (setq dst (concat dst (std11-wrap-as-quoted-pairs (decode-mime-charset-string buf code-conversion) - delimiters)) + chars-must-be-quote)) buf "")) (cond (decoded (setq dst (concat dst (std11-wrap-as-quoted-pairs (car decoded) - delimiters)) + chars-must-be-quote)) src (cdr decoded))) ((memq ch delimiters) (setq dst (concat dst (list ch)) @@ -276,7 +261,7 @@ such as a version of Net$cape)." (setq dst (concat dst (std11-wrap-as-quoted-pairs (decode-mime-charset-string buf code-conversion) - delimiters)))) + chars-must-be-quote)))) dst)) @@ -291,6 +276,7 @@ such as a version of Net$cape)." "[^ \t\n=]*" nil nil + nil must-unfold code-conversion)) @@ -302,6 +288,7 @@ such as a version of Net$cape)." "[^ \t\n()\\\\=]*" ?\\ '(?\( ?\)) + '(?\( ?\) ?\\ ?\r ?\n) must-unfold code-conversion)) @@ -313,6 +300,7 @@ such as a version of Net$cape)." "[^ \t\n\"\\\\=]*" ?\\ '(?\") + '(?\" ?\\ ?\r ?\n) must-unfold code-conversion)) @@ -333,10 +321,61 @@ mime-charset, it decodes non-ASCII bit patterns as the mime-charset. Otherwise it decodes non-ASCII bit patterns as the default-mime-charset." (eword-decode-unstructured - code-conversion (std11-unfold-string string) + code-conversion must-unfold)) +(defun eword-decode-structured-field-body (string + &optional + start-column max-column) + (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11)) + (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string)))) + (ew-crlf-to-lf decoded))) + +(defun eword-decode-and-unfold-structured-field-body (string + &optional + start-column + max-column) + "Decode and unfold STRING as structured field body. +It decodes non us-ascii characters in FULL-NAME encoded as +encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii +characters are regarded as variable `default-mime-charset'. + +If an encoded-word is broken or your emacs implementation can not +decode the charset included in it, it is not decoded." + (let* ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string)))) + (ew-crlf-to-lf (ew-crlf-unfold decoded)))) + +(defun eword-decode-and-fold-structured-field-body (string + start-column + &optional max-column) + (or max-column + (setq max-column fill-column)) + (let* ((field-name (make-string (1- start-column) ?X)) + (field-body (ew-lf-crlf-to-crlf string)) + (ew-decode-field-default-syntax '(ew-scan-unibyte-std11)) + (decoded (ew-decode-field field-name field-body))) + (unless (equal field-body decoded) + (setq decoded (ew-crlf-refold decoded start-column max-column))) + (ew-crlf-to-lf decoded))) + +(defun eword-decode-unstructured-field-body (string &optional start-column + max-column) + (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string)))) + (ew-crlf-to-lf decoded))) + +(defun eword-decode-and-unfold-unstructured-field-body (string + &optional start-column + max-column) + (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string)))) + (ew-crlf-to-lf (ew-crlf-unfold decoded)))) + +(defun eword-decode-unfolded-unstructured-field-body (string + &optional start-column + max-column) + (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string)))) + (ew-crlf-to-lf decoded))) + ;;; @ for region ;;; @@ -356,6 +395,8 @@ mime-charset, it decodes non-ASCII bit patterns as the mime-charset. Otherwise it decodes non-ASCII bit patterns as the default-mime-charset." (interactive "*r") + (rotate-memo args-eword-decode-region + (list start end (buffer-substring start end) unfolding must-unfold code-conversion)) (save-excursion (save-restriction (narrow-to-region start end) @@ -363,79 +404,12 @@ default-mime-charset." (eword-decode-unfold) ) (let ((str (eword-decode-unstructured - code-conversion (buffer-substring (point-min) (point-max)) + code-conversion must-unfold))) (delete-region (point-min) (point-max)) (insert str))))) - -;;; @ for message header -;;; - -(defcustom eword-decode-ignored-field-list - '(newsgroups path lines nntp-posting-host received message-id date) - "*List of field-names to be ignored when decoding. -Each field name must be symbol." - :group 'eword-decode - :type '(repeat symbol)) - -(defcustom eword-decode-structured-field-list - '(reply-to resent-reply-to from resent-from sender resent-sender - to resent-to cc resent-cc bcc resent-bcc dcc - mime-version content-type content-transfer-encoding - content-disposition) - "*List of field-names to decode as structured field. -Each field name must be symbol." - :group 'eword-decode - :type '(repeat symbol)) - -(defun eword-decode-header (&optional code-conversion separator) - "Decode MIME encoded-words in header fields. -If CODE-CONVERSION is nil, it decodes only encoded-words. If it is -mime-charset, it decodes non-ASCII bit patterns as the mime-charset. -Otherwise it decodes non-ASCII bit patterns as the -default-mime-charset. -If SEPARATOR is not nil, it is used as header separator." - (interactive "*") - (if (and code-conversion - (not (mime-charset-to-coding-system code-conversion))) - (setq code-conversion default-mime-charset)) - (save-excursion - (save-restriction - (std11-narrow-to-header separator) - (if code-conversion - (let (beg p end field-name len) - (goto-char (point-min)) - (while (re-search-forward std11-field-head-regexp nil t) - (setq beg (match-beginning 0) - p (match-end 0) - field-name (buffer-substring beg (1- p)) - len (string-width field-name) - field-name (intern (downcase field-name)) - end (std11-field-end)) - (cond ((memq field-name eword-decode-ignored-field-list) - ;; Don't decode - ) - ((memq field-name eword-decode-structured-field-list) - ;; Decode as structured field - (let ((body (buffer-substring p end))) - (delete-region p end) - (insert (eword-decode-and-fold-structured-field - body (1+ len))) - )) - (t - ;; Decode as unstructured field - (save-restriction - (narrow-to-region beg (1+ end)) - (goto-char p) - (eword-decode-region beg (point-max) 'unfold nil - code-conversion) - (goto-char (point-max)) - ))))) - (eword-decode-region (point-min) (point-max) t nil nil) - )))) - (defun eword-decode-unfold () (goto-char (point-min)) (let (field beg end) @@ -453,11 +427,346 @@ If SEPARATOR is not nil, it is used as header separator." )) ))) +;;; @ for message header +;;; + +(defvar mime-field-decoder-alist nil) + +(defvar mime-field-decoder-cache nil) + +(defvar mime-update-field-decoder-cache 'ew-mime-update-field-decoder-cache + "*Field decoder cache update function.") + +;;;###autoload +(defun mime-set-field-decoder (field &rest specs) + "Set decoder of FILED. +SPECS must be like `MODE1 DECODER1 MODE2 DECODER2 ...'. +Each mode must be `nil', `plain', `wide', `summary' or `nov'. +If mode is `nil', corresponding decoder is set up for every modes." + (when specs + (let ((mode (pop specs)) + (function (pop specs))) + (if mode + (progn + (let ((cell (assq mode mime-field-decoder-alist))) + (if cell + (setcdr cell (put-alist field function (cdr cell))) + (setq mime-field-decoder-alist + (cons (cons mode (list (cons field function))) + mime-field-decoder-alist)) + )) + (apply (function mime-set-field-decoder) field specs) + ) + (mime-set-field-decoder field + 'plain function + 'wide function + 'summary function + 'nov function) + )))) + +;;;###autoload +(defmacro mime-find-field-presentation-method (name) + "Return field-presentation-method from NAME. +NAME must be `plain', `wide', `summary' or `nov'." + (cond ((eq name nil) + `(or (assq 'summary mime-field-decoder-cache) + '(summary)) + ) + ((and (consp name) + (car name) + (consp (cdr name)) + (symbolp (car (cdr name))) + (null (cdr (cdr name)))) + `(or (assq ,name mime-field-decoder-cache) + (cons ,name nil)) + ) + (t + `(or (assq (or ,name 'summary) mime-field-decoder-cache) + (cons (or ,name 'summary) nil)) + ))) + +(defun mime-find-field-decoder-internal (field &optional mode) + "Return function to decode field-body of FIELD in MODE. +Optional argument MODE must be object of field-presentation-method." + (cdr (or (assq field (cdr mode)) + (prog1 + (funcall mime-update-field-decoder-cache + field (car mode)) + (setcdr mode + (cdr (assq (car mode) mime-field-decoder-cache))) + )))) + +;;;###autoload +(defun mime-find-field-decoder (field &optional mode) + "Return function to decode field-body of FIELD in MODE. +Optional argument MODE must be object or name of +field-presentation-method. Name of field-presentation-method must be +`plain', `wide', `summary' or `nov'. +Default value of MODE is `summary'." + (if (symbolp mode) + (let ((p (cdr (mime-find-field-presentation-method mode)))) + (if (and p (setq p (assq field p))) + (cdr p) + (cdr (funcall mime-update-field-decoder-cache + field (or mode 'summary))))) + (inline (mime-find-field-decoder-internal field mode)) + )) + +;;;###autoload +(defun mime-update-field-decoder-cache (field mode &optional function) + "Update field decoder cache `mime-field-decoder-cache'." + (cond ((eq function 'identity) + (setq function nil) + ) + ((null function) + (let ((decoder-alist + (cdr (assq (or mode 'summary) mime-field-decoder-alist)))) + (setq function (cdr (or (assq field decoder-alist) + (assq t decoder-alist))))) + )) + (let ((cell (assq mode mime-field-decoder-cache)) + ret) + (if cell + (if (setq ret (assq field (cdr cell))) + (setcdr ret function) + (setcdr cell (cons (setq ret (cons field function)) (cdr cell)))) + (setq mime-field-decoder-cache + (cons (cons mode (list (setq ret (cons field function)))) + mime-field-decoder-cache))) + ret)) + +;; ignored fields +(mime-set-field-decoder 'Archive nil nil) +(mime-set-field-decoder 'Content-Md5 nil nil) +(mime-set-field-decoder 'Control nil nil) +(mime-set-field-decoder 'Date nil nil) +(mime-set-field-decoder 'Distribution nil nil) +(mime-set-field-decoder 'Followup-Host nil nil) +(mime-set-field-decoder 'Followup-To nil nil) +(mime-set-field-decoder 'Lines nil nil) +(mime-set-field-decoder 'Message-Id nil nil) +(mime-set-field-decoder 'Newsgroups nil nil) +(mime-set-field-decoder 'Nntp-Posting-Host nil nil) +(mime-set-field-decoder 'Path nil nil) +(mime-set-field-decoder 'Posted-And-Mailed nil nil) +(mime-set-field-decoder 'Received nil nil) +(mime-set-field-decoder 'Status nil nil) +(mime-set-field-decoder 'X-Face nil nil) +(mime-set-field-decoder 'X-Face-Version nil nil) +(mime-set-field-decoder 'X-Info nil nil) +(mime-set-field-decoder 'X-Pgp-Key-Info nil nil) +(mime-set-field-decoder 'X-Pgp-Sig nil nil) +(mime-set-field-decoder 'X-Pgp-Sig-Version nil nil) +(mime-set-field-decoder 'Xref nil nil) + +;; structured fields +(let ((fields + '(Reply-To Resent-Reply-To From Resent-From Sender Resent-Sender + To Resent-To Cc Resent-Cc Bcc Resent-Bcc Dcc + Mail-Followup-To + Mime-Version Content-Type Content-Transfer-Encoding + Content-Disposition User-Agent)) + field) + (while fields + (setq field (pop fields)) + (mime-set-field-decoder + field + 'plain #'eword-decode-structured-field-body + 'wide #'eword-decode-and-fold-structured-field-body + 'summary #'eword-decode-and-unfold-structured-field-body + 'nov #'eword-decode-and-unfold-structured-field-body) + )) + +;; unstructured fields (default) +(mime-set-field-decoder + t + 'plain #'eword-decode-unstructured-field-body + 'wide #'eword-decode-unstructured-field-body + 'summary #'eword-decode-and-unfold-unstructured-field-body + 'nov #'eword-decode-unfolded-unstructured-field-body) + +;;;###autoload +(defun ew-mime-update-field-decoder-cache (field mode) + (let ((fun (cond + ((eq mode 'plain) + (lexical-let ((field-name (symbol-name field))) + (lambda (field-body &optional start-column max-column must-unfold) + (setq field-body (ew-lf-to-crlf field-body)) + (let ((res (ew-crlf-to-lf + (ew-decode-field field-name field-body)))) + (add-text-properties + 0 (length res) + (list 'original-field-name field-name + 'original-field-body field-body) + res) + res)))) + ((eq mode 'wide) + (lexical-let ((field-name (symbol-name field))) + (lambda (field-body &optional start-column max-column must-unfold) + (setq field-body (ew-lf-to-crlf field-body)) + (let ((res (ew-crlf-to-lf + (ew-crlf-refold + (ew-decode-field field-name field-body) + (length field-name) + (or max-column fill-column))))) + (add-text-properties + 0 (length res) + (list 'original-field-name field-name + 'original-field-body field-body) + res) + res)))) + ((eq mode 'summary) + (lexical-let ((field-name (symbol-name field))) + (lambda (field-body &optional start-column max-column must-unfold) + (setq field-body (ew-lf-to-crlf field-body)) + (let ((res (ew-crlf-to-lf + (ew-crlf-unfold + (ew-decode-field field-name field-body))))) + (add-text-properties + 0 (length res) + (list 'original-field-name field-name + 'original-field-body field-body) + res) + res)))) + ((eq mode 'nov) + (lexical-let ((field-name (symbol-name field))) + (lambda (field-body &optional start-column max-column must-unfold) + (setq field-body (ew-lf-to-crlf field-body)) + (require 'ew-var) + (let ((ew-ignore-76bytes-limit t)) + (let ((res (ew-crlf-to-lf + (ew-crlf-unfold + (ew-decode-field field-name field-body))))) + (add-text-properties + 0 (length res) + (list 'original-field-name field-name + 'original-field-body field-body) + res) + res))))) + (t + nil)))) + (mime-update-field-decoder-cache field mode fun))) + +;;;###autoload +(defun mime-decode-field-body (field-body field-name + &optional mode max-column) + "Decode FIELD-BODY as FIELD-NAME in MODE, and return the result. +Optional argument MODE must be `plain', `wide', `summary' or `nov'. +Default mode is `summary'. + +If MODE is `wide' and MAX-COLUMN is non-nil, the result is folded with +MAX-COLUMN. + +Non MIME encoded-word part in FILED-BODY is decoded with +`default-mime-charset'." + (unless mode (setq mode 'summary)) + (if (symbolp field-name) (setq field-name (symbol-name field-name))) + (let ((decoded + (if (eq mode 'nov) + (let ((ew-ignore-76bytes-limit t)) + (ew-decode-field + field-name (ew-lf-crlf-to-crlf field-body))) + (ew-decode-field + field-name (ew-lf-crlf-to-crlf field-body))))) + (if (and (eq mode 'wide) max-column) + (setq decoded (ew-crlf-refold + decoded + (1+ (string-width field-name)) + max-column)) + (if (not (eq mode 'plain)) + (setq decoded (ew-crlf-unfold decoded)))) + (setq decoded (ew-crlf-to-lf decoded)) + (add-text-properties 0 (length decoded) + (list 'original-field-name field-name + 'original-field-body field-body) + decoded) + decoded)) + +;;;###autoload +(defun mime-decode-header-in-region (start end + &optional code-conversion) + "Decode MIME encoded-words in region between START and END. +If CODE-CONVERSION is nil, it decodes only encoded-words. If it is +mime-charset, it decodes non-ASCII bit patterns as the mime-charset. +Otherwise it decodes non-ASCII bit patterns as the +default-mime-charset." + (interactive "*r") + (save-excursion + (save-restriction + (narrow-to-region start end) + (let ((default-charset + (if code-conversion + (if (mime-charset-to-coding-system code-conversion) + code-conversion + default-mime-charset)))) + (if default-charset + (let ((mode-obj (mime-find-field-presentation-method 'wide)) + beg p end len field-decoder + field-name field-body) + (goto-char (point-min)) + (while (re-search-forward std11-field-head-regexp nil t) + (setq beg (match-beginning 0) + p (match-end 0) + field-name (buffer-substring beg (1- p)) + len (string-width field-name) + field-decoder (inline + (mime-find-field-decoder-internal + (intern (capitalize field-name)) + mode-obj))) + (when field-decoder + (setq end (std11-field-end) + field-body (buffer-substring p end)) + (let ((default-mime-charset default-charset)) + (delete-region p end) + (insert (funcall field-decoder field-body (1+ len))) + )) + (add-text-properties beg (min (1+ (point)) (point-max)) + (list 'original-field-name field-name + 'original-field-body field-body)) + )) + (eword-decode-region (point-min) (point-max) t) + ))))) + +;;;###autoload +(defun mime-decode-header-in-buffer (&optional code-conversion separator) + "Decode MIME encoded-words in header fields. +If CODE-CONVERSION is nil, it decodes only encoded-words. If it is +mime-charset, it decodes non-ASCII bit patterns as the mime-charset. +Otherwise it decodes non-ASCII bit patterns as the +default-mime-charset. +If SEPARATOR is not nil, it is used as header separator." + (interactive "*") + (mime-decode-header-in-region + (point-min) + (save-excursion + (goto-char (point-min)) + (if (re-search-forward + (concat "^\\(" (regexp-quote (or separator "")) "\\)?$") + nil t) + (match-beginning 0) + (point-max) + )) + code-conversion)) + +(define-obsolete-function-alias 'eword-decode-header + 'mime-decode-header-in-buffer) + ;;; @ encoded-word decoder ;;; -(defvar eword-warning-face nil "Face used for invalid encoded-word.") +(defvar eword-decode-encoded-word-error-handler + 'eword-decode-encoded-word-default-error-handler) + +(defvar eword-warning-face nil + "Face used for invalid encoded-word.") + +(defun eword-decode-encoded-word-default-error-handler (word signal) + (and (add-text-properties 0 (length word) + (and eword-warning-face + (list 'face eword-warning-face)) + word) + word)) (defun eword-decode-encoded-word (word &optional must-unfold) "Decode WORD if it is an encoded-word. @@ -482,12 +791,8 @@ as a version of Net$cape)." (condition-case err (eword-decode-encoded-text charset encoding text must-unfold) (error - (and - (add-text-properties 0 (length word) - (and eword-warning-face - (list 'face eword-warning-face)) - word) - word))) + (funcall eword-decode-encoded-word-error-handler word err) + )) )) word)) @@ -509,37 +814,19 @@ if there are in decoded encoded-text (generated by bad manner MUA such as a version of Net$cape)." (let ((cs (mime-charset-to-coding-system charset))) (if cs - (let ((dest - (cond - ((string-equal "B" encoding) - (if (and (string-match eword-B-encoded-text-regexp string) - (string-equal string (match-string 0 string))) - (base64-decode-string string) - (error "Invalid encoded-text %s" string))) - ((string-equal "Q" encoding) - (if (and (string-match eword-Q-encoded-text-regexp string) - (string-equal string (match-string 0 string))) - (q-encoding-decode-string string) - (error "Invalid encoded-text %s" string))) - (t - (error "Invalid encoding %s" encoding) - ))) - ) - (if dest - (progn - (setq dest (decode-coding-string dest cs)) - (if must-unfold - (mapconcat (function - (lambda (chr) - (cond - ((eq chr ?\n) "") - ((eq chr ?\t) " ") - (t (char-to-string chr))) - )) - (std11-unfold-string dest) - "") - dest) - )))))) + (let ((dest (encoded-text-decode-string string encoding))) + (when dest + (setq dest (decode-mime-charset-string dest charset)) + (if must-unfold + (mapconcat (function + (lambda (chr) + (cond ((eq chr ?\n) "") + ((eq chr ?\t) " ") + (t (char-to-string chr))) + )) + (std11-unfold-string dest) + "") + dest)))))) ;;; @ lexical analyze @@ -576,12 +863,17 @@ be the result." (defun eword-analyze-quoted-string (string &optional must-unfold) (let ((p (std11-check-enclosure string ?\" ?\"))) (if p - (cons (cons 'quoted-string - (eword-decode-quoted-string - (substring string 0 p) - default-mime-charset)) - (substring string p)) - ))) + (cons (cons 'quoted-string + (if eword-decode-quoted-encoded-word + (eword-decode-quoted-string + (substring string 0 p) + default-mime-charset) + (std11-wrap-as-quoted-string + (decode-mime-charset-string + (std11-strip-quoted-pair (substring string 1 (1- p))) + default-mime-charset)))) + (substring string p))) + )) (defun eword-analyze-domain-literal (string &optional must-unfold) (std11-analyze-domain-literal string)) @@ -595,11 +887,12 @@ be the result." (setq p (or p len)) (cons (cons 'comment (eword-decode-comment - (substring string 0 p) + (std11-unfold-string (substring string 0 p)) default-mime-charset)) (substring string p))) nil))) + (defun eword-analyze-spaces (string &optional must-unfold) (std11-analyze-spaces string)) @@ -608,16 +901,23 @@ be the result." (defun eword-analyze-encoded-word (string &optional must-unfold) (let ((decoded (eword-decode-first-encoded-words - string - eword-encoded-word-in-phrase-regexp - eword-after-encoded-word-in-phrase-regexp - must-unfold))) + string + eword-encoded-word-in-phrase-regexp + eword-after-encoded-word-in-phrase-regexp + must-unfold))) (if decoded - (cons (cons 'atom (car decoded)) (cdr decoded))))) + (let ((s (car decoded))) + (while (or (string-match std11-atom-regexp s) + (string-match std11-spaces-regexp s)) + (setq s (substring s (match-end 0)))) + (if (= (length s) 0) + (cons (cons 'atom (car decoded)) (cdr decoded)) + (cons (cons 'quoted-string + (std11-wrap-as-quoted-string (car decoded))) + (cdr decoded))))))) (defun eword-analyze-atom (string &optional must-unfold) - (if (let ((enable-multibyte-characters nil)) - (string-match std11-atom-regexp string)) + (if (string-match std11-atom-regexp (string-as-unibyte string)) (let ((end (match-end 0))) (if (and eword-decode-sticked-encoded-word (string-match eword-encoded-word-in-phrase-regexp @@ -631,17 +931,24 @@ be the result." )))) (defun eword-lexical-analyze-internal (string must-unfold) - (let (dest ret) + (let ((last 'eword-analyze-spaces) + dest ret) (while (not (string-equal string "")) (setq ret - (let ((rest eword-lexical-analyzers) - func r) - (while (and (setq func (car rest)) - (null (setq r (funcall func string must-unfold))) - ) - (setq rest (cdr rest))) - (or r `((error . ,string) . "")) - )) + (let ((rest eword-lexical-analyzers) + func r) + (while (and (setq func (car rest)) + (or + (and + (not eword-decode-sticked-encoded-word) + (not (eq last 'eword-analyze-spaces)) + (eq func 'eword-analyze-encoded-word)) + (null (setq r (funcall func string must-unfold)))) + ) + (setq rest (cdr rest))) + (setq last func) + (or r `((error . ,string) . "")) + )) (setq dest (cons (car ret) dest)) (setq string (cdr ret)) ) @@ -670,112 +977,6 @@ characters encoded as encoded-words or invalid \"raw\" format. (defun eword-decode-token (token) (cdr token)) -(defun eword-decode-and-fold-structured-field - (string start-column &optional max-column must-unfold) - "Decode and fold (fill) STRING as structured field body. -It decodes non us-ascii characters in FULL-NAME encoded as -encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii -characters are regarded as variable `default-mime-charset'. - -If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is not decoded. - -If MAX-COLUMN is omitted, `fill-column' is used. - -If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-words (generated by bad manner MUA -such as a version of Net$cape)." - (or max-column - (setq max-column fill-column)) - (let ((c start-column) - (tokens (eword-lexical-analyze string must-unfold)) - (result "") - token) - (while (and (setq token (car tokens)) - (setq tokens (cdr tokens))) - (let* ((type (car token))) - (if (eq type 'spaces) - (let* ((next-token (car tokens)) - (next-str (eword-decode-token next-token)) - (next-len (string-width next-str)) - (next-c (+ c next-len 1))) - (if (< next-c max-column) - (setq result (concat result " " next-str) - c next-c) - (setq result (concat result "\n " next-str) - c (1+ next-len))) - (setq tokens (cdr tokens)) - ) - (let* ((str (eword-decode-token token))) - (setq result (concat result str) - c (+ c (string-width str))) - )))) - (if token - (concat result (eword-decode-token token)) - result))) - -(defun eword-decode-and-unfold-structured-field (string) - "Decode and unfold STRING as structured field body. -It decodes non us-ascii characters in FULL-NAME encoded as -encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii -characters are regarded as variable `default-mime-charset'. - -If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is not decoded." - (let ((tokens (eword-lexical-analyze string 'must-unfold)) - (result "")) - (while tokens - (let* ((token (car tokens)) - (type (car token))) - (setq tokens (cdr tokens)) - (setq result - (if (eq type 'spaces) - (concat result " ") - (concat result (eword-decode-token token)) - )))) - result)) - -(defun eword-decode-structured-field-body (string &optional must-unfold - start-column max-column) - "Decode non us-ascii characters in STRING as structured field body. -STRING is unfolded before decoding. - -It decodes non us-ascii characters in FULL-NAME encoded as -encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii -characters are regarded as variable `default-mime-charset'. - -If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is not decoded. - -If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-words (generated by bad manner MUA -such as a version of Net$cape)." - (if start-column - ;; fold with max-column - (eword-decode-and-fold-structured-field - string start-column max-column must-unfold) - ;; Don't fold - (mapconcat (function eword-decode-token) - (eword-lexical-analyze string must-unfold) - "") - )) - -(defun eword-decode-unstructured-field-body (string &optional must-unfold) - "Decode non us-ascii characters in STRING as unstructured field body. -STRING is unfolded before decoding. - -It decodes non us-ascii characters in FULL-NAME encoded as -encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii -characters are regarded as variable `default-mime-charset'. - -If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is not decoded. - -If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-words (generated by bad manner MUA -such as a version of Net$cape)." - (eword-decode-string string must-unfold default-mime-charset)) - (defun eword-extract-address-components (string) "Extract full name and canonical address from STRING. Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). @@ -783,6 +984,7 @@ If no name can be extracted, FULL-NAME will be nil. It decodes non us-ascii characters in FULL-NAME encoded as encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii characters are regarded as variable `default-mime-charset'." + (rotate-memo args-eword-extract-address-components (list string)) (let* ((structure (car (std11-parse-address (eword-lexical-analyze (std11-unfold-string string) 'must-unfold))))