X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=eword-decode.el;h=a1be3bcb1e1431e7e0c837a07dba430db4974d39;hb=3c07ee018fb2fa3178e4eef483aee0326a2a52a6;hp=938d6665f0d1f33273b6cb3eb175b8c4fe04b020;hpb=24febc0a2434ea9dbf0b72dbe1f068f1c5c1286e;p=elisp%2Fflim.git diff --git a/eword-decode.el b/eword-decode.el index 938d666..a1be3bc 100644 --- a/eword-decode.el +++ b/eword-decode.el @@ -4,16 +4,16 @@ ;; Author: ENAMI Tsugutomo ;; MORIOKA Tomohiko -;; Tanaka Akira -;; Maintainer: Tanaka Akira +;; TANAKA Akira ;; Created: 1995/10/03 ;; Original: 1992/07/20 ENAMI Tsugutomo's `mime.el'. -;; Renamed: 1993/06/03 to tiny-mime.el -;; Renamed: 1995/10/03 from tiny-mime.el (split off encoder) -;; Renamed: 1997/02/22 from tm-ew-d.el +;; Renamed: 1993/06/03 to tiny-mime.el by MORIOKA Tomohiko +;; Renamed: 1995/10/03 to tm-ew-d.el (split off encoder) +;; by MORIOKA Tomohiko +;; Renamed: 1997/02/22 from tm-ew-d.el by MORIOKA Tomohiko ;; Keywords: encoded-word, MIME, multilingual, header, mail, news -;; This file is part of FLAM (Faithful Library About MIME). +;; This file is part of FLIM (Faithful Library about Internet Message). ;; This program is free software; you can redistribute it and/or ;; modify it under the terms of the GNU General Public License as @@ -36,320 +36,186 @@ (require 'mel) (require 'mime-def) +(eval-when-compile (require 'cl)) + (defgroup eword-decode nil "Encoded-word decoding" :group 'mime) - -;;; @ variables -;;; - -(defcustom eword-decode-sticked-encoded-word nil - "*If non-nil, decode encoded-words sticked on atoms, -other encoded-words, etc. -however this behaviour violates RFC2047." - :group 'eword-decode - :type 'boolean) - -(defcustom eword-decode-quoted-encoded-word nil - "*If non-nil, decode encoded-words in quoted-string -however this behaviour violates RFC2047." +(defcustom eword-max-size-to-decode 1000 + "*Max size to decode header field." :group 'eword-decode - :type 'boolean) + :type '(choice (integer :tag "Limit (bytes)") + (const :tag "Don't limit" nil))) ;;; @ MIME encoded-word definition ;;; -(defconst eword-encoded-word-prefix-regexp - (concat (regexp-quote "=?") - "\\(" mime-charset-regexp "\\)" - (regexp-quote "?") - "\\(B\\|Q\\)" - (regexp-quote "?"))) -(defconst eword-encoded-word-suffix-regexp - (regexp-quote "?=")) - -(defconst eword-encoded-text-in-unstructured-regexp "[!->@-~]+") -(defconst eword-encoded-word-in-unstructured-regexp - (concat eword-encoded-word-prefix-regexp - "\\(" eword-encoded-text-in-unstructured-regexp "\\)" - eword-encoded-word-suffix-regexp)) -(defconst eword-after-encoded-word-in-unstructured-regexp "\\([ \t]\\|$\\)") - -(defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+") -(defconst eword-encoded-word-in-phrase-regexp - (concat eword-encoded-word-prefix-regexp - "\\(" eword-encoded-text-in-phrase-regexp "\\)" - eword-encoded-word-suffix-regexp)) -(defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t(]\\|$\\)") - -(defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+") -(defconst eword-encoded-word-in-comment-regexp - (concat eword-encoded-word-prefix-regexp - "\\(" eword-encoded-text-in-comment-regexp "\\)" - eword-encoded-word-suffix-regexp)) -(defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)") - -(defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+") -(defconst eword-encoded-word-in-quoted-string-regexp - (concat eword-encoded-word-prefix-regexp - "\\(" eword-encoded-text-in-quoted-string-regexp "\\)" - eword-encoded-word-suffix-regexp)) -(defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)") - -; obsolete -(defconst eword-encoded-text-regexp eword-encoded-text-in-unstructured-regexp) -(defconst eword-encoded-word-regexp eword-encoded-word-in-unstructured-regexp) - - -;;; @@ Base64 -;;; - -(defconst base64-token-regexp "[A-Za-z0-9+/]") -(defconst base64-token-padding-regexp "[A-Za-z0-9+/=]") - -(defconst eword-B-encoded-text-regexp - (concat "\\(\\(" - base64-token-regexp - base64-token-regexp - base64-token-regexp - base64-token-regexp - "\\)*" - base64-token-regexp - base64-token-regexp - base64-token-padding-regexp - base64-token-padding-regexp - "\\)")) +(eval-and-compile + (defconst eword-encoded-text-regexp "[!->@-~]+") + + (defconst eword-encoded-word-regexp + (eval-when-compile + (concat (regexp-quote "=?") + "\\(" + mime-charset-regexp + "\\)" + (regexp-quote "?") + "\\(B\\|Q\\)" + (regexp-quote "?") + "\\(" + eword-encoded-text-regexp + "\\)" + (regexp-quote "?=")))) + ) -;; (defconst eword-B-encoding-and-encoded-text-regexp -;; (concat "\\(B\\)\\?" eword-B-encoded-text-regexp)) - -;;; @@ Quoted-Printable +;;; @ for string ;;; -(defconst eword-Q-encoded-text-regexp - (concat "\\([^=?]\\|" quoted-printable-octet-regexp "\\)+")) -;; (defconst eword-Q-encoding-and-encoded-text-regexp -;; (concat "\\(Q\\)\\?" eword-Q-encoded-text-regexp)) - - -;;; @ internal utilities -;;; +(defun eword-decode-string (string &optional must-unfold) + "Decode MIME encoded-words in STRING. -(defun eword-decode-first-encoded-words (string - eword-regexp - after-regexp - &optional must-unfold) - "Decode MIME encoded-words in beginning of STRING. - -EWORD-REGEXP is the regexp that matches a encoded-word. -Usual value is -eword-encoded-word-in-unstructured-regexp, -eword-encoded-text-in-phrase-regexp, -eword-encoded-word-in-comment-regexp or -eword-encoded-word-in-quoted-string-regexp. - -AFTER-REGEXP is the regexp that matches a after encoded-word. -Usual value is -eword-after-encoded-word-in-unstructured-regexp, -eword-after-encoded-text-in-phrase-regexp, -eword-after-encoded-word-in-comment-regexp or -eword-after-encoded-word-in-quoted-string-regexp. - -If beginning of STRING matches EWORD-REGEXP with AFTER-REGEXP, -returns a cons cell of decoded string(sequence of characters) and -the rest(sequence of octets). - -If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP, -returns nil. +STRING is unfolded before decoding. If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is returned in decoded part -as encoded-word form. +decode the charset included in it, it is not decoded. If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even if there are in decoded encoded-words (generated by bad manner MUA such as a version of Net$cape)." - (if eword-decode-sticked-encoded-word (setq after-regexp "")) - (let* ((between-ewords-regexp - (if eword-decode-sticked-encoded-word - "\\(\n?[ \t]\\)*" - "\\(\n?[ \t]\\)+")) - (between-ewords-eword-after-regexp - (concat "\\`\\(" between-ewords-regexp "\\)" - "\\(" eword-regexp "\\)" - after-regexp)) - (eword-after-regexp - (concat "\\`\\(" eword-regexp "\\)" after-regexp)) - (src string) ; sequence of octets. - (dst "")) ; sequence of characters. - (if (string-match eword-after-regexp src) - (let* (p - (q (match-end 1)) - (ew (substring src 0 q)) - (dw (eword-decode-encoded-word ew must-unfold))) - (setq dst (concat dst dw) - src (substring src q)) - (if (not (string= ew dw)) - (progn - (while - (and - (string-match between-ewords-eword-after-regexp src) - (progn - (setq p (match-end 1) - q (match-end 3) - ew (substring src p q) - dw (eword-decode-encoded-word ew must-unfold)) - (if (string= ew dw) - (progn - (setq dst (concat dst (substring src 0 q)) - src (substring src q)) - nil) - t))) - (setq dst (concat dst dw) - src (substring src q))))) - (cons dst src)) - nil))) - -(defun eword-decode-entire-string (string - eword-regexp - after-regexp - safe-regexp - escape ; ?\\ or nil. - delimiters ; list of chars. - must-unfold - code-conversion) - (if (and code-conversion - (not (mime-charset-to-coding-system code-conversion))) - (setq code-conversion default-mime-charset)) - (let ((equal-safe-regexp (concat "\\`=?" safe-regexp)) - (dst "") - (buf "") - (src string) - (ew-enable t)) - (while (< 0 (length src)) - (let ((ch (aref src 0)) - (decoded (and - ew-enable - (eword-decode-first-encoded-words src - eword-regexp after-regexp must-unfold)))) - (if (and (not (string= buf "")) - (or decoded (memq ch delimiters))) - (setq dst (concat dst - (std11-wrap-as-quoted-pairs - (decode-mime-charset-string buf code-conversion) - delimiters)) - buf "")) - (cond - (decoded - (setq dst (concat dst - (std11-wrap-as-quoted-pairs - (car decoded) - delimiters)) - src (cdr decoded))) - ((memq ch delimiters) - (setq dst (concat dst (list ch)) - src (substring src 1) - ew-enable t)) - ((eq ch escape) - (setq buf (concat buf (list (aref src 1))) - src (substring src 2) - ew-enable t)) - ((string-match "\\`[ \t\n]+" src) - (setq buf (concat buf (substring src 0 (match-end 0))) - src (substring src (match-end 0)) - ew-enable t)) - ((and (string-match equal-safe-regexp src) - (< 0 (match-end 0))) - (setq buf (concat buf (substring src 0 (match-end 0))) - src (substring src (match-end 0)) - ew-enable eword-decode-sticked-encoded-word)) - (t (error "something wrong"))))) - (if (not (string= buf "")) - (setq dst (concat dst - (std11-wrap-as-quoted-pairs - (decode-mime-charset-string buf code-conversion) - delimiters)))) - dst)) - - -;;; @ for string -;;; + (setq string (std11-unfold-string string)) + (let ((dest "")(ew nil) + beg end) + (while (and (string-match eword-encoded-word-regexp string) + (setq beg (match-beginning 0) + end (match-end 0)) + ) + (if (> beg 0) + (if (not + (and (eq ew t) + (string-match "^[ \t]+$" (substring string 0 beg)) + )) + (setq dest (concat dest (substring string 0 beg))) + ) + ) + (setq dest + (concat dest + (eword-decode-encoded-word + (substring string beg end) must-unfold) + )) + (setq string (substring string end)) + (setq ew t) + ) + (concat dest string) + )) -(defun eword-decode-unstructured (string code-conversion &optional must-unfold) - (eword-decode-entire-string - string - eword-encoded-word-in-unstructured-regexp - eword-after-encoded-word-in-unstructured-regexp - "[^ \t\n=]*" - nil - nil - must-unfold - code-conversion)) - -(defun eword-decode-comment (string code-conversion &optional must-unfold) - (eword-decode-entire-string - string - eword-encoded-word-in-comment-regexp - eword-after-encoded-word-in-comment-regexp - "[^ \t\n()\\\\=]*" - ?\\ - '(?\( ?\)) - must-unfold - code-conversion)) - -(defun eword-decode-quoted-string (string code-conversion &optional must-unfold) - (eword-decode-entire-string - string - eword-encoded-word-in-quoted-string-regexp - eword-after-encoded-word-in-quoted-string-regexp - "[^ \t\n\"\\\\=]*" - ?\\ - '(?\") - must-unfold - code-conversion)) - -(defun eword-decode-string (string &optional must-unfold code-conversion) - "Decode MIME encoded-words in STRING. +(defun eword-decode-structured-field-body (string + &optional start-column max-column + start) + (let ((tokens (eword-lexical-analyze string start 'must-unfold)) + (result "") + token) + (while tokens + (setq token (car tokens)) + (setq result (concat result (eword-decode-token token))) + (setq tokens (cdr tokens))) + result)) -STRING is unfolded before decoding. +(defun eword-decode-and-unfold-structured-field-body (string + &optional + start-column + max-column + start) + "Decode and unfold STRING as structured field body. +It decodes non us-ascii characters in FULL-NAME encoded as +encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii +characters are regarded as variable `default-mime-charset'. If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is not decoded. - -If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-words (generated by bad manner MUA -such as a version of Net$cape). +decode the charset included in it, it is not decoded." + (let ((tokens (eword-lexical-analyze string start 'must-unfold)) + (result "")) + (while tokens + (let* ((token (car tokens)) + (type (car token))) + (setq tokens (cdr tokens)) + (setq result + (if (eq type 'spaces) + (concat result " ") + (concat result (eword-decode-token token)) + )))) + result)) -If CODE-CONVERSION is nil, it decodes only encoded-words. If it is -mime-charset, it decodes non-ASCII bit patterns as the mime-charset. -Otherwise it decodes non-ASCII bit patterns as the -default-mime-charset." - (eword-decode-unstructured - (std11-unfold-string string) - code-conversion - must-unfold)) +(defun eword-decode-and-fold-structured-field-body (string + start-column + &optional max-column + start) + (if (and eword-max-size-to-decode + (> (length string) eword-max-size-to-decode)) + string + (or max-column + (setq max-column fill-column)) + (let ((c start-column) + (tokens (eword-lexical-analyze string start 'must-unfold)) + (result "") + token) + (while (and (setq token (car tokens)) + (setq tokens (cdr tokens))) + (let* ((type (car token))) + (if (eq type 'spaces) + (let* ((next-token (car tokens)) + (next-str (eword-decode-token next-token)) + (next-len (string-width next-str)) + (next-c (+ c next-len 1))) + (if (< next-c max-column) + (setq result (concat result " " next-str) + c next-c) + (setq result (concat result "\n " next-str) + c (1+ next-len))) + (setq tokens (cdr tokens)) + ) + (let* ((str (eword-decode-token token))) + (setq result (concat result str) + c (+ c (string-width str))) + )))) + (if token + (concat result (eword-decode-token token)) + result)))) + +(defun eword-decode-unstructured-field-body (string &optional start-column + max-column) + (eword-decode-string + (decode-mime-charset-string string default-mime-charset))) + +(defun eword-decode-and-unfold-unstructured-field-body (string + &optional start-column + max-column) + (eword-decode-string + (decode-mime-charset-string (std11-unfold-string string) + default-mime-charset) + 'must-unfold)) + +(defun eword-decode-unfolded-unstructured-field-body (string + &optional start-column + max-column) + (eword-decode-string + (decode-mime-charset-string string default-mime-charset) + 'must-unfold)) ;;; @ for region ;;; -(defun eword-decode-region (start end &optional unfolding must-unfold - code-conversion) +(defun eword-decode-region (start end &optional unfolding must-unfold) "Decode MIME encoded-words in region between START and END. If UNFOLDING is not nil, it unfolds before decoding. If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even if there are in decoded encoded-words (generated by bad manner MUA -such as a version of Net$cape). - -If CODE-CONVERSION is nil, it decodes only encoded-words. If it is -mime-charset, it decodes non-ASCII bit patterns as the mime-charset. -Otherwise it decodes non-ASCII bit patterns as the -default-mime-charset." +such as a version of Net$cape)." (interactive "*r") (save-excursion (save-restriction @@ -357,79 +223,22 @@ default-mime-charset." (if unfolding (eword-decode-unfold) ) - (let ((str (eword-decode-unstructured - (buffer-substring (point-min) (point-max)) - code-conversion - must-unfold))) - (delete-region (point-min) (point-max)) - (insert str))))) - - -;;; @ for message header -;;; - -(defcustom eword-decode-ignored-field-list - '(newsgroups path lines nntp-posting-host received message-id date) - "*List of field-names to be ignored when decoding. -Each field name must be symbol." - :group 'eword-decode - :type '(repeat symbol)) - -(defcustom eword-decode-structured-field-list - '(reply-to resent-reply-to from resent-from sender resent-sender - to resent-to cc resent-cc bcc resent-bcc dcc - mime-version content-type content-transfer-encoding - content-disposition) - "*List of field-names to decode as structured field. -Each field name must be symbol." - :group 'eword-decode - :type '(repeat symbol)) - -(defun eword-decode-header (&optional code-conversion separator) - "Decode MIME encoded-words in header fields. -If CODE-CONVERSION is nil, it decodes only encoded-words. If it is -mime-charset, it decodes non-ASCII bit patterns as the mime-charset. -Otherwise it decodes non-ASCII bit patterns as the -default-mime-charset. -If SEPARATOR is not nil, it is used as header separator." - (interactive "*") - (if (and code-conversion - (not (mime-charset-to-coding-system code-conversion))) - (setq code-conversion default-mime-charset)) - (save-excursion - (save-restriction - (std11-narrow-to-header separator) - (if code-conversion - (let (beg p end field-name len) - (goto-char (point-min)) - (while (re-search-forward std11-field-head-regexp nil t) - (setq beg (match-beginning 0) - p (match-end 0) - field-name (buffer-substring beg (1- p)) - len (string-width field-name) - field-name (intern (downcase field-name)) - end (std11-field-end)) - (cond ((memq field-name eword-decode-ignored-field-list) - ;; Don't decode - ) - ((memq field-name eword-decode-structured-field-list) - ;; Decode as structured field - (let ((body (buffer-substring p end))) - (delete-region p end) - (insert (eword-decode-and-fold-structured-field - body (1+ len))) - )) - (t - ;; Decode as unstructured field - (save-restriction - (narrow-to-region beg (1+ end)) - (goto-char p) - (eword-decode-region beg (point-max) 'unfold nil - code-conversion) - (goto-char (point-max)) - ))))) - (eword-decode-region (point-min) (point-max) t nil nil) - )))) + (goto-char (point-min)) + (while (re-search-forward (concat "\\(" eword-encoded-word-regexp "\\)" + "\\(\n?[ \t]\\)+" + "\\(" eword-encoded-word-regexp "\\)") + nil t) + (replace-match "\\1\\6") + (goto-char (point-min)) + ) + (while (re-search-forward eword-encoded-word-regexp nil t) + (insert (eword-decode-encoded-word + (prog1 + (buffer-substring (match-beginning 0) (match-end 0)) + (delete-region (match-beginning 0) (match-end 0)) + ) must-unfold)) + ) + ))) (defun eword-decode-unfold () (goto-char (point-min)) @@ -449,6 +258,332 @@ If SEPARATOR is not nil, it is used as header separator." ))) +;;; @ for message header +;;; + +(defvar mime-field-decoder-alist nil) + +(defvar mime-field-decoder-cache nil) + +(defvar mime-update-field-decoder-cache 'ew-mime-update-field-decoder-cache + "*Field decoder cache update function.") + +(defun ew-mime-update-field-decoder-cache (field mode) + (require 'ew-dec) + (let ((fun (cond + ((eq mode 'plain) + (lexical-let ((field-name (symbol-name field))) + (lambda (field-body &optional start-column max-column must-unfold) + (setq field-body (ew-lf-to-crlf field-body)) + (let ((res (ew-crlf-to-lf + (ew-decode-field field-name field-body)))) + (add-text-properties + 0 (length res) + (list 'original-field-name field-name + 'original-field-body field-body) + res) + res)))) + ((eq mode 'wide) + (lexical-let ((field-name (symbol-name field))) + (lambda (field-body &optional start-column max-column must-unfold) + (setq field-body (ew-lf-to-crlf field-body)) + (let* ((res (ew-decode-field field-name field-body)) + (res (if (string= res field-body) + res + (ew-crlf-refold res + (length field-name) + (or max-column fill-column)))) + (res (ew-crlf-to-lf res))) + (add-text-properties + 0 (length res) + (list 'original-field-name field-name + 'original-field-body field-body) + res) + res)))) + ((eq mode 'summary) + (lexical-let ((field-name (symbol-name field))) + (lambda (field-body &optional start-column max-column must-unfold) + (setq field-body (ew-lf-to-crlf field-body)) + (let ((res (ew-crlf-to-lf + (ew-crlf-unfold + (ew-decode-field field-name field-body))))) + (add-text-properties + 0 (length res) + (list 'original-field-name field-name + 'original-field-body field-body) + res) + res)))) + ((eq mode 'nov) + (lexical-let ((field-name (symbol-name field))) + (lambda (field-body &optional start-column max-column must-unfold) + (setq field-body (ew-lf-to-crlf field-body)) + (require 'ew-var) + (let ((ew-ignore-76bytes-limit t)) + (let ((res (ew-crlf-to-lf + (ew-crlf-unfold + (ew-decode-field field-name field-body))))) + (add-text-properties + 0 (length res) + (list 'original-field-name field-name + 'original-field-body field-body) + res) + res))))) + (t + nil)))) + (mime-update-field-decoder-cache field mode fun))) + +;;;###autoload +(defun mime-set-field-decoder (field &rest specs) + "Set decoder of FILED. +SPECS must be like `MODE1 DECODER1 MODE2 DECODER2 ...'. +Each mode must be `nil', `plain', `wide', `summary' or `nov'. +If mode is `nil', corresponding decoder is set up for every modes." + (when specs + (let ((mode (pop specs)) + (function (pop specs))) + (if mode + (progn + (let ((cell (assq mode mime-field-decoder-alist))) + (if cell + (setcdr cell (put-alist field function (cdr cell))) + (setq mime-field-decoder-alist + (cons (cons mode (list (cons field function))) + mime-field-decoder-alist)) + )) + (apply (function mime-set-field-decoder) field specs) + ) + (mime-set-field-decoder field + 'plain function + 'wide function + 'summary function + 'nov function) + )))) + +;;;###autoload +(defmacro mime-find-field-presentation-method (name) + "Return field-presentation-method from NAME. +NAME must be `plain', `wide', `summary' or `nov'." + (cond ((eq name nil) + `(or (assq 'summary mime-field-decoder-cache) + '(summary)) + ) + ((and (consp name) + (car name) + (consp (cdr name)) + (symbolp (car (cdr name))) + (null (cdr (cdr name)))) + `(or (assq ,name mime-field-decoder-cache) + (cons ,name nil)) + ) + (t + `(or (assq (or ,name 'summary) mime-field-decoder-cache) + (cons (or ,name 'summary) nil)) + ))) + +(defun mime-find-field-decoder-internal (field &optional mode) + "Return function to decode field-body of FIELD in MODE. +Optional argument MODE must be object of field-presentation-method." + (cdr (or (assq field (cdr mode)) + (prog1 + (funcall mime-update-field-decoder-cache + field (car mode)) + (setcdr mode + (cdr (assq (car mode) mime-field-decoder-cache))) + )))) + +;;;###autoload +(defun mime-find-field-decoder (field &optional mode) + "Return function to decode field-body of FIELD in MODE. +Optional argument MODE must be object or name of +field-presentation-method. Name of field-presentation-method must be +`plain', `wide', `summary' or `nov'. +Default value of MODE is `summary'." + (if (symbolp mode) + (let ((p (cdr (mime-find-field-presentation-method mode)))) + (if (and p (setq p (assq field p))) + (cdr p) + (cdr (funcall mime-update-field-decoder-cache + field (or mode 'summary))))) + (inline (mime-find-field-decoder-internal field mode)) + )) + +;;;###autoload +(defun mime-update-field-decoder-cache (field mode &optional function) + "Update field decoder cache `mime-field-decoder-cache'." + (cond ((eq function 'identity) + (setq function nil) + ) + ((null function) + (let ((decoder-alist + (cdr (assq (or mode 'summary) mime-field-decoder-alist)))) + (setq function (cdr (or (assq field decoder-alist) + (assq t decoder-alist))))) + )) + (let ((cell (assq mode mime-field-decoder-cache)) + ret) + (if cell + (if (setq ret (assq field (cdr cell))) + (setcdr ret function) + (setcdr cell (cons (setq ret (cons field function)) (cdr cell)))) + (setq mime-field-decoder-cache + (cons (cons mode (list (setq ret (cons field function)))) + mime-field-decoder-cache))) + ret)) + +;; ignored fields +(mime-set-field-decoder 'Archive nil nil) +(mime-set-field-decoder 'Content-Md5 nil nil) +(mime-set-field-decoder 'Control nil nil) +(mime-set-field-decoder 'Date nil nil) +(mime-set-field-decoder 'Distribution nil nil) +(mime-set-field-decoder 'Followup-Host nil nil) +(mime-set-field-decoder 'Followup-To nil nil) +(mime-set-field-decoder 'Lines nil nil) +(mime-set-field-decoder 'Message-Id nil nil) +(mime-set-field-decoder 'Newsgroups nil nil) +(mime-set-field-decoder 'Nntp-Posting-Host nil nil) +(mime-set-field-decoder 'Path nil nil) +(mime-set-field-decoder 'Posted-And-Mailed nil nil) +(mime-set-field-decoder 'Received nil nil) +(mime-set-field-decoder 'Status nil nil) +(mime-set-field-decoder 'X-Face nil nil) +(mime-set-field-decoder 'X-Face-Version nil nil) +(mime-set-field-decoder 'X-Info nil nil) +(mime-set-field-decoder 'X-Pgp-Key-Info nil nil) +(mime-set-field-decoder 'X-Pgp-Sig nil nil) +(mime-set-field-decoder 'X-Pgp-Sig-Version nil nil) +(mime-set-field-decoder 'Xref nil nil) + +;; structured fields +(let ((fields + '(Reply-To Resent-Reply-To From Resent-From Sender Resent-Sender + To Resent-To Cc Resent-Cc Bcc Resent-Bcc Dcc + Mail-Followup-To + Mime-Version Content-Type Content-Transfer-Encoding + Content-Disposition User-Agent)) + field) + (while fields + (setq field (pop fields)) + (mime-set-field-decoder + field + 'plain #'eword-decode-structured-field-body + 'wide #'eword-decode-and-fold-structured-field-body + 'summary #'eword-decode-and-unfold-structured-field-body + 'nov #'eword-decode-and-unfold-structured-field-body) + )) + +;; unstructured fields (default) +(mime-set-field-decoder + t + 'plain #'eword-decode-unstructured-field-body + 'wide #'eword-decode-unstructured-field-body + 'summary #'eword-decode-and-unfold-unstructured-field-body + 'nov #'eword-decode-unfolded-unstructured-field-body) + +;;;###autoload +(defun mime-decode-field-body (field-body field-name + &optional mode max-column) + "Decode FIELD-BODY as FIELD-NAME in MODE, and return the result. +Optional argument MODE must be `plain', `wide', `summary' or `nov'. +Default mode is `summary'. + +If MODE is `wide' and MAX-COLUMN is non-nil, the result is folded with +MAX-COLUMN. + +Non MIME encoded-word part in FILED-BODY is decoded with +`default-mime-charset'." + (unless mode (setq mode 'summary)) + (if (symbolp field-name) (setq field-name (symbol-name field-name))) + (let ((decoded + (if (eq mode 'nov) + (let ((ew-ignore-76bytes-limit t)) + (ew-decode-field + field-name (ew-lf-crlf-to-crlf field-body))) + (ew-decode-field + field-name (ew-lf-crlf-to-crlf field-body))))) + (if (and (eq mode 'wide) max-column) + (setq decoded (ew-crlf-refold + decoded + (1+ (string-width field-name)) + max-column)) + (if (not (eq mode 'plain)) + (setq decoded (ew-crlf-unfold decoded)))) + (setq decoded (ew-crlf-to-lf decoded)) + (add-text-properties 0 (length decoded) + (list 'original-field-name field-name + 'original-field-body field-body) + decoded) + decoded)) + +;;;###autoload +(defun mime-decode-header-in-region (start end + &optional code-conversion) + "Decode MIME encoded-words in region between START and END. +If CODE-CONVERSION is nil, it decodes only encoded-words. If it is +mime-charset, it decodes non-ASCII bit patterns as the mime-charset. +Otherwise it decodes non-ASCII bit patterns as the +default-mime-charset." + (interactive "*r") + (save-excursion + (save-restriction + (narrow-to-region start end) + (let ((default-charset + (if code-conversion + (if (mime-charset-to-coding-system code-conversion) + code-conversion + default-mime-charset)))) + (if default-charset + (let ((mode-obj (mime-find-field-presentation-method 'wide)) + beg p end field-name len field-decoder) + (goto-char (point-min)) + (while (re-search-forward std11-field-head-regexp nil t) + (setq beg (match-beginning 0) + p (match-end 0) + field-name (buffer-substring beg (1- p)) + len (string-width field-name) + field-decoder (inline + (mime-find-field-decoder-internal + (intern (capitalize field-name)) + mode-obj))) + (when field-decoder + (setq end (std11-field-end)) + (let ((body (buffer-substring p end)) + (default-mime-charset default-charset)) + (delete-region p end) + (insert (funcall field-decoder body (1+ len))) + (add-text-properties beg (min (1+ (point)) (point-max)) + (list 'original-field-name field-name + 'original-field-body field-body)) + )) + )) + (eword-decode-region (point-min) (point-max) t) + ))))) + +;;;###autoload +(defun mime-decode-header-in-buffer (&optional code-conversion separator) + "Decode MIME encoded-words in header fields. +If CODE-CONVERSION is nil, it decodes only encoded-words. If it is +mime-charset, it decodes non-ASCII bit patterns as the mime-charset. +Otherwise it decodes non-ASCII bit patterns as the +default-mime-charset. +If SEPARATOR is not nil, it is used as header separator." + (interactive "*") + (mime-decode-header-in-region + (point-min) + (save-excursion + (goto-char (point-min)) + (if (re-search-forward + (concat "^\\(" (regexp-quote (or separator "")) "\\)?$") + nil t) + (match-beginning 0) + (point-max) + )) + code-conversion)) + +(define-obsolete-function-alias 'eword-decode-header + 'mime-decode-header-in-buffer) + + ;;; @ encoded-word decoder ;;; @@ -511,37 +646,19 @@ if there are in decoded encoded-text (generated by bad manner MUA such as a version of Net$cape)." (let ((cs (mime-charset-to-coding-system charset))) (if cs - (let ((dest - (cond - ((string-equal "B" encoding) - (if (and (string-match eword-B-encoded-text-regexp string) - (string-equal string (match-string 0 string))) - (base64-decode-string string) - (error "Invalid encoded-text %s" string))) - ((string-equal "Q" encoding) - (if (and (string-match eword-Q-encoded-text-regexp string) - (string-equal string (match-string 0 string))) - (q-encoding-decode-string string) - (error "Invalid encoded-text %s" string))) - (t - (error "Invalid encoding %s" encoding) - ))) - ) - (if dest - (progn - (setq dest (decode-coding-string dest cs)) - (if must-unfold - (mapconcat (function - (lambda (chr) - (cond - ((eq chr ?\n) "") - ((eq chr ?\t) " ") - (t (char-to-string chr))) - )) - (std11-unfold-string dest) - "") - dest) - )))))) + (let ((dest (encoded-text-decode-string string encoding))) + (when dest + (setq dest (decode-mime-charset-string dest charset)) + (if must-unfold + (mapconcat (function + (lambda (chr) + (cond ((eq chr ?\n) "") + ((eq chr ?\t) " ") + (t (char-to-string chr))) + )) + (std11-unfold-string dest) + "") + dest)))))) ;;; @ lexical analyze @@ -552,7 +669,7 @@ as a version of Net$cape)." "*Max position of eword-lexical-analyze-cache. It is max size of eword-lexical-analyze-cache - 1.") -(defcustom eword-lexical-analyzers +(defcustom eword-lexical-analyzer '(eword-analyze-quoted-string eword-analyze-domain-literal eword-analyze-comment @@ -561,8 +678,9 @@ It is max size of eword-lexical-analyze-cache - 1.") eword-analyze-encoded-word eword-analyze-atom) "*List of functions to return result of lexical analyze. -Each function must have two arguments: STRING and MUST-UNFOLD. +Each function must have three arguments: STRING, START and MUST-UNFOLD. STRING is the target string to be analyzed. +START is start position of STRING to analyze. If MUST-UNFOLD is not nil, each function must unfold and eliminate bare-CR and bare-LF from the result even if they are included in content of the encoded-word. @@ -575,210 +693,212 @@ be the result." :group 'eword-decode :type '(repeat function)) -(defun eword-analyze-quoted-string (string &optional must-unfold) - (let ((p (std11-check-enclosure string ?\" ?\"))) +(defun eword-analyze-quoted-string-without-encoded-word (string start &optional must-unfold) + (let ((p (std11-check-enclosure string ?\" ?\" nil start))) + (if p + (cons (cons 'quoted-string + (decode-mime-charset-string + (std11-strip-quoted-pair + (substring string (1+ start) (1- p))) + default-mime-charset)) + ;;(substring string p)) + p) + ))) + +(defun eword-analyze-quoted-string-with-encoded-word (string start &optional must-unfold) + (let ((p (std11-check-enclosure string ?\" ?\" nil start))) (if p (cons (cons 'quoted-string - (eword-decode-quoted-string - (substring string 0 p) - default-mime-charset)) - (substring string p)) + (let ((str + (std11-strip-quoted-pair + (substring string (1+ start) (1- p))))) + (if (string-match eword-encoded-word-regexp str) + (eword-decode-encoded-word str) + (decode-mime-charset-string str default-mime-charset) + ))) + p) ))) -(defun eword-analyze-domain-literal (string &optional must-unfold) - (std11-analyze-domain-literal string)) - -(defun eword-analyze-comment (string &optional must-unfold) - (let ((len (length string))) - (if (and (< 0 len) (eq (aref string 0) ?\()) - (let ((p 0)) - (while (and p (< p len) (eq (aref string p) ?\()) - (setq p (std11-check-enclosure string ?\( ?\) t p))) - (setq p (or p len)) - (cons (cons 'comment - (eword-decode-comment - (substring string 0 p) - default-mime-charset)) - (substring string p))) - nil))) - -(defun eword-analyze-spaces (string &optional must-unfold) - (std11-analyze-spaces string)) - -(defun eword-analyze-special (string &optional must-unfold) - (std11-analyze-special string)) - -(defun eword-analyze-encoded-word (string &optional must-unfold) - (let ((decoded (eword-decode-first-encoded-words - string - eword-encoded-word-in-phrase-regexp - eword-after-encoded-word-in-phrase-regexp - must-unfold))) - (if decoded - (cons (cons 'atom (car decoded)) (cdr decoded))))) - -(defun eword-analyze-atom (string &optional must-unfold) - (if (let ((enable-multibyte-characters nil)) - (string-match std11-atom-regexp string)) +(defvar eword-analyze-quoted-encoded-word nil) +(defun eword-analyze-quoted-string (string start &optional must-unfold) + (if eword-analyze-quoted-encoded-word + (eword-analyze-quoted-string-with-encoded-word string start must-unfold) + (eword-analyze-quoted-string-without-encoded-word string start must-unfold))) + +(defun eword-analyze-domain-literal (string start &optional must-unfold) + (std11-analyze-domain-literal string start)) + +(defun eword-analyze-comment (string from &optional must-unfold) + (let ((len (length string)) + (i (or from 0)) + dest last-str + chr ret) + (when (and (> len i) + (eq (aref string i) ?\()) + (setq i (1+ i) + from i) + (catch 'tag + (while (< i len) + (setq chr (aref string i)) + (cond ((eq chr ?\\) + (setq i (1+ i)) + (if (>= i len) + (throw 'tag nil) + ) + (setq last-str (concat last-str + (substring string from (1- i)) + (char-to-string (aref string i))) + i (1+ i) + from i) + ) + ((eq chr ?\)) + (setq ret (concat last-str + (substring string from i))) + (throw 'tag (cons + (cons 'comment + (nreverse + (if (string= ret "") + dest + (cons + (eword-decode-string + (decode-mime-charset-string + ret default-mime-charset) + must-unfold) + dest) + ))) + (1+ i))) + ) + ((eq chr ?\() + (if (setq ret (eword-analyze-comment string i must-unfold)) + (setq last-str + (concat last-str + (substring string from i)) + dest + (if (string= last-str "") + (cons (car ret) dest) + (list* (car ret) + (eword-decode-string + (decode-mime-charset-string + last-str default-mime-charset) + must-unfold) + dest) + ) + i (cdr ret) + from i + last-str "") + (throw 'tag nil) + )) + (t + (setq i (1+ i)) + )) + ))))) + +(defun eword-analyze-spaces (string start &optional must-unfold) + (std11-analyze-spaces string start)) + +(defun eword-analyze-special (string start &optional must-unfold) + (std11-analyze-special string start)) + +(defun eword-analyze-encoded-word (string start &optional must-unfold) + (if (and (string-match eword-encoded-word-regexp string start) + (= (match-beginning 0) start)) + (let ((end (match-end 0)) + (dest (eword-decode-encoded-word (match-string 0 string) + must-unfold)) + ) + ;;(setq string (substring string end)) + (setq start end) + (while (and (string-match (eval-when-compile + (concat "[ \t\n]*\\(" + eword-encoded-word-regexp + "\\)")) + string start) + (= (match-beginning 0) start)) + (setq end (match-end 0)) + (setq dest + (concat dest + (eword-decode-encoded-word (match-string 1 string) + must-unfold)) + ;;string (substring string end)) + start end) + ) + (cons (cons 'atom dest) ;;string) + end) + ))) + +(defun eword-analyze-atom (string start &optional must-unfold) + (if (and (string-match std11-atom-regexp string start) + (= (match-beginning 0) start)) (let ((end (match-end 0))) - (if (and eword-decode-sticked-encoded-word - (string-match eword-encoded-word-in-phrase-regexp - (substring string 0 end)) - (< 0 (match-beginning 0))) - (setq end (match-beginning 0))) (cons (cons 'atom (decode-mime-charset-string - (substring string 0 end) + (substring string start end) default-mime-charset)) - (substring string end) - )))) - -(defun eword-lexical-analyze-internal (string must-unfold) - (let (dest ret) - (while (not (string-equal string "")) + ;;(substring string end) + end) + ))) + +(defun eword-lexical-analyze-internal (string start must-unfold) + (let ((len (length string)) + dest ret) + (while (< start len) (setq ret - (let ((rest eword-lexical-analyzers) + (let ((rest eword-lexical-analyzer) func r) (while (and (setq func (car rest)) - (null (setq r (funcall func string must-unfold))) + (null + (setq r (funcall func string start must-unfold))) ) (setq rest (cdr rest))) - (or r `((error . ,string) . "")) + (or r + (list (cons 'error (substring string start)) (1+ len))) )) - (setq dest (cons (car ret) dest)) - (setq string (cdr ret)) + (setq dest (cons (car ret) dest) + start (cdr ret)) ) (nreverse dest) )) -(defun eword-lexical-analyze (string &optional must-unfold) +(defun eword-lexical-analyze (string &optional start must-unfold) "Return lexical analyzed list corresponding STRING. It is like std11-lexical-analyze, but it decodes non us-ascii characters encoded as encoded-words or invalid \"raw\" format. \"Raw\" non us-ascii characters are regarded as variable `default-mime-charset'." - (let* ((str (copy-sequence string)) - (key (cons str (cons default-mime-charset must-unfold))) - ret) - (set-text-properties 0 (length str) nil str) + (let ((key (substring string (or start 0))) + ret cell) + (set-text-properties 0 (length key) nil key) (if (setq ret (assoc key eword-lexical-analyze-cache)) (cdr ret) - (setq ret (eword-lexical-analyze-internal str must-unfold)) + (setq ret (eword-lexical-analyze-internal key 0 must-unfold)) (setq eword-lexical-analyze-cache (cons (cons key ret) - (last eword-lexical-analyze-cache - eword-lexical-analyze-cache-max))) + eword-lexical-analyze-cache)) + (if (cdr (setq cell (nthcdr eword-lexical-analyze-cache-max + eword-lexical-analyze-cache))) + (setcdr cell nil)) ret))) (defun eword-decode-token (token) - (cdr token)) - -(defun eword-decode-and-fold-structured-field - (string start-column &optional max-column must-unfold) - "Decode and fold (fill) STRING as structured field body. -It decodes non us-ascii characters in FULL-NAME encoded as -encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii -characters are regarded as variable `default-mime-charset'. - -If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is not decoded. - -If MAX-COLUMN is omitted, `fill-column' is used. - -If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-words (generated by bad manner MUA -such as a version of Net$cape)." - (or max-column - (setq max-column fill-column)) - (let ((c start-column) - (tokens (eword-lexical-analyze string must-unfold)) - (result "") - token) - (while (and (setq token (car tokens)) - (setq tokens (cdr tokens))) - (let* ((type (car token))) - (if (eq type 'spaces) - (let* ((next-token (car tokens)) - (next-str (eword-decode-token next-token)) - (next-len (string-width next-str)) - (next-c (+ c next-len 1))) - (if (< next-c max-column) - (setq result (concat result " " next-str) - c next-c) - (setq result (concat result "\n " next-str) - c (1+ next-len))) - (setq tokens (cdr tokens)) - ) - (let* ((str (eword-decode-token token))) - (setq result (concat result str) - c (+ c (string-width str))) - )))) - (if token - (concat result (eword-decode-token token)) - result))) - -(defun eword-decode-and-unfold-structured-field (string) - "Decode and unfold STRING as structured field body. -It decodes non us-ascii characters in FULL-NAME encoded as -encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii -characters are regarded as variable `default-mime-charset'. - -If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is not decoded." - (let ((tokens (eword-lexical-analyze string 'must-unfold)) - (result "")) - (while tokens - (let* ((token (car tokens)) - (type (car token))) - (setq tokens (cdr tokens)) - (setq result - (if (eq type 'spaces) - (concat result " ") - (concat result (eword-decode-token token)) - )))) - result)) - -(defun eword-decode-structured-field-body (string &optional must-unfold - start-column max-column) - "Decode non us-ascii characters in STRING as structured field body. -STRING is unfolded before decoding. - -It decodes non us-ascii characters in FULL-NAME encoded as -encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii -characters are regarded as variable `default-mime-charset'. - -If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is not decoded. - -If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-words (generated by bad manner MUA -such as a version of Net$cape)." - (if start-column - ;; fold with max-column - (eword-decode-and-fold-structured-field - string start-column max-column must-unfold) - ;; Don't fold - (mapconcat (function eword-decode-token) - (eword-lexical-analyze string must-unfold) - "") - )) - -(defun eword-decode-unstructured-field-body (string &optional must-unfold) - "Decode non us-ascii characters in STRING as unstructured field body. -STRING is unfolded before decoding. - -It decodes non us-ascii characters in FULL-NAME encoded as -encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii -characters are regarded as variable `default-mime-charset'. - -If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is not decoded. - -If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-words (generated by bad manner MUA -such as a version of Net$cape)." - (eword-decode-string string must-unfold default-mime-charset)) - -(defun eword-extract-address-components (string) + (let ((type (car token)) + (value (cdr token))) + (cond ((eq type 'quoted-string) + (std11-wrap-as-quoted-string value)) + ((eq type 'comment) + (let ((dest "")) + (while value + (setq dest (concat dest + (if (stringp (car value)) + (std11-wrap-as-quoted-pairs + (car value) '(?( ?))) + (eword-decode-token (car value)) + )) + value (cdr value)) + ) + (concat "(" dest ")") + )) + (t value)))) + +(defun eword-extract-address-components (string &optional start) "Extract full name and canonical address from STRING. Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). If no name can be extracted, FULL-NAME will be nil. @@ -787,7 +907,8 @@ encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii characters are regarded as variable `default-mime-charset'." (let* ((structure (car (std11-parse-address (eword-lexical-analyze - (std11-unfold-string string) 'must-unfold)))) + (std11-unfold-string string) start + 'must-unfold)))) (phrase (std11-full-name-string structure)) (address (std11-address-string structure)) )