(require 'mel)
(require 'mime-def)
+(require 'ew-dec)
+
(defgroup eword-decode nil
"Encoded-word decoding"
:group 'mime)
+;;; TEST
+
+(defvar rotate-memo nil)
+(defmacro rotate-memo (var val)
+ `(when rotate-memo
+ (unless (boundp ',var) (setq ,var ()))
+ (setq ,var (cons ,val ,var))
+ (let ((tmp (last ,var (- (length ,var) 100))))
+ (when tmp (setcdr tmp nil)))
+ ,var))
;;; @ variables
;;;
:group 'eword-decode
:type 'boolean)
+(defcustom eword-max-size-to-decode 1000
+ "*Max size to decode header field."
+ :group 'eword-decode
+ :type '(choice (integer :tag "Limit (bytes)")
+ (const :tag "Don't limit" nil)))
+
;;; @ MIME encoded-word definition
;;;
(defconst eword-encoded-word-regexp eword-encoded-word-in-unstructured-regexp)
-;;; @@ Base64
-;;;
-
-(defconst base64-token-regexp "[A-Za-z0-9+/]")
-(defconst base64-token-padding-regexp "[A-Za-z0-9+/=]")
-
-(defconst eword-B-encoded-text-regexp
- (concat "\\(\\("
- base64-token-regexp
- base64-token-regexp
- base64-token-regexp
- base64-token-regexp
- "\\)*"
- base64-token-regexp
- base64-token-regexp
- base64-token-padding-regexp
- base64-token-padding-regexp
- "\\)"))
-
-;; (defconst eword-B-encoding-and-encoded-text-regexp
-;; (concat "\\(B\\)\\?" eword-B-encoded-text-regexp))
-
-
-;;; @@ Quoted-Printable
-;;;
-
-(defconst eword-Q-encoded-text-regexp
- (concat "\\([^=?]\\|" quoted-printable-octet-regexp "\\)+"))
-;; (defconst eword-Q-encoding-and-encoded-text-regexp
-;; (concat "\\(Q\\)\\?" eword-Q-encoded-text-regexp))
-
-
;;; @ internal utilities
;;;
code-conversion
must-unfold))
+(defun eword-decode-and-fold-structured-field
+ (string start-column &optional max-column must-unfold)
+ "Decode and fold (fill) STRING as structured field body.
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is not decoded.
+
+If MAX-COLUMN is omitted, `fill-column' is used.
+
+If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
+ (rotate-memo args-eword-decode-and-fold-structured-field
+ (list string start-column max-column must-unfold))
+ (or max-column
+ (setq max-column fill-column))
+ (let* ((field-name (make-string (1- start-column) ?X))
+ (field-body (ew-lf-crlf-to-crlf string))
+ (ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
+ (decoded (ew-decode-field field-name field-body)))
+ (unless (equal field-body decoded)
+ (setq decoded (ew-crlf-refold decoded start-column max-column)))
+ (ew-crlf-to-lf decoded)))
+
+(defun eword-decode-and-unfold-structured-field (string)
+ "Decode and unfold STRING as structured field body.
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is not decoded."
+ (rotate-memo args-eword-decode-and-unfold-structured-field (list string))
+ (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
+ (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf (ew-crlf-unfold decoded))))
+
+(defun eword-decode-structured-field-body (string &optional must-unfold
+ start-column max-column)
+ "Decode non us-ascii characters in STRING as structured field body.
+STRING is unfolded before decoding.
+
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is not decoded.
+
+If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
+ (rotate-memo args-eword-decode-structured-field-body
+ (list string must-unfold start-column max-column))
+ (if start-column
+ ;; fold with max-column
+ (eword-decode-and-fold-structured-field
+ string start-column max-column must-unfold)
+ ;; Don't fold
+ (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
+ (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf decoded))))
+
+(defun eword-decode-unstructured-field-body (string &optional must-unfold)
+ "Decode non us-ascii characters in STRING as unstructured field body.
+STRING is unfolded before decoding.
+
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is not decoded.
+
+If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
+ (rotate-memo args-eword-decode-unstructured-field-body
+ (list string must-unfold))
+ (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf (ew-crlf-unfold decoded))))
+
+(defun eword-decode-and-unfold-unstructured-field (string)
+ "Decode and unfold STRING as unstructured field body.
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is not decoded."
+ (rotate-memo args-eword-decode-and-unfold-unstructured-field
+ (list string))
+ (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf (ew-crlf-unfold decoded))))
+
;;; @ for region
;;;
Otherwise it decodes non-ASCII bit patterns as the
default-mime-charset."
(interactive "*r")
+ (rotate-memo args-eword-decode-region
+ (list start end (buffer-substring start end) unfolding must-unfold code-conversion))
(save-excursion
(save-restriction
(narrow-to-region start end)
(defcustom eword-decode-structured-field-list
'(Reply-To Resent-Reply-To From Resent-From Sender Resent-Sender
To Resent-To Cc Resent-Cc Bcc Resent-Bcc Dcc
+ Mail-Followup-To
Mime-Version Content-Type Content-Transfer-Encoding
- Content-Disposition)
+ Content-Disposition User-Agent)
"*List of field-names to decode as structured field.
Each field name must be symbol."
:group 'eword-decode
:type '(repeat symbol))
+(defun eword-decode-field-body
+ (field-body field-name &optional unfolded max-column)
+ "Decode FIELD-BODY as FIELD-NAME, and return the result.
+
+If UNFOLDED is non-nil, it is assumed that FIELD-BODY is
+already unfolded.
+
+If MAX-COLUMN is non-nil, the result is folded with MAX-COLUMN
+or `fill-column' if MAX-COLUMN is t.
+Otherwise, the result is unfolded.
+
+MIME encoded-word in FIELD-BODY is recognized according to
+`eword-decode-ignored-field-list',
+`eword-decode-structured-field-list' and FIELD-NAME.
+
+Non MIME encoded-word part in FILED-BODY is decoded with
+`default-mime-charset'."
+ (if (symbolp field-name) (setq field-name (symbol-name field-name)))
+ (let ((decoded
+ (if unfolded
+ (let ((ew-ignore-76bytes-limit t))
+ (ew-decode-field
+ field-name (ew-lf-crlf-to-crlf field-body)))
+ (ew-decode-field
+ field-name (ew-lf-crlf-to-crlf field-body)))))
+ (if max-column
+ (setq decoded (ew-crlf-refold
+ decoded
+ (1+ (string-width field-name))
+ (if (eq max-column t) fill-column max-column)))
+ (setq decoded (ew-crlf-unfold decoded)))
+ (ew-crlf-to-lf decoded)))
+
(defun eword-decode-header (&optional code-conversion separator)
"Decode MIME encoded-words in header fields.
If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
default-mime-charset.
If SEPARATOR is not nil, it is used as header separator."
(interactive "*")
+ (rotate-memo args-eword-decode-header (list code-conversion))
+ (unless code-conversion
+ (message "eword-decode-header is called without code-conversion")
+ (sit-for 2))
(if (and code-conversion
(not (mime-charset-to-coding-system code-conversion)))
(setq code-conversion default-mime-charset))
(save-excursion
(save-restriction
(std11-narrow-to-header separator)
+ (rotate-memo args-h-eword-decode-header (buffer-substring (point-min) (point-max)))
(if code-conversion
- (let (beg p end field-name len)
+ (let (beg p end field-name field-body decoded)
(goto-char (point-min))
(while (re-search-forward std11-field-head-regexp nil t)
(setq beg (match-beginning 0)
p (match-end 0)
field-name (buffer-substring beg (1- p))
- len (string-width field-name)
- field-name (intern (capitalize field-name))
- end (std11-field-end))
- (cond ((memq field-name eword-decode-ignored-field-list)
- ;; Don't decode
- )
- ((memq field-name eword-decode-structured-field-list)
- ;; Decode as structured field
- (let ((body (buffer-substring p end)))
- (delete-region p end)
- (insert (eword-decode-and-fold-structured-field
- body (1+ len)))
- ))
- (t
- ;; Decode as unstructured field
- (save-restriction
- (narrow-to-region beg (1+ end))
- (goto-char p)
- (eword-decode-region beg (point-max) 'unfold nil
- code-conversion)
- (goto-char (point-max))
- )))))
+ end (std11-field-end)
+ field-body (ew-lf-crlf-to-crlf
+ (buffer-substring p end))
+ decoded (ew-decode-field
+ field-name field-body))
+ (unless (equal field-body decoded)
+ (setq decoded (ew-crlf-refold
+ decoded
+ (1+ (string-width field-name))
+ fill-column)))
+ (delete-region p end)
+ (insert (ew-crlf-to-lf decoded))
+ (add-text-properties beg (min (1+ (point)) (point-max))
+ (list 'original-field-name field-name
+ 'original-field-body field-body))
+ ))
(eword-decode-region (point-min) (point-max) t nil nil)
))))
as a version of Net$cape)."
(let ((cs (mime-charset-to-coding-system charset)))
(if cs
- (let ((dest
- (cond
- ((string-equal "B" encoding)
- (if (and (string-match eword-B-encoded-text-regexp string)
- (string-equal string (match-string 0 string)))
- (base64-decode-string string)
- (error "Invalid encoded-text %s" string)))
- ((string-equal "Q" encoding)
- (if (and (string-match eword-Q-encoded-text-regexp string)
- (string-equal string (match-string 0 string)))
- (q-encoding-decode-string string)
- (error "Invalid encoded-text %s" string)))
- (t
- (error "Invalid encoding %s" encoding)
- )))
- )
+ (let ((dest (encoded-text-decode-string string encoding)))
(when dest
(setq dest (decode-mime-charset-string dest charset))
(if must-unfold
(cdr decoded)))))))
(defun eword-analyze-atom (string &optional must-unfold)
- (if (let ((enable-multibyte-characters nil))
- (string-match std11-atom-regexp string))
+ (if (string-match std11-atom-regexp (string-as-unibyte string))
(let ((end (match-end 0)))
(if (and eword-decode-sticked-encoded-word
(string-match eword-encoded-word-in-phrase-regexp
(defun eword-decode-token (token)
(cdr token))
-(defun eword-decode-and-fold-structured-field
- (string start-column &optional max-column must-unfold)
- "Decode and fold (fill) STRING as structured field body.
-It decodes non us-ascii characters in FULL-NAME encoded as
-encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
-characters are regarded as variable `default-mime-charset'.
-
-If an encoded-word is broken or your emacs implementation can not
-decode the charset included in it, it is not decoded.
-
-If MAX-COLUMN is omitted, `fill-column' is used.
-
-If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-words (generated by bad manner MUA
-such as a version of Net$cape)."
- (or max-column
- (setq max-column fill-column))
- (let ((c start-column)
- (tokens (eword-lexical-analyze string must-unfold))
- (result "")
- token)
- (while (and (setq token (car tokens))
- (setq tokens (cdr tokens)))
- (let* ((type (car token)))
- (if (eq type 'spaces)
- (let* ((next-token (car tokens))
- (next-str (eword-decode-token next-token))
- (next-len (string-width next-str))
- (next-c (+ c next-len 1)))
- (if (< next-c max-column)
- (setq result (concat result " " next-str)
- c next-c)
- (setq result (concat result "\n " next-str)
- c (1+ next-len)))
- (setq tokens (cdr tokens))
- )
- (let* ((str (eword-decode-token token)))
- (setq result (concat result str)
- c (+ c (string-width str)))
- ))))
- (if token
- (concat result (eword-decode-token token))
- result)))
-
-(defun eword-decode-and-unfold-structured-field (string)
- "Decode and unfold STRING as structured field body.
-It decodes non us-ascii characters in FULL-NAME encoded as
-encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
-characters are regarded as variable `default-mime-charset'.
-
-If an encoded-word is broken or your emacs implementation can not
-decode the charset included in it, it is not decoded."
- (let ((tokens (eword-lexical-analyze string 'must-unfold))
- (result ""))
- (while tokens
- (let* ((token (car tokens))
- (type (car token)))
- (setq tokens (cdr tokens))
- (setq result
- (if (eq type 'spaces)
- (concat result " ")
- (concat result (eword-decode-token token))
- ))))
- result))
-
-(defun eword-decode-structured-field-body (string &optional must-unfold
- start-column max-column)
- "Decode non us-ascii characters in STRING as structured field body.
-STRING is unfolded before decoding.
-
-It decodes non us-ascii characters in FULL-NAME encoded as
-encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
-characters are regarded as variable `default-mime-charset'.
-
-If an encoded-word is broken or your emacs implementation can not
-decode the charset included in it, it is not decoded.
-
-If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-words (generated by bad manner MUA
-such as a version of Net$cape)."
- (if start-column
- ;; fold with max-column
- (eword-decode-and-fold-structured-field
- string start-column max-column must-unfold)
- ;; Don't fold
- (mapconcat (function eword-decode-token)
- (eword-lexical-analyze string must-unfold)
- "")
- ))
-
-(defun eword-decode-unstructured-field-body (string &optional must-unfold)
- "Decode non us-ascii characters in STRING as unstructured field body.
-STRING is unfolded before decoding.
-
-It decodes non us-ascii characters in FULL-NAME encoded as
-encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
-characters are regarded as variable `default-mime-charset'.
-
-If an encoded-word is broken or your emacs implementation can not
-decode the charset included in it, it is not decoded.
-
-If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-words (generated by bad manner MUA
-such as a version of Net$cape)."
- (eword-decode-string string must-unfold default-mime-charset))
-
(defun eword-extract-address-components (string)
"Extract full name and canonical address from STRING.
Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
It decodes non us-ascii characters in FULL-NAME encoded as
encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
characters are regarded as variable `default-mime-charset'."
+ (rotate-memo args-eword-extract-address-components (list string))
(let* ((structure (car (std11-parse-address
(eword-lexical-analyze
(std11-unfold-string string) 'must-unfold))))