;;; TEST
+(defvar rotate-memo nil)
(defmacro rotate-memo (var val)
- `(progn
+ `(when rotate-memo
(unless (boundp ',var) (setq ,var ()))
(setq ,var (cons ,val ,var))
(let ((tmp (last ,var (- (length ,var) 100))))
:group 'eword-decode
:type 'boolean)
+(defcustom eword-max-size-to-decode 1000
+ "*Max size to decode header field."
+ :group 'eword-decode
+ :type '(choice (integer :tag "Limit (bytes)")
+ (const :tag "Don't limit" nil)))
+
;;; @ MIME encoded-word definition
;;;
(defconst eword-encoded-word-regexp eword-encoded-word-in-unstructured-regexp)
-;;; @@ Base64
-;;;
-
-(defconst base64-token-regexp "[A-Za-z0-9+/]")
-(defconst base64-token-padding-regexp "[A-Za-z0-9+/=]")
-
-(defconst eword-B-encoded-text-regexp
- (concat "\\(\\("
- base64-token-regexp
- base64-token-regexp
- base64-token-regexp
- base64-token-regexp
- "\\)*"
- base64-token-regexp
- base64-token-regexp
- base64-token-padding-regexp
- base64-token-padding-regexp
- "\\)"))
-
-;; (defconst eword-B-encoding-and-encoded-text-regexp
-;; (concat "\\(B\\)\\?" eword-B-encoded-text-regexp))
-
-
-;;; @@ Quoted-Printable
-;;;
-
-(defconst eword-Q-encoded-text-regexp
- (concat "\\([^=?]\\|" quoted-printable-octet-regexp "\\)+"))
-;; (defconst eword-Q-encoding-and-encoded-text-regexp
-;; (concat "\\(Q\\)\\?" eword-Q-encoded-text-regexp))
-
-
;;; @ internal utilities
;;;
code-conversion
must-unfold))
+(defun eword-decode-and-fold-structured-field
+ (string start-column &optional max-column must-unfold)
+ "Decode and fold (fill) STRING as structured field body.
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is not decoded.
+
+If MAX-COLUMN is omitted, `fill-column' is used.
+
+If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
+ (rotate-memo args-eword-decode-and-fold-structured-field
+ (list string start-column max-column must-unfold))
+ (or max-column
+ (setq max-column fill-column))
+ (let* ((field-name (make-string (1- start-column) ?X))
+ (field-body (ew-lf-crlf-to-crlf string))
+ (ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
+ (decoded (ew-decode-field field-name field-body)))
+ (unless (equal field-body decoded)
+ (setq decoded (ew-crlf-refold decoded start-column max-column)))
+ (ew-crlf-to-lf decoded)))
+
+(defun eword-decode-and-unfold-structured-field (string)
+ "Decode and unfold STRING as structured field body.
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is not decoded."
+ (rotate-memo args-eword-decode-and-unfold-structured-field (list string))
+ (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
+ (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf (ew-crlf-unfold decoded))))
+
+(defun eword-decode-structured-field-body (string &optional must-unfold
+ start-column max-column)
+ "Decode non us-ascii characters in STRING as structured field body.
+STRING is unfolded before decoding.
+
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is not decoded.
+
+If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
+ (rotate-memo args-eword-decode-structured-field-body
+ (list string must-unfold start-column max-column))
+ (if start-column
+ ;; fold with max-column
+ (eword-decode-and-fold-structured-field
+ string start-column max-column must-unfold)
+ ;; Don't fold
+ (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
+ (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf decoded))))
+
+(defun eword-decode-unstructured-field-body (string &optional must-unfold)
+ "Decode non us-ascii characters in STRING as unstructured field body.
+STRING is unfolded before decoding.
+
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is not decoded.
+
+If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
+ (rotate-memo args-eword-decode-unstructured-field-body
+ (list string must-unfold))
+ (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf (ew-crlf-unfold decoded))))
+
+(defun eword-decode-and-unfold-unstructured-field (string)
+ "Decode and unfold STRING as unstructured field body.
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is not decoded."
+ (rotate-memo args-eword-decode-and-unfold-unstructured-field
+ (list string))
+ (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf (ew-crlf-unfold decoded))))
+
;;; @ for region
;;;
(defcustom eword-decode-structured-field-list
'(Reply-To Resent-Reply-To From Resent-From Sender Resent-Sender
To Resent-To Cc Resent-Cc Bcc Resent-Bcc Dcc
+ Mail-Followup-To
Mime-Version Content-Type Content-Transfer-Encoding
- Content-Disposition)
+ Content-Disposition User-Agent)
"*List of field-names to decode as structured field.
Each field name must be symbol."
:group 'eword-decode
:type '(repeat symbol))
+(defun eword-decode-field-body
+ (field-body field-name &optional unfolded max-column)
+ "Decode FIELD-BODY as FIELD-NAME, and return the result.
+
+If UNFOLDED is non-nil, it is assumed that FIELD-BODY is
+already unfolded.
+
+If MAX-COLUMN is non-nil, the result is folded with MAX-COLUMN
+or `fill-column' if MAX-COLUMN is t.
+Otherwise, the result is unfolded.
+
+MIME encoded-word in FIELD-BODY is recognized according to
+`eword-decode-ignored-field-list',
+`eword-decode-structured-field-list' and FIELD-NAME.
+
+Non MIME encoded-word part in FILED-BODY is decoded with
+`default-mime-charset'."
+ (if (symbolp field-name) (setq field-name (symbol-name field-name)))
+ (let ((decoded
+ (if unfolded
+ (let ((ew-ignore-76bytes-limit t))
+ (ew-decode-field
+ field-name (ew-lf-crlf-to-crlf field-body)))
+ (ew-decode-field
+ field-name (ew-lf-crlf-to-crlf field-body)))))
+ (if max-column
+ (setq decoded (ew-crlf-refold
+ decoded
+ (1+ (string-width field-name))
+ (if (eq max-column t) fill-column max-column)))
+ (setq decoded (ew-crlf-unfold decoded)))
+ (ew-crlf-to-lf decoded)))
+
(defun eword-decode-header (&optional code-conversion separator)
"Decode MIME encoded-words in header fields.
If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
default-mime-charset.
If SEPARATOR is not nil, it is used as header separator."
(interactive "*")
- (rotate-memo args-eword-decode-header
- (list code-conversion))
+ (rotate-memo args-eword-decode-header (list code-conversion))
(unless code-conversion
- (message "eword-decode-header is called with no code-conversion"))
+ (message "eword-decode-header is called without code-conversion")
+ (sit-for 2))
(if (and code-conversion
(not (mime-charset-to-coding-system code-conversion)))
(setq code-conversion default-mime-charset))
(save-excursion
(save-restriction
(std11-narrow-to-header separator)
+ (rotate-memo args-h-eword-decode-header (buffer-substring (point-min) (point-max)))
(if code-conversion
- (let (beg p end field-name field-body len)
+ (let (beg p end field-name field-body decoded)
(goto-char (point-min))
(while (re-search-forward std11-field-head-regexp nil t)
(setq beg (match-beginning 0)
p (match-end 0)
field-name (buffer-substring beg (1- p))
end (std11-field-end)
- field-body (buffer-substring p end))
+ field-body (ew-lf-crlf-to-crlf
+ (buffer-substring p end))
+ decoded (ew-decode-field
+ field-name field-body))
+ (unless (equal field-body decoded)
+ (setq decoded (ew-crlf-refold
+ decoded
+ (1+ (string-width field-name))
+ fill-column)))
(delete-region p end)
- (insert (ew-decode-field field-name (ew-lf-crlf-to-crlf field-body)))
+ (insert (ew-crlf-to-lf decoded))
+ (add-text-properties beg (min (1+ (point)) (point-max))
+ (list 'original-field-name field-name
+ 'original-field-body field-body))
))
(eword-decode-region (point-min) (point-max) t nil nil)
))))
as a version of Net$cape)."
(let ((cs (mime-charset-to-coding-system charset)))
(if cs
- (let ((dest
- (cond
- ((string-equal "B" encoding)
- (if (and (string-match eword-B-encoded-text-regexp string)
- (string-equal string (match-string 0 string)))
- (base64-decode-string string)
- (error "Invalid encoded-text %s" string)))
- ((string-equal "Q" encoding)
- (if (and (string-match eword-Q-encoded-text-regexp string)
- (string-equal string (match-string 0 string)))
- (q-encoding-decode-string string)
- (error "Invalid encoded-text %s" string)))
- (t
- (error "Invalid encoding %s" encoding)
- )))
- )
+ (let ((dest (encoded-text-decode-string string encoding)))
(when dest
(setq dest (decode-mime-charset-string dest charset))
(if must-unfold
(cdr decoded)))))))
(defun eword-analyze-atom (string &optional must-unfold)
- (if (let ((enable-multibyte-characters nil))
- (string-match std11-atom-regexp string))
+ (if (string-match std11-atom-regexp (string-as-unibyte string))
(let ((end (match-end 0)))
(if (and eword-decode-sticked-encoded-word
(string-match eword-encoded-word-in-phrase-regexp
(defun eword-decode-token (token)
(cdr token))
-(defun eword-decode-and-fold-structured-field
- (string start-column &optional max-column must-unfold)
- "Decode and fold (fill) STRING as structured field body.
-It decodes non us-ascii characters in FULL-NAME encoded as
-encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
-characters are regarded as variable `default-mime-charset'.
-
-If an encoded-word is broken or your emacs implementation can not
-decode the charset included in it, it is not decoded.
-
-If MAX-COLUMN is omitted, `fill-column' is used.
-
-If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-words (generated by bad manner MUA
-such as a version of Net$cape)."
- (rotate-memo args-eword-decode-and-fold-structured-field
- (list string start-column max-column must-unfold))
- (or max-column
- (setq max-column fill-column))
- (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
- (decoded (ew-decode-field (make-string (1- start-column) ?X)
- (ew-lf-crlf-to-crlf string)
- (if must-unfold 'ew-cut-cr-lf)))
- column)
- (setq decoded (ew-crlf-to-lf decoded))
- (setq column 0)
- (ew-lf-line-convert decoded
- (lambda (line)
- (if (<= (length line) max-column)
- line
- (let ((start 0) index)
- (catch 'loop
- (while (< (+ column start) max-column)
- (if (string-match " " decoded start)
- (progn
- (setq start (match-end 0))
- (when (< (match-beginning 0) max-column)
- (setq index (match-beginning 0))))
- (throw 'loop nil)))
- (setq index (string-match " " decoded start)))
- (if index
- (concat (substring decoded 0 index)
- "\n"
- (substring decoded index))
- decoded))))
- (lambda (str) (setq column 1) str)
- (lambda (str) (setq column 0) str))))
-
-(defun eword-decode-and-unfold-structured-field (string)
- "Decode and unfold STRING as structured field body.
-It decodes non us-ascii characters in FULL-NAME encoded as
-encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
-characters are regarded as variable `default-mime-charset'.
-
-If an encoded-word is broken or your emacs implementation can not
-decode the charset included in it, it is not decoded."
- (rotate-memo args-eword-decode-and-unfold-structured-field
- (list string))
- (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
- (decoded (ew-decode-field ""
- (ew-lf-crlf-to-crlf string)
- 'ew-cut-cr-lf)))
- (ew-cut-cr-lf decoded)))
-
-(defun eword-decode-structured-field-body (string &optional must-unfold
- start-column max-column)
- "Decode non us-ascii characters in STRING as structured field body.
-STRING is unfolded before decoding.
-
-It decodes non us-ascii characters in FULL-NAME encoded as
-encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
-characters are regarded as variable `default-mime-charset'.
-
-If an encoded-word is broken or your emacs implementation can not
-decode the charset included in it, it is not decoded.
-
-If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-words (generated by bad manner MUA
-such as a version of Net$cape)."
- (rotate-memo args-eword-decode-structured-field-body
- (list string must-unfold start-column max-column))
- (if start-column
- ;; fold with max-column (folding is not implemented.)
- (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
- (decoded (ew-decode-field (make-string (1- start-column) ?X)
- (ew-lf-crlf-to-crlf string)
- (if must-unfold 'ew-cut-cr-lf))))
- (if must-unfold (ew-cut-cr-lf decoded) decoded))
- ;; Don't fold
- (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
- (decoded (ew-decode-field ""
- (ew-lf-crlf-to-crlf string)
- (if must-unfold 'ew-cut-cr-lf))))
- (if must-unfold (ew-cut-cr-lf decoded) decoded))))
-
-(defun eword-decode-unstructured-field-body (string &optional must-unfold)
- "Decode non us-ascii characters in STRING as unstructured field body.
-STRING is unfolded before decoding.
-
-It decodes non us-ascii characters in FULL-NAME encoded as
-encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
-characters are regarded as variable `default-mime-charset'.
-
-If an encoded-word is broken or your emacs implementation can not
-decode the charset included in it, it is not decoded.
-
-If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-words (generated by bad manner MUA
-such as a version of Net$cape)."
- (rotate-memo args-eword-decode-unstructured-field-body
- (list string must-unfold))
- (let ((decoded (ew-decode-field ""
- (ew-lf-crlf-to-crlf string)
- (if must-unfold 'ew-cut-cr-lf))))
- (if must-unfold
- (ew-cut-cr-lf decoded)
- decoded)))
-
(defun eword-extract-address-components (string)
"Extract full name and canonical address from STRING.
Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
It decodes non us-ascii characters in FULL-NAME encoded as
encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
characters are regarded as variable `default-mime-charset'."
- (rotate-memo args-eword-extract-address-components
- (list string))
+ (rotate-memo args-eword-extract-address-components (list string))
(let* ((structure (car (std11-parse-address
(eword-lexical-analyze
(std11-unfold-string string) 'must-unfold))))