(require 'mime-def)
(require 'ew-dec)
+(require 'ew-line)
+
+(eval-when-compile (require 'cl))
(defgroup eword-decode nil
"Encoded-word decoding"
:group 'eword-decode
:type 'boolean)
+(defcustom eword-max-size-to-decode 1000
+ "*Max size to decode header field."
+ :group 'eword-decode
+ :type '(choice (integer :tag "Limit (bytes)")
+ (const :tag "Don't limit" nil)))
+
;;; @ MIME encoded-word definition
;;;
code-conversion
must-unfold))
+(defun eword-decode-structured-field-body (string
+ &optional
+ start-column max-column)
+ (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
+ (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf decoded)))
+
+(defun eword-decode-and-unfold-structured-field-body (string
+ &optional
+ start-column
+ max-column)
+ "Decode and unfold STRING as structured field body.
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is not decoded."
+ (let* ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf (ew-crlf-unfold decoded))))
+
+(defun eword-decode-and-fold-structured-field-body (string
+ start-column
+ &optional max-column)
+ (or max-column
+ (setq max-column fill-column))
+ (let* ((field-name (make-string (1- start-column) ?X))
+ (field-body (ew-lf-crlf-to-crlf string))
+ (ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
+ (decoded (ew-decode-field field-name field-body)))
+ (unless (equal field-body decoded)
+ (setq decoded (ew-crlf-refold decoded start-column max-column)))
+ (ew-crlf-to-lf decoded)))
+
+(defun eword-decode-unstructured-field-body (string &optional start-column
+ max-column)
+ (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf decoded)))
+
+(defun eword-decode-and-unfold-unstructured-field-body (string
+ &optional start-column
+ max-column)
+ (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf (ew-crlf-unfold decoded))))
+
+(defun eword-decode-unfolded-unstructured-field-body (string
+ &optional start-column
+ max-column)
+ (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf decoded)))
+
;;; @ for region
;;;
(delete-region (point-min) (point-max))
(insert str)))))
-
-;;; @ for message header
-;;;
-
-(defcustom eword-decode-ignored-field-list
- '(Newsgroups Path Lines Nntp-Posting-Host Received Message-Id Date)
- "*List of field-names to be ignored when decoding.
-Each field name must be symbol."
- :group 'eword-decode
- :type '(repeat symbol))
-
-(defcustom eword-decode-structured-field-list
- '(Reply-To Resent-Reply-To From Resent-From Sender Resent-Sender
- To Resent-To Cc Resent-Cc Bcc Resent-Bcc Dcc
- Mail-Followup-To
- Mime-Version Content-Type Content-Transfer-Encoding
- Content-Disposition User-Agent)
- "*List of field-names to decode as structured field.
-Each field name must be symbol."
- :group 'eword-decode
- :type '(repeat symbol))
-
-(defun eword-decode-header (&optional code-conversion separator)
- "Decode MIME encoded-words in header fields.
-If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
-mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
-Otherwise it decodes non-ASCII bit patterns as the
-default-mime-charset.
-If SEPARATOR is not nil, it is used as header separator."
- (interactive "*")
- (rotate-memo args-eword-decode-header (list code-conversion))
- (unless code-conversion
- (message "eword-decode-header is called without code-conversion")
- (sit-for 2))
- (if (and code-conversion
- (not (mime-charset-to-coding-system code-conversion)))
- (setq code-conversion default-mime-charset))
- (save-excursion
- (save-restriction
- (std11-narrow-to-header separator)
- (rotate-memo args-h-eword-decode-header (buffer-substring (point-min) (point-max)))
- (if code-conversion
- (let (beg p end field-name field-body decoded)
- (goto-char (point-min))
- (while (re-search-forward std11-field-head-regexp nil t)
- (setq beg (match-beginning 0)
- p (match-end 0)
- field-name (buffer-substring beg (1- p))
- end (std11-field-end)
- field-body (ew-lf-crlf-to-crlf
- (buffer-substring p end))
- decoded (ew-decode-field
- field-name field-body))
- (unless (equal field-body decoded)
- (setq decoded (ew-crlf-refold
- decoded
- (1+ (string-width field-name))
- fill-column)))
- (delete-region p end)
- (insert (ew-crlf-to-lf decoded))
- (add-text-properties beg (min (1+ (point)) (point-max))
- (list 'original-field-name field-name
- 'original-field-body field-body))
- ))
- (eword-decode-region (point-min) (point-max) t nil nil)
- ))))
-
(defun eword-decode-unfold ()
(goto-char (point-min))
(let (field beg end)
))
)))
+;;; @ for message header
+;;;
+
+(defvar mime-field-decoder-alist nil)
+
+(defvar mime-field-decoder-cache nil)
+
+(defvar mime-update-field-decoder-cache 'ew-mime-update-field-decoder-cache
+ "*Field decoder cache update function.")
+
+;;;###autoload
+(defun mime-set-field-decoder (field &rest specs)
+ "Set decoder of FILED.
+SPECS must be like `MODE1 DECODER1 MODE2 DECODER2 ...'.
+Each mode must be `nil', `plain', `wide', `summary' or `nov'.
+If mode is `nil', corresponding decoder is set up for every modes."
+ (when specs
+ (let ((mode (pop specs))
+ (function (pop specs)))
+ (if mode
+ (progn
+ (let ((cell (assq mode mime-field-decoder-alist)))
+ (if cell
+ (setcdr cell (put-alist field function (cdr cell)))
+ (setq mime-field-decoder-alist
+ (cons (cons mode (list (cons field function)))
+ mime-field-decoder-alist))
+ ))
+ (apply (function mime-set-field-decoder) field specs)
+ )
+ (mime-set-field-decoder field
+ 'plain function
+ 'wide function
+ 'summary function
+ 'nov function)
+ ))))
+
+;;;###autoload
+(defmacro mime-find-field-presentation-method (name)
+ "Return field-presentation-method from NAME.
+NAME must be `plain', `wide', `summary' or `nov'."
+ (cond ((eq name nil)
+ `(or (assq 'summary mime-field-decoder-cache)
+ '(summary))
+ )
+ ((and (consp name)
+ (car name)
+ (consp (cdr name))
+ (symbolp (car (cdr name)))
+ (null (cdr (cdr name))))
+ `(or (assq ,name mime-field-decoder-cache)
+ (cons ,name nil))
+ )
+ (t
+ `(or (assq (or ,name 'summary) mime-field-decoder-cache)
+ (cons (or ,name 'summary) nil))
+ )))
+
+(defun mime-find-field-decoder-internal (field &optional mode)
+ "Return function to decode field-body of FIELD in MODE.
+Optional argument MODE must be object of field-presentation-method."
+ (cdr (or (assq field (cdr mode))
+ (prog1
+ (funcall mime-update-field-decoder-cache
+ field (car mode))
+ (setcdr mode
+ (cdr (assq (car mode) mime-field-decoder-cache)))
+ ))))
+
+;;;###autoload
+(defun mime-find-field-decoder (field &optional mode)
+ "Return function to decode field-body of FIELD in MODE.
+Optional argument MODE must be object or name of
+field-presentation-method. Name of field-presentation-method must be
+`plain', `wide', `summary' or `nov'.
+Default value of MODE is `summary'."
+ (if (symbolp mode)
+ (let ((p (cdr (mime-find-field-presentation-method mode))))
+ (if (and p (setq p (assq field p)))
+ (cdr p)
+ (cdr (funcall mime-update-field-decoder-cache
+ field (or mode 'summary)))))
+ (inline (mime-find-field-decoder-internal field mode))
+ ))
+
+;;;###autoload
+(defun mime-update-field-decoder-cache (field mode &optional function)
+ "Update field decoder cache `mime-field-decoder-cache'."
+ (cond ((eq function 'identity)
+ (setq function nil)
+ )
+ ((null function)
+ (let ((decoder-alist
+ (cdr (assq (or mode 'summary) mime-field-decoder-alist))))
+ (setq function (cdr (or (assq field decoder-alist)
+ (assq t decoder-alist)))))
+ ))
+ (let ((cell (assq mode mime-field-decoder-cache))
+ ret)
+ (if cell
+ (if (setq ret (assq field (cdr cell)))
+ (setcdr ret function)
+ (setcdr cell (cons (setq ret (cons field function)) (cdr cell))))
+ (setq mime-field-decoder-cache
+ (cons (cons mode (list (setq ret (cons field function))))
+ mime-field-decoder-cache)))
+ ret))
+
+;; ignored fields
+(mime-set-field-decoder 'Archive nil nil)
+(mime-set-field-decoder 'Content-Md5 nil nil)
+(mime-set-field-decoder 'Control nil nil)
+(mime-set-field-decoder 'Date nil nil)
+(mime-set-field-decoder 'Distribution nil nil)
+(mime-set-field-decoder 'Followup-Host nil nil)
+(mime-set-field-decoder 'Followup-To nil nil)
+(mime-set-field-decoder 'Lines nil nil)
+(mime-set-field-decoder 'Message-Id nil nil)
+(mime-set-field-decoder 'Newsgroups nil nil)
+(mime-set-field-decoder 'Nntp-Posting-Host nil nil)
+(mime-set-field-decoder 'Path nil nil)
+(mime-set-field-decoder 'Posted-And-Mailed nil nil)
+(mime-set-field-decoder 'Received nil nil)
+(mime-set-field-decoder 'Status nil nil)
+(mime-set-field-decoder 'X-Face nil nil)
+(mime-set-field-decoder 'X-Face-Version nil nil)
+(mime-set-field-decoder 'X-Info nil nil)
+(mime-set-field-decoder 'X-Pgp-Key-Info nil nil)
+(mime-set-field-decoder 'X-Pgp-Sig nil nil)
+(mime-set-field-decoder 'X-Pgp-Sig-Version nil nil)
+(mime-set-field-decoder 'Xref nil nil)
+
+;; structured fields
+(let ((fields
+ '(Reply-To Resent-Reply-To From Resent-From Sender Resent-Sender
+ To Resent-To Cc Resent-Cc Bcc Resent-Bcc Dcc
+ Mail-Followup-To
+ Mime-Version Content-Type Content-Transfer-Encoding
+ Content-Disposition User-Agent))
+ field)
+ (while fields
+ (setq field (pop fields))
+ (mime-set-field-decoder
+ field
+ 'plain #'eword-decode-structured-field-body
+ 'wide #'eword-decode-and-fold-structured-field-body
+ 'summary #'eword-decode-and-unfold-structured-field-body
+ 'nov #'eword-decode-and-unfold-structured-field-body)
+ ))
+
+;; unstructured fields (default)
+(mime-set-field-decoder
+ t
+ 'plain #'eword-decode-unstructured-field-body
+ 'wide #'eword-decode-unstructured-field-body
+ 'summary #'eword-decode-and-unfold-unstructured-field-body
+ 'nov #'eword-decode-unfolded-unstructured-field-body)
+
+;;;###autoload
+(defun ew-mime-update-field-decoder-cache (field mode)
+ (let ((fun (cond
+ ((eq mode 'plain)
+ (lexical-let ((field-name (symbol-name field)))
+ (lambda (field-body &optional start-column max-column must-unfold)
+ (setq field-body (ew-lf-to-crlf field-body))
+ (let ((res (ew-crlf-to-lf
+ (ew-decode-field field-name field-body))))
+ (add-text-properties
+ 0 (length res)
+ (list 'original-field-name field-name
+ 'original-field-body field-body)
+ res)
+ res))))
+ ((eq mode 'wide)
+ (lexical-let ((field-name (symbol-name field)))
+ (lambda (field-body &optional start-column max-column must-unfold)
+ (setq field-body (ew-lf-to-crlf field-body))
+ (let ((res (ew-crlf-to-lf
+ (ew-crlf-refold
+ (ew-decode-field field-name field-body)
+ (length field-name)
+ (or max-column fill-column)))))
+ (add-text-properties
+ 0 (length res)
+ (list 'original-field-name field-name
+ 'original-field-body field-body)
+ res)
+ res))))
+ ((eq mode 'summary)
+ (lexical-let ((field-name (symbol-name field)))
+ (lambda (field-body &optional start-column max-column must-unfold)
+ (setq field-body (ew-lf-to-crlf field-body))
+ (let ((res (ew-crlf-to-lf
+ (ew-crlf-unfold
+ (ew-decode-field field-name field-body)))))
+ (add-text-properties
+ 0 (length res)
+ (list 'original-field-name field-name
+ 'original-field-body field-body)
+ res)
+ res))))
+ ((eq mode 'nov)
+ (lexical-let ((field-name (symbol-name field)))
+ (lambda (field-body &optional start-column max-column must-unfold)
+ (setq field-body (ew-lf-to-crlf field-body))
+ (require 'ew-var)
+ (let ((ew-ignore-76bytes-limit t))
+ (let ((res (ew-crlf-to-lf
+ (ew-crlf-unfold
+ (ew-decode-field field-name field-body)))))
+ (add-text-properties
+ 0 (length res)
+ (list 'original-field-name field-name
+ 'original-field-body field-body)
+ res)
+ res)))))
+ (t
+ nil))))
+ (mime-update-field-decoder-cache field mode fun)))
+
+;;;###autoload
+(defun mime-decode-field-body (field-body field-name
+ &optional mode max-column)
+ "Decode FIELD-BODY as FIELD-NAME in MODE, and return the result.
+Optional argument MODE must be `plain', `wide', `summary' or `nov'.
+Default mode is `summary'.
+
+If MODE is `wide' and MAX-COLUMN is non-nil, the result is folded with
+MAX-COLUMN.
+
+Non MIME encoded-word part in FILED-BODY is decoded with
+`default-mime-charset'."
+ (unless mode (setq mode 'summary))
+ (if (symbolp field-name) (setq field-name (symbol-name field-name)))
+ (let ((decoded
+ (if (eq mode 'nov)
+ (let ((ew-ignore-76bytes-limit t))
+ (ew-decode-field
+ field-name (ew-lf-crlf-to-crlf field-body)))
+ (ew-decode-field
+ field-name (ew-lf-crlf-to-crlf field-body)))))
+ (if (and (eq mode 'wide) max-column)
+ (setq decoded (ew-crlf-refold
+ decoded
+ (1+ (string-width field-name))
+ max-column))
+ (if (not (eq mode 'plain))
+ (setq decoded (ew-crlf-unfold decoded))))
+ (setq decoded (ew-crlf-to-lf decoded))
+ (add-text-properties 0 (length decoded)
+ (list 'original-field-name field-name
+ 'original-field-body field-body)
+ decoded)
+ decoded))
+
+;;;###autoload
+(defun mime-decode-header-in-region (start end
+ &optional code-conversion)
+ "Decode MIME encoded-words in region between START and END.
+If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
+mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
+Otherwise it decodes non-ASCII bit patterns as the
+default-mime-charset."
+ (interactive "*r")
+ (save-excursion
+ (save-restriction
+ (narrow-to-region start end)
+ (let ((default-charset
+ (if code-conversion
+ (if (mime-charset-to-coding-system code-conversion)
+ code-conversion
+ default-mime-charset))))
+ (if default-charset
+ (let ((mode-obj (mime-find-field-presentation-method 'wide))
+ beg p end len field-decoder
+ field-name field-body)
+ (goto-char (point-min))
+ (while (re-search-forward std11-field-head-regexp nil t)
+ (setq beg (match-beginning 0)
+ p (match-end 0)
+ field-name (buffer-substring beg (1- p))
+ len (string-width field-name)
+ field-decoder (inline
+ (mime-find-field-decoder-internal
+ (intern (capitalize field-name))
+ mode-obj)))
+ (when field-decoder
+ (setq end (std11-field-end)
+ field-body (buffer-substring p end))
+ (let ((default-mime-charset default-charset))
+ (delete-region p end)
+ (insert (funcall field-decoder field-body (1+ len)))
+ ))
+ (add-text-properties beg (min (1+ (point)) (point-max))
+ (list 'original-field-name field-name
+ 'original-field-body field-body))
+ ))
+ (eword-decode-region (point-min) (point-max) t)
+ )))))
+
+;;;###autoload
+(defun mime-decode-header-in-buffer (&optional code-conversion separator)
+ "Decode MIME encoded-words in header fields.
+If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
+mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
+Otherwise it decodes non-ASCII bit patterns as the
+default-mime-charset.
+If SEPARATOR is not nil, it is used as header separator."
+ (interactive "*")
+ (mime-decode-header-in-region
+ (point-min)
+ (save-excursion
+ (goto-char (point-min))
+ (if (re-search-forward
+ (concat "^\\(" (regexp-quote (or separator "")) "\\)?$")
+ nil t)
+ (match-beginning 0)
+ (point-max)
+ ))
+ code-conversion))
+
+(define-obsolete-function-alias 'eword-decode-header
+ 'mime-decode-header-in-buffer)
+
;;; @ encoded-word decoder
;;;
(substring string p)))
nil)))
+
(defun eword-analyze-spaces (string &optional must-unfold)
(std11-analyze-spaces string))
(cdr decoded)))))))
(defun eword-analyze-atom (string &optional must-unfold)
- (if (let ((enable-multibyte-characters nil))
- (string-match std11-atom-regexp string))
+ (if (string-match std11-atom-regexp (string-as-unibyte string))
(let ((end (match-end 0)))
(if (and eword-decode-sticked-encoded-word
(string-match eword-encoded-word-in-phrase-regexp
(defun eword-decode-token (token)
(cdr token))
-(defun eword-decode-and-fold-structured-field
- (string start-column &optional max-column must-unfold)
- "Decode and fold (fill) STRING as structured field body.
-It decodes non us-ascii characters in FULL-NAME encoded as
-encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
-characters are regarded as variable `default-mime-charset'.
-
-If an encoded-word is broken or your emacs implementation can not
-decode the charset included in it, it is not decoded.
-
-If MAX-COLUMN is omitted, `fill-column' is used.
-
-If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-words (generated by bad manner MUA
-such as a version of Net$cape)."
- (rotate-memo args-eword-decode-and-fold-structured-field
- (list string start-column max-column must-unfold))
- (or max-column
- (setq max-column fill-column))
- (let* ((field-name (make-string (1- start-column) ?X))
- (field-body (ew-lf-crlf-to-crlf string))
- (ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
- (decoded (ew-decode-field field-name field-body)))
- (unless (equal field-body decoded)
- (setq decoded (ew-crlf-refold decoded start-column max-column)))
- (ew-crlf-to-lf decoded)))
-
-(defun eword-decode-and-unfold-structured-field (string)
- "Decode and unfold STRING as structured field body.
-It decodes non us-ascii characters in FULL-NAME encoded as
-encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
-characters are regarded as variable `default-mime-charset'.
-
-If an encoded-word is broken or your emacs implementation can not
-decode the charset included in it, it is not decoded."
- (rotate-memo args-eword-decode-and-unfold-structured-field (list string))
- (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
- (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
- (ew-crlf-to-lf (ew-crlf-unfold decoded))))
-
-(defun eword-decode-structured-field-body (string &optional must-unfold
- start-column max-column)
- "Decode non us-ascii characters in STRING as structured field body.
-STRING is unfolded before decoding.
-
-It decodes non us-ascii characters in FULL-NAME encoded as
-encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
-characters are regarded as variable `default-mime-charset'.
-
-If an encoded-word is broken or your emacs implementation can not
-decode the charset included in it, it is not decoded.
-
-If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-words (generated by bad manner MUA
-such as a version of Net$cape)."
- (rotate-memo args-eword-decode-structured-field-body
- (list string must-unfold start-column max-column))
- (if start-column
- ;; fold with max-column
- (eword-decode-and-fold-structured-field
- string start-column max-column must-unfold)
- ;; Don't fold
- (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
- (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
- (ew-crlf-to-lf decoded))))
-
-(defun eword-decode-unstructured-field-body (string &optional must-unfold)
- "Decode non us-ascii characters in STRING as unstructured field body.
-STRING is unfolded before decoding.
-
-It decodes non us-ascii characters in FULL-NAME encoded as
-encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
-characters are regarded as variable `default-mime-charset'.
-
-If an encoded-word is broken or your emacs implementation can not
-decode the charset included in it, it is not decoded.
-
-If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-words (generated by bad manner MUA
-such as a version of Net$cape)."
- (rotate-memo args-eword-decode-unstructured-field-body
- (list string must-unfold))
- (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
- (ew-crlf-to-lf (ew-crlf-unfold decoded))))
-
(defun eword-extract-address-components (string)
"Extract full name and canonical address from STRING.
Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).