X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=eword-decode.el;h=f11b7cc4d448f24aafabe26c9032b4823b1834ce;hb=99c158b34f006c5840785347fc7e711a6b246c0c;hp=9982cbdad229103ac451305736ca86345c882194;hpb=a0a1ddc99e2a091b842f608db40de667b57858d2;p=elisp%2Fflim.git diff --git a/eword-decode.el b/eword-decode.el index 9982cbd..f11b7cc 100644 --- a/eword-decode.el +++ b/eword-decode.el @@ -37,6 +37,9 @@ (require 'mime-def) (require 'ew-dec) +(require 'ew-line) + +(eval-when-compile (require 'cl)) (defgroup eword-decode nil "Encoded-word decoding" @@ -69,6 +72,12 @@ however this behaviour violates RFC2047." :group 'eword-decode :type 'boolean) +(defcustom eword-max-size-to-decode 1000 + "*Max size to decode header field." + :group 'eword-decode + :type '(choice (integer :tag "Limit (bytes)") + (const :tag "Don't limit" nil))) + ;;; @ MIME encoded-word definition ;;; @@ -316,6 +325,57 @@ default-mime-charset." code-conversion must-unfold)) +(defun eword-decode-structured-field-body (string + &optional + start-column max-column) + (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11)) + (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string)))) + (ew-crlf-to-lf decoded))) + +(defun eword-decode-and-unfold-structured-field-body (string + &optional + start-column + max-column) + "Decode and unfold STRING as structured field body. +It decodes non us-ascii characters in FULL-NAME encoded as +encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii +characters are regarded as variable `default-mime-charset'. + +If an encoded-word is broken or your emacs implementation can not +decode the charset included in it, it is not decoded." + (let* ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string)))) + (ew-crlf-to-lf (ew-crlf-unfold decoded)))) + +(defun eword-decode-and-fold-structured-field-body (string + start-column + &optional max-column) + (or max-column + (setq max-column fill-column)) + (let* ((field-name (make-string (1- start-column) ?X)) + (field-body (ew-lf-crlf-to-crlf string)) + (ew-decode-field-default-syntax '(ew-scan-unibyte-std11)) + (decoded (ew-decode-field field-name field-body))) + (unless (equal field-body decoded) + (setq decoded (ew-crlf-refold decoded start-column max-column))) + (ew-crlf-to-lf decoded))) + +(defun eword-decode-unstructured-field-body (string &optional start-column + max-column) + (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string)))) + (ew-crlf-to-lf decoded))) + +(defun eword-decode-and-unfold-unstructured-field-body (string + &optional start-column + max-column) + (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string)))) + (ew-crlf-to-lf (ew-crlf-unfold decoded)))) + +(defun eword-decode-unfolded-unstructured-field-body (string + &optional start-column + max-column) + (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string)))) + (ew-crlf-to-lf decoded))) + ;;; @ for region ;;; @@ -350,73 +410,6 @@ default-mime-charset." (delete-region (point-min) (point-max)) (insert str))))) - -;;; @ for message header -;;; - -(defcustom eword-decode-ignored-field-list - '(Newsgroups Path Lines Nntp-Posting-Host Received Message-Id Date) - "*List of field-names to be ignored when decoding. -Each field name must be symbol." - :group 'eword-decode - :type '(repeat symbol)) - -(defcustom eword-decode-structured-field-list - '(Reply-To Resent-Reply-To From Resent-From Sender Resent-Sender - To Resent-To Cc Resent-Cc Bcc Resent-Bcc Dcc - Mail-Followup-To - Mime-Version Content-Type Content-Transfer-Encoding - Content-Disposition User-Agent) - "*List of field-names to decode as structured field. -Each field name must be symbol." - :group 'eword-decode - :type '(repeat symbol)) - -(defun eword-decode-header (&optional code-conversion separator) - "Decode MIME encoded-words in header fields. -If CODE-CONVERSION is nil, it decodes only encoded-words. If it is -mime-charset, it decodes non-ASCII bit patterns as the mime-charset. -Otherwise it decodes non-ASCII bit patterns as the -default-mime-charset. -If SEPARATOR is not nil, it is used as header separator." - (interactive "*") - (rotate-memo args-eword-decode-header (list code-conversion)) - (unless code-conversion - (message "eword-decode-header is called without code-conversion") - (sit-for 2)) - (if (and code-conversion - (not (mime-charset-to-coding-system code-conversion))) - (setq code-conversion default-mime-charset)) - (save-excursion - (save-restriction - (std11-narrow-to-header separator) - (rotate-memo args-h-eword-decode-header (buffer-substring (point-min) (point-max))) - (if code-conversion - (let (beg p end field-name field-body decoded) - (goto-char (point-min)) - (while (re-search-forward std11-field-head-regexp nil t) - (setq beg (match-beginning 0) - p (match-end 0) - field-name (buffer-substring beg (1- p)) - end (std11-field-end) - field-body (ew-lf-crlf-to-crlf - (buffer-substring p end)) - decoded (ew-decode-field - field-name field-body)) - (unless (equal field-body decoded) - (setq decoded (ew-crlf-refold - decoded - (1+ (string-width field-name)) - fill-column))) - (delete-region p end) - (insert (ew-crlf-to-lf decoded)) - (add-text-properties beg (min (1+ (point)) (point-max)) - (list 'original-field-name field-name - 'original-field-body field-body)) - )) - (eword-decode-region (point-min) (point-max) t nil nil) - )))) - (defun eword-decode-unfold () (goto-char (point-min)) (let (field beg end) @@ -434,6 +427,330 @@ If SEPARATOR is not nil, it is used as header separator." )) ))) +;;; @ for message header +;;; + +(defvar mime-field-decoder-alist nil) + +(defvar mime-field-decoder-cache nil) + +(defvar mime-update-field-decoder-cache 'ew-mime-update-field-decoder-cache + "*Field decoder cache update function.") + +;;;###autoload +(defun mime-set-field-decoder (field &rest specs) + "Set decoder of FILED. +SPECS must be like `MODE1 DECODER1 MODE2 DECODER2 ...'. +Each mode must be `nil', `plain', `wide', `summary' or `nov'. +If mode is `nil', corresponding decoder is set up for every modes." + (when specs + (let ((mode (pop specs)) + (function (pop specs))) + (if mode + (progn + (let ((cell (assq mode mime-field-decoder-alist))) + (if cell + (setcdr cell (put-alist field function (cdr cell))) + (setq mime-field-decoder-alist + (cons (cons mode (list (cons field function))) + mime-field-decoder-alist)) + )) + (apply (function mime-set-field-decoder) field specs) + ) + (mime-set-field-decoder field + 'plain function + 'wide function + 'summary function + 'nov function) + )))) + +;;;###autoload +(defmacro mime-find-field-presentation-method (name) + "Return field-presentation-method from NAME. +NAME must be `plain', `wide', `summary' or `nov'." + (cond ((eq name nil) + `(or (assq 'summary mime-field-decoder-cache) + '(summary)) + ) + ((and (consp name) + (car name) + (consp (cdr name)) + (symbolp (car (cdr name))) + (null (cdr (cdr name)))) + `(or (assq ,name mime-field-decoder-cache) + (cons ,name nil)) + ) + (t + `(or (assq (or ,name 'summary) mime-field-decoder-cache) + (cons (or ,name 'summary) nil)) + ))) + +(defun mime-find-field-decoder-internal (field &optional mode) + "Return function to decode field-body of FIELD in MODE. +Optional argument MODE must be object of field-presentation-method." + (cdr (or (assq field (cdr mode)) + (prog1 + (funcall mime-update-field-decoder-cache + field (car mode)) + (setcdr mode + (cdr (assq (car mode) mime-field-decoder-cache))) + )))) + +;;;###autoload +(defun mime-find-field-decoder (field &optional mode) + "Return function to decode field-body of FIELD in MODE. +Optional argument MODE must be object or name of +field-presentation-method. Name of field-presentation-method must be +`plain', `wide', `summary' or `nov'. +Default value of MODE is `summary'." + (if (symbolp mode) + (let ((p (cdr (mime-find-field-presentation-method mode)))) + (if (and p (setq p (assq field p))) + (cdr p) + (cdr (funcall mime-update-field-decoder-cache + field (or mode 'summary))))) + (inline (mime-find-field-decoder-internal field mode)) + )) + +;;;###autoload +(defun mime-update-field-decoder-cache (field mode &optional function) + "Update field decoder cache `mime-field-decoder-cache'." + (cond ((eq function 'identity) + (setq function nil) + ) + ((null function) + (let ((decoder-alist + (cdr (assq (or mode 'summary) mime-field-decoder-alist)))) + (setq function (cdr (or (assq field decoder-alist) + (assq t decoder-alist))))) + )) + (let ((cell (assq mode mime-field-decoder-cache)) + ret) + (if cell + (if (setq ret (assq field (cdr cell))) + (setcdr ret function) + (setcdr cell (cons (setq ret (cons field function)) (cdr cell)))) + (setq mime-field-decoder-cache + (cons (cons mode (list (setq ret (cons field function)))) + mime-field-decoder-cache))) + ret)) + +;; ignored fields +(mime-set-field-decoder 'Archive nil nil) +(mime-set-field-decoder 'Content-Md5 nil nil) +(mime-set-field-decoder 'Control nil nil) +(mime-set-field-decoder 'Date nil nil) +(mime-set-field-decoder 'Distribution nil nil) +(mime-set-field-decoder 'Followup-Host nil nil) +(mime-set-field-decoder 'Followup-To nil nil) +(mime-set-field-decoder 'Lines nil nil) +(mime-set-field-decoder 'Message-Id nil nil) +(mime-set-field-decoder 'Newsgroups nil nil) +(mime-set-field-decoder 'Nntp-Posting-Host nil nil) +(mime-set-field-decoder 'Path nil nil) +(mime-set-field-decoder 'Posted-And-Mailed nil nil) +(mime-set-field-decoder 'Received nil nil) +(mime-set-field-decoder 'Status nil nil) +(mime-set-field-decoder 'X-Face nil nil) +(mime-set-field-decoder 'X-Face-Version nil nil) +(mime-set-field-decoder 'X-Info nil nil) +(mime-set-field-decoder 'X-Pgp-Key-Info nil nil) +(mime-set-field-decoder 'X-Pgp-Sig nil nil) +(mime-set-field-decoder 'X-Pgp-Sig-Version nil nil) +(mime-set-field-decoder 'Xref nil nil) + +;; structured fields +(let ((fields + '(Reply-To Resent-Reply-To From Resent-From Sender Resent-Sender + To Resent-To Cc Resent-Cc Bcc Resent-Bcc Dcc + Mail-Followup-To + Mime-Version Content-Type Content-Transfer-Encoding + Content-Disposition User-Agent)) + field) + (while fields + (setq field (pop fields)) + (mime-set-field-decoder + field + 'plain #'eword-decode-structured-field-body + 'wide #'eword-decode-and-fold-structured-field-body + 'summary #'eword-decode-and-unfold-structured-field-body + 'nov #'eword-decode-and-unfold-structured-field-body) + )) + +;; unstructured fields (default) +(mime-set-field-decoder + t + 'plain #'eword-decode-unstructured-field-body + 'wide #'eword-decode-unstructured-field-body + 'summary #'eword-decode-and-unfold-unstructured-field-body + 'nov #'eword-decode-unfolded-unstructured-field-body) + +;;;###autoload +(defun ew-mime-update-field-decoder-cache (field mode) + (let ((fun (cond + ((eq mode 'plain) + (lexical-let ((field-name (symbol-name field))) + (lambda (field-body &optional start-column max-column must-unfold) + (setq field-body (ew-lf-to-crlf field-body)) + (let ((res (ew-crlf-to-lf + (ew-decode-field field-name field-body)))) + (add-text-properties + 0 (length res) + (list 'original-field-name field-name + 'original-field-body field-body) + res) + res)))) + ((eq mode 'wide) + (lexical-let ((field-name (symbol-name field))) + (lambda (field-body &optional start-column max-column must-unfold) + (setq field-body (ew-lf-to-crlf field-body)) + (let ((res (ew-crlf-to-lf + (ew-crlf-refold + (ew-decode-field field-name field-body) + (length field-name) + (or max-column fill-column))))) + (add-text-properties + 0 (length res) + (list 'original-field-name field-name + 'original-field-body field-body) + res) + res)))) + ((eq mode 'summary) + (lexical-let ((field-name (symbol-name field))) + (lambda (field-body &optional start-column max-column must-unfold) + (setq field-body (ew-lf-to-crlf field-body)) + (let ((res (ew-crlf-to-lf + (ew-crlf-unfold + (ew-decode-field field-name field-body))))) + (add-text-properties + 0 (length res) + (list 'original-field-name field-name + 'original-field-body field-body) + res) + res)))) + ((eq mode 'nov) + (lexical-let ((field-name (symbol-name field))) + (lambda (field-body &optional start-column max-column must-unfold) + (setq field-body (ew-lf-to-crlf field-body)) + (require 'ew-var) + (let ((ew-ignore-76bytes-limit t)) + (let ((res (ew-crlf-to-lf + (ew-crlf-unfold + (ew-decode-field field-name field-body))))) + (add-text-properties + 0 (length res) + (list 'original-field-name field-name + 'original-field-body field-body) + res) + res))))) + (t + nil)))) + (mime-update-field-decoder-cache field mode fun))) + +;;;###autoload +(defun mime-decode-field-body (field-body field-name + &optional mode max-column) + "Decode FIELD-BODY as FIELD-NAME in MODE, and return the result. +Optional argument MODE must be `plain', `wide', `summary' or `nov'. +Default mode is `summary'. + +If MODE is `wide' and MAX-COLUMN is non-nil, the result is folded with +MAX-COLUMN. + +Non MIME encoded-word part in FILED-BODY is decoded with +`default-mime-charset'." + (unless mode (setq mode 'summary)) + (if (symbolp field-name) (setq field-name (symbol-name field-name))) + (let ((decoded + (if (eq mode 'nov) + (let ((ew-ignore-76bytes-limit t)) + (ew-decode-field + field-name (ew-lf-crlf-to-crlf field-body))) + (ew-decode-field + field-name (ew-lf-crlf-to-crlf field-body))))) + (if (and (eq mode 'wide) max-column) + (setq decoded (ew-crlf-refold + decoded + (1+ (string-width field-name)) + max-column)) + (if (not (eq mode 'plain)) + (setq decoded (ew-crlf-unfold decoded)))) + (setq decoded (ew-crlf-to-lf decoded)) + (add-text-properties 0 (length decoded) + (list 'original-field-name field-name + 'original-field-body field-body) + decoded) + decoded)) + +;;;###autoload +(defun mime-decode-header-in-region (start end + &optional code-conversion) + "Decode MIME encoded-words in region between START and END. +If CODE-CONVERSION is nil, it decodes only encoded-words. If it is +mime-charset, it decodes non-ASCII bit patterns as the mime-charset. +Otherwise it decodes non-ASCII bit patterns as the +default-mime-charset." + (interactive "*r") + (save-excursion + (save-restriction + (narrow-to-region start end) + (let ((default-charset + (if code-conversion + (if (mime-charset-to-coding-system code-conversion) + code-conversion + default-mime-charset)))) + (if default-charset + (let ((mode-obj (mime-find-field-presentation-method 'wide)) + beg p end len field-decoder + field-name field-body) + (goto-char (point-min)) + (while (re-search-forward std11-field-head-regexp nil t) + (setq beg (match-beginning 0) + p (match-end 0) + field-name (buffer-substring beg (1- p)) + len (string-width field-name) + field-decoder (inline + (mime-find-field-decoder-internal + (intern (capitalize field-name)) + mode-obj))) + (when field-decoder + (setq end (std11-field-end) + field-body (buffer-substring p end)) + (let ((default-mime-charset default-charset)) + (delete-region p end) + (insert (funcall field-decoder field-body (1+ len))) + )) + (add-text-properties beg (min (1+ (point)) (point-max)) + (list 'original-field-name field-name + 'original-field-body field-body)) + )) + (eword-decode-region (point-min) (point-max) t) + ))))) + +;;;###autoload +(defun mime-decode-header-in-buffer (&optional code-conversion separator) + "Decode MIME encoded-words in header fields. +If CODE-CONVERSION is nil, it decodes only encoded-words. If it is +mime-charset, it decodes non-ASCII bit patterns as the mime-charset. +Otherwise it decodes non-ASCII bit patterns as the +default-mime-charset. +If SEPARATOR is not nil, it is used as header separator." + (interactive "*") + (mime-decode-header-in-region + (point-min) + (save-excursion + (goto-char (point-min)) + (if (re-search-forward + (concat "^\\(" (regexp-quote (or separator "")) "\\)?$") + nil t) + (match-beginning 0) + (point-max) + )) + code-conversion)) + +(define-obsolete-function-alias 'eword-decode-header + 'mime-decode-header-in-buffer) + ;;; @ encoded-word decoder ;;; @@ -575,6 +892,7 @@ be the result." (substring string p))) nil))) + (defun eword-analyze-spaces (string &optional must-unfold) (std11-analyze-spaces string)) @@ -599,8 +917,7 @@ be the result." (cdr decoded))))))) (defun eword-analyze-atom (string &optional must-unfold) - (if (let ((enable-multibyte-characters nil)) - (string-match std11-atom-regexp string)) + (if (string-match std11-atom-regexp (string-as-unibyte string)) (let ((end (match-end 0))) (if (and eword-decode-sticked-encoded-word (string-match eword-encoded-word-in-phrase-regexp @@ -660,91 +977,6 @@ characters encoded as encoded-words or invalid \"raw\" format. (defun eword-decode-token (token) (cdr token)) -(defun eword-decode-and-fold-structured-field - (string start-column &optional max-column must-unfold) - "Decode and fold (fill) STRING as structured field body. -It decodes non us-ascii characters in FULL-NAME encoded as -encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii -characters are regarded as variable `default-mime-charset'. - -If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is not decoded. - -If MAX-COLUMN is omitted, `fill-column' is used. - -If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-words (generated by bad manner MUA -such as a version of Net$cape)." - (rotate-memo args-eword-decode-and-fold-structured-field - (list string start-column max-column must-unfold)) - (or max-column - (setq max-column fill-column)) - (let* ((field-name (make-string (1- start-column) ?X)) - (field-body (ew-lf-crlf-to-crlf string)) - (ew-decode-field-default-syntax '(ew-scan-unibyte-std11)) - (decoded (ew-decode-field field-name field-body))) - (unless (equal field-body decoded) - (setq decoded (ew-crlf-refold decoded start-column max-column))) - (ew-crlf-to-lf decoded))) - -(defun eword-decode-and-unfold-structured-field (string) - "Decode and unfold STRING as structured field body. -It decodes non us-ascii characters in FULL-NAME encoded as -encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii -characters are regarded as variable `default-mime-charset'. - -If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is not decoded." - (rotate-memo args-eword-decode-and-unfold-structured-field (list string)) - (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11)) - (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string)))) - (ew-crlf-to-lf (ew-crlf-unfold decoded)))) - -(defun eword-decode-structured-field-body (string &optional must-unfold - start-column max-column) - "Decode non us-ascii characters in STRING as structured field body. -STRING is unfolded before decoding. - -It decodes non us-ascii characters in FULL-NAME encoded as -encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii -characters are regarded as variable `default-mime-charset'. - -If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is not decoded. - -If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-words (generated by bad manner MUA -such as a version of Net$cape)." - (rotate-memo args-eword-decode-structured-field-body - (list string must-unfold start-column max-column)) - (if start-column - ;; fold with max-column - (eword-decode-and-fold-structured-field - string start-column max-column must-unfold) - ;; Don't fold - (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11)) - (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string)))) - (ew-crlf-to-lf decoded)))) - -(defun eword-decode-unstructured-field-body (string &optional must-unfold) - "Decode non us-ascii characters in STRING as unstructured field body. -STRING is unfolded before decoding. - -It decodes non us-ascii characters in FULL-NAME encoded as -encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii -characters are regarded as variable `default-mime-charset'. - -If an encoded-word is broken or your emacs implementation can not -decode the charset included in it, it is not decoded. - -If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-words (generated by bad manner MUA -such as a version of Net$cape)." - (rotate-memo args-eword-decode-unstructured-field-body - (list string must-unfold)) - (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string)))) - (ew-crlf-to-lf (ew-crlf-unfold decoded)))) - (defun eword-extract-address-components (string) "Extract full name and canonical address from STRING. Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).