X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Frfc2047.el;h=1f07cb6df98a8fb1b15fb21b48325034e336da57;hb=5357096aae0887174a86d6a6796112d6b3d2d53e;hp=17493afd73f95a501e8fb92d0645c4c2ab7a1b24;hpb=288df404798143bcebde31f44f2041f786424fa6;p=elisp%2Fgnus.git- diff --git a/lisp/rfc2047.el b/lisp/rfc2047.el index 17493af..1f07cb6 100644 --- a/lisp/rfc2047.el +++ b/lisp/rfc2047.el @@ -97,6 +97,25 @@ quoted-printable and base64 respectively.") ;;; Functions for encoding RFC2047 messages ;;; +(defun rfc2047-qp-or-base64 () + "Return the type with which to encode the buffer. +This is either `base64' or `quoted-printable'." + (save-excursion + (let ((limit (min (point-max) (+ 2000 (point-min)))) + (n8bit 0)) + (goto-char (point-min)) + (skip-chars-forward "\x20-\x7f\r\n\t" limit) + (while (< (point) limit) + (incf n8bit) + (forward-char 1) + (skip-chars-forward "\x20-\x7f\r\n\t" limit)) + (if (or (< (* 6 n8bit) (- limit (point-min))) + ;; Don't base64, say, a short line with a single + ;; non-ASCII char when splitting parts by charset. + (= n8bit 1)) + 'quoted-printable + 'base64)))) + (defun rfc2047-narrow-to-field () "Narrow the buffer to the header on the current line." (beginning-of-line) @@ -382,12 +401,12 @@ By default, the region is treated as containing addresses (see ;; encoding, choose the one that's shorter. (save-restriction (narrow-to-region b e) - (if (eq (mm-qp-or-base64) 'base64) + (if (eq (rfc2047-qp-or-base64) 'base64) 'B 'Q)))) (start (concat "=?" (downcase (symbol-name mime-charset)) "?" - (downcase (symbol-name encoding)) "?")) + (upcase (symbol-name encoding)) "?")) (factor (case mime-charset ((iso-8859-5 iso-8859-7 iso-8859-8 koi8-r) 1) ((big5 gb2312 euc-kr) 2) @@ -588,8 +607,8 @@ By default, the region is treated as containing addresses (see (eval-and-compile (defconst rfc2047-encoded-word-regexp - "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\ -\\?\\([!->@-~ +]*\\)\\?=")) + "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\ +\\?\\(B\\|Q\\)\\?\\([!->@-~ ]*\\)\\?=")) ;; Fixme: This should decode in place, not cons intermediate strings. ;; Also check whether it needs to worry about delimiting fields like @@ -668,7 +687,20 @@ By default, the region is treated as containing addresses (see mail-parse-charset (not (eq mail-parse-charset 'us-ascii)) (not (eq mail-parse-charset 'gnus-decoded))) - (mm-decode-coding-string string mail-parse-charset) + ;; `decode-coding-string' in Emacs offers a third optional + ;; arg NOCOPY to avoid consing a new string if the decoding + ;; is "trivial". Unfortunately it currently doesn't + ;; consider anything else than a `nil' coding system + ;; trivial. + ;; `rfc2047-decode-string' is called multiple times for each + ;; article during summary buffer generation, and we really + ;; want to avoid unnecessary consing. So we bypass + ;; `decode-coding-string' if the string is purely ASCII. + (if (and (fboundp 'detect-coding-string) + ;; string is purely ASCII + (eq (detect-coding-string string t) 'undecided)) + string + (mm-decode-coding-string string mail-parse-charset)) (mm-string-as-multibyte string))))) (defun rfc2047-parse-and-decode (word) @@ -681,7 +713,7 @@ decodable." (condition-case nil (rfc2047-decode (match-string 1 word) - (upcase (match-string 2 word)) + (string-to-char (match-string 2 word)) (match-string 3 word)) (error word)) word))) ; un-decodable @@ -691,15 +723,19 @@ decodable." ;; Be more liberal to accept buggy base64 strings. If ;; base64-decode-string accepts buggy strings, this function could ;; be aliased to identity. - (case (mod (length string) 4) - (0 string) - (1 string) ;; Error, don't pad it. - (2 (concat string "==")) - (3 (concat string "=")))) + (if (= 0 (mod (length string) 4)) + string + (when (string-match "=+$" string) + (setq string (substring string 0 (match-beginning 0)))) + (case (mod (length string) 4) + (0 string) + (1 string) ;; Error, don't pad it. + (2 (concat string "==")) + (3 (concat string "="))))) (defun rfc2047-decode (charset encoding string) "Decode STRING from the given MIME CHARSET in the given ENCODING. -Valid ENCODINGs are \"B\" and \"Q\". +Valid ENCODINGs are the characters \"B\" and \"Q\". If your Emacs implementation can't decode CHARSET, return nil." (if (stringp charset) (setq charset (intern (downcase charset)))) @@ -719,13 +755,13 @@ If your Emacs implementation can't decode CHARSET, return nil." (setq cs mail-parse-charset)) (mm-decode-coding-string (cond - ((equal "B" encoding) + ((char-equal ?B encoding) (base64-decode-string (rfc2047-pad-base64 string))) - ((equal "Q" encoding) + ((char-equal ?Q encoding) (quoted-printable-decode-string - (mm-replace-chars-in-string string ?_ ? ))) - (t (error "Invalid encoding: %s" encoding))) + (mm-subst-char-in-string ?_ ? string t))) + (t (error "Invalid encoding: %c" encoding))) cs)))) (provide 'rfc2047)