From 737e77d969a6f684d3ed0acfd51d5c90a8939e03 Mon Sep 17 00:00:00 2001 From: yamaoka Date: Thu, 22 Jul 2004 09:37:38 +0000 Subject: [PATCH] Synch to No Gnus 200407220937. --- lisp/ChangeLog | 11 ++++++++ lisp/mml.el | 2 +- lisp/rfc2047.el | 71 +++++++++++++++++++++++++++++++++++++++++++------- texi/ChangeLog | 4 +++ texi/emacs-mime.texi | 46 ++++++++++++++++++++------------ 5 files changed, 106 insertions(+), 28 deletions(-) diff --git a/lisp/ChangeLog b/lisp/ChangeLog index 7e7aaed..3f8dbde 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,14 @@ +2004-07-22 Katsumi Yamaoka + + * rfc2047.el (rfc2047-encode-region): Check carefully whether to + encode special characters; fix some kind of misconfigured headers; + signal a real error if debug-on-quit or debug-on-error is non-nil. + (rfc2047-encode-max-chars): New variable. + (rfc2047-encode-1): Use it. + (rfc2047-encode-parameter): New function. + + * mml.el (mml-insert-parameter): Remove an excessive space. + 2004-07-17 Simon Josefsson * gnus-group.el (gnus-group-make-group-simple): Add, suggested by diff --git a/lisp/mml.el b/lisp/mml.el index 68370d6..162bca9 100644 --- a/lisp/mml.el +++ b/lisp/mml.el @@ -803,7 +803,7 @@ If HANDLES is non-nil, use it instead reparsing the buffer." (insert " " param) (when (> (current-column) 71) (goto-char point) - (insert "\n ") + (insert "\n") (end-of-line))))) ;;; diff --git a/lisp/rfc2047.el b/lisp/rfc2047.el index 81a3699..ac892ee 100644 --- a/lisp/rfc2047.el +++ b/lisp/rfc2047.el @@ -319,7 +319,7 @@ Dynamically bind `rfc2047-encoding-type' to change that." ;; `address-mime' case -- take care of quoted words, comments. (with-syntax-table rfc2047-syntax-table (goto-char (point-min)) - (condition-case nil ; in case of unbalanced quotes + (condition-case err ; in case of unbalanced quotes ;; Look for rfc2822-style: sequences of atoms, quoted ;; strings, specials, whitespace. (Specials mustn't be ;; encoded.) @@ -396,14 +396,23 @@ Dynamically bind `rfc2047-encoding-type' to change that." (goto-char (match-beginning 0)) (setq end nil))) (goto-char end)))) + ;; Where the value nil of `end' means there may be + ;; text to have to be encoded following the point. + ;; Otherwise, the point reached to the end of ASCII + ;; words separated by whitespace or a special char. (unless end - (setq end t) (when (looking-at encodable-regexp) - (goto-char (match-end 0)) + (goto-char (setq begin (match-end 0))) (while (and (looking-at "[ \t\n]+\\([^ \t\n]+\\)") (setq end (match-end 0)) - (string-match encodable-regexp - (match-string 1))) + (progn + (while (re-search-forward + encodable-regexp end t)) + (< begin (point))) + (or (not (re-search-forward "\\Sw" end t)) + (progn + (goto-char (match-beginning 0)) + nil))) (goto-char end)) (when (looking-at "[^ \t\n]+") (setq end (match-end 0)) @@ -417,6 +426,8 @@ Dynamically bind `rfc2047-encoding-type' to change that." (t (goto-char (1- (match-end 0))) (unless (= (point) (match-beginning 0)) + ;; Separate encodable text and + ;; delimiter. (insert " ")))) (goto-char end) (skip-chars-forward " \t\n") @@ -430,12 +441,19 @@ Dynamically bind `rfc2047-encoding-type' to change that." (goto-char start) (if (re-search-forward encodable-regexp end 'move) (progn - (rfc2047-encode start end) + (goto-char start) + (unless (memq (char-before) '(nil ?\t ? )) + ;; Separate encodable text and delimiter. + (insert " ") + (setq end (1+ end))) + (rfc2047-encode (point) end) (setq last-encoded t)) (setq last-encoded nil))))) (error - (error "Invalid data for rfc2047 encoding: %s" - (mm-replace-in-string orig-text "[ \t\n]+" " "))))))) + (if (or debug-on-quit debug-on-error) + (signal (car err) (cdr err)) + (error "Invalid data for rfc2047 encoding: %s" + (mm-replace-in-string orig-text "[ \t\n]+" " ")))))))) (rfc2047-fold-region b (point)) (goto-char (point-max)))) @@ -448,11 +466,22 @@ By default, the string is treated as containing addresses (see (rfc2047-encode-region (point-min) (point-max)) (buffer-string))) +(defvar rfc2047-encode-max-chars 76 + "Maximum characters of each header line that contain encoded-words. +If it is nil, encoded-words will not be folded. Too small value may +cause an error. Don't change this for no particular reason.") + (defun rfc2047-encode-1 (column string cs encoder start space &optional eword) "Subroutine used by `rfc2047-encode'." (cond ((string-equal string "") (or eword "")) - ((>= column 76) + ((not rfc2047-encode-max-chars) + (concat start + (funcall encoder (if cs + (mm-encode-coding-string string cs) + string)) + "?=")) + ((>= column rfc2047-encode-max-chars) (when (and eword (string-match "\n[ \t]+\\'" eword)) ;; Reomove a superfluous empty line. @@ -474,7 +503,7 @@ By default, the string is treated as containing addresses (see cs) (substring string 0 (1+ index)))) "?=")) - (if (<= (+ column (length next)) 76) + (if (<= (+ column (length next)) rfc2047-encode-max-chars) (setq prev next index (1+ index)) (setq next prev @@ -669,6 +698,28 @@ Point moves to the end of the region." (subst-char-in-region (point-min) (point-max) ? ?_) (buffer-string))) +(defun rfc2047-encode-parameter (param value) + "Return and PARAM=VALUE string encoded in the RFC2047-like style. +This is a replacement for the `rfc2231-encode-string' function. + +When attaching files as MIME parts, we should use the RFC2231 encoding +to specify the file names containing non-ASCII characters. However, +many mail softwares don't support it in practice and recipients won't +be able to extract files with correct names. Instead, the RFC2047-like +encoding is acceptable generally. This function provides the very +RFC2047-like encoding, resigning to such a regrettable trend. To use +it, put the following line in your ~/.gnus.el file: + +\(defalias 'mail-header-encode-parameter 'rfc2047-encode-parameter) +" + (let* ((rfc2047-encoding-type 'mime) + (rfc2047-encode-max-chars nil) + (string (rfc2047-encode-string value))) + (if (string-match "[][()<>@,;:\\\"/?=]" ;; tspecials + string) + (concat param "=" (format "%S" string)) + (concat param "=" string)))) + ;;; ;;; Functions for decoding RFC2047 messages ;;; diff --git a/texi/ChangeLog b/texi/ChangeLog index 20c97c8..64456e6 100644 --- a/texi/ChangeLog +++ b/texi/ChangeLog @@ -1,3 +1,7 @@ +2004-07-22 Katsumi Yamaoka + + * emacs-mime.texi (rfc2047): Update. + 2004-07-12 Katsumi Yamaoka * gnus.texi (Splitting Mail): Add nnmail-split-lowercase-expanded. diff --git a/texi/emacs-mime.texi b/texi/emacs-mime.texi index 00e8f44..5c6a371 100644 --- a/texi/emacs-mime.texi +++ b/texi/emacs-mime.texi @@ -1336,11 +1336,6 @@ library does. The following variables are tweakable: @table @code -@item rfc2047-default-charset -@vindex rfc2047-default-charset -Characters in this charset should not be decoded by this library. -This defaults to @code{iso-8859-1}. - @item rfc2047-header-encoding-alist @vindex rfc2047-header-encoding-alist This is an alist of header / encoding-type pairs. Its main purpose is @@ -1348,9 +1343,10 @@ to prevent encoding of certain headers. The keys can either be header regexps, or @code{t}. -The values can be either @code{nil}, in which case the header(s) in -question won't be encoded, or @code{mime}, which means that they will be -encoded. +The values can be @code{nil}, in which case the header(s) in question +won't be encoded, @code{mime}, which means that they will be encoded, or +@code{address-mime}, which means the header(s) will be encoded carefully +assuming they contain addresses. @item rfc2047-charset-encoding-alist @vindex rfc2047-charset-encoding-alist @@ -1358,22 +1354,20 @@ RFC2047 specifies two forms of encoding---@code{Q} (a Quoted-Printable-like encoding) and @code{B} (base64). This alist specifies which charset should use which encoding. -@item rfc2047-encoding-function-alist -@vindex rfc2047-encoding-function-alist +@item rfc2047-encode-function-alist +@vindex rfc2047-encode-function-alist This is an alist of encoding / function pairs. The encodings are @code{Q}, @code{B} and @code{nil}. -@item rfc2047-q-encoding-alist -@vindex rfc2047-q-encoding-alist -The @code{Q} encoding isn't quite the same for all headers. Some -headers allow a narrower range of characters, and that is what this -variable is for. It's an alist of header regexps / allowable character -ranges. - @item rfc2047-encoded-word-regexp @vindex rfc2047-encoded-word-regexp When decoding words, this library looks for matches to this regexp. +@item rfc2047-encode-encoded-words +@vindex rfc2047-encode-encoded-words +The boolean variable specifies whether encoded words +(e.g. @samp{=?hello?=}) should be encoded again. + @end table Those were the variables, and these are this functions: @@ -1404,6 +1398,24 @@ Decode the encoded words in the region. @findex rfc2047-decode-string Decode a string and return the results. +@item rfc2047-encode-parameter +@findex rfc2047-encode-parameter +Encode a parameter in the RFC2047-like style. This is a replacement for +the @code{rfc2231-encode-string} function. @xref{rfc2231}. + +When attaching files as @acronym{MIME} parts, we should use the RFC2231 +encoding to specify the file names containing non-@acronym{ASCII} +characters. However, many mail softwares don't support it in practice +and recipients won't be able to extract files with correct names. +Instead, the RFC2047-like encoding is acceptable generally. This +function provides the very RFC2047-like encoding, resigning to such a +regrettable trend. To use it, put the following line in your +@file{~/.gnus.el} file: + +@lisp +(defalias 'mail-header-encode-parameter 'rfc2047-encode-parameter) +@end lisp + @end table -- 1.7.10.4