X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Frfc2047.el;h=3b76718755a7bbce51590571e0ccd40b3dcf4dd3;hb=e2696774a2e225ea60d46cc665d4232c80412731;hp=2493bba8565111a0011736418f8a5b28f0b96bed;hpb=9f2825563aa260ff40fb0c1e344db537fe110aab;p=elisp%2Fgnus.git- diff --git a/lisp/rfc2047.el b/lisp/rfc2047.el index 2493bba..3b76718 100644 --- a/lisp/rfc2047.el +++ b/lisp/rfc2047.el @@ -169,14 +169,15 @@ Should be called narrowed to the head of the message." (mm-charset-to-coding-system (car message-posting-charset)))) ;; No encoding necessary, but folding is nice - (rfc2047-fold-region - (save-excursion - (goto-char (point-min)) - (skip-chars-forward "^:") - (when (looking-at ": ") - (forward-char 2)) - (point)) - (point-max))) + (when nil + (rfc2047-fold-region + (save-excursion + (goto-char (point-min)) + (skip-chars-forward "^:") + (when (looking-at ": ") + (forward-char 2)) + (point)) + (point-max)))) ;; We found something that may perhaps be encoded. (setq method nil alist rfc2047-header-encoding-alist) @@ -186,7 +187,6 @@ Should be called narrowed to the head of the message." (eq (car elem) t)) (setq alist nil method (cdr elem)))) - (goto-char (point-min)) (re-search-forward "^[^:]+: *" nil t) (cond ((eq method 'address-mime) @@ -242,7 +242,9 @@ The buffer may be narrowed." (mm-find-mime-charset-region (point-min) (point-max)))) (goto-char (point-min)) (or (and rfc2047-encode-encoded-words - (search-forward "=?" nil t)) + (prog1 + (search-forward "=?" nil t) + (goto-char (point-min)))) (and charsets (not (equal charsets (list (car message-posting-charset)))))))) @@ -268,8 +270,8 @@ The buffer may be narrowed." table)))) (modify-syntax-entry ?\\ "\\" table) (modify-syntax-entry ?\" "\"" table) - (modify-syntax-entry ?\( "." table) - (modify-syntax-entry ?\) "." table) + (modify-syntax-entry ?\( "(" table) + (modify-syntax-entry ?\) ")" table) (modify-syntax-entry ?\< "." table) (modify-syntax-entry ?\> "." table) (modify-syntax-entry ?\[ "." table) @@ -287,13 +289,14 @@ Dynamically bind `rfc2047-encoding-type' to change that." (save-restriction (narrow-to-region b e) (let ((encodable-regexp (if rfc2047-encode-encoded-words - "[^\000-\177]\\|=\\?" - "[^\000-\177]")) + "[^\000-\177]+\\|=\\?" + "[^\000-\177]+")) start ; start of current token - end ; end of current token + end begin ;; Whether there's an encoded word before the current token, ;; either immediately or separated by space. - last-encoded) + last-encoded + (orig-text (buffer-substring-no-properties b e))) (if (eq 'mime rfc2047-encoding-type) ;; Simple case. Continuous words in which all those contain ;; non-ASCII characters are encoded collectively. Encoding @@ -333,111 +336,109 @@ Dynamically bind `rfc2047-encoding-type' to change that." (setq end (point)) ;; Does it need encoding? (goto-char start) - (skip-chars-forward "\000-\177" end) - (if (= end (point)) - (setq last-encoded nil) - ;; It needs encoding. Strip the quotes first, - ;; since encoded words can't occur in quotes. - (goto-char end) - (delete-backward-char 1) - (goto-char start) - (delete-char 1) - (when last-encoded - ;; There was a preceding quoted word. We need - ;; to include any separating whitespace in this - ;; word to avoid it getting lost. - (skip-chars-backward " \t") - ;; A space is needed between the encoded words. - (insert ? ) - (setq start (point) - end (1+ end))) - ;; Adjust the end position for the deleted quotes. - (rfc2047-encode start (- end 2)) - (setq last-encoded t))) ; record that it was encoded + (if (re-search-forward encodable-regexp end 'move) + ;; It needs encoding. Strip the quotes first, + ;; since encoded words can't occur in quotes. + (progn + (goto-char end) + (delete-backward-char 1) + (goto-char start) + (delete-char 1) + (when last-encoded + ;; There was a preceding quoted word. We need + ;; to include any separating whitespace in this + ;; word to avoid it getting lost. + (skip-chars-backward " \t") + ;; A space is needed between the encoded words. + (insert ? ) + (setq start (point) + end (1+ end))) + ;; Adjust the end position for the deleted quotes. + (rfc2047-encode start (- end 2)) + (setq last-encoded t)) ; record that it was encoded + (setq last-encoded nil))) ((eq ?. (char-syntax (char-after))) ;; Skip other delimiters, but record that they've ;; potentially separated quoted words. (forward-char) (setq last-encoded nil)) + ((eq ?\) (char-syntax (char-after))) + (error "Unbalanced parentheses")) + ((eq ?\( (char-syntax (char-after))) + ;; Look for the end of parentheses. + (forward-list) + ;; Encode text as an unstructured field. + (let ((rfc2047-encoding-type 'mime)) + (rfc2047-encode-region (1+ start) (1- (point))) + (forward-char))) (t ; normal token/whitespace sequence ;; Find the end. + ;; Skip one ASCII word, or encode continuous words + ;; in which all those contain non-ASCII characters. + (setq end nil) + (while (not end) + (when (looking-at "[\000-\177]+") + (setq begin (point) + end (match-end 0)) + (if (re-search-forward "[ \t\n]\\|\\Sw" end 'move) + (progn + (setq end (match-beginning 0)) + (if rfc2047-encode-encoded-words + (progn + (goto-char begin) + (when (search-forward "=?" end 'move) + (goto-char (match-beginning 0)) + (setq end nil))) + (goto-char end))) + (setq end nil))) + (unless end + (setq end t) + (when (looking-at encodable-regexp) + (goto-char (match-end 0)) + (while (and (looking-at "[ \t\n]+\\([^ \t\n]+\\)") + (setq end (match-end 0)) + (string-match encodable-regexp + (match-string 1))) + (goto-char end)) + (when (looking-at "[^ \t\n]+") + (setq end (match-end 0)) + (if (re-search-forward "\\Sw+" end t) + ;; There are special characters better + ;; to be encoded so that MTAs may parse + ;; them safely. + (cond ((= end (point))) + ((looking-at encodable-regexp) + (setq end nil)) + (t + (goto-char (1- (match-end 0))) + (unless (= (point) (match-beginning 0)) + (insert " ")))) + (goto-char end) + (skip-chars-forward " \t\n") + (if (and (looking-at "[^ \t\n]+") + (string-match encodable-regexp + (match-string 0))) + (setq end nil) + (goto-char end))))))) (skip-chars-backward " \t\n") - (if (and (eq (char-before) ?\() - ;; Look for the end of parentheses. - (let ((string (buffer-substring (point) - (point-max))) - (default-major-mode 'fundamental-mode)) - ;; Use `standard-syntax-table'. - (with-temp-buffer - (insert "(" string) - (goto-char (point-min)) - (condition-case nil - (progn - (forward-list 1) - (setq end (- (point) 3))) - (error nil))))) - ;; Encode text as an unstructured field. - (let ((rfc2047-encoding-type 'mime)) - (rfc2047-encode-region start (+ (point) end)) - (forward-char)) - ;; Skip one ASCII word, or encode continuous words - ;; in which all those contain non-ASCII characters. - (skip-chars-forward " \t\n") - (setq end nil) - (while (not end) - (when (looking-at "[\000-\177]+") - (setq end (match-end 0)) - (if (re-search-forward "[ \t\n]\\|\\Sw" end t) - (goto-char (match-beginning 0)) - (goto-char end) - (setq end nil))) - (unless end - (setq end t) - (when (looking-at "[^\000-\177]+") - (goto-char (match-end 0)) - (while (and (looking-at "[ \t\n]+\\([^ \t\n]+\\)") - (setq end (match-end 0)) - (string-match "[^\000-\177]" - (match-string 1))) - (goto-char end)) - (when (looking-at "[^ \t\n]+") - (setq end (match-end 0)) - (if (re-search-forward "\\Sw+" end t) - ;; There are special characters better - ;; to be encoded so that MTAs may parse - ;; them safely. - (cond ((= end (point))) - ((looking-at "[^\000-\177]") - (setq end nil)) - (t - (goto-char (1- (match-end 0))) - (unless (= (point) (match-beginning 0)) - (insert " ")))) - (goto-char end) - (skip-chars-forward " \t\n") - (if (and (looking-at "[^ \t\n]+") - (string-match "[^\000-\177]" - (match-string 0))) - (setq end nil) - (goto-char end))))))) - (skip-chars-backward " \t\n") - (setq end (point)) - (goto-char start) - (skip-chars-forward "\000-\177" end) - (if (= end (point)) - (setq last-encoded nil) - (rfc2047-encode start end) - (setq last-encoded t)))))) + (setq end (point)) + (goto-char start) + (if (re-search-forward encodable-regexp end 'move) + (progn + (rfc2047-encode start end) + (setq last-encoded t)) + (setq last-encoded nil))))) (error (error "Invalid data for rfc2047 encoding: %s" - (buffer-substring b e))))))) - (rfc2047-fold-region b (point)))) + (mm-replace-in-string orig-text "[ \t\n]+" " "))))))) + (rfc2047-fold-region b (point)) + (goto-char (point-max)))) (defun rfc2047-encode-string (string) "Encode words in STRING. By default, the string is treated as containing addresses (see `rfc2047-encoding-type')." - (with-temp-buffer + (mm-with-multibyte-buffer (insert string) (rfc2047-encode-region (point-min) (point-max)) (buffer-string))) @@ -485,8 +486,7 @@ By default, the string is treated as containing addresses (see (defun rfc2047-encode (b e) "Encode the word(s) in the region B to E. -By default, the region is treated as containing addresses (see -`rfc2047-encoding-type')." +Point moves to the end of the region." (let ((mime-charset (or (mm-find-mime-charset-region b e) (list 'us-ascii))) cs encoding space eword) (cond ((> (length mime-charset) 1) @@ -510,6 +510,9 @@ By default, the region is treated as containing addresses (see 'Q))) (widen) (goto-char b) + (setq b (point-marker) + e (set-marker (make-marker) e)) + (rfc2047-fold-region (point-at-bol) b) (unless (= 0 (skip-chars-backward " \t")) (setq space (buffer-substring-no-properties (point) b))) (setq eword (rfc2047-encode-1 @@ -529,6 +532,8 @@ By default, the region is treated as containing addresses (see (goto-char b)) e) (insert eword) + (set-marker b nil) + (set-marker e nil) (unless (or (eolp) (looking-at "[ \t\n)]")) (insert " ")))) @@ -600,9 +605,9 @@ By default, the region is treated as containing addresses (see (goto-char (or break qword-break)) (setq break nil qword-break nil) - (if (looking-at "[ \t]") - (insert ?\n) - (insert "\n ")) + (if (looking-at "[ \t]") + (insert ?\n) + (insert "\n ")) (setq bol (1- (point))) ;; Don't break before the first non-LWSP characters. (skip-chars-forward " \t") @@ -808,9 +813,8 @@ If your Emacs implementation can't decode CHARSET, return nil." (memq 'gnus-unknown mail-parse-ignored-charsets)) (setq cs (mm-charset-to-coding-system mail-parse-charset))) (when cs - (when (and (eq cs 'ascii) - mail-parse-charset) - (setq cs mail-parse-charset)) + (when (eq cs 'ascii) + (setq cs (or mail-parse-charset 'raw-text))) (mm-decode-coding-string (cond ((char-equal ?B encoding)