X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Frfc2047.el;h=ff07547c6d28e3c709ab8fc582b34f452c0abac1;hb=2ee42624a6069cf91f228bdf578e3e5d1f044d5d;hp=0fe732824b44568b6496675d355e23c0de19d415;hpb=08a04d10607c813f9ae8da149d1eb0a3ec9aa692;p=elisp%2Fgnus.git- diff --git a/lisp/rfc2047.el b/lisp/rfc2047.el index 0fe7328..ff07547 100644 --- a/lisp/rfc2047.el +++ b/lisp/rfc2047.el @@ -1,5 +1,5 @@ ;;; rfc2047.el --- Functions for encoding and decoding rfc2047 messages -;; Copyright (C) 1998 Free Software Foundation, Inc. +;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; MORIOKA Tomohiko @@ -25,13 +25,14 @@ ;;; Code: (eval-and-compile - (if (not (fboundp 'base64-encode-string)) - (require 'base64))) + (eval + '(unless (fboundp 'base64-decode-string) + (require 'base64)))) + (require 'qp) (require 'mm-util) - -(defvar rfc2047-default-charset 'iso-8859-1 - "Default MIME charset -- does not need encoding.") +(require 'ietf-drums) +(require 'mail-prsvr) (defvar rfc2047-header-encoding-alist '(("Newsgroups" . nil) @@ -45,7 +46,7 @@ The values can be: 1) nil, in which case no encoding is done; 2) `mime', in which case the header will be encoded according to RFC2047; -3) a charset, in which case it will be encoded as that charse; +3) a charset, in which case it will be encoded as that charset; 4) `default', in which case the field will be encoded as the rest of the article.") @@ -55,8 +56,8 @@ The values can be: (iso-8859-2 . Q) (iso-8859-3 . Q) (iso-8859-4 . Q) - (iso-8859-5 . Q) - (koi8-r . Q) + (iso-8859-5 . B) + (koi8-r . B) (iso-8859-7 . Q) (iso-8859-8 . Q) (iso-8859-9 . Q) @@ -73,13 +74,13 @@ Valid encodings are nil, `Q' and `B'.") (defvar rfc2047-encoding-function-alist '((Q . rfc2047-q-encode-region) - (B . base64-encode-region) + (B . rfc2047-b-encode-region) (nil . ignore)) "Alist of RFC2047 encodings to encoding functions.") (defvar rfc2047-q-encoding-alist - '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" . "[^-A-Za-z0-9!*+/=_]") - ("." . "[\000-\007\013\015-\037\200-\377=_?]")) + '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" . "-A-Za-z0-9!*+/=_") + ("." . "^\000-\007\011\013\015-\037\200-\377=_?")) "Alist of header regexps and valid Q characters.") ;;; @@ -100,76 +101,133 @@ Valid encodings are nil, `Q' and `B'.") (point-max)))) (goto-char (point-min))) -;;;###autoload (defun rfc2047-encode-message-header () "Encode the message header according to `rfc2047-header-encoding-alist'. Should be called narrowed to the head of the message." (interactive "*") - (when (featurep 'mule) - (save-excursion - (let ((alist rfc2047-header-encoding-alist) - elem method) - (while (not (eobp)) - (save-restriction - (rfc2047-narrow-to-field) - (when (rfc2047-encodable-p) - ;; We found something that may perhaps be encoded. - (while (setq elem (pop alist)) - (when (or (and (stringp (car elem)) - (looking-at (car elem))) - (eq (car elem) t)) - (setq alist nil - method (cdr elem)))) - (when method - (cond - ((eq method 'mime) - (rfc2047-encode-region (point-min) (point-max))) - ;; Hm. - (t)))) - (goto-char (point-max)))))))) - -(defun rfc2047-encodable-p () - "Say whether the current (narrowed) buffer contains characters that need encoding." - (let ((charsets (mapcar - 'mm-mule-charset-to-mime-charset - (find-charset-region (point-min) (point-max)))) - (cs (list 'us-ascii rfc2047-default-charset)) + (save-excursion + (goto-char (point-min)) + (let ((alist rfc2047-header-encoding-alist) + elem method) + (while (not (eobp)) + (save-restriction + (rfc2047-narrow-to-field) + (if (not (rfc2047-encodable-p)) + (if (and (eq (mm-body-7-or-8) '8bit) + (mm-multibyte-p) + (mm-coding-system-p + (car message-posting-charset))) + ;; 8 bit must be decoded. + ;; Is message-posting-charset a coding system? + (mm-encode-coding-region + (point-min) (point-max) + (car message-posting-charset))) + ;; We found something that may perhaps be encoded. + (while (setq elem (pop alist)) + (when (or (and (stringp (car elem)) + (looking-at (car elem))) + (eq (car elem) t)) + (setq alist nil + method (cdr elem)))) + (cond + ((eq method 'mime) + (rfc2047-encode-region (point-min) (point-max)) + (rfc2047-fold-region (point-min) (point-max))) + ;; Hm. + (t))) + (goto-char (point-max))))))) + +(defun rfc2047-encodable-p (&optional header) + "Say whether the current (narrowed) buffer contains characters that need encoding in headers." + (let ((charsets + (mapcar + 'mm-mime-charset + (mm-find-charset-region (point-min) (point-max)))) + (cs (list 'us-ascii (car message-posting-charset))) found) (while charsets (unless (memq (pop charsets) cs) (setq found t))) found)) +(defun rfc2047-dissect-region (b e) + "Dissect the region between B and E into words." + (let ((all-specials (concat ietf-drums-tspecials " \t\n\r")) + (special-list (mapcar 'identity ietf-drums-tspecials)) + (blank-list '(? ?\t ?\n ?\r)) + words current cs state mail-parse-mule-charset) + (save-restriction + (narrow-to-region b e) + (goto-char (point-min)) + (skip-chars-forward all-specials) + (setq b (point)) + (while (not (eobp)) + (cond + ((not state) + (setq state 'word) + (if (not (eq (setq cs (mm-charset-after)) 'ascii)) + (setq current cs)) + (setq b (point))) + ((eq state 'blank) + (cond + ((memq (char-after) special-list) + (setq state nil)) + ((memq (char-after) blank-list)) + (t + (setq state 'word) + (unless b + (setq b (point))) + (if (not (eq (setq cs (mm-charset-after)) 'ascii)) + (setq current cs))))) + ((eq state 'word) + (cond + ((memq (char-after) special-list) + (setq state nil) + (push (list b (point) current) words) + (setq current nil)) + ((memq (char-after) blank-list) + (setq state 'blank) + (if (not current) + (setq b nil) + (push (list b (point) current) words) + (setq b (point)) + (setq current nil))) + ((or (eq (setq cs (mm-charset-after)) 'ascii) + (if current + (eq current cs) + (setq current cs)))) + (t + (push (list b (point) current) words) + (setq current cs) + (setq b (point)))))) + (if state + (forward-char) + (skip-chars-forward all-specials))) + (if (eq state 'word) + (push (list b (point) current) words))) + words)) + (defun rfc2047-encode-region (b e) "Encode all encodable words in REGION." - (let (prev c start qstart qprev qend) - (save-excursion - (goto-char b) - (while (re-search-forward "[^ \t\n]+" nil t) - (save-restriction - (narrow-to-region (match-beginning 0) (match-end 0)) - (goto-char (setq start (point-min))) - (setq prev nil) - (while (not (eobp)) - (unless (eq (setq c (char-charset (following-char))) 'ascii) - (cond - ((eq c prev) - ) - ((null prev) - (setq qstart (or qstart start) - qend (point-max) - qprev c) - (setq prev c)) - (t - ;(rfc2047-encode start (setq start (point)) prev) - (setq prev c)))) - (forward-char 1))) - (when (and (not prev) qstart) - (rfc2047-encode qstart qend qprev) - (setq qstart nil))) - (when qstart - (rfc2047-encode qstart qend qprev) - (setq qstart nil))))) + (let ((words (rfc2047-dissect-region b e)) + beg end current word) + (while (setq word (pop words)) + (if (equal (nth 2 word) current) + (setq beg (nth 0 word)) + (when current + (if (and (eq beg (nth 1 word)) (nth 2 word)) + (progn + ;; There might be a bug in Emacs Mule. + ;; A space must be inserted before encoding. + (goto-char beg) + (insert " ") + (rfc2047-encode (1+ beg) (1+ end) current)) + (rfc2047-encode beg end current))) + (setq current (nth 2 word) + beg (nth 0 word) + end (nth 1 word)))) + (when current + (rfc2047-encode beg end current)))) (defun rfc2047-encode-string (string) "Encode words in STRING." @@ -180,29 +238,69 @@ Should be called narrowed to the head of the message." (defun rfc2047-encode (b e charset) "Encode the word in the region with CHARSET." - (let* ((mime-charset (mm-mule-charset-to-mime-charset charset)) - (encoding (cdr (assq mime-charset - rfc2047-charset-encoding-alist))) + (let* ((mime-charset (mm-mime-charset charset)) + (encoding (or (cdr (assq mime-charset + rfc2047-charset-encoding-alist)) + 'B)) (start (concat "=?" (downcase (symbol-name mime-charset)) "?" - (downcase (symbol-name encoding)) "?"))) + (downcase (symbol-name encoding)) "?")) + (first t)) (save-restriction (narrow-to-region b e) - (mm-encode-coding-region b e mime-charset) + (when (eq encoding 'B) + ;; break into lines before encoding + (goto-char (point-min)) + (while (not (eobp)) + (goto-char (min (point-max) (+ 15 (point)))) + (unless (eobp) + (insert "\n")))) + (if (and (mm-multibyte-p) + (mm-coding-system-p mime-charset)) + (mm-encode-coding-region (point-min) (point-max) mime-charset)) (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) (point-min) (point-max)) (goto-char (point-min)) - (insert start) - (goto-char (point-max)) - (insert "?=") - ;; Encoded words can't be more than 75 chars long, so we have to - ;; split the long ones up. - (end-of-line) - (while (> (current-column) 74) - (beginning-of-line) - (forward-char 73) - (insert "?=\n " start) - (end-of-line))))) + (while (not (eobp)) + (unless first + (insert " ")) + (setq first nil) + (insert start) + (end-of-line) + (insert "?=") + (forward-line 1))))) + +(defun rfc2047-fold-region (b e) + "Fold the long lines in the region." + (save-restriction + (narrow-to-region b e) + (goto-char (point-min)) + (let ((break nil)) + (while (not (eobp)) + (cond + ((memq (char-after) '(? ?\t)) + (setq break (point))) + ((and (not break) + (looking-at "=\\?")) + (setq break (point))) + ((and break + (looking-at "\\?=") + (> (- (point) (save-excursion (beginning-of-line) (point))) 76)) + (goto-char break) + (setq break nil) + (insert "\n "))) + (unless (eobp) + (forward-char 1)))))) + +(defun rfc2047-b-encode-region (b e) + "Encode the header contained in REGION with the B encoding." + (save-restriction + (narrow-to-region (goto-char b) e) + (while (not (eobp)) + (base64-encode-region (point) (progn (end-of-line) (point)) t) + (if (and (bolp) (eolp)) + (delete-backward-char 1)) + (forward-line)))) (defun rfc2047-q-encode-region (b e) "Encode the header contained in REGION with the Q encoding." @@ -213,17 +311,23 @@ Should be called narrowed to the head of the message." (while alist (when (looking-at (caar alist)) (quoted-printable-encode-region b e nil (cdar alist)) - (subst-char-in-region (point-min) (point-max) ? ?_)) - (pop alist)))))) + (subst-char-in-region (point-min) (point-max) ? ?_) + (setq alist nil)) + (pop alist)) + (goto-char (point-min)) + (while (not (eobp)) + (goto-char (min (point-max) (+ 64 (point)))) + (search-backward "=" (- (point) 2) t) + (unless (eobp) + (insert "\n"))))))) ;;; ;;; Functions for decoding RFC2047 messages ;;; (defvar rfc2047-encoded-word-regexp - "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\\([!->@-~ ]+\\)\\?=") + "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\\([!->@-~ +]+\\)\\?=") -;;;###autoload (defun rfc2047-decode-region (start end) "Decode MIME-encoded words in region between START and END." (interactive "r") @@ -248,19 +352,27 @@ Should be called narrowed to the head of the message." (prog1 (match-string 0) (delete-region (match-beginning 0) (match-end 0))))) - (mm-decode-coding-region b e rfc2047-default-charset) + (when (and (mm-multibyte-p) + mail-parse-charset + (not (eq mail-parse-charset 'gnus-decoded))) + (mm-decode-coding-region b e mail-parse-charset)) (setq b (point))) - (mm-decode-coding-region b (point-max) rfc2047-default-charset))))) + (when (and (mm-multibyte-p) + mail-parse-charset + (not (eq mail-parse-charset 'us-ascii)) + (not (eq mail-parse-charset 'gnus-decoded))) + (mm-decode-coding-region b (point-max) mail-parse-charset)))))) -;;;###autoload (defun rfc2047-decode-string (string) - "Decode the quoted-printable-encoded STRING and return the results." - (with-temp-buffer - (mm-enable-multibyte) - (insert string) - (inline - (rfc2047-decode-region (point-min) (point-max))) - (buffer-string))) + "Decode the quoted-printable-encoded STRING and return the results." + (let ((m (mm-multibyte-p))) + (with-temp-buffer + (when m + (mm-enable-multibyte)) + (insert string) + (inline + (rfc2047-decode-region (point-min) (point-max))) + (buffer-string)))) (defun rfc2047-parse-and-decode (word) "Decode WORD and return it if it is an encoded word. @@ -280,14 +392,26 @@ Return WORD if not." "Decode STRING that uses CHARSET with ENCODING. Valid ENCODINGs are \"B\" and \"Q\". If your Emacs implementation can't decode CHARSET, it returns nil." + (if (stringp charset) + (setq charset (intern (downcase charset)))) + (if (or (not charset) + (eq 'gnus-all mail-parse-ignored-charsets) + (memq 'gnus-all mail-parse-ignored-charsets) + (memq charset mail-parse-ignored-charsets)) + (setq charset mail-parse-charset)) (let ((cs (mm-charset-to-coding-system charset))) + (if (and (not cs) charset + (listp mail-parse-ignored-charsets) + (memq 'gnus-unknown mail-parse-ignored-charsets)) + (setq cs (mm-charset-to-coding-system mail-parse-charset))) (when cs + (when (and (eq cs 'ascii) + mail-parse-charset) + (setq cs mail-parse-charset)) (mm-decode-coding-string (cond ((equal "B" encoding) - (if (fboundp 'base64-decode-string) - (base64-decode-string string) - (base64-decode string))) + (base64-decode-string string)) ((equal "Q" encoding) (quoted-printable-decode-string (mm-replace-chars-in-string string ?_ ? )))