From 299de10fd37e6c3a691bc1ec44b1748390cea9d5 Mon Sep 17 00:00:00 2001 From: akr Date: Fri, 3 Apr 1998 15:22:27 +0000 Subject: [PATCH] merged branch semi-1_2_0 and akr. --- eword-decode.el | 389 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 302 insertions(+), 87 deletions(-) diff --git a/eword-decode.el b/eword-decode.el index 3f5a4c7..9a0b946 100644 --- a/eword-decode.el +++ b/eword-decode.el @@ -59,6 +59,52 @@ eword-encoded-text-regexp "\\)" (regexp-quote "?="))) +(defconst eword-after-encoded-word-regexp "\\([ \t]\\|$\\)") + +(defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+") +(defconst eword-encoded-word-in-phrase-regexp + (concat (regexp-quote "=?") + "\\(" + mime-charset-regexp + "\\)" + (regexp-quote "?") + "\\(B\\|Q\\)" + (regexp-quote "?") + "\\(" + eword-encoded-text-in-phrase-regexp + "\\)" + (regexp-quote "?="))) +(defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t(]\\|$\\)") + +(defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+") +(defconst eword-encoded-word-in-comment-regexp + (concat (regexp-quote "=?") + "\\(" + mime-charset-regexp + "\\)" + (regexp-quote "?") + "\\(B\\|Q\\)" + (regexp-quote "?") + "\\(" + eword-encoded-text-in-comment-regexp + "\\)" + (regexp-quote "?="))) +(defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)") + +(defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+") +(defconst eword-encoded-word-in-quoted-string-regexp + (concat (regexp-quote "=?") + "\\(" + mime-charset-regexp + "\\)" + (regexp-quote "?") + "\\(B\\|Q\\)" + (regexp-quote "?") + "\\(" + eword-encoded-text-in-quoted-string-regexp + "\\)" + (regexp-quote "?="))) +(defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)") ;;; @@ Base64 @@ -101,6 +147,230 @@ ;;; @ for string ;;; +(defvar eword-decode-sticked-encoded-word nil + "*If non-nil, decode encoded-words sticked on atoms, +other encoded-words, etc. +however this behaviour violates RFC2047.") + +(defvar eword-decode-quoted-encoded-word nil + "*If non-nil, decode encoded-words in quoted-string +however this behaviour violates RFC2047.") + +(defun eword-decode-first-encoded-words (string + eword-regexp + after-regexp + &optional must-unfold) + "Decode MIME encoded-words in beginning of STRING. + +EWORD-REGEXP is the regexp that matches a encoded-word. +Usual value is eword-encoded-word-regexp, +eword-encoded-text-in-phrase-regexp, +eword-encoded-word-in-comment-regexp or +eword-encoded-word-in-quoted-string-regexp. + +AFTER-REGEXP is the regexp that matches a after encoded-word. +Usual value is eword-after-encoded-word-regexp, +eword-after-encoded-text-in-phrase-regexp, +eword-after-encoded-word-in-comment-regexp or +eword-after-encoded-word-in-quoted-string-regexp. + +If beginning of STRING matches EWORD-REGEXP and AFTER-REGEXP, +returns a cons cell of decoded string(sequence of characters) and +the rest(sequence of octets). + +If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP, +returns nil. + +If an encoded-word is broken or your emacs implementation can not +decode the charset included in it, it is returned in decoded part +as encoded-word form. + +If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even +if there are in decoded encoded-words (generated by bad manner MUA +such as a version of Net$cape)." + (if eword-decode-sticked-encoded-word (setq after-regexp "")) + (let ((between-ewords-regexp + (if eword-decode-sticked-encoded-word + "\\(\n?[ \t]\\)*" + "\\(\n?[ \t]\\)+")) + (src string) ; sequence of octets. + (dst "")) ; sequence of characters. + (if (string-match + (concat "\\`\\(" eword-regexp "\\)" after-regexp) src) + (let* (p + (q (match-end 1)) + (ew (substring src 0 q)) + (dw (eword-decode-encoded-word ew must-unfold))) + (setq dst (concat dst dw) + src (substring src q)) + (if (not (string= ew dw)) + (progn + (while + (and + (string-match + (concat "\\`\\(" between-ewords-regexp "\\)" + "\\(" eword-regexp "\\)" + after-regexp) + src) + (progn + (setq p (match-end 1) + q (match-end 3) + ew (substring src p q) + dw (eword-decode-encoded-word ew must-unfold)) + (if (string= ew dw) + (progn + (setq dst (concat dst (substring src 0 q)) + src (substring src q)) + nil) + t))) + (setq dst (concat dst dw) + src (substring src q))))) + (cons dst src)) + nil))) + +(defun eword-decode-comment-string (string &optional must-unfold) + (let ((src string) + (buf "") + (dst "") + (flag-ew t)) + (while (< 0 (length src)) + (let ((ch (aref src 0)) + (decoded (and + flag-ew + (eword-decode-first-encoded-words src + eword-encoded-word-in-comment-regexp + eword-after-encoded-word-in-comment-regexp + must-unfold)))) + (if (and (not (string= buf "")) + (or decoded (eq ch ?\() (eq ch ?\)))) + (setq dst (concat dst + (std11-wrap-as-quoted-pairs + (decode-mime-charset-string buf + default-mime-charset) + '(?\( ?\)))) + buf "")) + (cond + (decoded + (setq dst (concat dst + (std11-wrap-as-quoted-pairs + (car decoded) + '(?( ?)))) + src (cdr decoded))) + ((or (eq ch ?\() (eq ch ?\))) + (setq dst (concat dst (list ch)) + src (substring src 1) + flag-ew t)) + ((eq ch ?\\) + (setq buf (concat buf (list (aref src 1))) + src (substring src 2) + flag-ew t)) + ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n)) + (setq buf (concat buf (list ch)) + src (substring src 1) + flag-ew t)) + ((string-match "\\`=?[^ \t\n()\\\\=]*" src) + (setq buf (concat buf (substring src 0 (match-end 0))) + src (substring src (match-end 0)) + flag-ew eword-decode-sticked-encoded-word)) + (t (error "something wrong"))))) + (if (not (string= buf "")) + (setq dst (concat dst + (std11-wrap-as-quoted-pairs + (decode-mime-charset-string buf + default-mime-charset) + '(?\( ?\)))))) + dst)) + +(defun eword-decode-quoted-string (string &optional must-unfold) + (let ((src string) + (buf "") + (dst "") + (flag-ew t)) + (while (< 0 (length src)) + (let ((ch (aref src 0)) + (decoded (and + eword-decode-quoted-encoded-word + flag-ew + (eword-decode-first-encoded-words src + eword-encoded-word-in-quoted-string-regexp + eword-after-encoded-word-in-quoted-string-regexp + must-unfold)))) + (if (and (not (string= buf "")) + (or decoded (eq ch ?\"))) + (setq dst (concat dst + (std11-wrap-as-quoted-pairs + (decode-mime-charset-string buf + default-mime-charset) + '(?\"))) + buf "")) + (cond + (decoded + (setq dst (concat dst + (std11-wrap-as-quoted-pairs + (car decoded) + '(?\"))) + src (cdr decoded))) + ((or (eq ch ?\")) + (setq dst (concat dst (list ch)) + src (substring src 1) + flag-ew t)) + ((eq ch ?\\) + (setq buf (concat buf (list (aref src 1))) + src (substring src 2) + flag-ew t)) + ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n)) + (setq buf (concat buf (list ch)) + src (substring src 1) + flag-ew t)) + ((string-match "\\`=?[^ \t\n\"\\\\=]*" src) + (setq buf (concat buf (substring src 0 (match-end 0))) + src (substring src (match-end 0)) + flag-ew eword-decode-sticked-encoded-word)) + (t (error "something wrong"))))) + (if (not (string= buf "")) + (setq dst (concat dst + (std11-wrap-as-quoted-pairs + (decode-mime-charset-string buf + default-mime-charset) + '(?\"))))) + dst)) + +(defun eword-decode-unstructured-string (string &optional must-unfold) + (let ((src string) + (buf "") + (dst "") + (flag-ew t)) + (while (< 0 (length src)) + (let ((ch (aref src 0)) + (decoded (and flag-ew (eword-decode-first-encoded-words src + eword-encoded-word-regexp + eword-after-encoded-word-regexp + must-unfold)))) + (if (and (not (string= buf "")) + decoded) + (setq dst (concat dst + (decode-mime-charset-string buf + default-mime-charset)) + buf "")) + (cond + (decoded + (setq dst (concat dst (car decoded)) + src (cdr decoded))) + ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n)) + (setq buf (concat buf (list ch)) + src (substring src 1) + flag-ew t)) + ((string-match "\\`=?[^ \t\n=]*" src) + (setq buf (concat buf (substring src 0 (match-end 0))) + src (substring src (match-end 0)) + flag-ew eword-decode-sticked-encoded-word)) + (t (error "something wrong"))))) + (if (not (string= buf "")) + (setq dst (concat dst + (decode-mime-charset-string buf + default-mime-charset)))) + dst)) + (defun eword-decode-string (string &optional must-unfold) "Decode MIME encoded-words in STRING. @@ -112,31 +382,9 @@ decode the charset included in it, it is not decoded. If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even if there are in decoded encoded-words (generated by bad manner MUA such as a version of Net$cape)." - (setq string (std11-unfold-string string)) - (let ((dest "")(ew nil) - beg end) - (while (and (string-match eword-encoded-word-regexp string) - (setq beg (match-beginning 0) - end (match-end 0)) - ) - (if (> beg 0) - (if (not - (and (eq ew t) - (string-match "^[ \t]+$" (substring string 0 beg)) - )) - (setq dest (concat dest (substring string 0 beg))) - ) - ) - (setq dest - (concat dest - (eword-decode-encoded-word - (substring string beg end) must-unfold) - )) - (setq string (substring string end)) - (setq ew t) - ) - (concat dest string) - )) + (eword-decode-unstructured-string + (std11-unfold-string string) + must-unfold)) ;;; @ for region @@ -157,22 +405,11 @@ such as a version of Net$cape)." (if unfolding (eword-decode-unfold) ) - (goto-char (point-min)) - (while (re-search-forward (concat "\\(" eword-encoded-word-regexp "\\)" - "\\(\n?[ \t]\\)+" - "\\(" eword-encoded-word-regexp "\\)") - nil t) - (replace-match "\\1\\6") - (goto-char (point-min)) - ) - (while (re-search-forward eword-encoded-word-regexp nil t) - (insert (eword-decode-encoded-word - (prog1 - (buffer-substring (match-beginning 0) (match-end 0)) - (delete-region (match-beginning 0) (match-end 0)) - ) must-unfold)) - ) - ))) + (let ((str (eword-decode-unstructured-string + (buffer-substring (point-min) (point-max)) + must-unfold))) + (delete-region (point-min) (point-max)) + (insert str))))) ;;; @ for message header @@ -236,11 +473,10 @@ If SEPARATOR is not nil, it is used as header separator." ;; Decode as unstructured field (save-restriction (narrow-to-region beg (1+ end)) - (decode-mime-charset-region p end default-charset) (goto-char p) - (if (re-search-forward eword-encoded-word-regexp - nil t) - (eword-decode-region beg (point-max) 'unfold)) + (let ((default-mime-charset default-charset)) + (eword-decode-region beg (point-max) 'unfold)) + (goto-char (point-max)) ))))) (eword-decode-region (point-min) (point-max) t) ))))) @@ -386,9 +622,7 @@ be the result." (let ((p (std11-check-enclosure string ?\" ?\"))) (if p (cons (cons 'quoted-string - (decode-mime-charset-string - (std11-strip-quoted-pair (substring string 1 (1- p))) - default-mime-charset)) + (eword-decode-quoted-string (substring string 0 p))) (substring string p)) ))) @@ -399,11 +633,7 @@ be the result." (let ((p (std11-check-enclosure string ?\( ?\) t))) (if p (cons (cons 'comment - (eword-decode-string - (decode-mime-charset-string - (std11-strip-quoted-pair (substring string 1 (1- p))) - default-mime-charset) - must-unfold)) + (eword-decode-comment-string (substring string 0 p))) (substring string p)) ))) @@ -414,30 +644,22 @@ be the result." (std11-analyze-special string)) (defun eword-analyze-encoded-word (string &optional must-unfold) - (if (eq (string-match eword-encoded-word-regexp string) 0) - (let ((end (match-end 0)) - (dest (eword-decode-encoded-word (match-string 0 string) - must-unfold)) - ) - (setq string (substring string end)) - (while (eq (string-match `,(concat "[ \t\n]*\\(" - eword-encoded-word-regexp - "\\)") - string) - 0) - (setq end (match-end 0)) - (setq dest - (concat dest - (eword-decode-encoded-word (match-string 1 string) - must-unfold)) - string (substring string end)) - ) - (cons (cons 'atom dest) string) - ))) + (let ((decoded (eword-decode-first-encoded-words + string + eword-encoded-word-in-phrase-regexp + eword-after-encoded-word-in-phrase-regexp + must-unfold))) + (if decoded + (cons (cons 'atom (car decoded)) (cdr decoded))))) (defun eword-analyze-atom (string &optional must-unfold) (if (string-match std11-atom-regexp string) (let ((end (match-end 0))) + (if (and eword-decode-sticked-encoded-word + (string-match eword-encoded-word-in-phrase-regexp + (substring string 0 end)) + (< 0 (match-beginning 0))) + (setq end (match-beginning 0))) (cons (cons 'atom (decode-mime-charset-string (substring string 0 end) default-mime-charset)) @@ -468,12 +690,13 @@ It is like std11-lexical-analyze, but it decodes non us-ascii characters encoded as encoded-words or invalid \"raw\" format. \"Raw\" non us-ascii characters are regarded as variable `default-mime-charset'." - (let ((key (copy-sequence string)) - ret) - (set-text-properties 0 (length key) nil key) + (let* ((str (copy-sequence string)) + (key (cons str (cons default-mime-charset must-unfold))) + ret) + (set-text-properties 0 (length str) nil str) (if (setq ret (assoc key eword-lexical-analyze-cache)) (cdr ret) - (setq ret (eword-lexical-analyze-internal key must-unfold)) + (setq ret (eword-lexical-analyze-internal str must-unfold)) (setq eword-lexical-analyze-cache (cons (cons key ret) (last eword-lexical-analyze-cache @@ -481,13 +704,7 @@ characters encoded as encoded-words or invalid \"raw\" format. ret))) (defun eword-decode-token (token) - (let ((type (car token)) - (value (cdr token))) - (cond ((eq type 'quoted-string) - (std11-wrap-as-quoted-string value)) - ((eq type 'comment) - (concat "(" (std11-wrap-as-quoted-pairs value '(?( ?))) ")")) - (t value)))) + (cdr token)) (defun eword-decode-and-fold-structured-field (string start-column &optional max-column must-unfold) @@ -593,9 +810,7 @@ decode the charset included in it, it is not decoded. If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even if there are in decoded encoded-words (generated by bad manner MUA such as a version of Net$cape)." - (eword-decode-string - (decode-mime-charset-string string default-mime-charset) - must-unfold)) + (eword-decode-string string must-unfold)) (defun eword-extract-address-components (string) "Extract full name and canonical address from STRING. -- 1.7.10.4