From c9a63a37ce872f67ccb7a6cbd85f0b63d05bd263 Mon Sep 17 00:00:00 2001 From: akr Date: Thu, 23 Apr 1998 17:04:57 +0000 Subject: [PATCH] * eword-decode.el (eword-encoded-word-prefix-regexp): New constant. (eword-encoded-word-suffix-regexp): New constant. (eword-encoded-text-in-unstructured-regexp): New constant. (eword-encoded-word-in-unstructured-regexp): New constant. (eword-after-encoded-word-in-unstructured-regexp): New constant. (eword-decode-entire-string): New function. (eword-decode-unstructured): Use `eword-decode-entire-string'. (eword-decode-comment): Use `eword-decode-entire-string'. (eword-decode-quoted-string): Use `eword-decode-entire-string'. (eword-decode-string): Add optional argument `default-mime-charset'. (eword-decode-region): Add optional argument `default-mime-charset'. (eword-decode-header): Refine `code-conversion' treatment. --- ChangeLog | 15 +++ eword-decode.el | 368 +++++++++++++++++++++++-------------------------------- mime-def.el | 2 +- 3 files changed, 169 insertions(+), 216 deletions(-) diff --git a/ChangeLog b/ChangeLog index 91ffb8d..77843e4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +1998-04-23 Tanaka Akira + + * eword-decode.el (eword-encoded-word-prefix-regexp): New constant. + (eword-encoded-word-suffix-regexp): New constant. + (eword-encoded-text-in-unstructured-regexp): New constant. + (eword-encoded-word-in-unstructured-regexp): New constant. + (eword-after-encoded-word-in-unstructured-regexp): New constant. + (eword-decode-entire-string): New function. + (eword-decode-unstructured): Use `eword-decode-entire-string'. + (eword-decode-comment): Use `eword-decode-entire-string'. + (eword-decode-quoted-string): Use `eword-decode-entire-string'. + (eword-decode-string): Add optional argument `default-mime-charset'. + (eword-decode-region): Add optional argument `default-mime-charset'. + (eword-decode-header): Refine `code-conversion' treatment. + 1998-04-21 Tanaka Akira * eword-decode.el (eword-decode-header): code-conversion is now diff --git a/eword-decode.el b/eword-decode.el index 7b116f1..9b87b82 100644 --- a/eword-decode.el +++ b/eword-decode.el @@ -41,69 +41,67 @@ :group 'mime) +;;; @ variables +;;; + +(defcustom eword-decode-sticked-encoded-word nil + "*If non-nil, decode encoded-words sticked on atoms, +other encoded-words, etc. +however this behaviour violates RFC2047." + :group 'eword-decode + :type 'boolean) + +(defcustom eword-decode-quoted-encoded-word nil + "*If non-nil, decode encoded-words in quoted-string +however this behaviour violates RFC2047." + :group 'eword-decode + :type 'boolean) + + ;;; @ MIME encoded-word definition ;;; -(defconst eword-encoded-text-regexp "[!->@-~]+") -(defconst eword-encoded-word-regexp +(defconst eword-encoded-word-prefix-regexp (concat (regexp-quote "=?") - "\\(" - mime-charset-regexp - "\\)" + "\\(" mime-charset-regexp "\\)" (regexp-quote "?") "\\(B\\|Q\\)" - (regexp-quote "?") - "\\(" - eword-encoded-text-regexp - "\\)" - (regexp-quote "?="))) -(defconst eword-after-encoded-word-regexp "\\([ \t]\\|$\\)") + (regexp-quote "?"))) +(defconst eword-encoded-word-suffix-regexp + (regexp-quote "?=")) + +(defconst eword-encoded-text-in-unstructured-regexp "[!->@-~]+") +(defconst eword-encoded-word-in-unstructured-regexp + (concat eword-encoded-word-prefix-regexp + "\\(" eword-encoded-text-in-unstructured-regexp "\\)" + eword-encoded-word-suffix-regexp)) +(defconst eword-after-encoded-word-in-unstructured-regexp "\\([ \t]\\|$\\)") (defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+") (defconst eword-encoded-word-in-phrase-regexp - (concat (regexp-quote "=?") - "\\(" - mime-charset-regexp - "\\)" - (regexp-quote "?") - "\\(B\\|Q\\)" - (regexp-quote "?") - "\\(" - eword-encoded-text-in-phrase-regexp - "\\)" - (regexp-quote "?="))) + (concat eword-encoded-word-prefix-regexp + "\\(" eword-encoded-text-in-phrase-regexp "\\)" + eword-encoded-word-suffix-regexp)) (defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t(]\\|$\\)") (defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+") (defconst eword-encoded-word-in-comment-regexp - (concat (regexp-quote "=?") - "\\(" - mime-charset-regexp - "\\)" - (regexp-quote "?") - "\\(B\\|Q\\)" - (regexp-quote "?") - "\\(" - eword-encoded-text-in-comment-regexp - "\\)" - (regexp-quote "?="))) + (concat eword-encoded-word-prefix-regexp + "\\(" eword-encoded-text-in-comment-regexp "\\)" + eword-encoded-word-suffix-regexp)) (defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)") (defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+") (defconst eword-encoded-word-in-quoted-string-regexp - (concat (regexp-quote "=?") - "\\(" - mime-charset-regexp - "\\)" - (regexp-quote "?") - "\\(B\\|Q\\)" - (regexp-quote "?") - "\\(" - eword-encoded-text-in-quoted-string-regexp - "\\)" - (regexp-quote "?="))) + (concat eword-encoded-word-prefix-regexp + "\\(" eword-encoded-text-in-quoted-string-regexp "\\)" + eword-encoded-word-suffix-regexp)) (defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)") +; obsolete +(defconst eword-encoded-text-regexp eword-encoded-text-in-unstructured-regexp) +(defconst eword-encoded-word-regexp eword-encoded-word-in-unstructured-regexp) + ;;; @@ Base64 ;;; @@ -142,22 +140,9 @@ ;; (concat "\\(Q\\)\\?" eword-Q-encoded-text-regexp)) -;;; @ for string +;;; @ internal utilities ;;; -(defcustom eword-decode-sticked-encoded-word nil - "*If non-nil, decode encoded-words sticked on atoms, -other encoded-words, etc. -however this behaviour violates RFC2047." - :group 'eword-decode - :type 'boolean) - -(defcustom eword-decode-quoted-encoded-word nil - "*If non-nil, decode encoded-words in quoted-string -however this behaviour violates RFC2047." - :group 'eword-decode - :type 'boolean) - (defun eword-decode-first-encoded-words (string eword-regexp after-regexp @@ -165,13 +150,15 @@ however this behaviour violates RFC2047." "Decode MIME encoded-words in beginning of STRING. EWORD-REGEXP is the regexp that matches a encoded-word. -Usual value is eword-encoded-word-regexp, +Usual value is +eword-encoded-word-in-unstructured-regexp, eword-encoded-text-in-phrase-regexp, eword-encoded-word-in-comment-regexp or eword-encoded-word-in-quoted-string-regexp. AFTER-REGEXP is the regexp that matches a after encoded-word. -Usual value is eword-after-encoded-word-regexp, +Usual value is +eword-after-encoded-word-in-unstructured-regexp, eword-after-encoded-text-in-phrase-regexp, eword-after-encoded-word-in-comment-regexp or eword-after-encoded-word-in-quoted-string-regexp. @@ -230,150 +217,101 @@ such as a version of Net$cape)." (cons dst src)) nil))) -(defun eword-decode-comment-string (string &optional must-unfold) - (let ((src string) +(defun eword-decode-entire-string (string + eword-regexp + after-regexp + safe-regexp + escape ; ?\\ or nil. + delimiters ; list of chars. + default-charset + must-unfold) + (let ((dst "") (buf "") - (dst "") - (flag-ew t)) + (src string) + (ew-enable t)) (while (< 0 (length src)) (let ((ch (aref src 0)) (decoded (and - flag-ew + ew-enable (eword-decode-first-encoded-words src - eword-encoded-word-in-comment-regexp - eword-after-encoded-word-in-comment-regexp - must-unfold)))) + eword-regexp after-regexp must-unfold)))) (if (and (not (string= buf "")) - (or decoded (eq ch ?\() (eq ch ?\)))) + (or decoded (memq ch delimiters))) (setq dst (concat dst (std11-wrap-as-quoted-pairs - (decode-mime-charset-string buf - default-mime-charset) - '(?\( ?\)))) + (decode-mime-charset-string buf default-charset) + delimiters)) buf "")) (cond (decoded (setq dst (concat dst (std11-wrap-as-quoted-pairs (car decoded) - '(?( ?)))) + delimiters)) src (cdr decoded))) - ((or (eq ch ?\() (eq ch ?\))) + ((memq ch delimiters) (setq dst (concat dst (list ch)) src (substring src 1) - flag-ew t)) - ((eq ch ?\\) + ew-enable t)) + ((eq ch escape) (setq buf (concat buf (list (aref src 1))) src (substring src 2) - flag-ew t)) - ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n)) - (setq buf (concat buf (list ch)) - src (substring src 1) - flag-ew t)) - ((string-match "\\`=?[^ \t\n()\\\\=]*" src) + ew-enable t)) + ((string-match "\\`[ \t\n]+" src) (setq buf (concat buf (substring src 0 (match-end 0))) src (substring src (match-end 0)) - flag-ew eword-decode-sticked-encoded-word)) - (t (error "something wrong"))))) - (if (not (string= buf "")) - (setq dst (concat dst - (std11-wrap-as-quoted-pairs - (decode-mime-charset-string buf - default-mime-charset) - '(?\( ?\)))))) - dst)) - -(defun eword-decode-quoted-string (string &optional must-unfold) - (let ((src string) - (buf "") - (dst "") - (flag-ew t)) - (while (< 0 (length src)) - (let ((ch (aref src 0)) - (decoded (and - eword-decode-quoted-encoded-word - flag-ew - (eword-decode-first-encoded-words src - eword-encoded-word-in-quoted-string-regexp - eword-after-encoded-word-in-quoted-string-regexp - must-unfold)))) - (if (and (not (string= buf "")) - (or decoded (eq ch ?\"))) - (setq dst (concat dst - (std11-wrap-as-quoted-pairs - (decode-mime-charset-string buf - default-mime-charset) - '(?\"))) - buf "")) - (cond - (decoded - (setq dst (concat dst - (std11-wrap-as-quoted-pairs - (car decoded) - '(?\"))) - src (cdr decoded))) - ((or (eq ch ?\")) - (setq dst (concat dst (list ch)) - src (substring src 1) - flag-ew t)) - ((eq ch ?\\) - (setq buf (concat buf (list (aref src 1))) - src (substring src 2) - flag-ew t)) - ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n)) - (setq buf (concat buf (list ch)) - src (substring src 1) - flag-ew t)) - ((string-match "\\`=?[^ \t\n\"\\\\=]*" src) + ew-enable t)) + ((and (string-match (concat "\\`=?" safe-regexp) src) + (< 0 (match-end 0))) (setq buf (concat buf (substring src 0 (match-end 0))) src (substring src (match-end 0)) - flag-ew eword-decode-sticked-encoded-word)) + ew-enable eword-decode-sticked-encoded-word)) (t (error "something wrong"))))) (if (not (string= buf "")) (setq dst (concat dst (std11-wrap-as-quoted-pairs - (decode-mime-charset-string buf - default-mime-charset) - '(?\"))))) + (decode-mime-charset-string buf default-charset) + delimiters)))) dst)) -(defun eword-decode-unstructured-string (string &optional must-unfold) - (let ((src string) - (buf "") - (dst "") - (flag-ew t)) - (while (< 0 (length src)) - (let ((ch (aref src 0)) - (decoded (and flag-ew (eword-decode-first-encoded-words src - eword-encoded-word-regexp - eword-after-encoded-word-regexp - must-unfold)))) - (if (and (not (string= buf "")) - decoded) - (setq dst (concat dst - (decode-mime-charset-string buf - default-mime-charset)) - buf "")) - (cond - (decoded - (setq dst (concat dst (car decoded)) - src (cdr decoded))) - ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n)) - (setq buf (concat buf (list ch)) - src (substring src 1) - flag-ew t)) - ((string-match "\\`=?[^ \t\n=]*" src) - (setq buf (concat buf (substring src 0 (match-end 0))) - src (substring src (match-end 0)) - flag-ew eword-decode-sticked-encoded-word)) - (t (error "something wrong"))))) - (if (not (string= buf "")) - (setq dst (concat dst - (decode-mime-charset-string buf - default-mime-charset)))) - dst)) -(defun eword-decode-string (string &optional must-unfold) +;;; @ for string +;;; + +(defun eword-decode-unstructured (string &optional must-unfold) + (eword-decode-entire-string + string + eword-encoded-word-in-unstructured-regexp + eword-after-encoded-word-in-unstructured-regexp + "[^ \t\n=]*" + nil + nil + default-mime-charset + must-unfold)) + +(defun eword-decode-comment (string &optional must-unfold) + (eword-decode-entire-string + string + eword-encoded-word-in-comment-regexp + eword-after-encoded-word-in-comment-regexp + "[^ \t\n()\\\\=]*" + ?\\ + '(?\( ?\)) + default-mime-charset + must-unfold)) + +(defun eword-decode-quoted-string (string &optional must-unfold) + (eword-decode-entire-string + string + eword-encoded-word-in-quoted-string-regexp + eword-after-encoded-word-in-quoted-string-regexp + "[^ \t\n\"\\\\=]*" + ?\\ + '(?\") + default-mime-charset + must-unfold)) + +(defun eword-decode-string (string &optional must-unfold default-mime-charset) "Decode MIME encoded-words in STRING. STRING is unfolded before decoding. @@ -384,7 +322,7 @@ decode the charset included in it, it is not decoded. If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even if there are in decoded encoded-words (generated by bad manner MUA such as a version of Net$cape)." - (eword-decode-unstructured-string + (eword-decode-unstructured (std11-unfold-string string) must-unfold)) @@ -392,7 +330,8 @@ such as a version of Net$cape)." ;;; @ for region ;;; -(defun eword-decode-region (start end &optional unfolding must-unfold) +(defun eword-decode-region (start end &optional unfolding must-unfold + default-mime-charset) "Decode MIME encoded-words in region between START and END. If UNFOLDING is not nil, it unfolds before decoding. @@ -407,8 +346,8 @@ such as a version of Net$cape)." (if unfolding (eword-decode-unfold) ) - (let ((str (eword-decode-unstructured-string - (buffer-substring (point-min) (point-max)) + (let ((str (eword-decode-unstructured + (buffer-substring (point-min) (point-max)) must-unfold))) (delete-region (point-min) (point-max)) (insert str))))) @@ -442,44 +381,43 @@ Otherwise it decodes non-ASCII bit patterns as the default-mime-charset. If SEPARATOR is not nil, it is used as header separator." (interactive "*") + (if (and code-conversion + (not (mime-charset-to-coding-system code-conversion))) + (setq code-conversion default-mime-charset)) (save-excursion (save-restriction (std11-narrow-to-header separator) - (let ((default-mime-charset - (if code-conversion - (if (mime-charset-to-coding-system code-conversion) - code-conversion - default-mime-charset)))) - (if default-mime-charset - (let (beg p end field-name len) - (goto-char (point-min)) - (while (re-search-forward std11-field-head-regexp nil t) - (setq beg (match-beginning 0) - p (match-end 0) - field-name (buffer-substring beg (1- p)) - len (string-width field-name) - field-name (intern (downcase field-name)) - end (std11-field-end)) - (cond ((memq field-name eword-decode-ignored-field-list) - ;; Don't decode - ) - ((memq field-name eword-decode-structured-field-list) - ;; Decode as structured field - (let ((body (buffer-substring p end))) - (delete-region p end) - (insert (eword-decode-and-fold-structured-field - body (1+ len))) - )) - (t - ;; Decode as unstructured field - (save-restriction - (narrow-to-region beg (1+ end)) - (goto-char p) - (eword-decode-region beg (point-max) 'unfold) - (goto-char (point-max)) - ))))) - (eword-decode-region (point-min) (point-max) t) - ))))) + (if code-conversion + (let (beg p end field-name len) + (goto-char (point-min)) + (while (re-search-forward std11-field-head-regexp nil t) + (setq beg (match-beginning 0) + p (match-end 0) + field-name (buffer-substring beg (1- p)) + len (string-width field-name) + field-name (intern (downcase field-name)) + end (std11-field-end)) + (cond ((memq field-name eword-decode-ignored-field-list) + ;; Don't decode + ) + ((memq field-name eword-decode-structured-field-list) + ;; Decode as structured field + (let ((body (buffer-substring p end))) + (delete-region p end) + (insert (eword-decode-and-fold-structured-field + body (1+ len))) + )) + (t + ;; Decode as unstructured field + (save-restriction + (narrow-to-region beg (1+ end)) + (goto-char p) + (eword-decode-region beg (point-max) 'unfold nil + code-conversion) + (goto-char (point-max)) + ))))) + (eword-decode-region (point-min) (point-max) t nil code-conversion) + )))) (defun eword-decode-unfold () (goto-char (point-min)) @@ -637,7 +575,7 @@ be the result." (setq p (std11-check-enclosure string ?\( ?\) t p))) (setq p (or p len)) (cons (cons 'comment - (eword-decode-comment-string (substring string 0 p))) + (eword-decode-comment (substring string 0 p))) (substring string p))) nil))) @@ -815,7 +753,7 @@ decode the charset included in it, it is not decoded. If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even if there are in decoded encoded-words (generated by bad manner MUA such as a version of Net$cape)." - (eword-decode-string string must-unfold)) + (eword-decode-string string must-unfold default-mime-charset)) (defun eword-extract-address-components (string) "Extract full name and canonical address from STRING. diff --git a/mime-def.el b/mime-def.el index c513993..873ece5 100644 --- a/mime-def.el +++ b/mime-def.el @@ -25,7 +25,7 @@ ;;; Code: (defconst mime-spadework-module-version-string - "FLIM-FLAM 1.0.0 - \"蘇芳\" 2.5R3.0/3.0") + "FLIM-FLAM 1.0.1 - \"紅梅\" 2.5R7.0/8.0") (require 'custom) -- 1.7.10.4