From 5a3f15341b335283f27ba514dafc529c72460cb3 Mon Sep 17 00:00:00 2001 From: akr Date: Sun, 22 Mar 1998 19:37:33 +0000 Subject: [PATCH] * eword-decode.el (eword-after-encoded-word-regexp): New constant. eword-encoded-text-in-phrase-regexp: New constant. eword-encoded-word-in-phrase-regexp: New constant. eword-after-encoded-word-in-phrase-regexp: New constant. eword-encoded-text-in-comment-regexp: New constant. eword-encoded-word-in-comment-regexp: New constant. eword-after-encoded-word-in-comment-regexp: New constant. eword-encoded-text-in-quoted-string-regexp: New constant. eword-encoded-word-in-quoted-string-regexp: New constant. eword-after-encoded-word-in-quoted-string-regexp: New constant. eword-decode-sticked-encoded-word: Update DOC-STRING. eword-decode-quoted-encoded-word: Update DOC-STRING. eword-decode-first-encoded-words: Add argument eword-regexp. eword-decode-comment-string: Use `eword-encoded-word-in-comment-regexp' and `eword-after-encoded-word-in-comment-regexp'. eword-decode-quoted-string : Use `eword-encoded-word-in-quoted-string-regexp' and `eword-after-encoded-word-in-quoted-string-regexp'. eword-decode-unstructured-string: Use `eword-encoded-word-regexp' and `eword-after-encoded-word-regexp'. eword-analyze-encoded-word: Use `eword-encoded-word-in-phrase-regexp' and `eword-after-encoded-word-in-phrase-regexp' eword-lexical-analyze: Add `default-mime-charset' and `must-unfold' to key of cache. --- ChangeLog | 27 +++++++++ eword-decode.el | 166 +++++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 169 insertions(+), 24 deletions(-) diff --git a/ChangeLog b/ChangeLog index 431fce0..30c0a2c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,30 @@ +1998-03-22 Tanaka Akira + + * eword-decode.el (eword-after-encoded-word-regexp): New constant. + eword-encoded-text-in-phrase-regexp: New constant. + eword-encoded-word-in-phrase-regexp: New constant. + eword-after-encoded-word-in-phrase-regexp: New constant. + eword-encoded-text-in-comment-regexp: New constant. + eword-encoded-word-in-comment-regexp: New constant. + eword-after-encoded-word-in-comment-regexp: New constant. + eword-encoded-text-in-quoted-string-regexp: New constant. + eword-encoded-word-in-quoted-string-regexp: New constant. + eword-after-encoded-word-in-quoted-string-regexp: New constant. + eword-decode-sticked-encoded-word: Update DOC-STRING. + eword-decode-quoted-encoded-word: Update DOC-STRING. + eword-decode-first-encoded-words: Add argument eword-regexp. + eword-decode-comment-string: Use `eword-encoded-word-in-comment-regexp' + and `eword-after-encoded-word-in-comment-regexp'. + eword-decode-quoted-string : Use + `eword-encoded-word-in-quoted-string-regexp' and + `eword-after-encoded-word-in-quoted-string-regexp'. + eword-decode-unstructured-string: Use `eword-encoded-word-regexp' and + `eword-after-encoded-word-regexp'. + eword-analyze-encoded-word: Use `eword-encoded-word-in-phrase-regexp' + and `eword-after-encoded-word-in-phrase-regexp' + eword-lexical-analyze: Add `default-mime-charset' and `must-unfold' to + key of cache. + 1998-03-21 Shuhei KOBAYASHI * eword-decode.el (eword-lexical-analyze-internal): Fixed return diff --git a/eword-decode.el b/eword-decode.el index 83e2842..3b52aa4 100644 --- a/eword-decode.el +++ b/eword-decode.el @@ -59,6 +59,52 @@ eword-encoded-text-regexp "\\)" (regexp-quote "?="))) +(defconst eword-after-encoded-word-regexp "\\([ \t]\\|$\\)") + +(defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+") +(defconst eword-encoded-word-in-phrase-regexp + (concat (regexp-quote "=?") + "\\(" + mime-charset-regexp + "\\)" + (regexp-quote "?") + "\\(B\\|Q\\)" + (regexp-quote "?") + "\\(" + eword-encoded-text-in-phrase-regexp + "\\)" + (regexp-quote "?="))) +(defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t(]\\|$\\)") + +(defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+") +(defconst eword-encoded-word-in-comment-regexp + (concat (regexp-quote "=?") + "\\(" + mime-charset-regexp + "\\)" + (regexp-quote "?") + "\\(B\\|Q\\)" + (regexp-quote "?") + "\\(" + eword-encoded-text-in-comment-regexp + "\\)" + (regexp-quote "?="))) +(defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)") + +(defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+") +(defconst eword-encoded-word-in-quoted-string-regexp + (concat (regexp-quote "=?") + "\\(" + mime-charset-regexp + "\\)" + (regexp-quote "?") + "\\(B\\|Q\\)" + (regexp-quote "?") + "\\(" + eword-encoded-text-in-quoted-string-regexp + "\\)" + (regexp-quote "?="))) +(defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)") ;;; @@ Base64 @@ -102,19 +148,55 @@ ;;; (defvar eword-decode-sticked-encoded-word nil - "*If non-nil, decode encoded-words sticked on atoms, other encoded-words, etc. + "*If non-nil, decode encoded-words sticked on atoms, +other encoded-words, etc. however this behaviour violates RFC2047.") (defvar eword-decode-quoted-encoded-word nil - "*If non-nil, decode encoded-words in quoted-string + "*If non-nil, decode encoded-words in quoted-string however this behaviour violates RFC2047.") -(defun eword-decode-first-encoded-words (string after-regexp &optional must-unfold) +(defun eword-decode-first-encoded-words (string + eword-regexp + after-regexp + &optional must-unfold) + "Decode MIME encoded-words in beginning of STRING. + +EWORD-REGEXP is the regexp that matches a encoded-word. +Usual value is eword-encoded-word-regexp, +eword-encoded-text-in-phrase-regexp, +eword-encoded-word-in-comment-regexp or +eword-encoded-word-in-quoted-string-regexp. + +AFTER-REGEXP is the regexp that matches a after encoded-word. +Usual value is eword-after-encoded-word-regexp, +eword-after-encoded-text-in-phrase-regexp, +eword-after-encoded-word-in-comment-regexp or +eword-after-encoded-word-in-quoted-string-regexp. + +If beginning of STRING matches EWORD-REGEXP and AFTER-REGEXP, +returns a cons cell of decoded string(sequence of characters) and +the rest(sequence of octets). + +If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP, +returns nil. + +If an encoded-word is broken or your emacs implementation can not +decode the charset included in it, it is returned in decoded part +as encoded-word form. + +If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even +if there are in decoded encoded-words (generated by bad manner MUA +such as a version of Net$cape)." (if eword-decode-sticked-encoded-word (setq after-regexp "")) - (let ((between-ewords-regexp (if eword-decode-sticked-encoded-word "\\(\n?[ \t]\\)*" "\\(\n?[ \t]\\)+")) + (let ((between-ewords-regexp + (if eword-decode-sticked-encoded-word + "\\(\n?[ \t]\\)*" + "\\(\n?[ \t]\\)+")) (src string) ; sequence of octets. (dst "")) ; sequence of characters. - (if (string-match (concat "\\`\\(" eword-encoded-word-regexp "\\)" after-regexp) src) + (if (string-match + (concat "\\`\\(" eword-regexp "\\)" after-regexp) src) (let* (p (q (match-end 1)) (ew (substring src 0 q)) @@ -126,7 +208,9 @@ however this behaviour violates RFC2047.") (while (and (string-match - (concat "\\`\\(" between-ewords-regexp "\\)\\(" eword-encoded-word-regexp "\\)" after-regexp) + (concat "\\`\\(" between-ewords-regexp "\\)" + "\\(" eword-regexp "\\)" + after-regexp) src) (progn (setq p (match-end 1) @@ -154,14 +238,23 @@ however this behaviour violates RFC2047.") (decoded (and flag-ew (eword-decode-first-encoded-words src - "\\([ \t()\\\\]\\|$\\)" must-unfold)))) + eword-encoded-word-in-comment-regexp + eword-after-encoded-word-in-comment-regexp + must-unfold)))) (if (and (not (string= buf "")) (or decoded (eq ch ?\() (eq ch ?\)))) - (setq dst (concat dst (std11-wrap-as-quoted-pairs (decode-mime-charset-string buf default-mime-charset) '(?\( ?\)))) + (setq dst (concat dst + (std11-wrap-as-quoted-pairs + (decode-mime-charset-string buf + default-mime-charset) + '(?\( ?\)))) buf "")) (cond (decoded - (setq dst (concat dst (std11-wrap-as-quoted-pairs (car decoded) '(?( ?)))) + (setq dst (concat dst + (std11-wrap-as-quoted-pairs + (car decoded) + '(?( ?)))) src (cdr decoded))) ((or (eq ch ?\() (eq ch ?\))) (setq dst (concat dst (list ch)) @@ -181,7 +274,11 @@ however this behaviour violates RFC2047.") flag-ew eword-decode-sticked-encoded-word)) (t (error "something wrong"))))) (if (not (string= buf "")) - (setq dst (concat dst (std11-wrap-as-quoted-pairs (decode-mime-charset-string buf default-mime-charset) '(?\( ?\)))))) + (setq dst (concat dst + (std11-wrap-as-quoted-pairs + (decode-mime-charset-string buf + default-mime-charset) + '(?\( ?\)))))) dst)) (defun eword-decode-quoted-string (string &optional must-unfold) @@ -195,18 +292,23 @@ however this behaviour violates RFC2047.") eword-decode-quoted-encoded-word flag-ew (eword-decode-first-encoded-words src - "\\([ \t\"\\\\]\\|$\\)" must-unfold)))) + eword-encoded-word-in-quoted-string-regexp + eword-after-encoded-word-in-quoted-string-regexp + must-unfold)))) (if (and (not (string= buf "")) (or decoded (eq ch ?\"))) (setq dst (concat dst (std11-wrap-as-quoted-pairs - (decode-mime-charset-string buf default-mime-charset) + (decode-mime-charset-string buf + default-mime-charset) '(?\"))) buf "")) (cond (decoded (setq dst (concat dst - (std11-wrap-as-quoted-pairs (car decoded) '(?\"))) + (std11-wrap-as-quoted-pairs + (car decoded) + '(?\"))) src (cdr decoded))) ((or (eq ch ?\")) (setq dst (concat dst (list ch)) @@ -228,7 +330,8 @@ however this behaviour violates RFC2047.") (if (not (string= buf "")) (setq dst (concat dst (std11-wrap-as-quoted-pairs - (decode-mime-charset-string buf default-mime-charset) + (decode-mime-charset-string buf + default-mime-charset) '(?\"))))) dst)) @@ -239,10 +342,15 @@ however this behaviour violates RFC2047.") (flag-ew t)) (while (< 0 (length src)) (let ((ch (aref src 0)) - (decoded (and flag-ew (eword-decode-first-encoded-words src "\\([ \t]\\|$\\)" must-unfold)))) + (decoded (and flag-ew (eword-decode-first-encoded-words src + eword-encoded-word-regexp + eword-after-encoded-word-regexp + must-unfold)))) (if (and (not (string= buf "")) decoded) - (setq dst (concat dst (decode-mime-charset-string buf default-mime-charset)) + (setq dst (concat dst + (decode-mime-charset-string buf + default-mime-charset)) buf "")) (cond (decoded @@ -258,7 +366,9 @@ however this behaviour violates RFC2047.") flag-ew eword-decode-sticked-encoded-word)) (t (error "something wrong"))))) (if (not (string= buf "")) - (setq dst (concat dst (decode-mime-charset-string buf default-mime-charset)))) + (setq dst (concat dst + (decode-mime-charset-string buf + default-mime-charset)))) dst)) (defun eword-decode-string (string &optional must-unfold) @@ -272,7 +382,9 @@ decode the charset included in it, it is not decoded. If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even if there are in decoded encoded-words (generated by bad manner MUA such as a version of Net$cape)." - (eword-decode-unstructured-string (std11-unfold-string string) must-unfold)) + (eword-decode-unstructured-string + (std11-unfold-string string) + must-unfold)) ;;; @ for region @@ -293,7 +405,9 @@ such as a version of Net$cape)." (if unfolding (eword-decode-unfold) ) - (let ((str (eword-decode-unstructured-string (buffer-substring (point-min) (point-max)) must-unfold))) + (let ((str (eword-decode-unstructured-string + (buffer-substring (point-min) (point-max)) + must-unfold))) (delete-region (point-min) (point-max)) (insert str))))) @@ -529,7 +643,10 @@ be the result." (std11-analyze-special string)) (defun eword-analyze-encoded-word (string &optional must-unfold) - (let ((decoded (eword-decode-first-encoded-words string "\\([ \t(]\\|$\\)" must-unfold))) + (let ((decoded (eword-decode-first-encoded-words string + eword-encoded-word-in-phrase-regexp + eword-after-encoded-word-in-phrase-regexp + must-unfold))) (if decoded (cons (cons 'atom (car decoded)) (cdr decoded))))) @@ -566,12 +683,13 @@ It is like std11-lexical-analyze, but it decodes non us-ascii characters encoded as encoded-words or invalid \"raw\" format. \"Raw\" non us-ascii characters are regarded as variable `default-mime-charset'." - (let ((key (copy-sequence string)) - ret) - (set-text-properties 0 (length key) nil key) + (let* ((str (copy-sequence string)) + (key (cons str (cons default-mime-charset must-unfold))) + ret) + (set-text-properties 0 (length str) nil str) (if (setq ret (assoc key eword-lexical-analyze-cache)) (cdr ret) - (setq ret (eword-lexical-analyze-internal key must-unfold)) + (setq ret (eword-lexical-analyze-internal str must-unfold)) (setq eword-lexical-analyze-cache (cons (cons key ret) (last eword-lexical-analyze-cache -- 1.7.10.4