From 9b45a561eafdbc82b9a23a85fc33f1ad60749498 Mon Sep 17 00:00:00 2001 From: akr Date: Tue, 14 Apr 1998 01:14:03 +0000 Subject: [PATCH] eword-decode.el: Copied from AKEMI branch of SEMI. --- ChangeLog | 4 + eword-decode.el | 406 ++++++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 317 insertions(+), 93 deletions(-) diff --git a/ChangeLog b/ChangeLog index d7e5507..82ae004 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +1998-04-13 Tanaka Akira + + * eword-decode.el: Copied from AKEMI branch of SEMI. + 1998-04-13 MORIOKA Tomohiko * FLIM: Version 1.0.0 was released. diff --git a/eword-decode.el b/eword-decode.el index 4365cd6..c5d17c5 100644 --- a/eword-decode.el +++ b/eword-decode.el @@ -58,6 +58,52 @@ eword-encoded-text-regexp "\\)" (regexp-quote "?="))) +(defconst eword-after-encoded-word-regexp "\\([ \t]\\|$\\)") + +(defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+") +(defconst eword-encoded-word-in-phrase-regexp + (concat (regexp-quote "=?") + "\\(" + mime-charset-regexp + "\\)" + (regexp-quote "?") + "\\(B\\|Q\\)" + (regexp-quote "?") + "\\(" + eword-encoded-text-in-phrase-regexp + "\\)" + (regexp-quote "?="))) +(defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t(]\\|$\\)") + +(defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+") +(defconst eword-encoded-word-in-comment-regexp + (concat (regexp-quote "=?") + "\\(" + mime-charset-regexp + "\\)" + (regexp-quote "?") + "\\(B\\|Q\\)" + (regexp-quote "?") + "\\(" + eword-encoded-text-in-comment-regexp + "\\)" + (regexp-quote "?="))) +(defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)") + +(defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+") +(defconst eword-encoded-word-in-quoted-string-regexp + (concat (regexp-quote "=?") + "\\(" + mime-charset-regexp + "\\)" + (regexp-quote "?") + "\\(B\\|Q\\)" + (regexp-quote "?") + "\\(" + eword-encoded-text-in-quoted-string-regexp + "\\)" + (regexp-quote "?="))) +(defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)") ;;; @@ Base64 @@ -100,6 +146,230 @@ ;;; @ for string ;;; +(defvar eword-decode-sticked-encoded-word nil + "*If non-nil, decode encoded-words sticked on atoms, +other encoded-words, etc. +however this behaviour violates RFC2047.") + +(defvar eword-decode-quoted-encoded-word nil + "*If non-nil, decode encoded-words in quoted-string +however this behaviour violates RFC2047.") + +(defun eword-decode-first-encoded-words (string + eword-regexp + after-regexp + &optional must-unfold) + "Decode MIME encoded-words in beginning of STRING. + +EWORD-REGEXP is the regexp that matches a encoded-word. +Usual value is eword-encoded-word-regexp, +eword-encoded-text-in-phrase-regexp, +eword-encoded-word-in-comment-regexp or +eword-encoded-word-in-quoted-string-regexp. + +AFTER-REGEXP is the regexp that matches a after encoded-word. +Usual value is eword-after-encoded-word-regexp, +eword-after-encoded-text-in-phrase-regexp, +eword-after-encoded-word-in-comment-regexp or +eword-after-encoded-word-in-quoted-string-regexp. + +If beginning of STRING matches EWORD-REGEXP and AFTER-REGEXP, +returns a cons cell of decoded string(sequence of characters) and +the rest(sequence of octets). + +If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP, +returns nil. + +If an encoded-word is broken or your emacs implementation can not +decode the charset included in it, it is returned in decoded part +as encoded-word form. + +If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even +if there are in decoded encoded-words (generated by bad manner MUA +such as a version of Net$cape)." + (if eword-decode-sticked-encoded-word (setq after-regexp "")) + (let ((between-ewords-regexp + (if eword-decode-sticked-encoded-word + "\\(\n?[ \t]\\)*" + "\\(\n?[ \t]\\)+")) + (src string) ; sequence of octets. + (dst "")) ; sequence of characters. + (if (string-match + (concat "\\`\\(" eword-regexp "\\)" after-regexp) src) + (let* (p + (q (match-end 1)) + (ew (substring src 0 q)) + (dw (eword-decode-encoded-word ew must-unfold))) + (setq dst (concat dst dw) + src (substring src q)) + (if (not (string= ew dw)) + (progn + (while + (and + (string-match + (concat "\\`\\(" between-ewords-regexp "\\)" + "\\(" eword-regexp "\\)" + after-regexp) + src) + (progn + (setq p (match-end 1) + q (match-end 3) + ew (substring src p q) + dw (eword-decode-encoded-word ew must-unfold)) + (if (string= ew dw) + (progn + (setq dst (concat dst (substring src 0 q)) + src (substring src q)) + nil) + t))) + (setq dst (concat dst dw) + src (substring src q))))) + (cons dst src)) + nil))) + +(defun eword-decode-comment-string (string &optional must-unfold) + (let ((src string) + (buf "") + (dst "") + (flag-ew t)) + (while (< 0 (length src)) + (let ((ch (aref src 0)) + (decoded (and + flag-ew + (eword-decode-first-encoded-words src + eword-encoded-word-in-comment-regexp + eword-after-encoded-word-in-comment-regexp + must-unfold)))) + (if (and (not (string= buf "")) + (or decoded (eq ch ?\() (eq ch ?\)))) + (setq dst (concat dst + (std11-wrap-as-quoted-pairs + (decode-mime-charset-string buf + default-mime-charset) + '(?\( ?\)))) + buf "")) + (cond + (decoded + (setq dst (concat dst + (std11-wrap-as-quoted-pairs + (car decoded) + '(?( ?)))) + src (cdr decoded))) + ((or (eq ch ?\() (eq ch ?\))) + (setq dst (concat dst (list ch)) + src (substring src 1) + flag-ew t)) + ((eq ch ?\\) + (setq buf (concat buf (list (aref src 1))) + src (substring src 2) + flag-ew t)) + ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n)) + (setq buf (concat buf (list ch)) + src (substring src 1) + flag-ew t)) + ((string-match "\\`=?[^ \t\n()\\\\=]*" src) + (setq buf (concat buf (substring src 0 (match-end 0))) + src (substring src (match-end 0)) + flag-ew eword-decode-sticked-encoded-word)) + (t (error "something wrong"))))) + (if (not (string= buf "")) + (setq dst (concat dst + (std11-wrap-as-quoted-pairs + (decode-mime-charset-string buf + default-mime-charset) + '(?\( ?\)))))) + dst)) + +(defun eword-decode-quoted-string (string &optional must-unfold) + (let ((src string) + (buf "") + (dst "") + (flag-ew t)) + (while (< 0 (length src)) + (let ((ch (aref src 0)) + (decoded (and + eword-decode-quoted-encoded-word + flag-ew + (eword-decode-first-encoded-words src + eword-encoded-word-in-quoted-string-regexp + eword-after-encoded-word-in-quoted-string-regexp + must-unfold)))) + (if (and (not (string= buf "")) + (or decoded (eq ch ?\"))) + (setq dst (concat dst + (std11-wrap-as-quoted-pairs + (decode-mime-charset-string buf + default-mime-charset) + '(?\"))) + buf "")) + (cond + (decoded + (setq dst (concat dst + (std11-wrap-as-quoted-pairs + (car decoded) + '(?\"))) + src (cdr decoded))) + ((or (eq ch ?\")) + (setq dst (concat dst (list ch)) + src (substring src 1) + flag-ew t)) + ((eq ch ?\\) + (setq buf (concat buf (list (aref src 1))) + src (substring src 2) + flag-ew t)) + ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n)) + (setq buf (concat buf (list ch)) + src (substring src 1) + flag-ew t)) + ((string-match "\\`=?[^ \t\n\"\\\\=]*" src) + (setq buf (concat buf (substring src 0 (match-end 0))) + src (substring src (match-end 0)) + flag-ew eword-decode-sticked-encoded-word)) + (t (error "something wrong"))))) + (if (not (string= buf "")) + (setq dst (concat dst + (std11-wrap-as-quoted-pairs + (decode-mime-charset-string buf + default-mime-charset) + '(?\"))))) + dst)) + +(defun eword-decode-unstructured-string (string &optional must-unfold) + (let ((src string) + (buf "") + (dst "") + (flag-ew t)) + (while (< 0 (length src)) + (let ((ch (aref src 0)) + (decoded (and flag-ew (eword-decode-first-encoded-words src + eword-encoded-word-regexp + eword-after-encoded-word-regexp + must-unfold)))) + (if (and (not (string= buf "")) + decoded) + (setq dst (concat dst + (decode-mime-charset-string buf + default-mime-charset)) + buf "")) + (cond + (decoded + (setq dst (concat dst (car decoded)) + src (cdr decoded))) + ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n)) + (setq buf (concat buf (list ch)) + src (substring src 1) + flag-ew t)) + ((string-match "\\`=?[^ \t\n=]*" src) + (setq buf (concat buf (substring src 0 (match-end 0))) + src (substring src (match-end 0)) + flag-ew eword-decode-sticked-encoded-word)) + (t (error "something wrong"))))) + (if (not (string= buf "")) + (setq dst (concat dst + (decode-mime-charset-string buf + default-mime-charset)))) + dst)) + (defun eword-decode-string (string &optional must-unfold) "Decode MIME encoded-words in STRING. @@ -111,31 +381,9 @@ decode the charset included in it, it is not decoded. If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even if there are in decoded encoded-words (generated by bad manner MUA such as a version of Net$cape)." - (setq string (std11-unfold-string string)) - (let ((dest "")(ew nil) - beg end) - (while (and (string-match eword-encoded-word-regexp string) - (setq beg (match-beginning 0) - end (match-end 0)) - ) - (if (> beg 0) - (if (not - (and (eq ew t) - (string-match "^[ \t]+$" (substring string 0 beg)) - )) - (setq dest (concat dest (substring string 0 beg))) - ) - ) - (setq dest - (concat dest - (eword-decode-encoded-word - (substring string beg end) must-unfold) - )) - (setq string (substring string end)) - (setq ew t) - ) - (concat dest string) - )) + (eword-decode-unstructured-string + (std11-unfold-string string) + must-unfold)) ;;; @ for region @@ -156,22 +404,11 @@ such as a version of Net$cape)." (if unfolding (eword-decode-unfold) ) - (goto-char (point-min)) - (while (re-search-forward (concat "\\(" eword-encoded-word-regexp "\\)" - "\\(\n?[ \t]\\)+" - "\\(" eword-encoded-word-regexp "\\)") - nil t) - (replace-match "\\1\\6") - (goto-char (point-min)) - ) - (while (re-search-forward eword-encoded-word-regexp nil t) - (insert (eword-decode-encoded-word - (prog1 - (buffer-substring (match-beginning 0) (match-end 0)) - (delete-region (match-beginning 0) (match-end 0)) - ) must-unfold)) - ) - ))) + (let ((str (eword-decode-unstructured-string + (buffer-substring (point-min) (point-max)) + must-unfold))) + (delete-region (point-min) (point-max)) + (insert str))))) ;;; @ for message header @@ -235,11 +472,10 @@ If SEPARATOR is not nil, it is used as header separator." ;; Decode as unstructured field (save-restriction (narrow-to-region beg (1+ end)) - (decode-mime-charset-region p end default-charset) (goto-char p) - (if (re-search-forward eword-encoded-word-regexp - nil t) - (eword-decode-region beg (point-max) 'unfold)) + (let ((default-mime-charset default-charset)) + (eword-decode-region beg (point-max) 'unfold)) + (goto-char (point-max)) ))))) (eword-decode-region (point-min) (point-max) t) ))))) @@ -385,9 +621,7 @@ be the result." (let ((p (std11-check-enclosure string ?\" ?\"))) (if p (cons (cons 'quoted-string - (decode-mime-charset-string - (std11-strip-quoted-pair (substring string 1 (1- p))) - default-mime-charset)) + (eword-decode-quoted-string (substring string 0 p))) (substring string p)) ))) @@ -395,16 +629,16 @@ be the result." (std11-analyze-domain-literal string)) (defun eword-analyze-comment (string &optional must-unfold) - (let ((p (std11-check-enclosure string ?\( ?\) t))) - (if p - (cons (cons 'comment - (eword-decode-string - (decode-mime-charset-string - (std11-strip-quoted-pair (substring string 1 (1- p))) - default-mime-charset) - must-unfold)) - (substring string p)) - ))) + (let ((len (length string))) + (if (and (< 0 len) (eq (aref string 0) ?\()) + (let ((p 0)) + (while (and p (< p len) (eq (aref string p) ?\()) + (setq p (std11-check-enclosure string ?\( ?\) t p))) + (setq p (or p len)) + (cons (cons 'comment + (eword-decode-comment-string (substring string 0 p))) + (substring string p))) + nil))) (defun eword-analyze-spaces (string &optional must-unfold) (std11-analyze-spaces string)) @@ -413,30 +647,23 @@ be the result." (std11-analyze-special string)) (defun eword-analyze-encoded-word (string &optional must-unfold) - (if (eq (string-match eword-encoded-word-regexp string) 0) - (let ((end (match-end 0)) - (dest (eword-decode-encoded-word (match-string 0 string) - must-unfold)) - ) - (setq string (substring string end)) - (while (eq (string-match `,(concat "[ \t\n]*\\(" - eword-encoded-word-regexp - "\\)") - string) - 0) - (setq end (match-end 0)) - (setq dest - (concat dest - (eword-decode-encoded-word (match-string 1 string) - must-unfold)) - string (substring string end)) - ) - (cons (cons 'atom dest) string) - ))) + (let ((decoded (eword-decode-first-encoded-words + string + eword-encoded-word-in-phrase-regexp + eword-after-encoded-word-in-phrase-regexp + must-unfold))) + (if decoded + (cons (cons 'atom (car decoded)) (cdr decoded))))) (defun eword-analyze-atom (string &optional must-unfold) - (if (string-match std11-atom-regexp string) + (if (let ((enable-multibyte-characters nil)) + (string-match std11-atom-regexp string)) (let ((end (match-end 0))) + (if (and eword-decode-sticked-encoded-word + (string-match eword-encoded-word-in-phrase-regexp + (substring string 0 end)) + (< 0 (match-beginning 0))) + (setq end (match-beginning 0))) (cons (cons 'atom (decode-mime-charset-string (substring string 0 end) default-mime-charset)) @@ -467,12 +694,13 @@ It is like std11-lexical-analyze, but it decodes non us-ascii characters encoded as encoded-words or invalid \"raw\" format. \"Raw\" non us-ascii characters are regarded as variable `default-mime-charset'." - (let ((key (copy-sequence string)) - ret) - (set-text-properties 0 (length key) nil key) + (let* ((str (copy-sequence string)) + (key (cons str (cons default-mime-charset must-unfold))) + ret) + (set-text-properties 0 (length str) nil str) (if (setq ret (assoc key eword-lexical-analyze-cache)) (cdr ret) - (setq ret (eword-lexical-analyze-internal key must-unfold)) + (setq ret (eword-lexical-analyze-internal str must-unfold)) (setq eword-lexical-analyze-cache (cons (cons key ret) (last eword-lexical-analyze-cache @@ -480,13 +708,7 @@ characters encoded as encoded-words or invalid \"raw\" format. ret))) (defun eword-decode-token (token) - (let ((type (car token)) - (value (cdr token))) - (cond ((eq type 'quoted-string) - (std11-wrap-as-quoted-string value)) - ((eq type 'comment) - (concat "(" (std11-wrap-as-quoted-pairs value '(?( ?))) ")")) - (t value)))) + (cdr token)) (defun eword-decode-and-fold-structured-field (string start-column &optional max-column must-unfold) @@ -592,9 +814,7 @@ decode the charset included in it, it is not decoded. If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even if there are in decoded encoded-words (generated by bad manner MUA such as a version of Net$cape)." - (eword-decode-string - (decode-mime-charset-string string default-mime-charset) - must-unfold)) + (eword-decode-string string must-unfold)) (defun eword-extract-address-components (string) "Extract full name and canonical address from STRING. -- 1.7.10.4