(require 'mel)
(require 'mime-def)
+(require 'ew-dec)
+
(defgroup eword-decode nil
"Encoded-word decoding"
:group 'mime)
+;;; TEST
+
+(defvar rotate-memo nil)
+(defmacro rotate-memo (var val)
+ `(when rotate-memo
+ (unless (boundp ',var) (setq ,var ()))
+ (setq ,var (cons ,val ,var))
+ (let ((tmp (last ,var (- (length ,var) 100))))
+ (when tmp (setcdr tmp nil)))
+ ,var))
;;; @ variables
;;;
safe-regexp
escape ; ?\\ or nil.
delimiters ; list of chars.
+ chars-must-be-quote
must-unfold
code-conversion)
(if (and code-conversion
(setq dst (concat dst
(std11-wrap-as-quoted-pairs
(decode-mime-charset-string buf code-conversion)
- delimiters))
+ chars-must-be-quote))
buf ""))
(cond
(decoded
(setq dst (concat dst
(std11-wrap-as-quoted-pairs
(car decoded)
- delimiters))
+ chars-must-be-quote))
src (cdr decoded)))
((memq ch delimiters)
(setq dst (concat dst (list ch))
(setq dst (concat dst
(std11-wrap-as-quoted-pairs
(decode-mime-charset-string buf code-conversion)
- delimiters))))
+ chars-must-be-quote))))
dst))
"[^ \t\n=]*"
nil
nil
+ nil
must-unfold
code-conversion))
"[^ \t\n()\\\\=]*"
?\\
'(?\( ?\))
+ '(?\( ?\) ?\\ ?\r ?\n)
must-unfold
code-conversion))
"[^ \t\n\"\\\\=]*"
?\\
'(?\")
+ '(?\" ?\\ ?\r ?\n)
must-unfold
code-conversion))
Otherwise it decodes non-ASCII bit patterns as the
default-mime-charset."
(interactive "*r")
+ (rotate-memo args-eword-decode-region
+ (list start end (buffer-substring start end) unfolding must-unfold code-conversion))
(save-excursion
(save-restriction
(narrow-to-region start end)
default-mime-charset.
If SEPARATOR is not nil, it is used as header separator."
(interactive "*")
+ (rotate-memo args-eword-decode-header (list code-conversion))
+ (unless code-conversion
+ (message "eword-decode-header is called with no code-conversion")
+ (sit-for 2))
(if (and code-conversion
(not (mime-charset-to-coding-system code-conversion)))
(setq code-conversion default-mime-charset))
(save-excursion
(save-restriction
(std11-narrow-to-header separator)
+ (rotate-memo args-h-eword-decode-header (buffer-substring (point-min) (point-max)))
(if code-conversion
- (let (beg p end field-name len)
+ (let (beg p end field-name field-body decoded)
(goto-char (point-min))
(while (re-search-forward std11-field-head-regexp nil t)
(setq beg (match-beginning 0)
p (match-end 0)
field-name (buffer-substring beg (1- p))
- len (string-width field-name)
- field-name (intern (capitalize field-name))
- end (std11-field-end))
- (cond ((memq field-name eword-decode-ignored-field-list)
- ;; Don't decode
- )
- ((memq field-name eword-decode-structured-field-list)
- ;; Decode as structured field
- (let ((body (buffer-substring p end)))
- (delete-region p end)
- (insert (eword-decode-and-fold-structured-field
- body (1+ len)))
- ))
- (t
- ;; Decode as unstructured field
- (save-restriction
- (narrow-to-region beg (1+ end))
- (goto-char p)
- (eword-decode-region beg (point-max) 'unfold nil
- code-conversion)
- (goto-char (point-max))
- )))))
+ end (std11-field-end)
+ field-body (ew-lf-crlf-to-crlf
+ (buffer-substring p end))
+ decoded (ew-decode-field
+ field-name field-body))
+ (unless (equal field-body decoded)
+ (setq decoded (ew-crlf-refold
+ decoded
+ (1+ (string-width field-name))
+ fill-column)))
+ (delete-region p end)
+ (insert (ew-crlf-to-lf decoded))
+ (add-text-properties beg (min (1+ (point)) (point-max))
+ (list 'original-field-name field-name
+ 'original-field-body field-body))
+ ))
(eword-decode-region (point-min) (point-max) t nil nil)
))))
(defun eword-analyze-quoted-string (string &optional must-unfold)
(let ((p (std11-check-enclosure string ?\" ?\")))
(if p
- (cons (cons 'quoted-string
- (if eword-decode-quoted-encoded-word
- (eword-decode-quoted-string
- (substring string 0 p)
- default-mime-charset)
- (decode-mime-charset-string
- (std11-strip-quoted-pair (substring string 0 p))
- default-mime-charset)))
- (substring string p)))
- ))
+ (cons (cons 'quoted-string
+ (if eword-decode-quoted-encoded-word
+ (eword-decode-quoted-string
+ (substring string 0 p)
+ default-mime-charset)
+ (std11-wrap-as-quoted-string
+ (decode-mime-charset-string
+ (std11-strip-quoted-pair (substring string 1 (1- p)))
+ default-mime-charset))))
+ (substring string p)))
+ ))
(defun eword-analyze-domain-literal (string &optional must-unfold)
(std11-analyze-domain-literal string))
(defun eword-analyze-encoded-word (string &optional must-unfold)
(let ((decoded (eword-decode-first-encoded-words
- string
- eword-encoded-word-in-phrase-regexp
- eword-after-encoded-word-in-phrase-regexp
- must-unfold)))
+ string
+ eword-encoded-word-in-phrase-regexp
+ eword-after-encoded-word-in-phrase-regexp
+ must-unfold)))
(if decoded
- (cons (cons 'atom (car decoded)) (cdr decoded)))))
+ (let ((s (car decoded)))
+ (while (or (string-match std11-atom-regexp s)
+ (string-match std11-spaces-regexp s))
+ (setq s (substring s (match-end 0))))
+ (if (= (length s) 0)
+ (cons (cons 'atom (car decoded)) (cdr decoded))
+ (cons (cons 'quoted-string
+ (std11-wrap-as-quoted-string (car decoded)))
+ (cdr decoded)))))))
(defun eword-analyze-atom (string &optional must-unfold)
(if (let ((enable-multibyte-characters nil))
))))
(defun eword-lexical-analyze-internal (string must-unfold)
- (let (dest ret)
+ (let ((last 'eword-analyze-spaces)
+ dest ret)
(while (not (string-equal string ""))
(setq ret
- (let ((rest eword-lexical-analyzers)
- func r)
- (while (and (setq func (car rest))
- (null (setq r (funcall func string must-unfold)))
- )
- (setq rest (cdr rest)))
- (or r `((error . ,string) . ""))
- ))
+ (let ((rest eword-lexical-analyzers)
+ func r)
+ (while (and (setq func (car rest))
+ (or
+ (and
+ (not eword-decode-sticked-encoded-word)
+ (not (eq last 'eword-analyze-spaces))
+ (eq func 'eword-analyze-encoded-word))
+ (null (setq r (funcall func string must-unfold))))
+ )
+ (setq rest (cdr rest)))
+ (setq last func)
+ (or r `((error . ,string) . ""))
+ ))
(setq dest (cons (car ret) dest))
(setq string (cdr ret))
)
If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
if there are in decoded encoded-words (generated by bad manner MUA
such as a version of Net$cape)."
+ (rotate-memo args-eword-decode-and-fold-structured-field
+ (list string start-column max-column must-unfold))
(or max-column
(setq max-column fill-column))
- (let ((c start-column)
- (tokens (eword-lexical-analyze string must-unfold))
- (result "")
- token)
- (while (and (setq token (car tokens))
- (setq tokens (cdr tokens)))
- (let* ((type (car token)))
- (if (eq type 'spaces)
- (let* ((next-token (car tokens))
- (next-str (eword-decode-token next-token))
- (next-len (string-width next-str))
- (next-c (+ c next-len 1)))
- (if (< next-c max-column)
- (setq result (concat result " " next-str)
- c next-c)
- (setq result (concat result "\n " next-str)
- c (1+ next-len)))
- (setq tokens (cdr tokens))
- )
- (let* ((str (eword-decode-token token)))
- (setq result (concat result str)
- c (+ c (string-width str)))
- ))))
- (if token
- (concat result (eword-decode-token token))
- result)))
+ (let* ((field-name (make-string (1- start-column) ?X))
+ (field-body (ew-lf-crlf-to-crlf string))
+ (ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
+ (decoded (ew-decode-field field-name field-body)))
+ (unless (equal field-body decoded)
+ (setq decoded (ew-crlf-refold decoded start-column max-column)))
+ (ew-crlf-to-lf decoded)))
(defun eword-decode-and-unfold-structured-field (string)
"Decode and unfold STRING as structured field body.
If an encoded-word is broken or your emacs implementation can not
decode the charset included in it, it is not decoded."
- (let ((tokens (eword-lexical-analyze string 'must-unfold))
- (result ""))
- (while tokens
- (let* ((token (car tokens))
- (type (car token)))
- (setq tokens (cdr tokens))
- (setq result
- (if (eq type 'spaces)
- (concat result " ")
- (concat result (eword-decode-token token))
- ))))
- result))
+ (rotate-memo args-eword-decode-and-unfold-structured-field (list string))
+ (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
+ (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf (ew-crlf-unfold decoded))))
(defun eword-decode-structured-field-body (string &optional must-unfold
start-column max-column)
If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
if there are in decoded encoded-words (generated by bad manner MUA
such as a version of Net$cape)."
+ (rotate-memo args-eword-decode-structured-field-body
+ (list string must-unfold start-column max-column))
(if start-column
;; fold with max-column
(eword-decode-and-fold-structured-field
string start-column max-column must-unfold)
;; Don't fold
- (mapconcat (function eword-decode-token)
- (eword-lexical-analyze string must-unfold)
- "")
- ))
+ (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
+ (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf decoded))))
(defun eword-decode-unstructured-field-body (string &optional must-unfold)
"Decode non us-ascii characters in STRING as unstructured field body.
If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
if there are in decoded encoded-words (generated by bad manner MUA
such as a version of Net$cape)."
- (eword-decode-string string must-unfold default-mime-charset))
+ (rotate-memo args-eword-decode-unstructured-field-body
+ (list string must-unfold))
+ (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf (ew-crlf-unfold decoded))))
(defun eword-extract-address-components (string)
"Extract full name and canonical address from STRING.
It decodes non us-ascii characters in FULL-NAME encoded as
encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
characters are regarded as variable `default-mime-charset'."
+ (rotate-memo args-eword-extract-address-components (list string))
(let* ((structure (car (std11-parse-address
(eword-lexical-analyze
(std11-unfold-string string) 'must-unfold))))