eword-encoded-text-regexp
"\\)"
(regexp-quote "?=")))
+(defconst eword-after-encoded-word-regexp "\\([ \t]\\|$\\)")
+
+(defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+")
+(defconst eword-encoded-word-in-phrase-regexp
+ (concat (regexp-quote "=?")
+ "\\("
+ mime-charset-regexp
+ "\\)"
+ (regexp-quote "?")
+ "\\(B\\|Q\\)"
+ (regexp-quote "?")
+ "\\("
+ eword-encoded-text-in-phrase-regexp
+ "\\)"
+ (regexp-quote "?=")))
+(defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t(]\\|$\\)")
+
+(defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+")
+(defconst eword-encoded-word-in-comment-regexp
+ (concat (regexp-quote "=?")
+ "\\("
+ mime-charset-regexp
+ "\\)"
+ (regexp-quote "?")
+ "\\(B\\|Q\\)"
+ (regexp-quote "?")
+ "\\("
+ eword-encoded-text-in-comment-regexp
+ "\\)"
+ (regexp-quote "?=")))
+(defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)")
+
+(defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+")
+(defconst eword-encoded-word-in-quoted-string-regexp
+ (concat (regexp-quote "=?")
+ "\\("
+ mime-charset-regexp
+ "\\)"
+ (regexp-quote "?")
+ "\\(B\\|Q\\)"
+ (regexp-quote "?")
+ "\\("
+ eword-encoded-text-in-quoted-string-regexp
+ "\\)"
+ (regexp-quote "?=")))
+(defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)")
;;; @@ Base64
;;; @ for string
;;;
+(defvar eword-decode-sticked-encoded-word nil
+ "*If non-nil, decode encoded-words sticked on atoms,
+other encoded-words, etc.
+however this behaviour violates RFC2047.")
+
+(defvar eword-decode-quoted-encoded-word nil
+ "*If non-nil, decode encoded-words in quoted-string
+however this behaviour violates RFC2047.")
+
+(defun eword-decode-first-encoded-words (string
+ eword-regexp
+ after-regexp
+ &optional must-unfold)
+ "Decode MIME encoded-words in beginning of STRING.
+
+EWORD-REGEXP is the regexp that matches a encoded-word.
+Usual value is eword-encoded-word-regexp,
+eword-encoded-text-in-phrase-regexp,
+eword-encoded-word-in-comment-regexp or
+eword-encoded-word-in-quoted-string-regexp.
+
+AFTER-REGEXP is the regexp that matches a after encoded-word.
+Usual value is eword-after-encoded-word-regexp,
+eword-after-encoded-text-in-phrase-regexp,
+eword-after-encoded-word-in-comment-regexp or
+eword-after-encoded-word-in-quoted-string-regexp.
+
+If beginning of STRING matches EWORD-REGEXP and AFTER-REGEXP,
+returns a cons cell of decoded string(sequence of characters) and
+the rest(sequence of octets).
+
+If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP,
+returns nil.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is returned in decoded part
+as encoded-word form.
+
+If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
+ (if eword-decode-sticked-encoded-word (setq after-regexp ""))
+ (let ((between-ewords-regexp
+ (if eword-decode-sticked-encoded-word
+ "\\(\n?[ \t]\\)*"
+ "\\(\n?[ \t]\\)+"))
+ (src string) ; sequence of octets.
+ (dst "")) ; sequence of characters.
+ (if (string-match
+ (concat "\\`\\(" eword-regexp "\\)" after-regexp) src)
+ (let* (p
+ (q (match-end 1))
+ (ew (substring src 0 q))
+ (dw (eword-decode-encoded-word ew must-unfold)))
+ (setq dst (concat dst dw)
+ src (substring src q))
+ (if (not (string= ew dw))
+ (progn
+ (while
+ (and
+ (string-match
+ (concat "\\`\\(" between-ewords-regexp "\\)"
+ "\\(" eword-regexp "\\)"
+ after-regexp)
+ src)
+ (progn
+ (setq p (match-end 1)
+ q (match-end 3)
+ ew (substring src p q)
+ dw (eword-decode-encoded-word ew must-unfold))
+ (if (string= ew dw)
+ (progn
+ (setq dst (concat dst (substring src 0 q))
+ src (substring src q))
+ nil)
+ t)))
+ (setq dst (concat dst dw)
+ src (substring src q)))))
+ (cons dst src))
+ nil)))
+
+(defun eword-decode-comment-string (string &optional must-unfold)
+ (let ((src string)
+ (buf "")
+ (dst "")
+ (flag-ew t))
+ (while (< 0 (length src))
+ (let ((ch (aref src 0))
+ (decoded (and
+ flag-ew
+ (eword-decode-first-encoded-words src
+ eword-encoded-word-in-comment-regexp
+ eword-after-encoded-word-in-comment-regexp
+ must-unfold))))
+ (if (and (not (string= buf ""))
+ (or decoded (eq ch ?\() (eq ch ?\))))
+ (setq dst (concat dst
+ (std11-wrap-as-quoted-pairs
+ (decode-mime-charset-string buf
+ default-mime-charset)
+ '(?\( ?\))))
+ buf ""))
+ (cond
+ (decoded
+ (setq dst (concat dst
+ (std11-wrap-as-quoted-pairs
+ (car decoded)
+ '(?( ?))))
+ src (cdr decoded)))
+ ((or (eq ch ?\() (eq ch ?\)))
+ (setq dst (concat dst (list ch))
+ src (substring src 1)
+ flag-ew t))
+ ((eq ch ?\\)
+ (setq buf (concat buf (list (aref src 1)))
+ src (substring src 2)
+ flag-ew t))
+ ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n))
+ (setq buf (concat buf (list ch))
+ src (substring src 1)
+ flag-ew t))
+ ((string-match "\\`=?[^ \t\n()\\\\=]*" src)
+ (setq buf (concat buf (substring src 0 (match-end 0)))
+ src (substring src (match-end 0))
+ flag-ew eword-decode-sticked-encoded-word))
+ (t (error "something wrong")))))
+ (if (not (string= buf ""))
+ (setq dst (concat dst
+ (std11-wrap-as-quoted-pairs
+ (decode-mime-charset-string buf
+ default-mime-charset)
+ '(?\( ?\))))))
+ dst))
+
+(defun eword-decode-quoted-string (string &optional must-unfold)
+ (let ((src string)
+ (buf "")
+ (dst "")
+ (flag-ew t))
+ (while (< 0 (length src))
+ (let ((ch (aref src 0))
+ (decoded (and
+ eword-decode-quoted-encoded-word
+ flag-ew
+ (eword-decode-first-encoded-words src
+ eword-encoded-word-in-quoted-string-regexp
+ eword-after-encoded-word-in-quoted-string-regexp
+ must-unfold))))
+ (if (and (not (string= buf ""))
+ (or decoded (eq ch ?\")))
+ (setq dst (concat dst
+ (std11-wrap-as-quoted-pairs
+ (decode-mime-charset-string buf
+ default-mime-charset)
+ '(?\")))
+ buf ""))
+ (cond
+ (decoded
+ (setq dst (concat dst
+ (std11-wrap-as-quoted-pairs
+ (car decoded)
+ '(?\")))
+ src (cdr decoded)))
+ ((or (eq ch ?\"))
+ (setq dst (concat dst (list ch))
+ src (substring src 1)
+ flag-ew t))
+ ((eq ch ?\\)
+ (setq buf (concat buf (list (aref src 1)))
+ src (substring src 2)
+ flag-ew t))
+ ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n))
+ (setq buf (concat buf (list ch))
+ src (substring src 1)
+ flag-ew t))
+ ((string-match "\\`=?[^ \t\n\"\\\\=]*" src)
+ (setq buf (concat buf (substring src 0 (match-end 0)))
+ src (substring src (match-end 0))
+ flag-ew eword-decode-sticked-encoded-word))
+ (t (error "something wrong")))))
+ (if (not (string= buf ""))
+ (setq dst (concat dst
+ (std11-wrap-as-quoted-pairs
+ (decode-mime-charset-string buf
+ default-mime-charset)
+ '(?\")))))
+ dst))
+
+(defun eword-decode-unstructured-string (string &optional must-unfold)
+ (let ((src string)
+ (buf "")
+ (dst "")
+ (flag-ew t))
+ (while (< 0 (length src))
+ (let ((ch (aref src 0))
+ (decoded (and flag-ew (eword-decode-first-encoded-words src
+ eword-encoded-word-regexp
+ eword-after-encoded-word-regexp
+ must-unfold))))
+ (if (and (not (string= buf ""))
+ decoded)
+ (setq dst (concat dst
+ (decode-mime-charset-string buf
+ default-mime-charset))
+ buf ""))
+ (cond
+ (decoded
+ (setq dst (concat dst (car decoded))
+ src (cdr decoded)))
+ ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n))
+ (setq buf (concat buf (list ch))
+ src (substring src 1)
+ flag-ew t))
+ ((string-match "\\`=?[^ \t\n=]*" src)
+ (setq buf (concat buf (substring src 0 (match-end 0)))
+ src (substring src (match-end 0))
+ flag-ew eword-decode-sticked-encoded-word))
+ (t (error "something wrong")))))
+ (if (not (string= buf ""))
+ (setq dst (concat dst
+ (decode-mime-charset-string buf
+ default-mime-charset))))
+ dst))
+
(defun eword-decode-string (string &optional must-unfold)
"Decode MIME encoded-words in STRING.
If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
if there are in decoded encoded-words (generated by bad manner MUA
such as a version of Net$cape)."
- (setq string (std11-unfold-string string))
- (let ((dest "")(ew nil)
- beg end)
- (while (and (string-match eword-encoded-word-regexp string)
- (setq beg (match-beginning 0)
- end (match-end 0))
- )
- (if (> beg 0)
- (if (not
- (and (eq ew t)
- (string-match "^[ \t]+$" (substring string 0 beg))
- ))
- (setq dest (concat dest (substring string 0 beg)))
- )
- )
- (setq dest
- (concat dest
- (eword-decode-encoded-word
- (substring string beg end) must-unfold)
- ))
- (setq string (substring string end))
- (setq ew t)
- )
- (concat dest string)
- ))
+ (eword-decode-unstructured-string
+ (std11-unfold-string string)
+ must-unfold))
;;; @ for region
(if unfolding
(eword-decode-unfold)
)
- (goto-char (point-min))
- (while (re-search-forward (concat "\\(" eword-encoded-word-regexp "\\)"
- "\\(\n?[ \t]\\)+"
- "\\(" eword-encoded-word-regexp "\\)")
- nil t)
- (replace-match "\\1\\6")
- (goto-char (point-min))
- )
- (while (re-search-forward eword-encoded-word-regexp nil t)
- (insert (eword-decode-encoded-word
- (prog1
- (buffer-substring (match-beginning 0) (match-end 0))
- (delete-region (match-beginning 0) (match-end 0))
- ) must-unfold))
- )
- )))
+ (let ((str (eword-decode-unstructured-string
+ (buffer-substring (point-min) (point-max))
+ must-unfold)))
+ (delete-region (point-min) (point-max))
+ (insert str)))))
;;; @ for message header
;; Decode as unstructured field
(save-restriction
(narrow-to-region beg (1+ end))
- (decode-mime-charset-region p end default-charset)
(goto-char p)
- (if (re-search-forward eword-encoded-word-regexp
- nil t)
- (eword-decode-region beg (point-max) 'unfold))
+ (let ((default-mime-charset default-charset))
+ (eword-decode-region beg (point-max) 'unfold))
+ (goto-char (point-max))
)))))
(eword-decode-region (point-min) (point-max) t)
)))))
(let ((p (std11-check-enclosure string ?\" ?\")))
(if p
(cons (cons 'quoted-string
- (decode-mime-charset-string
- (std11-strip-quoted-pair (substring string 1 (1- p)))
- default-mime-charset))
+ (eword-decode-quoted-string (substring string 0 p)))
(substring string p))
)))
(std11-analyze-domain-literal string))
(defun eword-analyze-comment (string &optional must-unfold)
- (let ((p (std11-check-enclosure string ?\( ?\) t)))
- (if p
- (cons (cons 'comment
- (eword-decode-string
- (decode-mime-charset-string
- (std11-strip-quoted-pair (substring string 1 (1- p)))
- default-mime-charset)
- must-unfold))
- (substring string p))
- )))
+ (let ((len (length string)))
+ (if (and (< 0 len) (eq (aref string 0) ?\())
+ (let ((p 0))
+ (while (and p (< p len) (eq (aref string p) ?\())
+ (setq p (std11-check-enclosure string ?\( ?\) t p)))
+ (setq p (or p len))
+ (cons (cons 'comment
+ (eword-decode-comment-string (substring string 0 p)))
+ (substring string p)))
+ nil)))
(defun eword-analyze-spaces (string &optional must-unfold)
(std11-analyze-spaces string))
(std11-analyze-special string))
(defun eword-analyze-encoded-word (string &optional must-unfold)
- (if (eq (string-match eword-encoded-word-regexp string) 0)
- (let ((end (match-end 0))
- (dest (eword-decode-encoded-word (match-string 0 string)
- must-unfold))
- )
- (setq string (substring string end))
- (while (eq (string-match `,(concat "[ \t\n]*\\("
- eword-encoded-word-regexp
- "\\)")
- string)
- 0)
- (setq end (match-end 0))
- (setq dest
- (concat dest
- (eword-decode-encoded-word (match-string 1 string)
- must-unfold))
- string (substring string end))
- )
- (cons (cons 'atom dest) string)
- )))
+ (let ((decoded (eword-decode-first-encoded-words
+ string
+ eword-encoded-word-in-phrase-regexp
+ eword-after-encoded-word-in-phrase-regexp
+ must-unfold)))
+ (if decoded
+ (cons (cons 'atom (car decoded)) (cdr decoded)))))
(defun eword-analyze-atom (string &optional must-unfold)
- (if (string-match std11-atom-regexp string)
+ (if (let ((enable-multibyte-characters nil))
+ (string-match std11-atom-regexp string))
(let ((end (match-end 0)))
+ (if (and eword-decode-sticked-encoded-word
+ (string-match eword-encoded-word-in-phrase-regexp
+ (substring string 0 end))
+ (< 0 (match-beginning 0)))
+ (setq end (match-beginning 0)))
(cons (cons 'atom (decode-mime-charset-string
(substring string 0 end)
default-mime-charset))
characters encoded as encoded-words or invalid \"raw\" format.
\"Raw\" non us-ascii characters are regarded as variable
`default-mime-charset'."
- (let ((key (copy-sequence string))
- ret)
- (set-text-properties 0 (length key) nil key)
+ (let* ((str (copy-sequence string))
+ (key (cons str (cons default-mime-charset must-unfold)))
+ ret)
+ (set-text-properties 0 (length str) nil str)
(if (setq ret (assoc key eword-lexical-analyze-cache))
(cdr ret)
- (setq ret (eword-lexical-analyze-internal key must-unfold))
+ (setq ret (eword-lexical-analyze-internal str must-unfold))
(setq eword-lexical-analyze-cache
(cons (cons key ret)
(last eword-lexical-analyze-cache
ret)))
(defun eword-decode-token (token)
- (let ((type (car token))
- (value (cdr token)))
- (cond ((eq type 'quoted-string)
- (std11-wrap-as-quoted-string value))
- ((eq type 'comment)
- (concat "(" (std11-wrap-as-quoted-pairs value '(?( ?))) ")"))
- (t value))))
+ (cdr token))
(defun eword-decode-and-fold-structured-field
(string start-column &optional max-column must-unfold)
If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
if there are in decoded encoded-words (generated by bad manner MUA
such as a version of Net$cape)."
- (eword-decode-string
- (decode-mime-charset-string string default-mime-charset)
- must-unfold))
+ (eword-decode-string string must-unfold))
(defun eword-extract-address-components (string)
"Extract full name and canonical address from STRING.