;;; Code:
-(require 'std11-parse)
+(require 'std11)
(require 'mel)
(require 'mime-def)
+(require 'ew-dec)
+(require 'ew-line)
+
+(eval-when-compile (require 'cl))
+
(defgroup eword-decode nil
"Encoded-word decoding"
:group 'mime)
+;;; TEST
+
+(defvar rotate-memo nil)
+(defmacro rotate-memo (var val)
+ `(when rotate-memo
+ (unless (boundp ',var) (setq ,var ()))
+ (setq ,var (cons ,val ,var))
+ (let ((tmp (last ,var (- (length ,var) 100))))
+ (when tmp (setcdr tmp nil)))
+ ,var))
;;; @ variables
;;;
:group 'eword-decode
:type 'boolean)
+(defcustom eword-max-size-to-decode 1000
+ "*Max size to decode header field."
+ :group 'eword-decode
+ :type '(choice (integer :tag "Limit (bytes)")
+ (const :tag "Don't limit" nil)))
+
;;; @ MIME encoded-word definition
;;;
(concat eword-encoded-word-prefix-regexp
"\\(" eword-encoded-text-in-phrase-regexp "\\)"
eword-encoded-word-suffix-regexp))
-(defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t(]\\|$\\)")
+(defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t]\\|$\\)")
(defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+")
(defconst eword-encoded-word-in-comment-regexp
(defconst eword-encoded-word-regexp eword-encoded-word-in-unstructured-regexp)
-;;; @@ Base64
-;;;
-
-(defconst base64-token-regexp "[A-Za-z0-9+/]")
-(defconst base64-token-padding-regexp "[A-Za-z0-9+/=]")
-
-(defconst eword-B-encoded-text-regexp
- (concat "\\(\\("
- base64-token-regexp
- base64-token-regexp
- base64-token-regexp
- base64-token-regexp
- "\\)*"
- base64-token-regexp
- base64-token-regexp
- base64-token-padding-regexp
- base64-token-padding-regexp
- "\\)"))
-
-;; (defconst eword-B-encoding-and-encoded-text-regexp
-;; (concat "\\(B\\)\\?" eword-B-encoded-text-regexp))
-
-
-;;; @@ Quoted-Printable
-;;;
-
-(defconst quoted-printable-hex-chars "0123456789ABCDEF")
-(defconst quoted-printable-octet-regexp
- (concat "=[" quoted-printable-hex-chars
- "][" quoted-printable-hex-chars "]"))
-
-(defconst eword-Q-encoded-text-regexp
- (concat "\\([^=?]\\|" quoted-printable-octet-regexp "\\)+"))
-;; (defconst eword-Q-encoding-and-encoded-text-regexp
-;; (concat "\\(Q\\)\\?" eword-Q-encoded-text-regexp))
-
-
;;; @ internal utilities
;;;
safe-regexp
escape ; ?\\ or nil.
delimiters ; list of chars.
+ chars-must-be-quote
must-unfold
code-conversion)
(if (and code-conversion
(setq dst (concat dst
(std11-wrap-as-quoted-pairs
(decode-mime-charset-string buf code-conversion)
- delimiters))
+ chars-must-be-quote))
buf ""))
(cond
(decoded
(setq dst (concat dst
(std11-wrap-as-quoted-pairs
(car decoded)
- delimiters))
+ chars-must-be-quote))
src (cdr decoded)))
((memq ch delimiters)
(setq dst (concat dst (list ch))
(setq dst (concat dst
(std11-wrap-as-quoted-pairs
(decode-mime-charset-string buf code-conversion)
- delimiters))))
+ chars-must-be-quote))))
dst))
"[^ \t\n=]*"
nil
nil
+ nil
must-unfold
code-conversion))
"[^ \t\n()\\\\=]*"
?\\
'(?\( ?\))
+ '(?\( ?\) ?\\ ?\r ?\n)
must-unfold
code-conversion))
"[^ \t\n\"\\\\=]*"
?\\
'(?\")
+ '(?\" ?\\ ?\r ?\n)
must-unfold
code-conversion))
Otherwise it decodes non-ASCII bit patterns as the
default-mime-charset."
(eword-decode-unstructured
- code-conversion
(std11-unfold-string string)
+ code-conversion
must-unfold))
+(defun eword-decode-structured-field-body (string
+ &optional
+ start-column max-column)
+ (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
+ (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf decoded)))
+
+(defun eword-decode-and-unfold-structured-field-body (string
+ &optional
+ start-column
+ max-column)
+ "Decode and unfold STRING as structured field body.
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is not decoded."
+ (let* ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf (ew-crlf-unfold decoded))))
+
+(defun eword-decode-and-fold-structured-field-body (string
+ start-column
+ &optional max-column)
+ (or max-column
+ (setq max-column fill-column))
+ (let* ((field-name (make-string (1- start-column) ?X))
+ (field-body (ew-lf-crlf-to-crlf string))
+ (ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
+ (decoded (ew-decode-field field-name field-body)))
+ (unless (equal field-body decoded)
+ (setq decoded (ew-crlf-refold decoded start-column max-column)))
+ (ew-crlf-to-lf decoded)))
+
+(defun eword-decode-unstructured-field-body (string &optional start-column
+ max-column)
+ (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf decoded)))
+
+(defun eword-decode-and-unfold-unstructured-field-body (string
+ &optional start-column
+ max-column)
+ (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf (ew-crlf-unfold decoded))))
+
+(defun eword-decode-unfolded-unstructured-field-body (string
+ &optional start-column
+ max-column)
+ (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
+ (ew-crlf-to-lf decoded)))
+
;;; @ for region
;;;
Otherwise it decodes non-ASCII bit patterns as the
default-mime-charset."
(interactive "*r")
+ (rotate-memo args-eword-decode-region
+ (list start end (buffer-substring start end) unfolding must-unfold code-conversion))
(save-excursion
(save-restriction
(narrow-to-region start end)
(eword-decode-unfold)
)
(let ((str (eword-decode-unstructured
- code-conversion
(buffer-substring (point-min) (point-max))
+ code-conversion
must-unfold)))
(delete-region (point-min) (point-max))
(insert str)))))
-
-;;; @ for message header
-;;;
-
-(defcustom eword-decode-ignored-field-list
- '(newsgroups path lines nntp-posting-host received message-id date)
- "*List of field-names to be ignored when decoding.
-Each field name must be symbol."
- :group 'eword-decode
- :type '(repeat symbol))
-
-(defcustom eword-decode-structured-field-list
- '(reply-to resent-reply-to from resent-from sender resent-sender
- to resent-to cc resent-cc bcc resent-bcc dcc
- mime-version content-type content-transfer-encoding
- content-disposition)
- "*List of field-names to decode as structured field.
-Each field name must be symbol."
- :group 'eword-decode
- :type '(repeat symbol))
-
-(defun eword-decode-header (&optional code-conversion separator)
- "Decode MIME encoded-words in header fields.
-If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
-mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
-Otherwise it decodes non-ASCII bit patterns as the
-default-mime-charset.
-If SEPARATOR is not nil, it is used as header separator."
- (interactive "*")
- (if (and code-conversion
- (not (mime-charset-to-coding-system code-conversion)))
- (setq code-conversion default-mime-charset))
- (save-excursion
- (save-restriction
- (std11-narrow-to-header separator)
- (if code-conversion
- (let (beg p end field-name len)
- (goto-char (point-min))
- (while (re-search-forward std11-field-head-regexp nil t)
- (setq beg (match-beginning 0)
- p (match-end 0)
- field-name (buffer-substring beg (1- p))
- len (string-width field-name)
- field-name (intern (downcase field-name))
- end (std11-field-end))
- (cond ((memq field-name eword-decode-ignored-field-list)
- ;; Don't decode
- )
- ((memq field-name eword-decode-structured-field-list)
- ;; Decode as structured field
- (let ((body (buffer-substring p end)))
- (delete-region p end)
- (insert (eword-decode-and-fold-structured-field
- body (1+ len)))
- ))
- (t
- ;; Decode as unstructured field
- (save-restriction
- (narrow-to-region beg (1+ end))
- (goto-char p)
- (eword-decode-region beg (point-max) 'unfold nil
- code-conversion)
- (goto-char (point-max))
- )))))
- (eword-decode-region (point-min) (point-max) t nil nil)
- ))))
-
(defun eword-decode-unfold ()
(goto-char (point-min))
(let (field beg end)
))
)))
+;;; @ for message header
+;;;
+
+(defvar mime-field-decoder-alist nil)
+
+(defvar mime-field-decoder-cache nil)
+
+(defvar mime-update-field-decoder-cache 'ew-mime-update-field-decoder-cache
+ "*Field decoder cache update function.")
+
+;;;###autoload
+(defun mime-set-field-decoder (field &rest specs)
+ "Set decoder of FILED.
+SPECS must be like `MODE1 DECODER1 MODE2 DECODER2 ...'.
+Each mode must be `nil', `plain', `wide', `summary' or `nov'.
+If mode is `nil', corresponding decoder is set up for every modes."
+ (when specs
+ (let ((mode (pop specs))
+ (function (pop specs)))
+ (if mode
+ (progn
+ (let ((cell (assq mode mime-field-decoder-alist)))
+ (if cell
+ (setcdr cell (put-alist field function (cdr cell)))
+ (setq mime-field-decoder-alist
+ (cons (cons mode (list (cons field function)))
+ mime-field-decoder-alist))
+ ))
+ (apply (function mime-set-field-decoder) field specs)
+ )
+ (mime-set-field-decoder field
+ 'plain function
+ 'wide function
+ 'summary function
+ 'nov function)
+ ))))
+
+;;;###autoload
+(defmacro mime-find-field-presentation-method (name)
+ "Return field-presentation-method from NAME.
+NAME must be `plain', `wide', `summary' or `nov'."
+ (cond ((eq name nil)
+ `(or (assq 'summary mime-field-decoder-cache)
+ '(summary))
+ )
+ ((and (consp name)
+ (car name)
+ (consp (cdr name))
+ (symbolp (car (cdr name)))
+ (null (cdr (cdr name))))
+ `(or (assq ,name mime-field-decoder-cache)
+ (cons ,name nil))
+ )
+ (t
+ `(or (assq (or ,name 'summary) mime-field-decoder-cache)
+ (cons (or ,name 'summary) nil))
+ )))
+
+(defun mime-find-field-decoder-internal (field &optional mode)
+ "Return function to decode field-body of FIELD in MODE.
+Optional argument MODE must be object of field-presentation-method."
+ (cdr (or (assq field (cdr mode))
+ (prog1
+ (funcall mime-update-field-decoder-cache
+ field (car mode))
+ (setcdr mode
+ (cdr (assq (car mode) mime-field-decoder-cache)))
+ ))))
+
+;;;###autoload
+(defun mime-find-field-decoder (field &optional mode)
+ "Return function to decode field-body of FIELD in MODE.
+Optional argument MODE must be object or name of
+field-presentation-method. Name of field-presentation-method must be
+`plain', `wide', `summary' or `nov'.
+Default value of MODE is `summary'."
+ (if (symbolp mode)
+ (let ((p (cdr (mime-find-field-presentation-method mode))))
+ (if (and p (setq p (assq field p)))
+ (cdr p)
+ (cdr (funcall mime-update-field-decoder-cache
+ field (or mode 'summary)))))
+ (inline (mime-find-field-decoder-internal field mode))
+ ))
+
+;;;###autoload
+(defun mime-update-field-decoder-cache (field mode &optional function)
+ "Update field decoder cache `mime-field-decoder-cache'."
+ (cond ((eq function 'identity)
+ (setq function nil)
+ )
+ ((null function)
+ (let ((decoder-alist
+ (cdr (assq (or mode 'summary) mime-field-decoder-alist))))
+ (setq function (cdr (or (assq field decoder-alist)
+ (assq t decoder-alist)))))
+ ))
+ (let ((cell (assq mode mime-field-decoder-cache))
+ ret)
+ (if cell
+ (if (setq ret (assq field (cdr cell)))
+ (setcdr ret function)
+ (setcdr cell (cons (setq ret (cons field function)) (cdr cell))))
+ (setq mime-field-decoder-cache
+ (cons (cons mode (list (setq ret (cons field function))))
+ mime-field-decoder-cache)))
+ ret))
+
+;; ignored fields
+(mime-set-field-decoder 'Archive nil nil)
+(mime-set-field-decoder 'Content-Md5 nil nil)
+(mime-set-field-decoder 'Control nil nil)
+(mime-set-field-decoder 'Date nil nil)
+(mime-set-field-decoder 'Distribution nil nil)
+(mime-set-field-decoder 'Followup-Host nil nil)
+(mime-set-field-decoder 'Followup-To nil nil)
+(mime-set-field-decoder 'Lines nil nil)
+(mime-set-field-decoder 'Message-Id nil nil)
+(mime-set-field-decoder 'Newsgroups nil nil)
+(mime-set-field-decoder 'Nntp-Posting-Host nil nil)
+(mime-set-field-decoder 'Path nil nil)
+(mime-set-field-decoder 'Posted-And-Mailed nil nil)
+(mime-set-field-decoder 'Received nil nil)
+(mime-set-field-decoder 'Status nil nil)
+(mime-set-field-decoder 'X-Face nil nil)
+(mime-set-field-decoder 'X-Face-Version nil nil)
+(mime-set-field-decoder 'X-Info nil nil)
+(mime-set-field-decoder 'X-Pgp-Key-Info nil nil)
+(mime-set-field-decoder 'X-Pgp-Sig nil nil)
+(mime-set-field-decoder 'X-Pgp-Sig-Version nil nil)
+(mime-set-field-decoder 'Xref nil nil)
+
+;; structured fields
+(let ((fields
+ '(Reply-To Resent-Reply-To From Resent-From Sender Resent-Sender
+ To Resent-To Cc Resent-Cc Bcc Resent-Bcc Dcc
+ Mail-Followup-To
+ Mime-Version Content-Type Content-Transfer-Encoding
+ Content-Disposition User-Agent))
+ field)
+ (while fields
+ (setq field (pop fields))
+ (mime-set-field-decoder
+ field
+ 'plain #'eword-decode-structured-field-body
+ 'wide #'eword-decode-and-fold-structured-field-body
+ 'summary #'eword-decode-and-unfold-structured-field-body
+ 'nov #'eword-decode-and-unfold-structured-field-body)
+ ))
+
+;; unstructured fields (default)
+(mime-set-field-decoder
+ t
+ 'plain #'eword-decode-unstructured-field-body
+ 'wide #'eword-decode-unstructured-field-body
+ 'summary #'eword-decode-and-unfold-unstructured-field-body
+ 'nov #'eword-decode-unfolded-unstructured-field-body)
+
+;;;###autoload
+(defun ew-mime-update-field-decoder-cache (field mode)
+ (let ((fun (cond
+ ((eq mode 'plain)
+ (lexical-let ((field-name (symbol-name field)))
+ (lambda (field-body &optional start-column max-column must-unfold)
+ (setq field-body (ew-lf-to-crlf field-body))
+ (let ((res (ew-crlf-to-lf
+ (ew-decode-field field-name field-body))))
+ (add-text-properties
+ 0 (length res)
+ (list 'original-field-name field-name
+ 'original-field-body field-body)
+ res)
+ res))))
+ ((eq mode 'wide)
+ (lexical-let ((field-name (symbol-name field)))
+ (lambda (field-body &optional start-column max-column must-unfold)
+ (setq field-body (ew-lf-to-crlf field-body))
+ (let ((res (ew-crlf-to-lf
+ (ew-crlf-refold
+ (ew-decode-field field-name field-body)
+ (length field-name)
+ (or max-column fill-column)))))
+ (add-text-properties
+ 0 (length res)
+ (list 'original-field-name field-name
+ 'original-field-body field-body)
+ res)
+ res))))
+ ((eq mode 'summary)
+ (lexical-let ((field-name (symbol-name field)))
+ (lambda (field-body &optional start-column max-column must-unfold)
+ (setq field-body (ew-lf-to-crlf field-body))
+ (let ((res (ew-crlf-to-lf
+ (ew-crlf-unfold
+ (ew-decode-field field-name field-body)))))
+ (add-text-properties
+ 0 (length res)
+ (list 'original-field-name field-name
+ 'original-field-body field-body)
+ res)
+ res))))
+ ((eq mode 'nov)
+ (lexical-let ((field-name (symbol-name field)))
+ (lambda (field-body &optional start-column max-column must-unfold)
+ (setq field-body (ew-lf-to-crlf field-body))
+ (require 'ew-var)
+ (let ((ew-ignore-76bytes-limit t))
+ (let ((res (ew-crlf-to-lf
+ (ew-crlf-unfold
+ (ew-decode-field field-name field-body)))))
+ (add-text-properties
+ 0 (length res)
+ (list 'original-field-name field-name
+ 'original-field-body field-body)
+ res)
+ res)))))
+ (t
+ nil))))
+ (mime-update-field-decoder-cache field mode fun)))
+
+;;;###autoload
+(defun mime-decode-field-body (field-body field-name
+ &optional mode max-column)
+ "Decode FIELD-BODY as FIELD-NAME in MODE, and return the result.
+Optional argument MODE must be `plain', `wide', `summary' or `nov'.
+Default mode is `summary'.
+
+If MODE is `wide' and MAX-COLUMN is non-nil, the result is folded with
+MAX-COLUMN.
+
+Non MIME encoded-word part in FILED-BODY is decoded with
+`default-mime-charset'."
+ (unless mode (setq mode 'summary))
+ (if (symbolp field-name) (setq field-name (symbol-name field-name)))
+ (let ((decoded
+ (if (eq mode 'nov)
+ (let ((ew-ignore-76bytes-limit t))
+ (ew-decode-field
+ field-name (ew-lf-crlf-to-crlf field-body)))
+ (ew-decode-field
+ field-name (ew-lf-crlf-to-crlf field-body)))))
+ (if (and (eq mode 'wide) max-column)
+ (setq decoded (ew-crlf-refold
+ decoded
+ (1+ (string-width field-name))
+ max-column))
+ (if (not (eq mode 'plain))
+ (setq decoded (ew-crlf-unfold decoded))))
+ (setq decoded (ew-crlf-to-lf decoded))
+ (add-text-properties 0 (length decoded)
+ (list 'original-field-name field-name
+ 'original-field-body field-body)
+ decoded)
+ decoded))
+
+;;;###autoload
+(defun mime-decode-header-in-region (start end
+ &optional code-conversion)
+ "Decode MIME encoded-words in region between START and END.
+If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
+mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
+Otherwise it decodes non-ASCII bit patterns as the
+default-mime-charset."
+ (interactive "*r")
+ (save-excursion
+ (save-restriction
+ (narrow-to-region start end)
+ (let ((default-charset
+ (if code-conversion
+ (if (mime-charset-to-coding-system code-conversion)
+ code-conversion
+ default-mime-charset))))
+ (if default-charset
+ (let ((mode-obj (mime-find-field-presentation-method 'wide))
+ beg p end len field-decoder
+ field-name field-body)
+ (goto-char (point-min))
+ (while (re-search-forward std11-field-head-regexp nil t)
+ (setq beg (match-beginning 0)
+ p (match-end 0)
+ field-name (buffer-substring beg (1- p))
+ len (string-width field-name)
+ field-decoder (inline
+ (mime-find-field-decoder-internal
+ (intern (capitalize field-name))
+ mode-obj)))
+ (when field-decoder
+ (setq end (std11-field-end)
+ field-body (buffer-substring p end))
+ (let ((default-mime-charset default-charset))
+ (delete-region p end)
+ (insert (funcall field-decoder field-body (1+ len)))
+ ))
+ (add-text-properties beg (min (1+ (point)) (point-max))
+ (list 'original-field-name field-name
+ 'original-field-body field-body))
+ ))
+ (eword-decode-region (point-min) (point-max) t)
+ )))))
+
+;;;###autoload
+(defun mime-decode-header-in-buffer (&optional code-conversion separator)
+ "Decode MIME encoded-words in header fields.
+If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
+mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
+Otherwise it decodes non-ASCII bit patterns as the
+default-mime-charset.
+If SEPARATOR is not nil, it is used as header separator."
+ (interactive "*")
+ (mime-decode-header-in-region
+ (point-min)
+ (save-excursion
+ (goto-char (point-min))
+ (if (re-search-forward
+ (concat "^\\(" (regexp-quote (or separator "")) "\\)?$")
+ nil t)
+ (match-beginning 0)
+ (point-max)
+ ))
+ code-conversion))
+
+(define-obsolete-function-alias 'eword-decode-header
+ 'mime-decode-header-in-buffer)
+
;;; @ encoded-word decoder
;;;
-(defvar eword-warning-face nil "Face used for invalid encoded-word.")
+(defvar eword-decode-encoded-word-error-handler
+ 'eword-decode-encoded-word-default-error-handler)
+
+(defvar eword-warning-face nil
+ "Face used for invalid encoded-word.")
+
+(defun eword-decode-encoded-word-default-error-handler (word signal)
+ (and (add-text-properties 0 (length word)
+ (and eword-warning-face
+ (list 'face eword-warning-face))
+ word)
+ word))
(defun eword-decode-encoded-word (word &optional must-unfold)
"Decode WORD if it is an encoded-word.
(condition-case err
(eword-decode-encoded-text charset encoding text must-unfold)
(error
- (and
- (add-text-properties 0 (length word)
- (and eword-warning-face
- (list 'face eword-warning-face))
- word)
- word)))
+ (funcall eword-decode-encoded-word-error-handler word err)
+ ))
))
word))
as a version of Net$cape)."
(let ((cs (mime-charset-to-coding-system charset)))
(if cs
- (let ((dest
- (cond
- ((string-equal "B" encoding)
- (if (and (string-match eword-B-encoded-text-regexp string)
- (string-equal string (match-string 0 string)))
- (base64-decode-string string)
- (error "Invalid encoded-text %s" string)))
- ((string-equal "Q" encoding)
- (if (and (string-match eword-Q-encoded-text-regexp string)
- (string-equal string (match-string 0 string)))
- (q-encoding-decode-string string)
- (error "Invalid encoded-text %s" string)))
- (t
- (error "Invalid encoding %s" encoding)
- )))
- )
- (if dest
- (progn
- (setq dest (decode-coding-string dest cs))
- (if must-unfold
- (mapconcat (function
- (lambda (chr)
- (cond
- ((eq chr ?\n) "")
- ((eq chr ?\t) " ")
- (t (char-to-string chr)))
- ))
- (std11-unfold-string dest)
- "")
- dest)
- ))))))
+ (let ((dest (encoded-text-decode-string string encoding)))
+ (when dest
+ (setq dest (decode-mime-charset-string dest charset))
+ (if must-unfold
+ (mapconcat (function
+ (lambda (chr)
+ (cond ((eq chr ?\n) "")
+ ((eq chr ?\t) " ")
+ (t (char-to-string chr)))
+ ))
+ (std11-unfold-string dest)
+ "")
+ dest))))))
;;; @ lexical analyze
(defun eword-analyze-quoted-string (string &optional must-unfold)
(let ((p (std11-check-enclosure string ?\" ?\")))
(if p
- (cons (cons 'quoted-string
- (eword-decode-quoted-string
- (substring string 0 p)
- default-mime-charset))
- (substring string p))
- )))
+ (cons (cons 'quoted-string
+ (if eword-decode-quoted-encoded-word
+ (eword-decode-quoted-string
+ (substring string 0 p)
+ default-mime-charset)
+ (std11-wrap-as-quoted-string
+ (decode-mime-charset-string
+ (std11-strip-quoted-pair (substring string 1 (1- p)))
+ default-mime-charset))))
+ (substring string p)))
+ ))
(defun eword-analyze-domain-literal (string &optional must-unfold)
(std11-analyze-domain-literal string))
(setq p (or p len))
(cons (cons 'comment
(eword-decode-comment
- (substring string 0 p)
+ (std11-unfold-string (substring string 0 p))
default-mime-charset))
(substring string p)))
nil)))
+
(defun eword-analyze-spaces (string &optional must-unfold)
(std11-analyze-spaces string))
(defun eword-analyze-encoded-word (string &optional must-unfold)
(let ((decoded (eword-decode-first-encoded-words
- string
- eword-encoded-word-in-phrase-regexp
- eword-after-encoded-word-in-phrase-regexp
- must-unfold)))
+ string
+ eword-encoded-word-in-phrase-regexp
+ eword-after-encoded-word-in-phrase-regexp
+ must-unfold)))
(if decoded
- (cons (cons 'atom (car decoded)) (cdr decoded)))))
+ (let ((s (car decoded)))
+ (while (or (string-match std11-atom-regexp s)
+ (string-match std11-spaces-regexp s))
+ (setq s (substring s (match-end 0))))
+ (if (= (length s) 0)
+ (cons (cons 'atom (car decoded)) (cdr decoded))
+ (cons (cons 'quoted-string
+ (std11-wrap-as-quoted-string (car decoded)))
+ (cdr decoded)))))))
(defun eword-analyze-atom (string &optional must-unfold)
- (if (let ((enable-multibyte-characters nil))
- (string-match std11-atom-regexp string))
+ (if (string-match std11-atom-regexp (string-as-unibyte string))
(let ((end (match-end 0)))
(if (and eword-decode-sticked-encoded-word
(string-match eword-encoded-word-in-phrase-regexp
))))
(defun eword-lexical-analyze-internal (string must-unfold)
- (let (dest ret)
+ (let ((last 'eword-analyze-spaces)
+ dest ret)
(while (not (string-equal string ""))
(setq ret
- (let ((rest eword-lexical-analyzers)
- func r)
- (while (and (setq func (car rest))
- (null (setq r (funcall func string must-unfold)))
- )
- (setq rest (cdr rest)))
- (or r `((error . ,string) . ""))
- ))
+ (let ((rest eword-lexical-analyzers)
+ func r)
+ (while (and (setq func (car rest))
+ (or
+ (and
+ (not eword-decode-sticked-encoded-word)
+ (not (eq last 'eword-analyze-spaces))
+ (eq func 'eword-analyze-encoded-word))
+ (null (setq r (funcall func string must-unfold))))
+ )
+ (setq rest (cdr rest)))
+ (setq last func)
+ (or r `((error . ,string) . ""))
+ ))
(setq dest (cons (car ret) dest))
(setq string (cdr ret))
)
(defun eword-decode-token (token)
(cdr token))
-(defun eword-decode-and-fold-structured-field
- (string start-column &optional max-column must-unfold)
- "Decode and fold (fill) STRING as structured field body.
-It decodes non us-ascii characters in FULL-NAME encoded as
-encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
-characters are regarded as variable `default-mime-charset'.
-
-If an encoded-word is broken or your emacs implementation can not
-decode the charset included in it, it is not decoded.
-
-If MAX-COLUMN is omitted, `fill-column' is used.
-
-If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-words (generated by bad manner MUA
-such as a version of Net$cape)."
- (or max-column
- (setq max-column fill-column))
- (let ((c start-column)
- (tokens (eword-lexical-analyze string must-unfold))
- (result "")
- token)
- (while (and (setq token (car tokens))
- (setq tokens (cdr tokens)))
- (let* ((type (car token)))
- (if (eq type 'spaces)
- (let* ((next-token (car tokens))
- (next-str (eword-decode-token next-token))
- (next-len (string-width next-str))
- (next-c (+ c next-len 1)))
- (if (< next-c max-column)
- (setq result (concat result " " next-str)
- c next-c)
- (setq result (concat result "\n " next-str)
- c (1+ next-len)))
- (setq tokens (cdr tokens))
- )
- (let* ((str (eword-decode-token token)))
- (setq result (concat result str)
- c (+ c (string-width str)))
- ))))
- (if token
- (concat result (eword-decode-token token))
- result)))
-
-(defun eword-decode-and-unfold-structured-field (string)
- "Decode and unfold STRING as structured field body.
-It decodes non us-ascii characters in FULL-NAME encoded as
-encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
-characters are regarded as variable `default-mime-charset'.
-
-If an encoded-word is broken or your emacs implementation can not
-decode the charset included in it, it is not decoded."
- (let ((tokens (eword-lexical-analyze string 'must-unfold))
- (result ""))
- (while tokens
- (let* ((token (car tokens))
- (type (car token)))
- (setq tokens (cdr tokens))
- (setq result
- (if (eq type 'spaces)
- (concat result " ")
- (concat result (eword-decode-token token))
- ))))
- result))
-
-(defun eword-decode-structured-field-body (string &optional must-unfold
- start-column max-column)
- "Decode non us-ascii characters in STRING as structured field body.
-STRING is unfolded before decoding.
-
-It decodes non us-ascii characters in FULL-NAME encoded as
-encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
-characters are regarded as variable `default-mime-charset'.
-
-If an encoded-word is broken or your emacs implementation can not
-decode the charset included in it, it is not decoded.
-
-If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-words (generated by bad manner MUA
-such as a version of Net$cape)."
- (if start-column
- ;; fold with max-column
- (eword-decode-and-fold-structured-field
- string start-column max-column must-unfold)
- ;; Don't fold
- (mapconcat (function eword-decode-token)
- (eword-lexical-analyze string must-unfold)
- "")
- ))
-
-(defun eword-decode-unstructured-field-body (string &optional must-unfold)
- "Decode non us-ascii characters in STRING as unstructured field body.
-STRING is unfolded before decoding.
-
-It decodes non us-ascii characters in FULL-NAME encoded as
-encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
-characters are regarded as variable `default-mime-charset'.
-
-If an encoded-word is broken or your emacs implementation can not
-decode the charset included in it, it is not decoded.
-
-If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
-if there are in decoded encoded-words (generated by bad manner MUA
-such as a version of Net$cape)."
- (eword-decode-string string must-unfold default-mime-charset))
-
(defun eword-extract-address-components (string)
"Extract full name and canonical address from STRING.
Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
It decodes non us-ascii characters in FULL-NAME encoded as
encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
characters are regarded as variable `default-mime-charset'."
+ (rotate-memo args-eword-extract-address-components (list string))
(let* ((structure (car (std11-parse-address
(eword-lexical-analyze
(std11-unfold-string string) 'must-unfold))))