* ChangeLog: Fix typo.
[elisp/flim.git] / eword-decode.el
index 46063ff..affdce6 100644 (file)
 (require 'mel)
 (require 'mime-def)
 
+(require 'ew-dec)
+
 (defgroup eword-decode nil
   "Encoded-word decoding"
   :group 'mime)
 
+;;; TEST
+
+(defmacro rotate-memo (var val)
+  `(progn
+     (unless (boundp ',var) (setq ,var ()))
+     (setq ,var (cons ,val ,var))
+     (let ((tmp (last ,var (- (length ,var) 100))))
+       (when tmp (setcdr tmp nil)))
+     ,var))
 
 ;;; @ variables
 ;;;
@@ -219,6 +230,7 @@ such as a version of Net$cape)."
                                   safe-regexp
                                   escape ; ?\\ or nil.
                                   delimiters ; list of chars.
+                                   chars-must-be-quote
                                   must-unfold
                                   code-conversion)
   (if (and code-conversion
@@ -240,14 +252,14 @@ such as a version of Net$cape)."
          (setq dst (concat dst
                      (std11-wrap-as-quoted-pairs
                        (decode-mime-charset-string buf code-conversion)
-                       delimiters))
+                       chars-must-be-quote))
                buf ""))
        (cond
          (decoded
            (setq dst (concat dst
                        (std11-wrap-as-quoted-pairs
                          (car decoded)
-                         delimiters))
+                         chars-must-be-quote))
                  src (cdr decoded)))
          ((memq ch delimiters)
            (setq dst (concat dst (list ch))
@@ -271,7 +283,7 @@ such as a version of Net$cape)."
       (setq dst (concat dst
                  (std11-wrap-as-quoted-pairs
                    (decode-mime-charset-string buf code-conversion)
-                   delimiters))))
+                   chars-must-be-quote))))
     dst))
 
 
@@ -286,6 +298,7 @@ such as a version of Net$cape)."
     "[^ \t\n=]*"
     nil
     nil
+    nil
     must-unfold
     code-conversion))
 
@@ -297,6 +310,7 @@ such as a version of Net$cape)."
     "[^ \t\n()\\\\=]*"
     ?\\
     '(?\( ?\))
+    '(?\( ?\) ?\\ ?\r ?\n)
     must-unfold
     code-conversion))
 
@@ -308,6 +322,7 @@ such as a version of Net$cape)."
     "[^ \t\n\"\\\\=]*"
     ?\\
     '(?\")
+    '(?\" ?\\ ?\r ?\n)
     must-unfold
     code-conversion))
 
@@ -369,17 +384,17 @@ default-mime-charset."
 ;;;
 
 (defcustom eword-decode-ignored-field-list
-  '(newsgroups path lines nntp-posting-host received message-id date)
+  '(Newsgroups Path Lines Nntp-Posting-Host Received Message-Id Date)
   "*List of field-names to be ignored when decoding.
 Each field name must be symbol."
   :group 'eword-decode
   :type '(repeat symbol))
 
 (defcustom eword-decode-structured-field-list
-  '(reply-to resent-reply-to from resent-from sender resent-sender
-            to resent-to cc resent-cc bcc resent-bcc dcc
-            mime-version content-type content-transfer-encoding
-            content-disposition)
+  '(Reply-To Resent-Reply-To From Resent-From Sender Resent-Sender
+            To Resent-To Cc Resent-Cc Bcc Resent-Bcc Dcc
+            Mime-Version Content-Type Content-Transfer-Encoding
+            Content-Disposition)
   "*List of field-names to decode as structured field.
 Each field name must be symbol."
   :group 'eword-decode
@@ -393,6 +408,10 @@ Otherwise it decodes non-ASCII bit patterns as the
 default-mime-charset.
 If SEPARATOR is not nil, it is used as header separator."
   (interactive "*")
+  (rotate-memo args-eword-decode-header
+              (list code-conversion))
+  (unless code-conversion
+    (message "eword-decode-header is called with no code-conversion"))
   (if (and code-conversion
           (not (mime-charset-to-coding-system code-conversion)))
       (setq code-conversion default-mime-charset))
@@ -400,34 +419,17 @@ If SEPARATOR is not nil, it is used as header separator."
     (save-restriction
       (std11-narrow-to-header separator)
       (if code-conversion
-         (let (beg p end field-name len)
+         (let (beg p end field-name field-body len)
            (goto-char (point-min))
            (while (re-search-forward std11-field-head-regexp nil t)
              (setq beg (match-beginning 0)
                    p (match-end 0)
                    field-name (buffer-substring beg (1- p))
-                   len (string-width field-name)
-                   field-name (intern (downcase field-name))
-                   end (std11-field-end))
-             (cond ((memq field-name eword-decode-ignored-field-list)
-                    ;; Don't decode
-                    )
-                   ((memq field-name eword-decode-structured-field-list)
-                    ;; Decode as structured field
-                    (let ((body (buffer-substring p end)))
-                      (delete-region p end)
-                      (insert (eword-decode-and-fold-structured-field
-                               body (1+ len)))
-                      ))
-                   (t
-                    ;; Decode as unstructured field
-                    (save-restriction
-                      (narrow-to-region beg (1+ end))
-                      (goto-char p)
-                      (eword-decode-region beg (point-max) 'unfold nil
-                        code-conversion)
-                      (goto-char (point-max))
-                      )))))
+                   end (std11-field-end)
+                   field-body (buffer-substring p end))
+             (delete-region p end)
+             (insert (ew-decode-field field-name (ew-lf-crlf-to-crlf field-body)))
+             ))
        (eword-decode-region (point-min) (point-max) t nil nil)
        ))))
 
@@ -527,21 +529,18 @@ as a version of Net$cape)."
                  (error "Invalid encoding %s" encoding)
                  )))
               )
-         (if dest
-             (progn
-               (setq dest (decode-coding-string dest cs))
-               (if must-unfold
-                   (mapconcat (function
-                               (lambda (chr)
-                                 (cond
-                                   ((eq chr ?\n) "")
-                                   ((eq chr ?\t) " ")
-                                   (t (char-to-string chr)))
-                                 ))
-                              (std11-unfold-string dest)
-                              "")
-                 dest)
-               ))))))
+         (when dest
+           (setq dest (decode-mime-charset-string dest charset))
+           (if must-unfold
+               (mapconcat (function
+                           (lambda (chr)
+                             (cond ((eq chr ?\n) "")
+                                   ((eq chr ?\t) " ")
+                                   (t (char-to-string chr)))
+                             ))
+                          (std11-unfold-string dest)
+                          "")
+             dest))))))
 
 
 ;;; @ lexical analyze
@@ -578,16 +577,17 @@ be the result."
 (defun eword-analyze-quoted-string (string &optional must-unfold)
   (let ((p (std11-check-enclosure string ?\" ?\")))
     (if p
-       (cons (cons 'quoted-string
-                   (if eword-decode-quoted-encoded-word
-                       (eword-decode-quoted-string
-                        (substring string 0 p)
-                        default-mime-charset)
-                     (decode-mime-charset-string
-                      (std11-strip-quoted-pair (substring string 0 p))
-                      default-mime-charset)))
-             (substring string p)))
-      ))
+        (cons (cons 'quoted-string
+                    (if eword-decode-quoted-encoded-word
+                        (eword-decode-quoted-string
+                          (substring string 0 p)
+                          default-mime-charset)
+                      (std11-wrap-as-quoted-string
+                       (decode-mime-charset-string
+                        (std11-strip-quoted-pair (substring string 1 (1- p)))
+                        default-mime-charset))))
+              (substring string p)))
+    ))
 
 (defun eword-analyze-domain-literal (string &optional must-unfold)
   (std11-analyze-domain-literal string))
@@ -614,12 +614,20 @@ be the result."
 
 (defun eword-analyze-encoded-word (string &optional must-unfold)
   (let ((decoded (eword-decode-first-encoded-words
-                  string
-                  eword-encoded-word-in-phrase-regexp
-                  eword-after-encoded-word-in-phrase-regexp
-                  must-unfold)))
+                  string
+                  eword-encoded-word-in-phrase-regexp
+                  eword-after-encoded-word-in-phrase-regexp
+                  must-unfold)))
     (if decoded
-      (cons (cons 'atom (car decoded)) (cdr decoded)))))
+        (let ((s (car decoded)))
+          (while (or (string-match std11-atom-regexp s)
+                     (string-match std11-spaces-regexp s))
+            (setq s (substring s (match-end 0))))
+          (if (= (length s) 0)
+              (cons (cons 'atom (car decoded)) (cdr decoded))
+            (cons (cons 'quoted-string
+                        (std11-wrap-as-quoted-string (car decoded)))
+                  (cdr decoded)))))))
 
 (defun eword-analyze-atom (string &optional must-unfold)
   (if (let ((enable-multibyte-characters nil))
@@ -637,17 +645,24 @@ be the result."
              ))))
 
 (defun eword-lexical-analyze-internal (string must-unfold)
-  (let (dest ret)
+  (let ((last 'eword-analyze-spaces)
+        dest ret)
     (while (not (string-equal string ""))
       (setq ret
-           (let ((rest eword-lexical-analyzers)
-                 func r)
-             (while (and (setq func (car rest))
-                         (null (setq r (funcall func string must-unfold)))
-                         )
-               (setq rest (cdr rest)))
-             (or r `((error . ,string) . ""))
-             ))
+            (let ((rest eword-lexical-analyzers)
+                  func r)
+              (while (and (setq func (car rest))
+                          (or
+                           (and
+                            (not eword-decode-sticked-encoded-word)
+                            (not (eq last 'eword-analyze-spaces))
+                            (eq func 'eword-analyze-encoded-word))
+                           (null (setq r (funcall func string must-unfold))))
+                          )
+                (setq rest (cdr rest)))
+              (setq last func)
+              (or r `((error . ,string) . ""))
+              ))
       (setq dest (cons (car ret) dest))
       (setq string (cdr ret))
       )
@@ -728,6 +743,8 @@ characters are regarded as variable `default-mime-charset'.
 
 If an encoded-word is broken or your emacs implementation can not
 decode the charset included in it, it is not decoded."
+  (rotate-memo args-eword-decode-and-unfold-structured-field
+              (list string))
   (let ((tokens (eword-lexical-analyze string 'must-unfold))
        (result ""))
     (while tokens
@@ -756,15 +773,21 @@ decode the charset included in it, it is not decoded.
 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 if there are in decoded encoded-words (generated by bad manner MUA
 such as a version of Net$cape)."
+  (rotate-memo args-eword-decode-structured-field-body
+              (list string must-unfold start-column max-column))
   (if start-column
-      ;; fold with max-column
-      (eword-decode-and-fold-structured-field
-       string start-column max-column must-unfold)
+      ;; fold with max-column (folding is not implemented.)
+      (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
+            (decoded (ew-decode-field (make-string (1- start-column) ?X)
+                                      (ew-lf-crlf-to-crlf string)
+                                      (if must-unfold 'ew-cut-cr-lf))))
+       (if must-unfold (ew-cut-cr-lf decoded) decoded))
     ;; Don't fold
-    (mapconcat (function eword-decode-token)
-              (eword-lexical-analyze string must-unfold)
-              "")
-    ))
+    (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
+          (decoded (ew-decode-field ""
+                                    (ew-lf-crlf-to-crlf string)
+                                    (if must-unfold 'ew-cut-cr-lf))))
+      (if must-unfold (ew-cut-cr-lf decoded) decoded))))
 
 (defun eword-decode-unstructured-field-body (string &optional must-unfold)
   "Decode non us-ascii characters in STRING as unstructured field body.
@@ -780,7 +803,14 @@ decode the charset included in it, it is not decoded.
 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 if there are in decoded encoded-words (generated by bad manner MUA
 such as a version of Net$cape)."
-  (eword-decode-string string must-unfold default-mime-charset))
+  (rotate-memo args-eword-decode-unstructured-field-body
+              (list string must-unfold))
+  (let ((decoded (ew-decode-field ""
+                                 (ew-lf-crlf-to-crlf string)
+                                 (if must-unfold 'ew-cut-cr-lf))))
+    (if must-unfold
+       (ew-cut-cr-lf decoded)
+      decoded)))
 
 (defun eword-extract-address-components (string)
   "Extract full name and canonical address from STRING.