* eword-decode.el (eword-after-encoded-word-regexp): New constant.
authorakr <akr>
Sun, 22 Mar 1998 19:37:33 +0000 (19:37 +0000)
committerakr <akr>
Sun, 22 Mar 1998 19:37:33 +0000 (19:37 +0000)
eword-encoded-text-in-phrase-regexp: New constant.
eword-encoded-word-in-phrase-regexp: New constant.
eword-after-encoded-word-in-phrase-regexp: New constant.
eword-encoded-text-in-comment-regexp: New constant.
eword-encoded-word-in-comment-regexp: New constant.
eword-after-encoded-word-in-comment-regexp: New constant.
eword-encoded-text-in-quoted-string-regexp: New constant.
eword-encoded-word-in-quoted-string-regexp: New constant.
eword-after-encoded-word-in-quoted-string-regexp: New constant.
eword-decode-sticked-encoded-word: Update DOC-STRING.
eword-decode-quoted-encoded-word: Update DOC-STRING.
eword-decode-first-encoded-words: Add argument eword-regexp.
eword-decode-comment-string: Use `eword-encoded-word-in-comment-regexp'
and `eword-after-encoded-word-in-comment-regexp'.
eword-decode-quoted-string : Use
`eword-encoded-word-in-quoted-string-regexp' and
`eword-after-encoded-word-in-quoted-string-regexp'.
eword-decode-unstructured-string: Use `eword-encoded-word-regexp' and
`eword-after-encoded-word-regexp'.
eword-analyze-encoded-word: Use `eword-encoded-word-in-phrase-regexp'
and `eword-after-encoded-word-in-phrase-regexp'
eword-lexical-analyze: Add `default-mime-charset' and `must-unfold' to
key of cache.

ChangeLog
eword-decode.el

index 431fce0..30c0a2c 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,30 @@
+1998-03-22  Tanaka Akira  <akr@jaist.ac.jp>
+
+       * eword-decode.el (eword-after-encoded-word-regexp): New constant.
+       eword-encoded-text-in-phrase-regexp: New constant.
+       eword-encoded-word-in-phrase-regexp: New constant.
+       eword-after-encoded-word-in-phrase-regexp: New constant.
+       eword-encoded-text-in-comment-regexp: New constant.
+       eword-encoded-word-in-comment-regexp: New constant.
+       eword-after-encoded-word-in-comment-regexp: New constant.
+       eword-encoded-text-in-quoted-string-regexp: New constant.
+       eword-encoded-word-in-quoted-string-regexp: New constant.
+       eword-after-encoded-word-in-quoted-string-regexp: New constant.
+       eword-decode-sticked-encoded-word: Update DOC-STRING.
+       eword-decode-quoted-encoded-word: Update DOC-STRING.
+       eword-decode-first-encoded-words: Add argument eword-regexp.
+       eword-decode-comment-string: Use `eword-encoded-word-in-comment-regexp'
+       and `eword-after-encoded-word-in-comment-regexp'.
+       eword-decode-quoted-string : Use
+       `eword-encoded-word-in-quoted-string-regexp' and
+       `eword-after-encoded-word-in-quoted-string-regexp'.
+       eword-decode-unstructured-string: Use `eword-encoded-word-regexp' and
+       `eword-after-encoded-word-regexp'.
+       eword-analyze-encoded-word: Use `eword-encoded-word-in-phrase-regexp'
+       and `eword-after-encoded-word-in-phrase-regexp'
+       eword-lexical-analyze: Add `default-mime-charset' and `must-unfold' to
+       key of cache.
 1998-03-21  Shuhei KOBAYASHI  <shuhei-k@jaist.ac.jp>
 
        * eword-decode.el (eword-lexical-analyze-internal): Fixed return
index 83e2842..3b52aa4 100644 (file)
          eword-encoded-text-regexp
          "\\)"
          (regexp-quote "?=")))
+(defconst eword-after-encoded-word-regexp "\\([ \t]\\|$\\)")
+
+(defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+")
+(defconst eword-encoded-word-in-phrase-regexp
+  (concat (regexp-quote "=?")
+         "\\("
+         mime-charset-regexp
+         "\\)"
+         (regexp-quote "?")
+         "\\(B\\|Q\\)"
+         (regexp-quote "?")
+         "\\("
+         eword-encoded-text-in-phrase-regexp
+         "\\)"
+         (regexp-quote "?=")))
+(defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t(]\\|$\\)")
+
+(defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+")
+(defconst eword-encoded-word-in-comment-regexp
+  (concat (regexp-quote "=?")
+         "\\("
+         mime-charset-regexp
+         "\\)"
+         (regexp-quote "?")
+         "\\(B\\|Q\\)"
+         (regexp-quote "?")
+         "\\("
+         eword-encoded-text-in-comment-regexp
+         "\\)"
+         (regexp-quote "?=")))
+(defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)")
+
+(defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+")
+(defconst eword-encoded-word-in-quoted-string-regexp
+  (concat (regexp-quote "=?")
+         "\\("
+         mime-charset-regexp
+         "\\)"
+         (regexp-quote "?")
+         "\\(B\\|Q\\)"
+         (regexp-quote "?")
+         "\\("
+         eword-encoded-text-in-quoted-string-regexp
+         "\\)"
+         (regexp-quote "?=")))
+(defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)")
 
 
 ;;; @@ Base64
 ;;;
 
 (defvar eword-decode-sticked-encoded-word nil
-  "*If non-nil, decode encoded-words sticked on atoms, other encoded-words, etc.
+  "*If non-nil, decode encoded-words sticked on atoms,
+other encoded-words, etc.
 however this behaviour violates RFC2047.")
 
 (defvar eword-decode-quoted-encoded-word nil
-  "*If non-nil, decode encoded-words in quoted-string 
+  "*If non-nil, decode encoded-words in quoted-string
 however this behaviour violates RFC2047.")
 
-(defun eword-decode-first-encoded-words (string after-regexp &optional must-unfold)
+(defun eword-decode-first-encoded-words (string
+                                        eword-regexp
+                                        after-regexp
+                                        &optional must-unfold)
+  "Decode MIME encoded-words in beginning of STRING.
+
+EWORD-REGEXP is the regexp that matches a encoded-word.
+Usual value is eword-encoded-word-regexp, 
+eword-encoded-text-in-phrase-regexp,
+eword-encoded-word-in-comment-regexp or
+eword-encoded-word-in-quoted-string-regexp.
+
+AFTER-REGEXP is the regexp that matches a after encoded-word.
+Usual value is eword-after-encoded-word-regexp, 
+eword-after-encoded-text-in-phrase-regexp,
+eword-after-encoded-word-in-comment-regexp or
+eword-after-encoded-word-in-quoted-string-regexp.
+
+If beginning of STRING matches EWORD-REGEXP and AFTER-REGEXP,
+returns a cons cell of decoded string(sequence of characters) and 
+the rest(sequence of octets).
+
+If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP,
+returns nil.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is returned in decoded part
+as encoded-word form.
+
+If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
   (if eword-decode-sticked-encoded-word (setq after-regexp ""))
-  (let ((between-ewords-regexp (if eword-decode-sticked-encoded-word "\\(\n?[ \t]\\)*" "\\(\n?[ \t]\\)+"))
+  (let ((between-ewords-regexp
+         (if eword-decode-sticked-encoded-word
+           "\\(\n?[ \t]\\)*"
+           "\\(\n?[ \t]\\)+"))
        (src string)    ; sequence of octets.
        (dst ""))       ; sequence of characters.
-    (if (string-match (concat "\\`\\(" eword-encoded-word-regexp "\\)" after-regexp) src)
+    (if (string-match
+         (concat "\\`\\(" eword-regexp "\\)" after-regexp) src)
       (let* (p
             (q (match-end 1))
             (ew (substring src 0 q))
@@ -126,7 +208,9 @@ however this behaviour violates RFC2047.")
            (while
              (and
                (string-match
-                 (concat "\\`\\(" between-ewords-regexp "\\)\\(" eword-encoded-word-regexp "\\)" after-regexp)
+                 (concat "\\`\\(" between-ewords-regexp "\\)"
+                            "\\(" eword-regexp "\\)"
+                            after-regexp)
                  src)
                (progn
                  (setq p (match-end 1)
@@ -154,14 +238,23 @@ however this behaviour violates RFC2047.")
            (decoded (and
                        flag-ew
                        (eword-decode-first-encoded-words src
-                         "\\([ \t()\\\\]\\|$\\)" must-unfold))))
+                         eword-encoded-word-in-comment-regexp
+                         eword-after-encoded-word-in-comment-regexp
+                         must-unfold))))
        (if (and (not (string= buf ""))
                 (or decoded (eq ch ?\() (eq ch ?\))))
-         (setq dst (concat dst (std11-wrap-as-quoted-pairs (decode-mime-charset-string buf default-mime-charset) '(?\( ?\))))
+         (setq dst (concat dst
+                     (std11-wrap-as-quoted-pairs
+                       (decode-mime-charset-string buf
+                         default-mime-charset)
+                       '(?\( ?\))))
                buf ""))
        (cond
          (decoded
-           (setq dst (concat dst (std11-wrap-as-quoted-pairs (car decoded) '(?( ?))))
+           (setq dst (concat dst
+                       (std11-wrap-as-quoted-pairs
+                         (car decoded)
+                         '(?( ?))))
                  src (cdr decoded)))
          ((or (eq ch ?\() (eq ch ?\)))
            (setq dst (concat dst (list ch))
@@ -181,7 +274,11 @@ however this behaviour violates RFC2047.")
                  flag-ew eword-decode-sticked-encoded-word))
          (t (error "something wrong")))))
     (if (not (string= buf ""))
-      (setq dst (concat dst (std11-wrap-as-quoted-pairs (decode-mime-charset-string buf default-mime-charset) '(?\( ?\))))))
+      (setq dst (concat dst
+                 (std11-wrap-as-quoted-pairs
+                   (decode-mime-charset-string buf
+                     default-mime-charset)
+                   '(?\( ?\))))))
     dst))
 
 (defun eword-decode-quoted-string (string &optional must-unfold)
@@ -195,18 +292,23 @@ however this behaviour violates RFC2047.")
                        eword-decode-quoted-encoded-word
                        flag-ew
                        (eword-decode-first-encoded-words src
-                         "\\([ \t\"\\\\]\\|$\\)" must-unfold))))
+                         eword-encoded-word-in-quoted-string-regexp
+                         eword-after-encoded-word-in-quoted-string-regexp
+                         must-unfold))))
        (if (and (not (string= buf ""))
                 (or decoded (eq ch ?\")))
          (setq dst (concat dst
                      (std11-wrap-as-quoted-pairs
-                       (decode-mime-charset-string buf default-mime-charset)
+                       (decode-mime-charset-string buf
+                       default-mime-charset)
                        '(?\")))
                buf ""))
        (cond
          (decoded
            (setq dst (concat dst
-                       (std11-wrap-as-quoted-pairs (car decoded) '(?\")))
+                       (std11-wrap-as-quoted-pairs
+                         (car decoded)
+                         '(?\")))
                  src (cdr decoded)))
          ((or (eq ch ?\"))
            (setq dst (concat dst (list ch))
@@ -228,7 +330,8 @@ however this behaviour violates RFC2047.")
     (if (not (string= buf ""))
       (setq dst (concat dst
                  (std11-wrap-as-quoted-pairs
-                   (decode-mime-charset-string buf default-mime-charset)
+                   (decode-mime-charset-string buf
+                     default-mime-charset)
                    '(?\")))))
     dst))
 
@@ -239,10 +342,15 @@ however this behaviour violates RFC2047.")
        (flag-ew t))
     (while (< 0 (length src))
       (let ((ch (aref src 0))
-           (decoded (and flag-ew (eword-decode-first-encoded-words src "\\([ \t]\\|$\\)" must-unfold))))
+           (decoded (and flag-ew (eword-decode-first-encoded-words src
+                                   eword-encoded-word-regexp
+                                   eword-after-encoded-word-regexp
+                                   must-unfold))))
        (if (and (not (string= buf ""))
                 decoded)
-         (setq dst (concat dst (decode-mime-charset-string buf default-mime-charset))
+         (setq dst (concat dst
+                     (decode-mime-charset-string buf
+                       default-mime-charset))
                buf ""))
        (cond
          (decoded
@@ -258,7 +366,9 @@ however this behaviour violates RFC2047.")
                  flag-ew eword-decode-sticked-encoded-word))
          (t (error "something wrong")))))
     (if (not (string= buf ""))
-      (setq dst (concat dst (decode-mime-charset-string buf default-mime-charset))))
+      (setq dst (concat dst
+                 (decode-mime-charset-string buf
+                   default-mime-charset))))
     dst))
 
 (defun eword-decode-string (string &optional must-unfold)
@@ -272,7 +382,9 @@ decode the charset included in it, it is not decoded.
 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 if there are in decoded encoded-words (generated by bad manner MUA
 such as a version of Net$cape)."
-  (eword-decode-unstructured-string (std11-unfold-string string) must-unfold))
+  (eword-decode-unstructured-string
+    (std11-unfold-string string)
+    must-unfold))
 
 
 ;;; @ for region
@@ -293,7 +405,9 @@ such as a version of Net$cape)."
       (if unfolding
          (eword-decode-unfold)
        )
-      (let ((str (eword-decode-unstructured-string (buffer-substring (point-min) (point-max)) must-unfold)))
+      (let ((str (eword-decode-unstructured-string
+                  (buffer-substring (point-min) (point-max))
+                  must-unfold)))
        (delete-region (point-min) (point-max))
        (insert str)))))
 
@@ -529,7 +643,10 @@ be the result."
   (std11-analyze-special string))
 
 (defun eword-analyze-encoded-word (string &optional must-unfold)
-  (let ((decoded (eword-decode-first-encoded-words string "\\([ \t(]\\|$\\)" must-unfold)))
+  (let ((decoded (eword-decode-first-encoded-words string
+                  eword-encoded-word-in-phrase-regexp
+                   eword-after-encoded-word-in-phrase-regexp
+                  must-unfold)))
     (if decoded
       (cons (cons 'atom (car decoded)) (cdr decoded)))))
 
@@ -566,12 +683,13 @@ It is like std11-lexical-analyze, but it decodes non us-ascii
 characters encoded as encoded-words or invalid \"raw\" format.
 \"Raw\" non us-ascii characters are regarded as variable
 `default-mime-charset'."
-  (let ((key (copy-sequence string))
-       ret)
-    (set-text-properties 0 (length key) nil key)
+  (let* ((str (copy-sequence string))
+        (key (cons str (cons default-mime-charset must-unfold)))
+        ret)
+    (set-text-properties 0 (length str) nil str)
     (if (setq ret (assoc key eword-lexical-analyze-cache))
        (cdr ret)
-      (setq ret (eword-lexical-analyze-internal key must-unfold))
+      (setq ret (eword-lexical-analyze-internal str must-unfold))
       (setq eword-lexical-analyze-cache
            (cons (cons key ret)
                  (last eword-lexical-analyze-cache