* eword-decode.el (eword-after-encoded-word-regexp): New constant.

author akr <akr>

Sun, 22 Mar 1998 19:37:33 +0000 (19:37 +0000)

committer akr <akr>

Sun, 22 Mar 1998 19:37:33 +0000 (19:37 +0000)
author akr <akr>
Sun, 22 Mar 1998 19:37:33 +0000 (19:37 +0000)
committer akr <akr>
Sun, 22 Mar 1998 19:37:33 +0000 (19:37 +0000)
diff --git a/ChangeLog b/ChangeLog

index 431fce0..30c0a2c 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,30 @@
+1998-03-22  Tanaka Akira  <akr@jaist.ac.jp>
+
+       * eword-decode.el (eword-after-encoded-word-regexp): New constant.
+       eword-encoded-text-in-phrase-regexp: New constant.
+       eword-encoded-word-in-phrase-regexp: New constant.
+       eword-after-encoded-word-in-phrase-regexp: New constant.
+       eword-encoded-text-in-comment-regexp: New constant.
+       eword-encoded-word-in-comment-regexp: New constant.
+       eword-after-encoded-word-in-comment-regexp: New constant.
+       eword-encoded-text-in-quoted-string-regexp: New constant.
+       eword-encoded-word-in-quoted-string-regexp: New constant.
+       eword-after-encoded-word-in-quoted-string-regexp: New constant.
+       eword-decode-sticked-encoded-word: Update DOC-STRING.
+       eword-decode-quoted-encoded-word: Update DOC-STRING.
+       eword-decode-first-encoded-words: Add argument eword-regexp.
+       eword-decode-comment-string: Use `eword-encoded-word-in-comment-regexp'
+       and `eword-after-encoded-word-in-comment-regexp'.
+       eword-decode-quoted-string : Use
+       `eword-encoded-word-in-quoted-string-regexp' and
+       `eword-after-encoded-word-in-quoted-string-regexp'.
+       eword-decode-unstructured-string: Use `eword-encoded-word-regexp' and
+       `eword-after-encoded-word-regexp'.
+       eword-analyze-encoded-word: Use `eword-encoded-word-in-phrase-regexp'
+       and `eword-after-encoded-word-in-phrase-regexp'
+       eword-lexical-analyze: Add `default-mime-charset' and `must-unfold' to
+       key of cache.
+ 
  1998-03-21  Shuhei KOBAYASHI  <shuhei-k@jaist.ac.jp>
  
         * eword-decode.el (eword-lexical-analyze-internal): Fixed return
diff --git a/eword-decode.el b/eword-decode.el

index 83e2842..3b52aa4 100644 (file)
--- a/eword-decode.el
+++ b/eword-decode.el
@@ -59,6 +59,52 @@
           eword-encoded-text-regexp
           "\\)"
           (regexp-quote "?=")))
+(defconst eword-after-encoded-word-regexp "\\([ \t]\\|$\\)")
+
+(defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+")
+(defconst eword-encoded-word-in-phrase-regexp
+  (concat (regexp-quote "=?")
+         "\\("
+         mime-charset-regexp
+         "\\)"
+         (regexp-quote "?")
+         "\\(B\\|Q\\)"
+         (regexp-quote "?")
+         "\\("
+         eword-encoded-text-in-phrase-regexp
+         "\\)"
+         (regexp-quote "?=")))
+(defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t(]\\|$\\)")
+
+(defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+")
+(defconst eword-encoded-word-in-comment-regexp
+  (concat (regexp-quote "=?")
+         "\\("
+         mime-charset-regexp
+         "\\)"
+         (regexp-quote "?")
+         "\\(B\\|Q\\)"
+         (regexp-quote "?")
+         "\\("
+         eword-encoded-text-in-comment-regexp
+         "\\)"
+         (regexp-quote "?=")))
+(defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)")
+
+(defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+")
+(defconst eword-encoded-word-in-quoted-string-regexp
+  (concat (regexp-quote "=?")
+         "\\("
+         mime-charset-regexp
+         "\\)"
+         (regexp-quote "?")
+         "\\(B\\|Q\\)"
+         (regexp-quote "?")
+         "\\("
+         eword-encoded-text-in-quoted-string-regexp
+         "\\)"
+         (regexp-quote "?=")))
+(defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)")
  
  
  ;;; @@ Base64
@@ -102,19 +148,55 @@
  ;;;
  
  (defvar eword-decode-sticked-encoded-word nil
-  "*If non-nil, decode encoded-words sticked on atoms, other encoded-words, etc.
+  "*If non-nil, decode encoded-words sticked on atoms,
+other encoded-words, etc.
  however this behaviour violates RFC2047.")
  
  (defvar eword-decode-quoted-encoded-word nil
-  "*If non-nil, decode encoded-words in quoted-string 
+  "*If non-nil, decode encoded-words in quoted-string
  however this behaviour violates RFC2047.")
  
-(defun eword-decode-first-encoded-words (string after-regexp &optional must-unfold)
+(defun eword-decode-first-encoded-words (string
+                                        eword-regexp
+                                        after-regexp
+                                        &optional must-unfold)
+  "Decode MIME encoded-words in beginning of STRING.
+
+EWORD-REGEXP is the regexp that matches a encoded-word.
+Usual value is eword-encoded-word-regexp, 
+eword-encoded-text-in-phrase-regexp,
+eword-encoded-word-in-comment-regexp or
+eword-encoded-word-in-quoted-string-regexp.
+
+AFTER-REGEXP is the regexp that matches a after encoded-word.
+Usual value is eword-after-encoded-word-regexp, 
+eword-after-encoded-text-in-phrase-regexp,
+eword-after-encoded-word-in-comment-regexp or
+eword-after-encoded-word-in-quoted-string-regexp.
+
+If beginning of STRING matches EWORD-REGEXP and AFTER-REGEXP,
+returns a cons cell of decoded string(sequence of characters) and 
+the rest(sequence of octets).
+
+If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP,
+returns nil.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is returned in decoded part
+as encoded-word form.
+
+If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
    (if eword-decode-sticked-encoded-word (setq after-regexp ""))
-  (let ((between-ewords-regexp (if eword-decode-sticked-encoded-word "\\(\n?[ \t]\\)*" "\\(\n?[ \t]\\)+"))
+  (let ((between-ewords-regexp
+         (if eword-decode-sticked-encoded-word
+           "\\(\n?[ \t]\\)*"
+           "\\(\n?[ \t]\\)+"))
         (src string)    ; sequence of octets.
         (dst ""))       ; sequence of characters.
-    (if (string-match (concat "\\`\\(" eword-encoded-word-regexp "\\)" after-regexp) src)
+    (if (string-match
+         (concat "\\`\\(" eword-regexp "\\)" after-regexp) src)
        (let* (p
              (q (match-end 1))
              (ew (substring src 0 q))
@@ -126,7 +208,9 @@ however this behaviour violates RFC2047.")
             (while
               (and
                 (string-match
-                 (concat "\\`\\(" between-ewords-regexp "\\)\\(" eword-encoded-word-regexp "\\)" after-regexp)
+                 (concat "\\`\\(" between-ewords-regexp "\\)"
+                            "\\(" eword-regexp "\\)"
+                            after-regexp)
                   src)
                 (progn
                   (setq p (match-end 1)
@@ -154,14 +238,23 @@ however this behaviour violates RFC2047.")
             (decoded (and
                         flag-ew
                         (eword-decode-first-encoded-words src
-                         "\\([ \t()\\\\]\\|$\\)" must-unfold))))
+                         eword-encoded-word-in-comment-regexp
+                         eword-after-encoded-word-in-comment-regexp
+                         must-unfold))))
         (if (and (not (string= buf ""))
                  (or decoded (eq ch ?\() (eq ch ?\))))
-         (setq dst (concat dst (std11-wrap-as-quoted-pairs (decode-mime-charset-string buf default-mime-charset) '(?\( ?\))))
+         (setq dst (concat dst
+                     (std11-wrap-as-quoted-pairs
+                       (decode-mime-charset-string buf
+                         default-mime-charset)
+                       '(?\( ?\))))
                 buf ""))
         (cond
           (decoded
-           (setq dst (concat dst (std11-wrap-as-quoted-pairs (car decoded) '(?( ?))))
+           (setq dst (concat dst
+                       (std11-wrap-as-quoted-pairs
+                         (car decoded)
+                         '(?( ?))))
                   src (cdr decoded)))
           ((or (eq ch ?\() (eq ch ?\)))
             (setq dst (concat dst (list ch))
@@ -181,7 +274,11 @@ however this behaviour violates RFC2047.")
                   flag-ew eword-decode-sticked-encoded-word))
           (t (error "something wrong")))))
      (if (not (string= buf ""))
-      (setq dst (concat dst (std11-wrap-as-quoted-pairs (decode-mime-charset-string buf default-mime-charset) '(?\( ?\))))))
+      (setq dst (concat dst
+                 (std11-wrap-as-quoted-pairs
+                   (decode-mime-charset-string buf
+                     default-mime-charset)
+                   '(?\( ?\))))))
      dst))
  
  (defun eword-decode-quoted-string (string &optional must-unfold)
@@ -195,18 +292,23 @@ however this behaviour violates RFC2047.")
                         eword-decode-quoted-encoded-word
                         flag-ew
                         (eword-decode-first-encoded-words src
-                         "\\([ \t\"\\\\]\\|$\\)" must-unfold))))
+                         eword-encoded-word-in-quoted-string-regexp
+                         eword-after-encoded-word-in-quoted-string-regexp
+                         must-unfold))))
         (if (and (not (string= buf ""))
                  (or decoded (eq ch ?\")))
           (setq dst (concat dst
                       (std11-wrap-as-quoted-pairs
-                       (decode-mime-charset-string buf default-mime-charset)
+                       (decode-mime-charset-string buf
+                       default-mime-charset)
                         '(?\")))
                 buf ""))
         (cond
           (decoded
             (setq dst (concat dst
-                       (std11-wrap-as-quoted-pairs (car decoded) '(?\")))
+                       (std11-wrap-as-quoted-pairs
+                         (car decoded)
+                         '(?\")))
                   src (cdr decoded)))
           ((or (eq ch ?\"))
             (setq dst (concat dst (list ch))
@@ -228,7 +330,8 @@ however this behaviour violates RFC2047.")
      (if (not (string= buf ""))
        (setq dst (concat dst
                   (std11-wrap-as-quoted-pairs
-                   (decode-mime-charset-string buf default-mime-charset)
+                   (decode-mime-charset-string buf
+                     default-mime-charset)
                     '(?\")))))
      dst))
  
@@ -239,10 +342,15 @@ however this behaviour violates RFC2047.")
         (flag-ew t))
      (while (< 0 (length src))
        (let ((ch (aref src 0))
-           (decoded (and flag-ew (eword-decode-first-encoded-words src "\\([ \t]\\|$\\)" must-unfold))))
+           (decoded (and flag-ew (eword-decode-first-encoded-words src
+                                   eword-encoded-word-regexp
+                                   eword-after-encoded-word-regexp
+                                   must-unfold))))
         (if (and (not (string= buf ""))
                  decoded)
-         (setq dst (concat dst (decode-mime-charset-string buf default-mime-charset))
+         (setq dst (concat dst
+                     (decode-mime-charset-string buf
+                       default-mime-charset))
                 buf ""))
         (cond
           (decoded
@@ -258,7 +366,9 @@ however this behaviour violates RFC2047.")
                   flag-ew eword-decode-sticked-encoded-word))
           (t (error "something wrong")))))
      (if (not (string= buf ""))
-      (setq dst (concat dst (decode-mime-charset-string buf default-mime-charset))))
+      (setq dst (concat dst
+                 (decode-mime-charset-string buf
+                   default-mime-charset))))
      dst))
  
  (defun eword-decode-string (string &optional must-unfold)
@@ -272,7 +382,9 @@ decode the charset included in it, it is not decoded.
  If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
  if there are in decoded encoded-words (generated by bad manner MUA
  such as a version of Net$cape)."
-  (eword-decode-unstructured-string (std11-unfold-string string) must-unfold))
+  (eword-decode-unstructured-string
+    (std11-unfold-string string)
+    must-unfold))
  
  
  ;;; @ for region
@@ -293,7 +405,9 @@ such as a version of Net$cape)."
        (if unfolding
           (eword-decode-unfold)
         )
-      (let ((str (eword-decode-unstructured-string (buffer-substring (point-min) (point-max)) must-unfold)))
+      (let ((str (eword-decode-unstructured-string
+                  (buffer-substring (point-min) (point-max))
+                  must-unfold)))
         (delete-region (point-min) (point-max))
         (insert str)))))
  
@@ -529,7 +643,10 @@ be the result."
    (std11-analyze-special string))
  
  (defun eword-analyze-encoded-word (string &optional must-unfold)
-  (let ((decoded (eword-decode-first-encoded-words string "\\([ \t(]\\|$\\)" must-unfold)))
+  (let ((decoded (eword-decode-first-encoded-words string
+                  eword-encoded-word-in-phrase-regexp
+                   eword-after-encoded-word-in-phrase-regexp
+                  must-unfold)))
      (if decoded
        (cons (cons 'atom (car decoded)) (cdr decoded)))))
  
@@ -566,12 +683,13 @@ It is like std11-lexical-analyze, but it decodes non us-ascii
  characters encoded as encoded-words or invalid \"raw\" format.
  \"Raw\" non us-ascii characters are regarded as variable
  `default-mime-charset'."
-  (let ((key (copy-sequence string))
-       ret)
-    (set-text-properties 0 (length key) nil key)
+  (let* ((str (copy-sequence string))
+        (key (cons str (cons default-mime-charset must-unfold)))
+        ret)
+    (set-text-properties 0 (length str) nil str)
      (if (setq ret (assoc key eword-lexical-analyze-cache))
         (cdr ret)
-      (setq ret (eword-lexical-analyze-internal key must-unfold))
+      (setq ret (eword-lexical-analyze-internal str must-unfold))
        (setq eword-lexical-analyze-cache
             (cons (cons key ret)
                   (last eword-lexical-analyze-cache
author	akr <akr>
	Sun, 22 Mar 1998 19:37:33 +0000 (19:37 +0000)
committer	akr <akr>
	Sun, 22 Mar 1998 19:37:33 +0000 (19:37 +0000)
ChangeLog		patch \| blob \| history
eword-decode.el		patch \| blob \| history