From: akr <akr>
Date: Sun, 22 Mar 1998 19:37:33 +0000 (+0000)
Subject: * eword-decode.el (eword-after-encoded-word-regexp): New constant.
X-Git-Tag: akr-199811302358~4
X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=5a3f15341b335283f27ba514dafc529c72460cb3;p=elisp%2Fsemi.git

* eword-decode.el (eword-after-encoded-word-regexp): New constant.
eword-encoded-text-in-phrase-regexp: New constant.
eword-encoded-word-in-phrase-regexp: New constant.
eword-after-encoded-word-in-phrase-regexp: New constant.
eword-encoded-text-in-comment-regexp: New constant.
eword-encoded-word-in-comment-regexp: New constant.
eword-after-encoded-word-in-comment-regexp: New constant.
eword-encoded-text-in-quoted-string-regexp: New constant.
eword-encoded-word-in-quoted-string-regexp: New constant.
eword-after-encoded-word-in-quoted-string-regexp: New constant.
eword-decode-sticked-encoded-word: Update DOC-STRING.
eword-decode-quoted-encoded-word: Update DOC-STRING.
eword-decode-first-encoded-words: Add argument eword-regexp.
eword-decode-comment-string: Use `eword-encoded-word-in-comment-regexp'
and `eword-after-encoded-word-in-comment-regexp'.
eword-decode-quoted-string : Use
`eword-encoded-word-in-quoted-string-regexp' and
`eword-after-encoded-word-in-quoted-string-regexp'.
eword-decode-unstructured-string: Use `eword-encoded-word-regexp' and
`eword-after-encoded-word-regexp'.
eword-analyze-encoded-word: Use `eword-encoded-word-in-phrase-regexp'
and `eword-after-encoded-word-in-phrase-regexp'
eword-lexical-analyze: Add `default-mime-charset' and `must-unfold' to
key of cache.
---

diff --git a/ChangeLog b/ChangeLog
index 431fce0..30c0a2c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,30 @@
+1998-03-22  Tanaka Akira  <akr@jaist.ac.jp>
+
+	* eword-decode.el (eword-after-encoded-word-regexp): New constant.
+	eword-encoded-text-in-phrase-regexp: New constant.
+	eword-encoded-word-in-phrase-regexp: New constant.
+	eword-after-encoded-word-in-phrase-regexp: New constant.
+	eword-encoded-text-in-comment-regexp: New constant.
+	eword-encoded-word-in-comment-regexp: New constant.
+	eword-after-encoded-word-in-comment-regexp: New constant.
+	eword-encoded-text-in-quoted-string-regexp: New constant.
+	eword-encoded-word-in-quoted-string-regexp: New constant.
+	eword-after-encoded-word-in-quoted-string-regexp: New constant.
+	eword-decode-sticked-encoded-word: Update DOC-STRING.
+	eword-decode-quoted-encoded-word: Update DOC-STRING.
+	eword-decode-first-encoded-words: Add argument eword-regexp.
+	eword-decode-comment-string: Use `eword-encoded-word-in-comment-regexp'
+	and `eword-after-encoded-word-in-comment-regexp'.
+	eword-decode-quoted-string : Use
+	`eword-encoded-word-in-quoted-string-regexp' and
+	`eword-after-encoded-word-in-quoted-string-regexp'.
+	eword-decode-unstructured-string: Use `eword-encoded-word-regexp' and
+	`eword-after-encoded-word-regexp'.
+	eword-analyze-encoded-word: Use `eword-encoded-word-in-phrase-regexp'
+	and `eword-after-encoded-word-in-phrase-regexp'
+	eword-lexical-analyze: Add `default-mime-charset' and `must-unfold' to
+	key of cache.
+ 
 1998-03-21  Shuhei KOBAYASHI  <shuhei-k@jaist.ac.jp>
 
 	* eword-decode.el (eword-lexical-analyze-internal): Fixed return
diff --git a/eword-decode.el b/eword-decode.el
index 83e2842..3b52aa4 100644
--- a/eword-decode.el
+++ b/eword-decode.el
@@ -59,6 +59,52 @@
 	  eword-encoded-text-regexp
 	  "\\)"
 	  (regexp-quote "?=")))
+(defconst eword-after-encoded-word-regexp "\\([ \t]\\|$\\)")
+
+(defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+")
+(defconst eword-encoded-word-in-phrase-regexp
+  (concat (regexp-quote "=?")
+	  "\\("
+	  mime-charset-regexp
+	  "\\)"
+	  (regexp-quote "?")
+	  "\\(B\\|Q\\)"
+	  (regexp-quote "?")
+	  "\\("
+	  eword-encoded-text-in-phrase-regexp
+	  "\\)"
+	  (regexp-quote "?=")))
+(defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t(]\\|$\\)")
+
+(defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+")
+(defconst eword-encoded-word-in-comment-regexp
+  (concat (regexp-quote "=?")
+	  "\\("
+	  mime-charset-regexp
+	  "\\)"
+	  (regexp-quote "?")
+	  "\\(B\\|Q\\)"
+	  (regexp-quote "?")
+	  "\\("
+	  eword-encoded-text-in-comment-regexp
+	  "\\)"
+	  (regexp-quote "?=")))
+(defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)")
+
+(defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+")
+(defconst eword-encoded-word-in-quoted-string-regexp
+  (concat (regexp-quote "=?")
+	  "\\("
+	  mime-charset-regexp
+	  "\\)"
+	  (regexp-quote "?")
+	  "\\(B\\|Q\\)"
+	  (regexp-quote "?")
+	  "\\("
+	  eword-encoded-text-in-quoted-string-regexp
+	  "\\)"
+	  (regexp-quote "?=")))
+(defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)")
 
 
 ;;; @@ Base64
@@ -102,19 +148,55 @@
 ;;;
 
 (defvar eword-decode-sticked-encoded-word nil
-  "*If non-nil, decode encoded-words sticked on atoms, other encoded-words, etc.
+  "*If non-nil, decode encoded-words sticked on atoms,
+other encoded-words, etc.
 however this behaviour violates RFC2047.")
 
 (defvar eword-decode-quoted-encoded-word nil
-  "*If non-nil, decode encoded-words in quoted-string 
+  "*If non-nil, decode encoded-words in quoted-string
 however this behaviour violates RFC2047.")
 
-(defun eword-decode-first-encoded-words (string after-regexp &optional must-unfold)
+(defun eword-decode-first-encoded-words (string
+					 eword-regexp
+					 after-regexp
+					 &optional must-unfold)
+  "Decode MIME encoded-words in beginning of STRING.
+
+EWORD-REGEXP is the regexp that matches a encoded-word.
+Usual value is eword-encoded-word-regexp, 
+eword-encoded-text-in-phrase-regexp,
+eword-encoded-word-in-comment-regexp or
+eword-encoded-word-in-quoted-string-regexp.
+
+AFTER-REGEXP is the regexp that matches a after encoded-word.
+Usual value is eword-after-encoded-word-regexp, 
+eword-after-encoded-text-in-phrase-regexp,
+eword-after-encoded-word-in-comment-regexp or
+eword-after-encoded-word-in-quoted-string-regexp.
+
+If beginning of STRING matches EWORD-REGEXP and AFTER-REGEXP,
+returns a cons cell of decoded string(sequence of characters) and 
+the rest(sequence of octets).
+
+If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP,
+returns nil.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is returned in decoded part
+as encoded-word form.
+
+If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
   (if eword-decode-sticked-encoded-word (setq after-regexp ""))
-  (let ((between-ewords-regexp (if eword-decode-sticked-encoded-word "\\(\n?[ \t]\\)*" "\\(\n?[ \t]\\)+"))
+  (let ((between-ewords-regexp
+  	  (if eword-decode-sticked-encoded-word
+	    "\\(\n?[ \t]\\)*"
+	    "\\(\n?[ \t]\\)+"))
   	(src string)	; sequence of octets.
   	(dst ""))	; sequence of characters.
-    (if (string-match (concat "\\`\\(" eword-encoded-word-regexp "\\)" after-regexp) src)
+    (if (string-match
+    	  (concat "\\`\\(" eword-regexp "\\)" after-regexp) src)
       (let* (p
       	     (q (match-end 1))
       	     (ew (substring src 0 q))
@@ -126,7 +208,9 @@ however this behaviour violates RFC2047.")
 	    (while
 	      (and
 	        (string-match
-		  (concat "\\`\\(" between-ewords-regexp "\\)\\(" eword-encoded-word-regexp "\\)" after-regexp)
+		  (concat "\\`\\(" between-ewords-regexp "\\)"
+			     "\\(" eword-regexp "\\)"
+			     after-regexp)
 		  src)
 		(progn
 		  (setq p (match-end 1)
@@ -154,14 +238,23 @@ however this behaviour violates RFC2047.")
       	    (decoded (and
 	    		flag-ew
 			(eword-decode-first-encoded-words src
-			  "\\([ \t()\\\\]\\|$\\)" must-unfold))))
+			  eword-encoded-word-in-comment-regexp
+			  eword-after-encoded-word-in-comment-regexp
+			  must-unfold))))
 	(if (and (not (string= buf ""))
 		 (or decoded (eq ch ?\() (eq ch ?\))))
-	  (setq dst (concat dst (std11-wrap-as-quoted-pairs (decode-mime-charset-string buf default-mime-charset) '(?\( ?\))))
+	  (setq dst (concat dst
+	  	      (std11-wrap-as-quoted-pairs
+		        (decode-mime-charset-string buf
+			  default-mime-charset)
+			'(?\( ?\))))
 		buf ""))
 	(cond
 	  (decoded
-	    (setq dst (concat dst (std11-wrap-as-quoted-pairs (car decoded) '(?( ?))))
+	    (setq dst (concat dst
+	    		(std11-wrap-as-quoted-pairs
+			  (car decoded)
+			  '(?( ?))))
 		  src (cdr decoded)))
 	  ((or (eq ch ?\() (eq ch ?\)))
 	    (setq dst (concat dst (list ch))
@@ -181,7 +274,11 @@ however this behaviour violates RFC2047.")
 		  flag-ew eword-decode-sticked-encoded-word))
 	  (t (error "something wrong")))))
     (if (not (string= buf ""))
-      (setq dst (concat dst (std11-wrap-as-quoted-pairs (decode-mime-charset-string buf default-mime-charset) '(?\( ?\))))))
+      (setq dst (concat dst
+      		  (std11-wrap-as-quoted-pairs
+		    (decode-mime-charset-string buf
+		      default-mime-charset)
+		    '(?\( ?\))))))
     dst))
 
 (defun eword-decode-quoted-string (string &optional must-unfold)
@@ -195,18 +292,23 @@ however this behaviour violates RFC2047.")
 	    		eword-decode-quoted-encoded-word
 	    		flag-ew
 			(eword-decode-first-encoded-words src
-			  "\\([ \t\"\\\\]\\|$\\)" must-unfold))))
+			  eword-encoded-word-in-quoted-string-regexp
+			  eword-after-encoded-word-in-quoted-string-regexp
+			  must-unfold))))
 	(if (and (not (string= buf ""))
 		 (or decoded (eq ch ?\")))
 	  (setq dst (concat dst
 		      (std11-wrap-as-quoted-pairs
-		        (decode-mime-charset-string buf default-mime-charset)
+		        (decode-mime-charset-string buf
+			default-mime-charset)
 			'(?\")))
 		buf ""))
 	(cond
 	  (decoded
 	    (setq dst (concat dst
-	    		(std11-wrap-as-quoted-pairs (car decoded) '(?\")))
+	    		(std11-wrap-as-quoted-pairs
+			  (car decoded)
+			  '(?\")))
 		  src (cdr decoded)))
 	  ((or (eq ch ?\"))
 	    (setq dst (concat dst (list ch))
@@ -228,7 +330,8 @@ however this behaviour violates RFC2047.")
     (if (not (string= buf ""))
       (setq dst (concat dst
       		  (std11-wrap-as-quoted-pairs
-		    (decode-mime-charset-string buf default-mime-charset)
+		    (decode-mime-charset-string buf
+		      default-mime-charset)
 		    '(?\")))))
     dst))
 
@@ -239,10 +342,15 @@ however this behaviour violates RFC2047.")
 	(flag-ew t))
     (while (< 0 (length src))
       (let ((ch (aref src 0))
-      	    (decoded (and flag-ew (eword-decode-first-encoded-words src "\\([ \t]\\|$\\)" must-unfold))))
+      	    (decoded (and flag-ew (eword-decode-first-encoded-words src
+				    eword-encoded-word-regexp
+	    			    eword-after-encoded-word-regexp
+				    must-unfold))))
 	(if (and (not (string= buf ""))
 		 decoded)
-	  (setq dst (concat dst (decode-mime-charset-string buf default-mime-charset))
+	  (setq dst (concat dst
+		      (decode-mime-charset-string buf
+		        default-mime-charset))
 		buf ""))
 	(cond
 	  (decoded
@@ -258,7 +366,9 @@ however this behaviour violates RFC2047.")
 		  flag-ew eword-decode-sticked-encoded-word))
 	  (t (error "something wrong")))))
     (if (not (string= buf ""))
-      (setq dst (concat dst (decode-mime-charset-string buf default-mime-charset))))
+      (setq dst (concat dst
+      		  (decode-mime-charset-string buf
+		    default-mime-charset))))
     dst))
 
 (defun eword-decode-string (string &optional must-unfold)
@@ -272,7 +382,9 @@ decode the charset included in it, it is not decoded.
 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 if there are in decoded encoded-words (generated by bad manner MUA
 such as a version of Net$cape)."
-  (eword-decode-unstructured-string (std11-unfold-string string) must-unfold))
+  (eword-decode-unstructured-string
+    (std11-unfold-string string)
+    must-unfold))
 
 
 ;;; @ for region
@@ -293,7 +405,9 @@ such as a version of Net$cape)."
       (if unfolding
 	  (eword-decode-unfold)
 	)
-      (let ((str (eword-decode-unstructured-string (buffer-substring (point-min) (point-max)) must-unfold)))
+      (let ((str (eword-decode-unstructured-string
+      		   (buffer-substring (point-min) (point-max))
+		   must-unfold)))
 	(delete-region (point-min) (point-max))
 	(insert str)))))
 
@@ -529,7 +643,10 @@ be the result."
   (std11-analyze-special string))
 
 (defun eword-analyze-encoded-word (string &optional must-unfold)
-  (let ((decoded (eword-decode-first-encoded-words string "\\([ \t(]\\|$\\)" must-unfold)))
+  (let ((decoded (eword-decode-first-encoded-words string
+		   eword-encoded-word-in-phrase-regexp
+                   eword-after-encoded-word-in-phrase-regexp
+		   must-unfold)))
     (if decoded
       (cons (cons 'atom (car decoded)) (cdr decoded)))))
 
@@ -566,12 +683,13 @@ It is like std11-lexical-analyze, but it decodes non us-ascii
 characters encoded as encoded-words or invalid \"raw\" format.
 \"Raw\" non us-ascii characters are regarded as variable
 `default-mime-charset'."
-  (let ((key (copy-sequence string))
-	ret)
-    (set-text-properties 0 (length key) nil key)
+  (let* ((str (copy-sequence string))
+  	 (key (cons str (cons default-mime-charset must-unfold)))
+	 ret)
+    (set-text-properties 0 (length str) nil str)
     (if (setq ret (assoc key eword-lexical-analyze-cache))
 	(cdr ret)
-      (setq ret (eword-lexical-analyze-internal key must-unfold))
+      (setq ret (eword-lexical-analyze-internal str must-unfold))
       (setq eword-lexical-analyze-cache
 	    (cons (cons key ret)
 		  (last eword-lexical-analyze-cache