From: akr <akr>
Date: Sat, 21 Mar 1998 00:08:30 +0000 (+0000)
Subject: encoded-word decoding routine rewrittened.
X-Git-Tag: akr-199811302358~8
X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=0838ce711bbace1ac71fba80cab33538da38a97c;p=elisp%2Fsemi.git

encoded-word decoding routine rewrittened.
---

diff --git a/ChangeLog b/ChangeLog
index 2d2d420..f8cb815 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,30 @@
 1998-03-20  Tanaka Akira  <akr@jaist.ac.jp>
 
+	* eword-decode.el (eword-decode-before-ewords-regexp): Delete.
+	(eword-decode-between-ewords-regexp): Delete.
+	(eword-decode-after-ewords-regexp): Delete.
+	(eword-decode-sticked-encoded-word): New variable.
+	(eword-decode-first-encoded-words): New function.
+	(eword-decode-comment-string): New function.
+	(eword-decode-unstructured-string): New function. It treats
+	`default-mime-charset' for parts other than encoded-words.
+	(eword-decode-string): Now, it is stub to
+	`eword-decode-unstructured-string'.
+	(eword-decode-region): Now, it is stub to
+	`eword-decode-unstructured-string'.
+	(eword-decode-header): Adapt to new `eword-decode-string'.
+	(eword-analyze-quoted-string): Call
+	`std11-wrap-as-quoted-string' first.
+	(eword-analyze-comment): Call `eword-decode-comment-string'.
+	(eword-analyze-encoded-word): Now, it is stub to
+	`eword-decode-first-encoded-words'.
+	(eword-decode-token): Adapt to new `eword-analyze-quoted-string'
+	and `eword-analyze-comment'.
+	(eword-decode-unstructured-field-body): Adapt to new
+	`eword-decode-string'
+
+1998-03-20  Tanaka Akira  <akr@jaist.ac.jp>
+
 	* eword-decode.el (eword-decode-string): Treat undecodable
 	encoded-words.
 
diff --git a/eword-decode.el b/eword-decode.el
index a7dd200..98f8427 100644
--- a/eword-decode.el
+++ b/eword-decode.el
@@ -98,33 +98,113 @@
 ;;   (concat "\\(Q\\)\\?" eword-Q-encoded-text-regexp))
 
 
-;; @ encoded-word's neighbor
-;;
-
-(defvar eword-decode-before-ewords-regexp "^\\|[ \t]"
-  "Regexp that matches before encoded words.
-This value must not contain grouping construct.
-Default value is \"\\\\`\\\\|^\\\\|[ \\t]\".
-Another useful (but not RFC2047 compliant) value is \"\".")
-
-(defvar eword-decode-between-ewords-regexp "\\(\n?[ \t]\\)+"
-  "Regexp that matches between encoded words.
-This value must contain exactly one grouping construct.
-Default value is \"\\\\(\\n?[ \\t]\\\\)+\".
-Another useful (but not RFC2047 compliant) value is \"\\\\(\\n?[ \\t]\\\\)*\".")
-
-(defvar eword-decode-after-ewords-regexp "[ \t]\\|$"
-  "Regexp that matches after encoded words.
-Default value is \"[ \\t]\\\\|$\".
-Another useful (but not RFC2047 compliant) value is \"\".")
-
-; (setq eword-decode-before-ewords-regexp "")
-; (setq eword-decode-between-ewords-regexp "\\(\n?[ \t]\\)*")
-; (setq eword-decode-after-ewords-regexp "")
-
 ;;; @ for string
 ;;;
 
+(defvar eword-decode-sticked-encoded-word nil
+  "*If non-nil, decode encoded-words sticked on encoded-words, atoms, etc.")
+
+(defun eword-decode-first-encoded-words (string after-regexp &optional must-unfold)
+  (if eword-decode-sticked-encoded-word (setq after-regexp ""))
+  (let ((between-ewords-regexp (if eword-decode-sticked-encoded-word "\\(\n?[ \t]\\)*" "\\(\n?[ \t]\\)+"))
+  	(src string)	; sequence of octets.
+  	(dst ""))	; sequence of characters.
+    (if (string-match (concat "\\`\\(" eword-encoded-word-regexp "\\)" after-regexp) src)
+      (let* (p
+      	     (q (match-end 1))
+      	     (ew (substring src 0 q))
+      	     (dw (eword-decode-encoded-word ew must-unfold)))
+        (setq dst (concat dst dw)
+	      src (substring src q))
+	(if (not (string= ew dw))
+	  (progn
+	    (while
+	      (and
+	        (string-match
+		  (concat "\\`\\(" between-ewords-regexp "\\)\\(" eword-encoded-word-regexp "\\)" after-regexp)
+		  src)
+		(progn
+		  (setq p (match-end 1)
+		  	q (match-end 3)
+		        ew (substring src p q)
+		        dw (eword-decode-encoded-word ew must-unfold))
+		  (if (string= ew dw)
+		    (progn
+		      (setq dst (concat dst (substring src 0 q))
+			    src (substring src q))
+		      nil)
+		    t)))
+	      (setq dst (concat dst dw)
+	            src (substring src q)))))
+	(cons dst src))
+      nil)))
+
+(defun eword-decode-comment-string (string &optional must-unfold)
+  (let ((src string)
+	(buf "")
+  	(dst "")
+	(flag-ew t))
+    (while (< 0 (length src))
+      (let ((ch (aref src 0))
+      	    (decoded (and flag-ew (eword-decode-first-encoded-words src "\\([ \t()\\\\]\\|$\\)" must-unfold))))
+	(if (and (not (string= buf ""))
+		 (or decoded (eq ch ?\() (eq ch ?\))))
+	  (setq dst (concat dst (std11-wrap-as-quoted-pairs (decode-mime-charset-string buf default-mime-charset) '(?( ?))))
+		buf ""))
+	(cond
+	  (decoded
+	    (setq dst (concat dst (std11-wrap-as-quoted-pairs (car decoded) '(?( ?))))
+		  src (cdr decoded)))
+	  ((or (eq ch ?\() (eq ch ?\)))
+	    (setq dst (concat dst (list ch))
+		  src (substring src 1)
+		  flag-ew t))
+	  ((eq ch ?\\)
+	    (setq buf (concat buf (list (aref src 1)))
+		  src (substring src 2)
+		  flag-ew t))
+	  ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n))
+	    (setq buf (concat buf (list ch))
+		  src (substring src 1)
+		  flag-ew t))
+	  ((string-match "\\`=?[^ \t\n()\\\\=]*" src)
+	    (setq buf (concat buf (substring src 0 (match-end 0)))
+		  src (substring src (match-end 0))
+		  flag-ew eword-decode-sticked-encoded-word))
+	  (t (error "something wrong")))))
+    (if (not (string= buf ""))
+      (setq dst (concat dst (std11-wrap-as-quoted-pairs (decode-mime-charset-string buf default-mime-charset) '(?( ?))))))
+    dst))
+
+(defun eword-decode-unstructured-string (string &optional must-unfold)
+  (let ((src string)
+	(buf "")
+  	(dst "")
+	(flag-ew t))
+    (while (< 0 (length src))
+      (let ((ch (aref src 0))
+      	    (decoded (and flag-ew (eword-decode-first-encoded-words src "\\([ \t]\\|$\\)" must-unfold))))
+	(if (and (not (string= buf ""))
+		 decoded)
+	  (setq dst (concat dst (decode-mime-charset-string buf default-mime-charset))
+		buf ""))
+	(cond
+	  (decoded
+	    (setq dst (concat dst (car decoded))
+		  src (cdr decoded)))
+	  ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n))
+	    (setq buf (concat buf (list ch))
+		  src (substring src 1)
+		  flag-ew t))
+	  ((string-match "\\`=?[^ \t\n=]*" src)
+	    (setq buf (concat buf (substring src 0 (match-end 0)))
+		  src (substring src (match-end 0))
+		  flag-ew eword-decode-sticked-encoded-word))
+	  (t (error "something wrong")))))
+    (if (not (string= buf ""))
+      (setq dst (concat dst (decode-mime-charset-string buf default-mime-charset))))
+    dst))
+
 (defun eword-decode-string (string &optional must-unfold)
   "Decode MIME encoded-words in STRING.
 
@@ -136,38 +216,7 @@ decode the charset included in it, it is not decoded.
 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 if there are in decoded encoded-words (generated by bad manner MUA
 such as a version of Net$cape)."
-  (let ((src (std11-unfold-string string))
-  	(dst "")
-	b e ew dw)
-    (while
-	(string-match
-	 (concat "\\(" eword-decode-before-ewords-regexp "\\)"
-		 "\\(" eword-encoded-word-regexp "\\)"
-		 "\\(" eword-decode-after-ewords-regexp "\\)")
-	 src)
-      (setq b (match-beginning 2))
-      (setq e (match-end 2))
-      (setq ew (substring src b e))
-      (setq dw (eword-decode-encoded-word ew must-unfold))
-      (setq dst (concat dst (substring src 0 b) dw))
-      (setq src (substring src e))
-      (if (not (string= ew dw))
-	  (while
-	      (and
-	       (string-match
-		(concat "\\`"
-			"\\(" eword-decode-between-ewords-regexp "\\)"
-			"\\(" eword-encoded-word-regexp "\\)"
-			"\\(" eword-decode-after-ewords-regexp "\\)")
-		src)
-	       (progn
-		 (setq e (match-end 3))
-		 (setq ew (substring src (match-beginning 3) e))
-		 (setq dw (eword-decode-encoded-word ew must-unfold))
-		 (not (string= ew dw))))
-	    (setq dst (concat dst dw))
-	    (setq src (substring src e)))))
-    (concat dst src)))
+  (eword-decode-unstructured-string (std11-unfold-string string) must-unfold))
 
 
 ;;; @ for region
@@ -188,40 +237,9 @@ such as a version of Net$cape)."
       (if unfolding
 	  (eword-decode-unfold)
 	)
-      (goto-char (point-min))
-      (let (b e ew dw)
-	(while 
-	    (progn
-	      (narrow-to-region (point) (point-max))
-	      (re-search-forward
-		(concat "\\(" eword-decode-before-ewords-regexp "\\)"
-			"\\(" eword-encoded-word-regexp "\\)"
-			"\\(" eword-decode-after-ewords-regexp "\\)") nil t))
-	  (setq b (match-beginning 2))
-	  (setq e (match-end 2))
-	  (setq ew (buffer-substring b e))
-	  (setq dw (eword-decode-encoded-word ew must-unfold))
-	  (if (not (string= ew dw))
-	      (progn
-		(goto-char e)
-		(delete-region b e)
-		(insert dw)
-		(while
-		    (and
-		     (looking-at
-		       (concat "\\(" eword-decode-between-ewords-regexp "\\)"
-			       "\\(" eword-encoded-word-regexp "\\)"
-			       "\\(" eword-decode-after-ewords-regexp "\\)"))
-		     (progn
-		       (setq b (match-beginning 0))
-		       (setq e (match-end 3))
-		       (setq ew (buffer-substring (match-beginning 3) e))
-		       (setq dw (eword-decode-encoded-word ew must-unfold))
-		       (not (string= ew dw))))
-		  (goto-char e)
-		  (delete-region b e)
-		  (insert dw))))))
-      )))
+      (let ((str (eword-decode-unstructured-string (buffer-substring (point-min) (point-max)) must-unfold)))
+	(delete-region (point-min) (point-max))
+	(insert str)))))
 
 
 ;;; @ for message header
@@ -285,11 +303,9 @@ If SEPARATOR is not nil, it is used as header separator."
 		       ;; Decode as unstructured field
 		       (save-restriction
 			 (narrow-to-region beg (1+ end))
-			 (decode-mime-charset-region p end default-charset)
 			 (goto-char p)
-			 (if (re-search-forward eword-encoded-word-regexp
-						nil t)
-			     (eword-decode-region beg (point-max) 'unfold))
+			 (eword-decode-region beg (point-max) 'unfold)
+			 (goto-char (point-max))
 			 )))))
 	  (eword-decode-region (point-min) (point-max) t)
 	  )))))
@@ -435,9 +451,10 @@ be the result."
   (let ((p (std11-check-enclosure string ?\" ?\")))
     (if p
 	(cons (cons 'quoted-string
-		    (decode-mime-charset-string
-		     (std11-strip-quoted-pair (substring string 1 (1- p)))
-		     default-mime-charset))
+		    (std11-wrap-as-quoted-string
+		     (decode-mime-charset-string
+		      (std11-strip-quoted-pair (substring string 1 (1- p)))
+		      default-mime-charset)))
 	      (substring string p))
       )))
 
@@ -447,12 +464,7 @@ be the result."
 (defun eword-analyze-comment (string &optional must-unfold)
   (let ((p (std11-check-enclosure string ?\( ?\) t)))
     (if p
-	(cons (cons 'comment
-		    (eword-decode-string
-		     (decode-mime-charset-string
-		      (std11-strip-quoted-pair (substring string 1 (1- p)))
-		      default-mime-charset)
-		     must-unfold))
+	(cons (cons 'comment (eword-decode-comment-string (substring string 0 p)))
 	      (substring string p))
       )))
 
@@ -463,26 +475,9 @@ be the result."
   (std11-analyze-special string))
 
 (defun eword-analyze-encoded-word (string &optional must-unfold)
-  (if (eq (string-match eword-encoded-word-regexp string) 0)
-      (let ((end (match-end 0))
-	    (dest (eword-decode-encoded-word (match-string 0 string)
-					     must-unfold))
-	    )
-	(setq string (substring string end))
-	(while (eq (string-match `,(concat "[ \t\n]*\\("
-					   eword-encoded-word-regexp
-					   "\\)")
-				 string)
-		   0)
-	  (setq end (match-end 0))
-	  (setq dest
-		(concat dest
-			(eword-decode-encoded-word (match-string 1 string)
-						   must-unfold))
-		string (substring string end))
-	  )
-	(cons (cons 'atom dest) string)
-	)))
+  (let ((decoded (eword-decode-first-encoded-words string "\\([ \t(]\\|$\\)" must-unfold)))
+    (if decoded
+      (cons (cons 'atom (car decoded)) (cdr decoded)))))
 
 (defun eword-analyze-atom (string &optional must-unfold)
   (if (string-match std11-atom-regexp string)
@@ -530,13 +525,7 @@ characters encoded as encoded-words or invalid \"raw\" format.
       ret)))
 
 (defun eword-decode-token (token)
-  (let ((type (car token))
-	(value (cdr token)))
-    (cond ((eq type 'quoted-string)
-	   (std11-wrap-as-quoted-string value))
-	  ((eq type 'comment)
-	   (concat "(" (std11-wrap-as-quoted-pairs value '(?( ?))) ")"))
-	  (t value))))
+  (cdr token))
 
 (defun eword-decode-and-fold-structured-field
   (string start-column &optional max-column must-unfold)
@@ -642,9 +631,7 @@ decode the charset included in it, it is not decoded.
 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 if there are in decoded encoded-words (generated by bad manner MUA
 such as a version of Net$cape)."
-  (eword-decode-string
-   (decode-mime-charset-string string default-mime-charset)
-   must-unfold))
+  (eword-decode-string string must-unfold))
 
 (defun eword-extract-address-components (string)
   "Extract full name and canonical address from STRING.