From: morioka <morioka>
Date: Mon, 16 Jun 1997 16:00:13 +0000 (+0000)
Subject: (eword-lexical-analyze-cache): New variable.
X-Git-Tag: semi-0_88~6
X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=6fd721133142103600ae753dcaf343bc3d32dacc;p=elisp%2Fsemi.git

(eword-lexical-analyze-cache): New variable.
(eword-lexical-analyze-cache-max): New variable.
(eword-analyze-quoted-string): New function.
(eword-analyze-comment): New function.
(eword-analyze-encoded-word): New function.
(eword-lexical-analyze-internal): New function.
(eword-lexical-analyze): New function.
(eword-decode-structured-field-body): New function.
(eword-decode-unstructured-field-body): New function.
(eword-extract-address-components): New function.
---

diff --git a/eword-decode.el b/eword-decode.el
index 3fca9ed..0289314 100644
--- a/eword-decode.el
+++ b/eword-decode.el
@@ -10,7 +10,7 @@
 ;;	Renamed: 1993/06/03 to tiny-mime.el
 ;;	Renamed: 1995/10/03 from tiny-mime.el (split off encoder)
 ;;	Renamed: 1997/02/22 from tm-ew-d.el
-;; Version: $Revision: 0.14 $
+;; Version: $Revision: 0.15 $
 ;; Keywords: encoded-word, MIME, multilingual, header, mail, news
 
 ;; This file is part of SEMI (SEMI is Emacs MIME Interfaces).
@@ -32,7 +32,7 @@
 
 ;;; Code:
 
-(require 'std11)
+(require 'std11-parse)
 (require 'mel)
 (require 'mime-def)
 
@@ -41,7 +41,7 @@
 ;;;
 
 (defconst eword-decode-RCS-ID
-  "$Id: eword-decode.el,v 0.14 1997-02-27 08:56:45 tmorioka Exp $")
+  "$Id: eword-decode.el,v 0.15 1997-06-16 16:00:13 morioka Exp $")
 (defconst eword-decode-version (get-version-string eword-decode-RCS-ID))
 
 
@@ -296,6 +296,164 @@ as a version of Net$cape)."
 		))))))
 
 
+;;; @ lexical analyze
+;;;
+
+(defvar eword-lexical-analyze-cache nil)
+(defvar eword-lexical-analyze-cache-max 299
+  "*Max position of eword-lexical-analyze-cache.
+It is max size of eword-lexical-analyze-cache - 1.")
+
+(defun eword-analyze-quoted-string (string)
+  (let ((p (std11-check-enclosure string ?\" ?\")))
+    (if p
+	(cons (cons 'quoted-string
+		    (decode-mime-charset-string
+		     (std11-strip-quoted-pair (substring string 1 (1- p)))
+		     default-mime-charset))
+	      (substring string p))
+      )))
+
+(defun eword-analyze-comment (string &optional must-unfold)
+  (let ((p (std11-check-enclosure string ?\( ?\) t)))
+    (if p
+	(cons (cons 'comment
+		    (eword-decode-string
+		     (decode-mime-charset-string
+		      (std11-strip-quoted-pair (substring string 1 (1- p)))
+		      default-mime-charset)
+		     must-unfold))
+	      (substring string p))
+      )))
+
+(defun eword-analyze-encoded-word (string &optional must-unfold)
+  (if (eq (string-match eword-encoded-word-regexp string) 0)
+      (let ((end (match-end 0))
+	    (dest (eword-decode-encoded-word (match-string 0 string)
+					     must-unfold))
+	    )
+	(setq string (substring string end))
+	(while (eq (string-match `,(concat "[ \t\n]*\\("
+					   eword-encoded-word-regexp
+					   "\\)")
+				 string)
+		   0)
+	  (setq end (match-end 0))
+	  (setq dest
+		(concat dest
+			(eword-decode-encoded-word (match-string 1 string)
+						   must-unfold))
+		string (substring string end))
+	  )
+	(cons (cons 'atom dest)
+	      (if (string= string "")
+		  nil
+		string))
+	)))
+
+(defun eword-lexical-analyze-internal (string must-unfold)
+  (let (dest ret)
+    (while (not (string-equal string ""))
+      (setq ret
+	    (or (eword-analyze-quoted-string string)
+		(std11-analyze-domain-literal string)
+		(eword-analyze-comment string must-unfold)
+		(std11-analyze-spaces string)
+		(std11-analyze-special string)
+		(eword-analyze-encoded-word string must-unfold)
+		(std11-analyze-atom string)
+		'((error) . "")
+		))
+      (setq dest (cons (car ret) dest))
+      (setq string (cdr ret))
+      )
+    (nreverse dest)
+    ))
+
+(defun eword-lexical-analyze (string &optional must-unfold)
+  "Return lexical analyzed list corresponding STRING.
+It is like std11-lexical-analyze, but it decodes non us-ascii
+characters encoded as encoded-words or invalid \"raw\" format.
+\"Raw\" non us-ascii characters are regarded as variable
+`default-mime-charset'."
+  (let ((key (copy-sequence string))
+	ret)
+    (set-text-properties 0 (length key) nil key)
+    (if (setq ret (assoc key eword-lexical-analyze-cache))
+	(cdr ret)
+      (setq ret (eword-lexical-analyze-internal key must-unfold))
+      (setq eword-lexical-analyze-cache
+	    (cons (cons key ret)
+		  (last eword-lexical-analyze-cache
+			eword-lexical-analyze-cache-max)))
+      ret)))
+
+(defun eword-decode-structured-field-body (string &optional must-unfold)
+  "Decode non us-ascii characters in STRING as structured field body.
+STRING is unfolded before decoding.
+
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is not decoded.
+
+If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
+  (mapconcat (function
+	      (lambda (token)
+		(let ((type (car token))
+		      (value (cdr token)))
+		  (cond ((eq type 'quoted-string)
+			 (std11-wrap-as-quoted-string value)
+			 )
+			((eq type 'comment)
+			 (concat "\""
+				 (std11-wrap-as-quoted-pairs value)
+				 "\"")
+			 )
+			(t
+			 value)))))
+	     (eword-lexical-analyze string must-unfold)
+	     ""))
+
+(defun eword-decode-unstructured-field-body (string &optional must-unfold)
+  "Decode non us-ascii characters in STRING as unstructured field body.
+STRING is unfolded before decoding.
+
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is not decoded.
+
+If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
+  (eword-decode-string
+   (decode-mime-charset-string string default-mime-charset)
+   must-unfold))
+
+(defun eword-extract-address-components (string)
+  "Extract full name and canonical address from STRING.
+Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
+If no name can be extracted, FULL-NAME will be nil.
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'."
+  (let* ((structure (car (std11-parse-address
+			  (eword-lexical-analyze
+			   (std11-unfold-string string) 'must-unfold))))
+         (phrase  (std11-full-name-string structure))
+         (address (std11-address-string structure))
+         )
+    (list phrase address)
+    ))
+
+
 ;;; @ end
 ;;;