;; Renamed: 1993/06/03 to tiny-mime.el
;; Renamed: 1995/10/03 from tiny-mime.el (split off encoder)
;; Renamed: 1997/02/22 from tm-ew-d.el
-;; Version: $Revision: 0.14 $
+;; Version: $Revision: 0.15 $
;; Keywords: encoded-word, MIME, multilingual, header, mail, news
;; This file is part of SEMI (SEMI is Emacs MIME Interfaces).
;;; Code:
-(require 'std11)
+(require 'std11-parse)
(require 'mel)
(require 'mime-def)
;;;
(defconst eword-decode-RCS-ID
- "$Id: eword-decode.el,v 0.14 1997-02-27 08:56:45 tmorioka Exp $")
+ "$Id: eword-decode.el,v 0.15 1997-06-16 16:00:13 morioka Exp $")
(defconst eword-decode-version (get-version-string eword-decode-RCS-ID))
))))))
+;;; @ lexical analyze
+;;;
+
+(defvar eword-lexical-analyze-cache nil)
+(defvar eword-lexical-analyze-cache-max 299
+ "*Max position of eword-lexical-analyze-cache.
+It is max size of eword-lexical-analyze-cache - 1.")
+
+(defun eword-analyze-quoted-string (string)
+ (let ((p (std11-check-enclosure string ?\" ?\")))
+ (if p
+ (cons (cons 'quoted-string
+ (decode-mime-charset-string
+ (std11-strip-quoted-pair (substring string 1 (1- p)))
+ default-mime-charset))
+ (substring string p))
+ )))
+
+(defun eword-analyze-comment (string &optional must-unfold)
+ (let ((p (std11-check-enclosure string ?\( ?\) t)))
+ (if p
+ (cons (cons 'comment
+ (eword-decode-string
+ (decode-mime-charset-string
+ (std11-strip-quoted-pair (substring string 1 (1- p)))
+ default-mime-charset)
+ must-unfold))
+ (substring string p))
+ )))
+
+(defun eword-analyze-encoded-word (string &optional must-unfold)
+ (if (eq (string-match eword-encoded-word-regexp string) 0)
+ (let ((end (match-end 0))
+ (dest (eword-decode-encoded-word (match-string 0 string)
+ must-unfold))
+ )
+ (setq string (substring string end))
+ (while (eq (string-match `,(concat "[ \t\n]*\\("
+ eword-encoded-word-regexp
+ "\\)")
+ string)
+ 0)
+ (setq end (match-end 0))
+ (setq dest
+ (concat dest
+ (eword-decode-encoded-word (match-string 1 string)
+ must-unfold))
+ string (substring string end))
+ )
+ (cons (cons 'atom dest)
+ (if (string= string "")
+ nil
+ string))
+ )))
+
+(defun eword-lexical-analyze-internal (string must-unfold)
+ (let (dest ret)
+ (while (not (string-equal string ""))
+ (setq ret
+ (or (eword-analyze-quoted-string string)
+ (std11-analyze-domain-literal string)
+ (eword-analyze-comment string must-unfold)
+ (std11-analyze-spaces string)
+ (std11-analyze-special string)
+ (eword-analyze-encoded-word string must-unfold)
+ (std11-analyze-atom string)
+ '((error) . "")
+ ))
+ (setq dest (cons (car ret) dest))
+ (setq string (cdr ret))
+ )
+ (nreverse dest)
+ ))
+
+(defun eword-lexical-analyze (string &optional must-unfold)
+ "Return lexical analyzed list corresponding STRING.
+It is like std11-lexical-analyze, but it decodes non us-ascii
+characters encoded as encoded-words or invalid \"raw\" format.
+\"Raw\" non us-ascii characters are regarded as variable
+`default-mime-charset'."
+ (let ((key (copy-sequence string))
+ ret)
+ (set-text-properties 0 (length key) nil key)
+ (if (setq ret (assoc key eword-lexical-analyze-cache))
+ (cdr ret)
+ (setq ret (eword-lexical-analyze-internal key must-unfold))
+ (setq eword-lexical-analyze-cache
+ (cons (cons key ret)
+ (last eword-lexical-analyze-cache
+ eword-lexical-analyze-cache-max)))
+ ret)))
+
+(defun eword-decode-structured-field-body (string &optional must-unfold)
+ "Decode non us-ascii characters in STRING as structured field body.
+STRING is unfolded before decoding.
+
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is not decoded.
+
+If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
+ (mapconcat (function
+ (lambda (token)
+ (let ((type (car token))
+ (value (cdr token)))
+ (cond ((eq type 'quoted-string)
+ (std11-wrap-as-quoted-string value)
+ )
+ ((eq type 'comment)
+ (concat "\""
+ (std11-wrap-as-quoted-pairs value)
+ "\"")
+ )
+ (t
+ value)))))
+ (eword-lexical-analyze string must-unfold)
+ ""))
+
+(defun eword-decode-unstructured-field-body (string &optional must-unfold)
+ "Decode non us-ascii characters in STRING as unstructured field body.
+STRING is unfolded before decoding.
+
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'.
+
+If an encoded-word is broken or your emacs implementation can not
+decode the charset included in it, it is not decoded.
+
+If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
+if there are in decoded encoded-words (generated by bad manner MUA
+such as a version of Net$cape)."
+ (eword-decode-string
+ (decode-mime-charset-string string default-mime-charset)
+ must-unfold))
+
+(defun eword-extract-address-components (string)
+ "Extract full name and canonical address from STRING.
+Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
+If no name can be extracted, FULL-NAME will be nil.
+It decodes non us-ascii characters in FULL-NAME encoded as
+encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
+characters are regarded as variable `default-mime-charset'."
+ (let* ((structure (car (std11-parse-address
+ (eword-lexical-analyze
+ (std11-unfold-string string) 'must-unfold))))
+ (phrase (std11-full-name-string structure))
+ (address (std11-address-string structure))
+ )
+ (list phrase address)
+ ))
+
+
;;; @ end
;;;