From 6fd721133142103600ae753dcaf343bc3d32dacc Mon Sep 17 00:00:00 2001 From: morioka Date: Mon, 16 Jun 1997 16:00:13 +0000 Subject: [PATCH] (eword-lexical-analyze-cache): New variable. (eword-lexical-analyze-cache-max): New variable. (eword-analyze-quoted-string): New function. (eword-analyze-comment): New function. (eword-analyze-encoded-word): New function. (eword-lexical-analyze-internal): New function. (eword-lexical-analyze): New function. (eword-decode-structured-field-body): New function. (eword-decode-unstructured-field-body): New function. (eword-extract-address-components): New function. --- eword-decode.el | 164 ++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 161 insertions(+), 3 deletions(-) diff --git a/eword-decode.el b/eword-decode.el index 3fca9ed..0289314 100644 --- a/eword-decode.el +++ b/eword-decode.el @@ -10,7 +10,7 @@ ;; Renamed: 1993/06/03 to tiny-mime.el ;; Renamed: 1995/10/03 from tiny-mime.el (split off encoder) ;; Renamed: 1997/02/22 from tm-ew-d.el -;; Version: $Revision: 0.14 $ +;; Version: $Revision: 0.15 $ ;; Keywords: encoded-word, MIME, multilingual, header, mail, news ;; This file is part of SEMI (SEMI is Emacs MIME Interfaces). @@ -32,7 +32,7 @@ ;;; Code: -(require 'std11) +(require 'std11-parse) (require 'mel) (require 'mime-def) @@ -41,7 +41,7 @@ ;;; (defconst eword-decode-RCS-ID - "$Id: eword-decode.el,v 0.14 1997-02-27 08:56:45 tmorioka Exp $") + "$Id: eword-decode.el,v 0.15 1997-06-16 16:00:13 morioka Exp $") (defconst eword-decode-version (get-version-string eword-decode-RCS-ID)) @@ -296,6 +296,164 @@ as a version of Net$cape)." )))))) +;;; @ lexical analyze +;;; + +(defvar eword-lexical-analyze-cache nil) +(defvar eword-lexical-analyze-cache-max 299 + "*Max position of eword-lexical-analyze-cache. +It is max size of eword-lexical-analyze-cache - 1.") + +(defun eword-analyze-quoted-string (string) + (let ((p (std11-check-enclosure string ?\" ?\"))) + (if p + (cons (cons 'quoted-string + (decode-mime-charset-string + (std11-strip-quoted-pair (substring string 1 (1- p))) + default-mime-charset)) + (substring string p)) + ))) + +(defun eword-analyze-comment (string &optional must-unfold) + (let ((p (std11-check-enclosure string ?\( ?\) t))) + (if p + (cons (cons 'comment + (eword-decode-string + (decode-mime-charset-string + (std11-strip-quoted-pair (substring string 1 (1- p))) + default-mime-charset) + must-unfold)) + (substring string p)) + ))) + +(defun eword-analyze-encoded-word (string &optional must-unfold) + (if (eq (string-match eword-encoded-word-regexp string) 0) + (let ((end (match-end 0)) + (dest (eword-decode-encoded-word (match-string 0 string) + must-unfold)) + ) + (setq string (substring string end)) + (while (eq (string-match `,(concat "[ \t\n]*\\(" + eword-encoded-word-regexp + "\\)") + string) + 0) + (setq end (match-end 0)) + (setq dest + (concat dest + (eword-decode-encoded-word (match-string 1 string) + must-unfold)) + string (substring string end)) + ) + (cons (cons 'atom dest) + (if (string= string "") + nil + string)) + ))) + +(defun eword-lexical-analyze-internal (string must-unfold) + (let (dest ret) + (while (not (string-equal string "")) + (setq ret + (or (eword-analyze-quoted-string string) + (std11-analyze-domain-literal string) + (eword-analyze-comment string must-unfold) + (std11-analyze-spaces string) + (std11-analyze-special string) + (eword-analyze-encoded-word string must-unfold) + (std11-analyze-atom string) + '((error) . "") + )) + (setq dest (cons (car ret) dest)) + (setq string (cdr ret)) + ) + (nreverse dest) + )) + +(defun eword-lexical-analyze (string &optional must-unfold) + "Return lexical analyzed list corresponding STRING. +It is like std11-lexical-analyze, but it decodes non us-ascii +characters encoded as encoded-words or invalid \"raw\" format. +\"Raw\" non us-ascii characters are regarded as variable +`default-mime-charset'." + (let ((key (copy-sequence string)) + ret) + (set-text-properties 0 (length key) nil key) + (if (setq ret (assoc key eword-lexical-analyze-cache)) + (cdr ret) + (setq ret (eword-lexical-analyze-internal key must-unfold)) + (setq eword-lexical-analyze-cache + (cons (cons key ret) + (last eword-lexical-analyze-cache + eword-lexical-analyze-cache-max))) + ret))) + +(defun eword-decode-structured-field-body (string &optional must-unfold) + "Decode non us-ascii characters in STRING as structured field body. +STRING is unfolded before decoding. + +It decodes non us-ascii characters in FULL-NAME encoded as +encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii +characters are regarded as variable `default-mime-charset'. + +If an encoded-word is broken or your emacs implementation can not +decode the charset included in it, it is not decoded. + +If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even +if there are in decoded encoded-words (generated by bad manner MUA +such as a version of Net$cape)." + (mapconcat (function + (lambda (token) + (let ((type (car token)) + (value (cdr token))) + (cond ((eq type 'quoted-string) + (std11-wrap-as-quoted-string value) + ) + ((eq type 'comment) + (concat "\"" + (std11-wrap-as-quoted-pairs value) + "\"") + ) + (t + value))))) + (eword-lexical-analyze string must-unfold) + "")) + +(defun eword-decode-unstructured-field-body (string &optional must-unfold) + "Decode non us-ascii characters in STRING as unstructured field body. +STRING is unfolded before decoding. + +It decodes non us-ascii characters in FULL-NAME encoded as +encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii +characters are regarded as variable `default-mime-charset'. + +If an encoded-word is broken or your emacs implementation can not +decode the charset included in it, it is not decoded. + +If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even +if there are in decoded encoded-words (generated by bad manner MUA +such as a version of Net$cape)." + (eword-decode-string + (decode-mime-charset-string string default-mime-charset) + must-unfold)) + +(defun eword-extract-address-components (string) + "Extract full name and canonical address from STRING. +Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). +If no name can be extracted, FULL-NAME will be nil. +It decodes non us-ascii characters in FULL-NAME encoded as +encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii +characters are regarded as variable `default-mime-charset'." + (let* ((structure (car (std11-parse-address + (eword-lexical-analyze + (std11-unfold-string string) 'must-unfold)))) + (phrase (std11-full-name-string structure)) + (address (std11-address-string structure)) + ) + (list phrase address) + )) + + ;;; @ end ;;; -- 1.7.10.4