1 ;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs
3 ;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc.
5 ;; Author: ENAMI Tsugutomo <enami@sys.ptg.sony.co.jp>
6 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
7 ;; Tanaka Akira <akr@jaist.ac.jp>
8 ;; Maintainer: Tanaka Akira <akr@jaist.ac.jp>
10 ;; Original: 1992/07/20 ENAMI Tsugutomo's `mime.el'.
11 ;; Renamed: 1993/06/03 to tiny-mime.el
12 ;; Renamed: 1995/10/03 from tiny-mime.el (split off encoder)
13 ;; Renamed: 1997/02/22 from tm-ew-d.el
14 ;; Keywords: encoded-word, MIME, multilingual, header, mail, news
16 ;; This file is part of FLAM (Faithful Library About MIME).
18 ;; This program is free software; you can redistribute it and/or
19 ;; modify it under the terms of the GNU General Public License as
20 ;; published by the Free Software Foundation; either version 2, or (at
21 ;; your option) any later version.
23 ;; This program is distributed in the hope that it will be useful, but
24 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
25 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 ;; General Public License for more details.
28 ;; You should have received a copy of the GNU General Public License
29 ;; along with GNU Emacs; see the file COPYING. If not, write to the
30 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
31 ;; Boston, MA 02111-1307, USA.
41 (defgroup eword-decode nil
42 "Encoded-word decoding"
47 (defvar rotate-memo nil)
48 (defmacro rotate-memo (var val)
50 (unless (boundp ',var) (setq ,var ()))
51 (setq ,var (cons ,val ,var))
52 (let ((tmp (last ,var (- (length ,var) 100))))
53 (when tmp (setcdr tmp nil)))
59 (defcustom eword-decode-sticked-encoded-word nil
60 "*If non-nil, decode encoded-words sticked on atoms,
61 other encoded-words, etc.
62 however this behaviour violates RFC2047."
66 (defcustom eword-decode-quoted-encoded-word nil
67 "*If non-nil, decode encoded-words in quoted-string
68 however this behaviour violates RFC2047."
72 (defcustom eword-max-size-to-decode 1000
73 "*Max size to decode header field."
75 :type '(choice (integer :tag "Limit (bytes)")
76 (const :tag "Don't limit" nil)))
79 ;;; @ MIME encoded-word definition
82 (defconst eword-encoded-word-prefix-regexp
83 (concat (regexp-quote "=?")
84 "\\(" mime-charset-regexp "\\)"
88 (defconst eword-encoded-word-suffix-regexp
91 (defconst eword-encoded-text-in-unstructured-regexp "[!->@-~]+")
92 (defconst eword-encoded-word-in-unstructured-regexp
93 (concat eword-encoded-word-prefix-regexp
94 "\\(" eword-encoded-text-in-unstructured-regexp "\\)"
95 eword-encoded-word-suffix-regexp))
96 (defconst eword-after-encoded-word-in-unstructured-regexp "\\([ \t]\\|$\\)")
98 (defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+")
99 (defconst eword-encoded-word-in-phrase-regexp
100 (concat eword-encoded-word-prefix-regexp
101 "\\(" eword-encoded-text-in-phrase-regexp "\\)"
102 eword-encoded-word-suffix-regexp))
103 (defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t]\\|$\\)")
105 (defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+")
106 (defconst eword-encoded-word-in-comment-regexp
107 (concat eword-encoded-word-prefix-regexp
108 "\\(" eword-encoded-text-in-comment-regexp "\\)"
109 eword-encoded-word-suffix-regexp))
110 (defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)")
112 (defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+")
113 (defconst eword-encoded-word-in-quoted-string-regexp
114 (concat eword-encoded-word-prefix-regexp
115 "\\(" eword-encoded-text-in-quoted-string-regexp "\\)"
116 eword-encoded-word-suffix-regexp))
117 (defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)")
120 (defconst eword-encoded-text-regexp eword-encoded-text-in-unstructured-regexp)
121 (defconst eword-encoded-word-regexp eword-encoded-word-in-unstructured-regexp)
124 ;;; @ internal utilities
127 (defun eword-decode-first-encoded-words (string
130 &optional must-unfold)
131 "Decode MIME encoded-words in beginning of STRING.
133 EWORD-REGEXP is the regexp that matches a encoded-word.
135 eword-encoded-word-in-unstructured-regexp,
136 eword-encoded-text-in-phrase-regexp,
137 eword-encoded-word-in-comment-regexp or
138 eword-encoded-word-in-quoted-string-regexp.
140 AFTER-REGEXP is the regexp that matches a after encoded-word.
142 eword-after-encoded-word-in-unstructured-regexp,
143 eword-after-encoded-text-in-phrase-regexp,
144 eword-after-encoded-word-in-comment-regexp or
145 eword-after-encoded-word-in-quoted-string-regexp.
147 If beginning of STRING matches EWORD-REGEXP with AFTER-REGEXP,
148 returns a cons cell of decoded string(sequence of characters) and
149 the rest(sequence of octets).
151 If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP,
154 If an encoded-word is broken or your emacs implementation can not
155 decode the charset included in it, it is returned in decoded part
156 as encoded-word form.
158 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
159 if there are in decoded encoded-words (generated by bad manner MUA
160 such as a version of Net$cape)."
161 (if eword-decode-sticked-encoded-word (setq after-regexp ""))
162 (let* ((between-ewords-regexp
163 (if eword-decode-sticked-encoded-word
166 (between-ewords-eword-after-regexp
167 (concat "\\`\\(" between-ewords-regexp "\\)"
168 "\\(" eword-regexp "\\)"
171 (concat "\\`\\(" eword-regexp "\\)" after-regexp))
172 (src string) ; sequence of octets.
173 (dst "")) ; sequence of characters.
174 (if (string-match eword-after-regexp src)
177 (ew (substring src 0 q))
178 (dw (eword-decode-encoded-word ew must-unfold)))
179 (setq dst (concat dst dw)
180 src (substring src q))
181 (if (not (string= ew dw))
185 (string-match between-ewords-eword-after-regexp src)
187 (setq p (match-end 1)
189 ew (substring src p q)
190 dw (eword-decode-encoded-word ew must-unfold))
193 (setq dst (concat dst (substring src 0 q))
194 src (substring src q))
197 (setq dst (concat dst dw)
198 src (substring src q)))))
202 (defun eword-decode-entire-string (string
207 delimiters ; list of chars.
211 (if (and code-conversion
212 (not (mime-charset-to-coding-system code-conversion)))
213 (setq code-conversion default-mime-charset))
214 (let ((equal-safe-regexp (concat "\\`=?" safe-regexp))
219 (while (< 0 (length src))
220 (let ((ch (aref src 0))
223 (eword-decode-first-encoded-words src
224 eword-regexp after-regexp must-unfold))))
225 (if (and (not (string= buf ""))
226 (or decoded (memq ch delimiters)))
227 (setq dst (concat dst
228 (std11-wrap-as-quoted-pairs
229 (decode-mime-charset-string buf code-conversion)
230 chars-must-be-quote))
234 (setq dst (concat dst
235 (std11-wrap-as-quoted-pairs
237 chars-must-be-quote))
239 ((memq ch delimiters)
240 (setq dst (concat dst (list ch))
241 src (substring src 1)
244 (setq buf (concat buf (list (aref src 1)))
245 src (substring src 2)
247 ((string-match "\\`[ \t\n]+" src)
248 (setq buf (concat buf (substring src 0 (match-end 0)))
249 src (substring src (match-end 0))
251 ((and (string-match equal-safe-regexp src)
253 (setq buf (concat buf (substring src 0 (match-end 0)))
254 src (substring src (match-end 0))
255 ew-enable eword-decode-sticked-encoded-word))
256 (t (error "something wrong")))))
257 (if (not (string= buf ""))
258 (setq dst (concat dst
259 (std11-wrap-as-quoted-pairs
260 (decode-mime-charset-string buf code-conversion)
261 chars-must-be-quote))))
268 (defun eword-decode-unstructured (string code-conversion &optional must-unfold)
269 (eword-decode-entire-string
271 eword-encoded-word-in-unstructured-regexp
272 eword-after-encoded-word-in-unstructured-regexp
280 (defun eword-decode-comment (string code-conversion &optional must-unfold)
281 (eword-decode-entire-string
283 eword-encoded-word-in-comment-regexp
284 eword-after-encoded-word-in-comment-regexp
288 '(?\( ?\) ?\\ ?\r ?\n)
292 (defun eword-decode-quoted-string (string code-conversion &optional must-unfold)
293 (eword-decode-entire-string
295 eword-encoded-word-in-quoted-string-regexp
296 eword-after-encoded-word-in-quoted-string-regexp
304 (defun eword-decode-string (string &optional must-unfold code-conversion)
305 "Decode MIME encoded-words in STRING.
307 STRING is unfolded before decoding.
309 If an encoded-word is broken or your emacs implementation can not
310 decode the charset included in it, it is not decoded.
312 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
313 if there are in decoded encoded-words (generated by bad manner MUA
314 such as a version of Net$cape).
316 If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
317 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
318 Otherwise it decodes non-ASCII bit patterns as the
319 default-mime-charset."
320 (eword-decode-unstructured
321 (std11-unfold-string string)
325 (defun eword-decode-and-fold-structured-field
326 (string start-column &optional max-column must-unfold)
327 "Decode and fold (fill) STRING as structured field body.
328 It decodes non us-ascii characters in FULL-NAME encoded as
329 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
330 characters are regarded as variable `default-mime-charset'.
332 If an encoded-word is broken or your emacs implementation can not
333 decode the charset included in it, it is not decoded.
335 If MAX-COLUMN is omitted, `fill-column' is used.
337 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
338 if there are in decoded encoded-words (generated by bad manner MUA
339 such as a version of Net$cape)."
340 (rotate-memo args-eword-decode-and-fold-structured-field
341 (list string start-column max-column must-unfold))
343 (setq max-column fill-column))
344 (let* ((field-name (make-string (1- start-column) ?X))
345 (field-body (ew-lf-crlf-to-crlf string))
346 (ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
347 (decoded (ew-decode-field field-name field-body)))
348 (unless (equal field-body decoded)
349 (setq decoded (ew-crlf-refold decoded start-column max-column)))
350 (ew-crlf-to-lf decoded)))
352 (defun eword-decode-and-unfold-structured-field (string)
353 "Decode and unfold STRING as structured field body.
354 It decodes non us-ascii characters in FULL-NAME encoded as
355 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
356 characters are regarded as variable `default-mime-charset'.
358 If an encoded-word is broken or your emacs implementation can not
359 decode the charset included in it, it is not decoded."
360 (rotate-memo args-eword-decode-and-unfold-structured-field (list string))
361 (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
362 (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
363 (ew-crlf-to-lf (ew-crlf-unfold decoded))))
365 (defun eword-decode-structured-field-body (string &optional must-unfold
366 start-column max-column)
367 "Decode non us-ascii characters in STRING as structured field body.
368 STRING is unfolded before decoding.
370 It decodes non us-ascii characters in FULL-NAME encoded as
371 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
372 characters are regarded as variable `default-mime-charset'.
374 If an encoded-word is broken or your emacs implementation can not
375 decode the charset included in it, it is not decoded.
377 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
378 if there are in decoded encoded-words (generated by bad manner MUA
379 such as a version of Net$cape)."
380 (rotate-memo args-eword-decode-structured-field-body
381 (list string must-unfold start-column max-column))
383 ;; fold with max-column
384 (eword-decode-and-fold-structured-field
385 string start-column max-column must-unfold)
387 (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
388 (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
389 (ew-crlf-to-lf decoded))))
391 (defun eword-decode-unstructured-field-body (string &optional must-unfold)
392 "Decode non us-ascii characters in STRING as unstructured field body.
393 STRING is unfolded before decoding.
395 It decodes non us-ascii characters in FULL-NAME encoded as
396 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
397 characters are regarded as variable `default-mime-charset'.
399 If an encoded-word is broken or your emacs implementation can not
400 decode the charset included in it, it is not decoded.
402 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
403 if there are in decoded encoded-words (generated by bad manner MUA
404 such as a version of Net$cape)."
405 (rotate-memo args-eword-decode-unstructured-field-body
406 (list string must-unfold))
407 (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
408 (ew-crlf-to-lf (ew-crlf-unfold decoded))))
410 (defun eword-decode-and-unfold-unstructured-field (string)
411 "Decode and unfold STRING as unstructured field body.
412 It decodes non us-ascii characters in FULL-NAME encoded as
413 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
414 characters are regarded as variable `default-mime-charset'.
416 If an encoded-word is broken or your emacs implementation can not
417 decode the charset included in it, it is not decoded."
418 (rotate-memo args-eword-decode-and-unfold-unstructured-field
420 (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
421 (ew-crlf-to-lf (ew-crlf-unfold decoded))))
427 (defun eword-decode-region (start end &optional unfolding must-unfold
429 "Decode MIME encoded-words in region between START and END.
431 If UNFOLDING is not nil, it unfolds before decoding.
433 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
434 if there are in decoded encoded-words (generated by bad manner MUA
435 such as a version of Net$cape).
437 If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
438 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
439 Otherwise it decodes non-ASCII bit patterns as the
440 default-mime-charset."
442 (rotate-memo args-eword-decode-region
443 (list start end (buffer-substring start end) unfolding must-unfold code-conversion))
446 (narrow-to-region start end)
448 (eword-decode-unfold)
450 (let ((str (eword-decode-unstructured
451 (buffer-substring (point-min) (point-max))
454 (delete-region (point-min) (point-max))
458 ;;; @ for message header
461 (defcustom eword-decode-ignored-field-list
462 '(Newsgroups Path Lines Nntp-Posting-Host Received Message-Id Date)
463 "*List of field-names to be ignored when decoding.
464 Each field name must be symbol."
466 :type '(repeat symbol))
468 (defcustom eword-decode-structured-field-list
469 '(Reply-To Resent-Reply-To From Resent-From Sender Resent-Sender
470 To Resent-To Cc Resent-Cc Bcc Resent-Bcc Dcc
472 Mime-Version Content-Type Content-Transfer-Encoding
473 Content-Disposition User-Agent)
474 "*List of field-names to decode as structured field.
475 Each field name must be symbol."
477 :type '(repeat symbol))
479 (defun eword-decode-field-body
480 (field-body field-name &optional unfolded max-column)
481 "Decode FIELD-BODY as FIELD-NAME, and return the result.
483 If UNFOLDED is non-nil, it is assumed that FIELD-BODY is
486 If MAX-COLUMN is non-nil, the result is folded with MAX-COLUMN
487 or `fill-column' if MAX-COLUMN is t.
488 Otherwise, the result is unfolded.
490 MIME encoded-word in FIELD-BODY is recognized according to
491 `eword-decode-ignored-field-list',
492 `eword-decode-structured-field-list' and FIELD-NAME.
494 Non MIME encoded-word part in FILED-BODY is decoded with
495 `default-mime-charset'."
496 (if (symbolp field-name) (setq field-name (symbol-name field-name)))
499 (let ((ew-ignore-76bytes-limit t))
501 field-name (ew-lf-crlf-to-crlf field-body)))
503 field-name (ew-lf-crlf-to-crlf field-body)))))
505 (setq decoded (ew-crlf-refold
507 (1+ (string-width field-name))
508 (if (eq max-column t) fill-column max-column)))
509 (setq decoded (ew-crlf-unfold decoded)))
510 (ew-crlf-to-lf decoded)))
512 (defun eword-decode-header (&optional code-conversion separator)
513 "Decode MIME encoded-words in header fields.
514 If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
515 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
516 Otherwise it decodes non-ASCII bit patterns as the
517 default-mime-charset.
518 If SEPARATOR is not nil, it is used as header separator."
520 (rotate-memo args-eword-decode-header (list code-conversion))
521 (unless code-conversion
522 (message "eword-decode-header is called without code-conversion")
524 (if (and code-conversion
525 (not (mime-charset-to-coding-system code-conversion)))
526 (setq code-conversion default-mime-charset))
529 (std11-narrow-to-header separator)
530 (rotate-memo args-h-eword-decode-header (buffer-substring (point-min) (point-max)))
532 (let (beg p end field-name field-body decoded)
533 (goto-char (point-min))
534 (while (re-search-forward std11-field-head-regexp nil t)
535 (setq beg (match-beginning 0)
537 field-name (buffer-substring beg (1- p))
538 end (std11-field-end)
539 field-body (ew-lf-crlf-to-crlf
540 (buffer-substring p end))
541 decoded (ew-decode-field
542 field-name field-body))
543 (unless (equal field-body decoded)
544 (setq decoded (ew-crlf-refold
546 (1+ (string-width field-name))
548 (delete-region p end)
549 (insert (ew-crlf-to-lf decoded))
550 (add-text-properties beg (min (1+ (point)) (point-max))
551 (list 'original-field-name field-name
552 'original-field-body field-body))
554 (eword-decode-region (point-min) (point-max) t nil nil)
557 (defun eword-decode-unfold ()
558 (goto-char (point-min))
560 (while (re-search-forward std11-field-head-regexp nil t)
561 (setq beg (match-beginning 0)
562 end (std11-field-end))
563 (setq field (buffer-substring beg end))
564 (if (string-match eword-encoded-word-regexp field)
566 (narrow-to-region (goto-char beg) end)
567 (while (re-search-forward "\n\\([ \t]\\)" nil t)
568 (replace-match (match-string 1))
570 (goto-char (point-max))
575 ;;; @ encoded-word decoder
578 (defvar eword-decode-encoded-word-error-handler
579 'eword-decode-encoded-word-default-error-handler)
581 (defvar eword-warning-face nil
582 "Face used for invalid encoded-word.")
584 (defun eword-decode-encoded-word-default-error-handler (word signal)
585 (and (add-text-properties 0 (length word)
586 (and eword-warning-face
587 (list 'face eword-warning-face))
591 (defun eword-decode-encoded-word (word &optional must-unfold)
592 "Decode WORD if it is an encoded-word.
594 If your emacs implementation can not decode the charset of WORD, it
595 returns WORD. Similarly the encoded-word is broken, it returns WORD.
597 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
598 if there are in decoded encoded-word (generated by bad manner MUA such
599 as a version of Net$cape)."
600 (or (if (string-match eword-encoded-word-regexp word)
602 (substring word (match-beginning 1) (match-end 1))
606 (substring word (match-beginning 2) (match-end 2))
609 (substring word (match-beginning 3) (match-end 3))
612 (eword-decode-encoded-text charset encoding text must-unfold)
614 (funcall eword-decode-encoded-word-error-handler word err)
620 ;;; @ encoded-text decoder
623 (defun eword-decode-encoded-text (charset encoding string
624 &optional must-unfold)
625 "Decode STRING as an encoded-text.
627 If your emacs implementation can not decode CHARSET, it returns nil.
629 If ENCODING is not \"B\" or \"Q\", it occurs error.
630 So you should write error-handling code if you don't want break by errors.
632 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
633 if there are in decoded encoded-text (generated by bad manner MUA such
634 as a version of Net$cape)."
635 (let ((cs (mime-charset-to-coding-system charset)))
637 (let ((dest (encoded-text-decode-string string encoding)))
639 (setq dest (decode-mime-charset-string dest charset))
643 (cond ((eq chr ?\n) "")
645 (t (char-to-string chr)))
647 (std11-unfold-string dest)
652 ;;; @ lexical analyze
655 (defvar eword-lexical-analyze-cache nil)
656 (defvar eword-lexical-analyze-cache-max 299
657 "*Max position of eword-lexical-analyze-cache.
658 It is max size of eword-lexical-analyze-cache - 1.")
660 (defcustom eword-lexical-analyzers
661 '(eword-analyze-quoted-string
662 eword-analyze-domain-literal
663 eword-analyze-comment
665 eword-analyze-special
666 eword-analyze-encoded-word
668 "*List of functions to return result of lexical analyze.
669 Each function must have two arguments: STRING and MUST-UNFOLD.
670 STRING is the target string to be analyzed.
671 If MUST-UNFOLD is not nil, each function must unfold and eliminate
672 bare-CR and bare-LF from the result even if they are included in
673 content of the encoded-word.
674 Each function must return nil if it can not analyze STRING as its
677 Previous function is preferred to next function. If a function
678 returns nil, next function is used. Otherwise the return value will
681 :type '(repeat function))
683 (defun eword-analyze-quoted-string (string &optional must-unfold)
684 (let ((p (std11-check-enclosure string ?\" ?\")))
686 (cons (cons 'quoted-string
687 (if eword-decode-quoted-encoded-word
688 (eword-decode-quoted-string
689 (substring string 0 p)
690 default-mime-charset)
691 (std11-wrap-as-quoted-string
692 (decode-mime-charset-string
693 (std11-strip-quoted-pair (substring string 1 (1- p)))
694 default-mime-charset))))
695 (substring string p)))
698 (defun eword-analyze-domain-literal (string &optional must-unfold)
699 (std11-analyze-domain-literal string))
701 (defun eword-analyze-comment (string &optional must-unfold)
702 (let ((len (length string)))
703 (if (and (< 0 len) (eq (aref string 0) ?\())
705 (while (and p (< p len) (eq (aref string p) ?\())
706 (setq p (std11-check-enclosure string ?\( ?\) t p)))
709 (eword-decode-comment
710 (std11-unfold-string (substring string 0 p))
711 default-mime-charset))
712 (substring string p)))
715 (defun eword-analyze-spaces (string &optional must-unfold)
716 (std11-analyze-spaces string))
718 (defun eword-analyze-special (string &optional must-unfold)
719 (std11-analyze-special string))
721 (defun eword-analyze-encoded-word (string &optional must-unfold)
722 (let ((decoded (eword-decode-first-encoded-words
724 eword-encoded-word-in-phrase-regexp
725 eword-after-encoded-word-in-phrase-regexp
728 (let ((s (car decoded)))
729 (while (or (string-match std11-atom-regexp s)
730 (string-match std11-spaces-regexp s))
731 (setq s (substring s (match-end 0))))
733 (cons (cons 'atom (car decoded)) (cdr decoded))
734 (cons (cons 'quoted-string
735 (std11-wrap-as-quoted-string (car decoded)))
738 (defun eword-analyze-atom (string &optional must-unfold)
739 (if (string-match std11-atom-regexp (string-as-unibyte string))
740 (let ((end (match-end 0)))
741 (if (and eword-decode-sticked-encoded-word
742 (string-match eword-encoded-word-in-phrase-regexp
743 (substring string 0 end))
744 (< 0 (match-beginning 0)))
745 (setq end (match-beginning 0)))
746 (cons (cons 'atom (decode-mime-charset-string
747 (substring string 0 end)
748 default-mime-charset))
749 (substring string end)
752 (defun eword-lexical-analyze-internal (string must-unfold)
753 (let ((last 'eword-analyze-spaces)
755 (while (not (string-equal string ""))
757 (let ((rest eword-lexical-analyzers)
759 (while (and (setq func (car rest))
762 (not eword-decode-sticked-encoded-word)
763 (not (eq last 'eword-analyze-spaces))
764 (eq func 'eword-analyze-encoded-word))
765 (null (setq r (funcall func string must-unfold))))
767 (setq rest (cdr rest)))
769 (or r `((error . ,string) . ""))
771 (setq dest (cons (car ret) dest))
772 (setq string (cdr ret))
777 (defun eword-lexical-analyze (string &optional must-unfold)
778 "Return lexical analyzed list corresponding STRING.
779 It is like std11-lexical-analyze, but it decodes non us-ascii
780 characters encoded as encoded-words or invalid \"raw\" format.
781 \"Raw\" non us-ascii characters are regarded as variable
782 `default-mime-charset'."
783 (let* ((str (copy-sequence string))
784 (key (cons str (cons default-mime-charset must-unfold)))
786 (set-text-properties 0 (length str) nil str)
787 (if (setq ret (assoc key eword-lexical-analyze-cache))
789 (setq ret (eword-lexical-analyze-internal str must-unfold))
790 (setq eword-lexical-analyze-cache
792 (last eword-lexical-analyze-cache
793 eword-lexical-analyze-cache-max)))
796 (defun eword-decode-token (token)
799 (defun eword-extract-address-components (string)
800 "Extract full name and canonical address from STRING.
801 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
802 If no name can be extracted, FULL-NAME will be nil.
803 It decodes non us-ascii characters in FULL-NAME encoded as
804 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
805 characters are regarded as variable `default-mime-charset'."
806 (rotate-memo args-eword-extract-address-components (list string))
807 (let* ((structure (car (std11-parse-address
808 (eword-lexical-analyze
809 (std11-unfold-string string) 'must-unfold))))
810 (phrase (std11-full-name-string structure))
811 (address (std11-address-string structure))
813 (list phrase address)
820 (provide 'eword-decode)
822 ;;; eword-decode.el ends here