1 ;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs
3 ;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc.
5 ;; Author: ENAMI Tsugutomo <enami@sys.ptg.sony.co.jp>
6 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
7 ;; Tanaka Akira <akr@jaist.ac.jp>
8 ;; Maintainer: Tanaka Akira <akr@jaist.ac.jp>
10 ;; Original: 1992/07/20 ENAMI Tsugutomo's `mime.el'.
11 ;; Renamed: 1993/06/03 to tiny-mime.el
12 ;; Renamed: 1995/10/03 from tiny-mime.el (split off encoder)
13 ;; Renamed: 1997/02/22 from tm-ew-d.el
14 ;; Keywords: encoded-word, MIME, multilingual, header, mail, news
16 ;; This file is part of FLAM (Faithful Library About MIME).
18 ;; This program is free software; you can redistribute it and/or
19 ;; modify it under the terms of the GNU General Public License as
20 ;; published by the Free Software Foundation; either version 2, or (at
21 ;; your option) any later version.
23 ;; This program is distributed in the hope that it will be useful, but
24 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
25 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 ;; General Public License for more details.
28 ;; You should have received a copy of the GNU General Public License
29 ;; along with GNU Emacs; see the file COPYING. If not, write to the
30 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
31 ;; Boston, MA 02111-1307, USA.
41 (defgroup eword-decode nil
42 "Encoded-word decoding"
47 (defmacro rotate-memo (var val)
49 (unless (boundp ',var) (setq ,var ()))
50 (setq ,var (cons ,val ,var))
51 (let ((tmp (last ,var (- (length ,var) 100))))
52 (when tmp (setcdr tmp nil)))
58 (defcustom eword-decode-sticked-encoded-word nil
59 "*If non-nil, decode encoded-words sticked on atoms,
60 other encoded-words, etc.
61 however this behaviour violates RFC2047."
65 (defcustom eword-decode-quoted-encoded-word nil
66 "*If non-nil, decode encoded-words in quoted-string
67 however this behaviour violates RFC2047."
72 ;;; @ MIME encoded-word definition
75 (defconst eword-encoded-word-prefix-regexp
76 (concat (regexp-quote "=?")
77 "\\(" mime-charset-regexp "\\)"
81 (defconst eword-encoded-word-suffix-regexp
84 (defconst eword-encoded-text-in-unstructured-regexp "[!->@-~]+")
85 (defconst eword-encoded-word-in-unstructured-regexp
86 (concat eword-encoded-word-prefix-regexp
87 "\\(" eword-encoded-text-in-unstructured-regexp "\\)"
88 eword-encoded-word-suffix-regexp))
89 (defconst eword-after-encoded-word-in-unstructured-regexp "\\([ \t]\\|$\\)")
91 (defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+")
92 (defconst eword-encoded-word-in-phrase-regexp
93 (concat eword-encoded-word-prefix-regexp
94 "\\(" eword-encoded-text-in-phrase-regexp "\\)"
95 eword-encoded-word-suffix-regexp))
96 (defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t]\\|$\\)")
98 (defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+")
99 (defconst eword-encoded-word-in-comment-regexp
100 (concat eword-encoded-word-prefix-regexp
101 "\\(" eword-encoded-text-in-comment-regexp "\\)"
102 eword-encoded-word-suffix-regexp))
103 (defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)")
105 (defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+")
106 (defconst eword-encoded-word-in-quoted-string-regexp
107 (concat eword-encoded-word-prefix-regexp
108 "\\(" eword-encoded-text-in-quoted-string-regexp "\\)"
109 eword-encoded-word-suffix-regexp))
110 (defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)")
113 (defconst eword-encoded-text-regexp eword-encoded-text-in-unstructured-regexp)
114 (defconst eword-encoded-word-regexp eword-encoded-word-in-unstructured-regexp)
120 (defconst base64-token-regexp "[A-Za-z0-9+/]")
121 (defconst base64-token-padding-regexp "[A-Za-z0-9+/=]")
123 (defconst eword-B-encoded-text-regexp
132 base64-token-padding-regexp
133 base64-token-padding-regexp
136 ;; (defconst eword-B-encoding-and-encoded-text-regexp
137 ;; (concat "\\(B\\)\\?" eword-B-encoded-text-regexp))
140 ;;; @@ Quoted-Printable
143 (defconst eword-Q-encoded-text-regexp
144 (concat "\\([^=?]\\|" quoted-printable-octet-regexp "\\)+"))
145 ;; (defconst eword-Q-encoding-and-encoded-text-regexp
146 ;; (concat "\\(Q\\)\\?" eword-Q-encoded-text-regexp))
149 ;;; @ internal utilities
152 (defun eword-decode-first-encoded-words (string
155 &optional must-unfold)
156 "Decode MIME encoded-words in beginning of STRING.
158 EWORD-REGEXP is the regexp that matches a encoded-word.
160 eword-encoded-word-in-unstructured-regexp,
161 eword-encoded-text-in-phrase-regexp,
162 eword-encoded-word-in-comment-regexp or
163 eword-encoded-word-in-quoted-string-regexp.
165 AFTER-REGEXP is the regexp that matches a after encoded-word.
167 eword-after-encoded-word-in-unstructured-regexp,
168 eword-after-encoded-text-in-phrase-regexp,
169 eword-after-encoded-word-in-comment-regexp or
170 eword-after-encoded-word-in-quoted-string-regexp.
172 If beginning of STRING matches EWORD-REGEXP with AFTER-REGEXP,
173 returns a cons cell of decoded string(sequence of characters) and
174 the rest(sequence of octets).
176 If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP,
179 If an encoded-word is broken or your emacs implementation can not
180 decode the charset included in it, it is returned in decoded part
181 as encoded-word form.
183 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
184 if there are in decoded encoded-words (generated by bad manner MUA
185 such as a version of Net$cape)."
186 (if eword-decode-sticked-encoded-word (setq after-regexp ""))
187 (let* ((between-ewords-regexp
188 (if eword-decode-sticked-encoded-word
191 (between-ewords-eword-after-regexp
192 (concat "\\`\\(" between-ewords-regexp "\\)"
193 "\\(" eword-regexp "\\)"
196 (concat "\\`\\(" eword-regexp "\\)" after-regexp))
197 (src string) ; sequence of octets.
198 (dst "")) ; sequence of characters.
199 (if (string-match eword-after-regexp src)
202 (ew (substring src 0 q))
203 (dw (eword-decode-encoded-word ew must-unfold)))
204 (setq dst (concat dst dw)
205 src (substring src q))
206 (if (not (string= ew dw))
210 (string-match between-ewords-eword-after-regexp src)
212 (setq p (match-end 1)
214 ew (substring src p q)
215 dw (eword-decode-encoded-word ew must-unfold))
218 (setq dst (concat dst (substring src 0 q))
219 src (substring src q))
222 (setq dst (concat dst dw)
223 src (substring src q)))))
227 (defun eword-decode-entire-string (string
232 delimiters ; list of chars.
236 (if (and code-conversion
237 (not (mime-charset-to-coding-system code-conversion)))
238 (setq code-conversion default-mime-charset))
239 (let ((equal-safe-regexp (concat "\\`=?" safe-regexp))
244 (while (< 0 (length src))
245 (let ((ch (aref src 0))
248 (eword-decode-first-encoded-words src
249 eword-regexp after-regexp must-unfold))))
250 (if (and (not (string= buf ""))
251 (or decoded (memq ch delimiters)))
252 (setq dst (concat dst
253 (std11-wrap-as-quoted-pairs
254 (decode-mime-charset-string buf code-conversion)
255 chars-must-be-quote))
259 (setq dst (concat dst
260 (std11-wrap-as-quoted-pairs
262 chars-must-be-quote))
264 ((memq ch delimiters)
265 (setq dst (concat dst (list ch))
266 src (substring src 1)
269 (setq buf (concat buf (list (aref src 1)))
270 src (substring src 2)
272 ((string-match "\\`[ \t\n]+" src)
273 (setq buf (concat buf (substring src 0 (match-end 0)))
274 src (substring src (match-end 0))
276 ((and (string-match equal-safe-regexp src)
278 (setq buf (concat buf (substring src 0 (match-end 0)))
279 src (substring src (match-end 0))
280 ew-enable eword-decode-sticked-encoded-word))
281 (t (error "something wrong")))))
282 (if (not (string= buf ""))
283 (setq dst (concat dst
284 (std11-wrap-as-quoted-pairs
285 (decode-mime-charset-string buf code-conversion)
286 chars-must-be-quote))))
293 (defun eword-decode-unstructured (string code-conversion &optional must-unfold)
294 (eword-decode-entire-string
296 eword-encoded-word-in-unstructured-regexp
297 eword-after-encoded-word-in-unstructured-regexp
305 (defun eword-decode-comment (string code-conversion &optional must-unfold)
306 (eword-decode-entire-string
308 eword-encoded-word-in-comment-regexp
309 eword-after-encoded-word-in-comment-regexp
313 '(?\( ?\) ?\\ ?\r ?\n)
317 (defun eword-decode-quoted-string (string code-conversion &optional must-unfold)
318 (eword-decode-entire-string
320 eword-encoded-word-in-quoted-string-regexp
321 eword-after-encoded-word-in-quoted-string-regexp
329 (defun eword-decode-string (string &optional must-unfold code-conversion)
330 "Decode MIME encoded-words in STRING.
332 STRING is unfolded before decoding.
334 If an encoded-word is broken or your emacs implementation can not
335 decode the charset included in it, it is not decoded.
337 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
338 if there are in decoded encoded-words (generated by bad manner MUA
339 such as a version of Net$cape).
341 If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
342 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
343 Otherwise it decodes non-ASCII bit patterns as the
344 default-mime-charset."
345 (eword-decode-unstructured
346 (std11-unfold-string string)
354 (defun eword-decode-region (start end &optional unfolding must-unfold
356 "Decode MIME encoded-words in region between START and END.
358 If UNFOLDING is not nil, it unfolds before decoding.
360 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
361 if there are in decoded encoded-words (generated by bad manner MUA
362 such as a version of Net$cape).
364 If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
365 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
366 Otherwise it decodes non-ASCII bit patterns as the
367 default-mime-charset."
371 (narrow-to-region start end)
373 (eword-decode-unfold)
375 (let ((str (eword-decode-unstructured
376 (buffer-substring (point-min) (point-max))
379 (delete-region (point-min) (point-max))
383 ;;; @ for message header
386 (defcustom eword-decode-ignored-field-list
387 '(Newsgroups Path Lines Nntp-Posting-Host Received Message-Id Date)
388 "*List of field-names to be ignored when decoding.
389 Each field name must be symbol."
391 :type '(repeat symbol))
393 (defcustom eword-decode-structured-field-list
394 '(Reply-To Resent-Reply-To From Resent-From Sender Resent-Sender
395 To Resent-To Cc Resent-Cc Bcc Resent-Bcc Dcc
396 Mime-Version Content-Type Content-Transfer-Encoding
398 "*List of field-names to decode as structured field.
399 Each field name must be symbol."
401 :type '(repeat symbol))
403 (defun eword-decode-header (&optional code-conversion separator)
404 "Decode MIME encoded-words in header fields.
405 If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
406 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
407 Otherwise it decodes non-ASCII bit patterns as the
408 default-mime-charset.
409 If SEPARATOR is not nil, it is used as header separator."
411 (rotate-memo args-eword-decode-header
412 (list code-conversion))
413 (unless code-conversion
414 (message "eword-decode-header is called with no code-conversion"))
415 (if (and code-conversion
416 (not (mime-charset-to-coding-system code-conversion)))
417 (setq code-conversion default-mime-charset))
420 (std11-narrow-to-header separator)
422 (let (beg p end field-name field-body len)
423 (goto-char (point-min))
424 (while (re-search-forward std11-field-head-regexp nil t)
425 (setq beg (match-beginning 0)
427 field-name (buffer-substring beg (1- p))
428 end (std11-field-end)
429 field-body (buffer-substring p end))
430 (delete-region p end)
431 (insert (ew-decode-field field-name (ew-lf-crlf-to-crlf field-body)))
433 (eword-decode-region (point-min) (point-max) t nil nil)
436 (defun eword-decode-unfold ()
437 (goto-char (point-min))
439 (while (re-search-forward std11-field-head-regexp nil t)
440 (setq beg (match-beginning 0)
441 end (std11-field-end))
442 (setq field (buffer-substring beg end))
443 (if (string-match eword-encoded-word-regexp field)
445 (narrow-to-region (goto-char beg) end)
446 (while (re-search-forward "\n\\([ \t]\\)" nil t)
447 (replace-match (match-string 1))
449 (goto-char (point-max))
454 ;;; @ encoded-word decoder
457 (defvar eword-decode-encoded-word-error-handler
458 'eword-decode-encoded-word-default-error-handler)
460 (defvar eword-warning-face nil
461 "Face used for invalid encoded-word.")
463 (defun eword-decode-encoded-word-default-error-handler (word signal)
464 (and (add-text-properties 0 (length word)
465 (and eword-warning-face
466 (list 'face eword-warning-face))
470 (defun eword-decode-encoded-word (word &optional must-unfold)
471 "Decode WORD if it is an encoded-word.
473 If your emacs implementation can not decode the charset of WORD, it
474 returns WORD. Similarly the encoded-word is broken, it returns WORD.
476 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
477 if there are in decoded encoded-word (generated by bad manner MUA such
478 as a version of Net$cape)."
479 (or (if (string-match eword-encoded-word-regexp word)
481 (substring word (match-beginning 1) (match-end 1))
485 (substring word (match-beginning 2) (match-end 2))
488 (substring word (match-beginning 3) (match-end 3))
491 (eword-decode-encoded-text charset encoding text must-unfold)
493 (funcall eword-decode-encoded-word-error-handler word err)
499 ;;; @ encoded-text decoder
502 (defun eword-decode-encoded-text (charset encoding string
503 &optional must-unfold)
504 "Decode STRING as an encoded-text.
506 If your emacs implementation can not decode CHARSET, it returns nil.
508 If ENCODING is not \"B\" or \"Q\", it occurs error.
509 So you should write error-handling code if you don't want break by errors.
511 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
512 if there are in decoded encoded-text (generated by bad manner MUA such
513 as a version of Net$cape)."
514 (let ((cs (mime-charset-to-coding-system charset)))
518 ((string-equal "B" encoding)
519 (if (and (string-match eword-B-encoded-text-regexp string)
520 (string-equal string (match-string 0 string)))
521 (base64-decode-string string)
522 (error "Invalid encoded-text %s" string)))
523 ((string-equal "Q" encoding)
524 (if (and (string-match eword-Q-encoded-text-regexp string)
525 (string-equal string (match-string 0 string)))
526 (q-encoding-decode-string string)
527 (error "Invalid encoded-text %s" string)))
529 (error "Invalid encoding %s" encoding)
533 (setq dest (decode-mime-charset-string dest charset))
537 (cond ((eq chr ?\n) "")
539 (t (char-to-string chr)))
541 (std11-unfold-string dest)
546 ;;; @ lexical analyze
549 (defvar eword-lexical-analyze-cache nil)
550 (defvar eword-lexical-analyze-cache-max 299
551 "*Max position of eword-lexical-analyze-cache.
552 It is max size of eword-lexical-analyze-cache - 1.")
554 (defcustom eword-lexical-analyzers
555 '(eword-analyze-quoted-string
556 eword-analyze-domain-literal
557 eword-analyze-comment
559 eword-analyze-special
560 eword-analyze-encoded-word
562 "*List of functions to return result of lexical analyze.
563 Each function must have two arguments: STRING and MUST-UNFOLD.
564 STRING is the target string to be analyzed.
565 If MUST-UNFOLD is not nil, each function must unfold and eliminate
566 bare-CR and bare-LF from the result even if they are included in
567 content of the encoded-word.
568 Each function must return nil if it can not analyze STRING as its
571 Previous function is preferred to next function. If a function
572 returns nil, next function is used. Otherwise the return value will
575 :type '(repeat function))
577 (defun eword-analyze-quoted-string (string &optional must-unfold)
578 (let ((p (std11-check-enclosure string ?\" ?\")))
580 (cons (cons 'quoted-string
581 (if eword-decode-quoted-encoded-word
582 (eword-decode-quoted-string
583 (substring string 0 p)
584 default-mime-charset)
585 (std11-wrap-as-quoted-string
586 (decode-mime-charset-string
587 (std11-strip-quoted-pair (substring string 1 (1- p)))
588 default-mime-charset))))
589 (substring string p)))
592 (defun eword-analyze-domain-literal (string &optional must-unfold)
593 (std11-analyze-domain-literal string))
595 (defun eword-analyze-comment (string &optional must-unfold)
596 (let ((len (length string)))
597 (if (and (< 0 len) (eq (aref string 0) ?\())
599 (while (and p (< p len) (eq (aref string p) ?\())
600 (setq p (std11-check-enclosure string ?\( ?\) t p)))
603 (eword-decode-comment
604 (std11-unfold-string (substring string 0 p))
605 default-mime-charset))
606 (substring string p)))
609 (defun eword-analyze-spaces (string &optional must-unfold)
610 (std11-analyze-spaces string))
612 (defun eword-analyze-special (string &optional must-unfold)
613 (std11-analyze-special string))
615 (defun eword-analyze-encoded-word (string &optional must-unfold)
616 (let ((decoded (eword-decode-first-encoded-words
618 eword-encoded-word-in-phrase-regexp
619 eword-after-encoded-word-in-phrase-regexp
622 (let ((s (car decoded)))
623 (while (or (string-match std11-atom-regexp s)
624 (string-match std11-spaces-regexp s))
625 (setq s (substring s (match-end 0))))
627 (cons (cons 'atom (car decoded)) (cdr decoded))
628 (cons (cons 'quoted-string
629 (std11-wrap-as-quoted-string (car decoded)))
632 (defun eword-analyze-atom (string &optional must-unfold)
633 (if (let ((enable-multibyte-characters nil))
634 (string-match std11-atom-regexp string))
635 (let ((end (match-end 0)))
636 (if (and eword-decode-sticked-encoded-word
637 (string-match eword-encoded-word-in-phrase-regexp
638 (substring string 0 end))
639 (< 0 (match-beginning 0)))
640 (setq end (match-beginning 0)))
641 (cons (cons 'atom (decode-mime-charset-string
642 (substring string 0 end)
643 default-mime-charset))
644 (substring string end)
647 (defun eword-lexical-analyze-internal (string must-unfold)
648 (let ((last 'eword-analyze-spaces)
650 (while (not (string-equal string ""))
652 (let ((rest eword-lexical-analyzers)
654 (while (and (setq func (car rest))
657 (not eword-decode-sticked-encoded-word)
658 (not (eq last 'eword-analyze-spaces))
659 (eq func 'eword-analyze-encoded-word))
660 (null (setq r (funcall func string must-unfold))))
662 (setq rest (cdr rest)))
664 (or r `((error . ,string) . ""))
666 (setq dest (cons (car ret) dest))
667 (setq string (cdr ret))
672 (defun eword-lexical-analyze (string &optional must-unfold)
673 "Return lexical analyzed list corresponding STRING.
674 It is like std11-lexical-analyze, but it decodes non us-ascii
675 characters encoded as encoded-words or invalid \"raw\" format.
676 \"Raw\" non us-ascii characters are regarded as variable
677 `default-mime-charset'."
678 (let* ((str (copy-sequence string))
679 (key (cons str (cons default-mime-charset must-unfold)))
681 (set-text-properties 0 (length str) nil str)
682 (if (setq ret (assoc key eword-lexical-analyze-cache))
684 (setq ret (eword-lexical-analyze-internal str must-unfold))
685 (setq eword-lexical-analyze-cache
687 (last eword-lexical-analyze-cache
688 eword-lexical-analyze-cache-max)))
691 (defun eword-decode-token (token)
694 (defun eword-decode-and-fold-structured-field
695 (string start-column &optional max-column must-unfold)
696 "Decode and fold (fill) STRING as structured field body.
697 It decodes non us-ascii characters in FULL-NAME encoded as
698 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
699 characters are regarded as variable `default-mime-charset'.
701 If an encoded-word is broken or your emacs implementation can not
702 decode the charset included in it, it is not decoded.
704 If MAX-COLUMN is omitted, `fill-column' is used.
706 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
707 if there are in decoded encoded-words (generated by bad manner MUA
708 such as a version of Net$cape)."
710 (setq max-column fill-column))
711 (let ((c start-column)
712 (tokens (eword-lexical-analyze string must-unfold))
715 (while (and (setq token (car tokens))
716 (setq tokens (cdr tokens)))
717 (let* ((type (car token)))
718 (if (eq type 'spaces)
719 (let* ((next-token (car tokens))
720 (next-str (eword-decode-token next-token))
721 (next-len (string-width next-str))
722 (next-c (+ c next-len 1)))
723 (if (< next-c max-column)
724 (setq result (concat result " " next-str)
726 (setq result (concat result "\n " next-str)
728 (setq tokens (cdr tokens))
730 (let* ((str (eword-decode-token token)))
731 (setq result (concat result str)
732 c (+ c (string-width str)))
735 (concat result (eword-decode-token token))
738 (defun eword-decode-and-unfold-structured-field (string)
739 "Decode and unfold STRING as structured field body.
740 It decodes non us-ascii characters in FULL-NAME encoded as
741 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
742 characters are regarded as variable `default-mime-charset'.
744 If an encoded-word is broken or your emacs implementation can not
745 decode the charset included in it, it is not decoded."
746 (rotate-memo args-eword-decode-and-unfold-structured-field
748 (let ((tokens (eword-lexical-analyze string 'must-unfold))
751 (let* ((token (car tokens))
753 (setq tokens (cdr tokens))
755 (if (eq type 'spaces)
757 (concat result (eword-decode-token token))
761 (defun eword-decode-structured-field-body (string &optional must-unfold
762 start-column max-column)
763 "Decode non us-ascii characters in STRING as structured field body.
764 STRING is unfolded before decoding.
766 It decodes non us-ascii characters in FULL-NAME encoded as
767 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
768 characters are regarded as variable `default-mime-charset'.
770 If an encoded-word is broken or your emacs implementation can not
771 decode the charset included in it, it is not decoded.
773 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
774 if there are in decoded encoded-words (generated by bad manner MUA
775 such as a version of Net$cape)."
776 (rotate-memo args-eword-decode-structured-field-body
777 (list string must-unfold start-column max-column))
779 ;; fold with max-column (folding is not implemented.)
780 (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
781 (decoded (ew-decode-field (make-string (1- start-column) ?X)
782 (ew-lf-crlf-to-crlf string)
783 (if must-unfold 'ew-cut-cr-lf))))
784 (if must-unfold (ew-cut-cr-lf decoded) decoded))
786 (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
787 (decoded (ew-decode-field ""
788 (ew-lf-crlf-to-crlf string)
789 (if must-unfold 'ew-cut-cr-lf))))
790 (if must-unfold (ew-cut-cr-lf decoded) decoded))))
792 (defun eword-decode-unstructured-field-body (string &optional must-unfold)
793 "Decode non us-ascii characters in STRING as unstructured field body.
794 STRING is unfolded before decoding.
796 It decodes non us-ascii characters in FULL-NAME encoded as
797 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
798 characters are regarded as variable `default-mime-charset'.
800 If an encoded-word is broken or your emacs implementation can not
801 decode the charset included in it, it is not decoded.
803 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
804 if there are in decoded encoded-words (generated by bad manner MUA
805 such as a version of Net$cape)."
806 (rotate-memo args-eword-decode-unstructured-field-body
807 (list string must-unfold))
808 (let ((decoded (ew-decode-field ""
809 (ew-lf-crlf-to-crlf string)
810 (if must-unfold 'ew-cut-cr-lf))))
812 (ew-cut-cr-lf decoded)
815 (defun eword-extract-address-components (string)
816 "Extract full name and canonical address from STRING.
817 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
818 If no name can be extracted, FULL-NAME will be nil.
819 It decodes non us-ascii characters in FULL-NAME encoded as
820 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
821 characters are regarded as variable `default-mime-charset'."
822 (let* ((structure (car (std11-parse-address
823 (eword-lexical-analyze
824 (std11-unfold-string string) 'must-unfold))))
825 (phrase (std11-full-name-string structure))
826 (address (std11-address-string structure))
828 (list phrase address)
835 (provide 'eword-decode)
837 ;;; eword-decode.el ends here