1 ;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs
3 ;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc.
5 ;; Author: ENAMI Tsugutomo <enami@sys.ptg.sony.co.jp>
6 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
7 ;; Tanaka Akira <akr@jaist.ac.jp>
8 ;; Maintainer: Tanaka Akira <akr@jaist.ac.jp>
10 ;; Original: 1992/07/20 ENAMI Tsugutomo's `mime.el'.
11 ;; Renamed: 1993/06/03 to tiny-mime.el
12 ;; Renamed: 1995/10/03 from tiny-mime.el (split off encoder)
13 ;; Renamed: 1997/02/22 from tm-ew-d.el
14 ;; Keywords: encoded-word, MIME, multilingual, header, mail, news
16 ;; This file is part of FLAM (Faithful Library About MIME).
18 ;; This program is free software; you can redistribute it and/or
19 ;; modify it under the terms of the GNU General Public License as
20 ;; published by the Free Software Foundation; either version 2, or (at
21 ;; your option) any later version.
23 ;; This program is distributed in the hope that it will be useful, but
24 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
25 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 ;; General Public License for more details.
28 ;; You should have received a copy of the GNU General Public License
29 ;; along with GNU Emacs; see the file COPYING. If not, write to the
30 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
31 ;; Boston, MA 02111-1307, USA.
41 (defgroup eword-decode nil
42 "Encoded-word decoding"
47 (defvar rotate-memo nil)
48 (defmacro rotate-memo (var val)
50 (unless (boundp ',var) (setq ,var ()))
51 (setq ,var (cons ,val ,var))
52 (let ((tmp (last ,var (- (length ,var) 100))))
53 (when tmp (setcdr tmp nil)))
59 (defcustom eword-decode-sticked-encoded-word nil
60 "*If non-nil, decode encoded-words sticked on atoms,
61 other encoded-words, etc.
62 however this behaviour violates RFC2047."
66 (defcustom eword-decode-quoted-encoded-word nil
67 "*If non-nil, decode encoded-words in quoted-string
68 however this behaviour violates RFC2047."
73 ;;; @ MIME encoded-word definition
76 (defconst eword-encoded-word-prefix-regexp
77 (concat (regexp-quote "=?")
78 "\\(" mime-charset-regexp "\\)"
82 (defconst eword-encoded-word-suffix-regexp
85 (defconst eword-encoded-text-in-unstructured-regexp "[!->@-~]+")
86 (defconst eword-encoded-word-in-unstructured-regexp
87 (concat eword-encoded-word-prefix-regexp
88 "\\(" eword-encoded-text-in-unstructured-regexp "\\)"
89 eword-encoded-word-suffix-regexp))
90 (defconst eword-after-encoded-word-in-unstructured-regexp "\\([ \t]\\|$\\)")
92 (defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+")
93 (defconst eword-encoded-word-in-phrase-regexp
94 (concat eword-encoded-word-prefix-regexp
95 "\\(" eword-encoded-text-in-phrase-regexp "\\)"
96 eword-encoded-word-suffix-regexp))
97 (defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t]\\|$\\)")
99 (defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+")
100 (defconst eword-encoded-word-in-comment-regexp
101 (concat eword-encoded-word-prefix-regexp
102 "\\(" eword-encoded-text-in-comment-regexp "\\)"
103 eword-encoded-word-suffix-regexp))
104 (defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)")
106 (defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+")
107 (defconst eword-encoded-word-in-quoted-string-regexp
108 (concat eword-encoded-word-prefix-regexp
109 "\\(" eword-encoded-text-in-quoted-string-regexp "\\)"
110 eword-encoded-word-suffix-regexp))
111 (defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)")
114 (defconst eword-encoded-text-regexp eword-encoded-text-in-unstructured-regexp)
115 (defconst eword-encoded-word-regexp eword-encoded-word-in-unstructured-regexp)
121 (defconst base64-token-regexp "[A-Za-z0-9+/]")
122 (defconst base64-token-padding-regexp "[A-Za-z0-9+/=]")
124 (defconst eword-B-encoded-text-regexp
133 base64-token-padding-regexp
134 base64-token-padding-regexp
137 ;; (defconst eword-B-encoding-and-encoded-text-regexp
138 ;; (concat "\\(B\\)\\?" eword-B-encoded-text-regexp))
141 ;;; @@ Quoted-Printable
144 (defconst eword-Q-encoded-text-regexp
145 (concat "\\([^=?]\\|" quoted-printable-octet-regexp "\\)+"))
146 ;; (defconst eword-Q-encoding-and-encoded-text-regexp
147 ;; (concat "\\(Q\\)\\?" eword-Q-encoded-text-regexp))
150 ;;; @ internal utilities
153 (defun eword-decode-first-encoded-words (string
156 &optional must-unfold)
157 "Decode MIME encoded-words in beginning of STRING.
159 EWORD-REGEXP is the regexp that matches a encoded-word.
161 eword-encoded-word-in-unstructured-regexp,
162 eword-encoded-text-in-phrase-regexp,
163 eword-encoded-word-in-comment-regexp or
164 eword-encoded-word-in-quoted-string-regexp.
166 AFTER-REGEXP is the regexp that matches a after encoded-word.
168 eword-after-encoded-word-in-unstructured-regexp,
169 eword-after-encoded-text-in-phrase-regexp,
170 eword-after-encoded-word-in-comment-regexp or
171 eword-after-encoded-word-in-quoted-string-regexp.
173 If beginning of STRING matches EWORD-REGEXP with AFTER-REGEXP,
174 returns a cons cell of decoded string(sequence of characters) and
175 the rest(sequence of octets).
177 If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP,
180 If an encoded-word is broken or your emacs implementation can not
181 decode the charset included in it, it is returned in decoded part
182 as encoded-word form.
184 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
185 if there are in decoded encoded-words (generated by bad manner MUA
186 such as a version of Net$cape)."
187 (if eword-decode-sticked-encoded-word (setq after-regexp ""))
188 (let* ((between-ewords-regexp
189 (if eword-decode-sticked-encoded-word
192 (between-ewords-eword-after-regexp
193 (concat "\\`\\(" between-ewords-regexp "\\)"
194 "\\(" eword-regexp "\\)"
197 (concat "\\`\\(" eword-regexp "\\)" after-regexp))
198 (src string) ; sequence of octets.
199 (dst "")) ; sequence of characters.
200 (if (string-match eword-after-regexp src)
203 (ew (substring src 0 q))
204 (dw (eword-decode-encoded-word ew must-unfold)))
205 (setq dst (concat dst dw)
206 src (substring src q))
207 (if (not (string= ew dw))
211 (string-match between-ewords-eword-after-regexp src)
213 (setq p (match-end 1)
215 ew (substring src p q)
216 dw (eword-decode-encoded-word ew must-unfold))
219 (setq dst (concat dst (substring src 0 q))
220 src (substring src q))
223 (setq dst (concat dst dw)
224 src (substring src q)))))
228 (defun eword-decode-entire-string (string
233 delimiters ; list of chars.
237 (if (and code-conversion
238 (not (mime-charset-to-coding-system code-conversion)))
239 (setq code-conversion default-mime-charset))
240 (let ((equal-safe-regexp (concat "\\`=?" safe-regexp))
245 (while (< 0 (length src))
246 (let ((ch (aref src 0))
249 (eword-decode-first-encoded-words src
250 eword-regexp after-regexp must-unfold))))
251 (if (and (not (string= buf ""))
252 (or decoded (memq ch delimiters)))
253 (setq dst (concat dst
254 (std11-wrap-as-quoted-pairs
255 (decode-mime-charset-string buf code-conversion)
256 chars-must-be-quote))
260 (setq dst (concat dst
261 (std11-wrap-as-quoted-pairs
263 chars-must-be-quote))
265 ((memq ch delimiters)
266 (setq dst (concat dst (list ch))
267 src (substring src 1)
270 (setq buf (concat buf (list (aref src 1)))
271 src (substring src 2)
273 ((string-match "\\`[ \t\n]+" src)
274 (setq buf (concat buf (substring src 0 (match-end 0)))
275 src (substring src (match-end 0))
277 ((and (string-match equal-safe-regexp src)
279 (setq buf (concat buf (substring src 0 (match-end 0)))
280 src (substring src (match-end 0))
281 ew-enable eword-decode-sticked-encoded-word))
282 (t (error "something wrong")))))
283 (if (not (string= buf ""))
284 (setq dst (concat dst
285 (std11-wrap-as-quoted-pairs
286 (decode-mime-charset-string buf code-conversion)
287 chars-must-be-quote))))
294 (defun eword-decode-unstructured (string code-conversion &optional must-unfold)
295 (eword-decode-entire-string
297 eword-encoded-word-in-unstructured-regexp
298 eword-after-encoded-word-in-unstructured-regexp
306 (defun eword-decode-comment (string code-conversion &optional must-unfold)
307 (eword-decode-entire-string
309 eword-encoded-word-in-comment-regexp
310 eword-after-encoded-word-in-comment-regexp
314 '(?\( ?\) ?\\ ?\r ?\n)
318 (defun eword-decode-quoted-string (string code-conversion &optional must-unfold)
319 (eword-decode-entire-string
321 eword-encoded-word-in-quoted-string-regexp
322 eword-after-encoded-word-in-quoted-string-regexp
330 (defun eword-decode-string (string &optional must-unfold code-conversion)
331 "Decode MIME encoded-words in STRING.
333 STRING is unfolded before decoding.
335 If an encoded-word is broken or your emacs implementation can not
336 decode the charset included in it, it is not decoded.
338 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
339 if there are in decoded encoded-words (generated by bad manner MUA
340 such as a version of Net$cape).
342 If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
343 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
344 Otherwise it decodes non-ASCII bit patterns as the
345 default-mime-charset."
346 (eword-decode-unstructured
347 (std11-unfold-string string)
355 (defun eword-decode-region (start end &optional unfolding must-unfold
357 "Decode MIME encoded-words in region between START and END.
359 If UNFOLDING is not nil, it unfolds before decoding.
361 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
362 if there are in decoded encoded-words (generated by bad manner MUA
363 such as a version of Net$cape).
365 If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
366 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
367 Otherwise it decodes non-ASCII bit patterns as the
368 default-mime-charset."
370 (rotate-memo args-eword-decode-region
371 (list start end (buffer-substring start end) unfolding must-unfold code-conversion))
374 (narrow-to-region start end)
376 (eword-decode-unfold)
378 (let ((str (eword-decode-unstructured
379 (buffer-substring (point-min) (point-max))
382 (delete-region (point-min) (point-max))
386 ;;; @ for message header
389 (defcustom eword-decode-ignored-field-list
390 '(Newsgroups Path Lines Nntp-Posting-Host Received Message-Id Date)
391 "*List of field-names to be ignored when decoding.
392 Each field name must be symbol."
394 :type '(repeat symbol))
396 (defcustom eword-decode-structured-field-list
397 '(Reply-To Resent-Reply-To From Resent-From Sender Resent-Sender
398 To Resent-To Cc Resent-Cc Bcc Resent-Bcc Dcc
399 Mime-Version Content-Type Content-Transfer-Encoding
401 "*List of field-names to decode as structured field.
402 Each field name must be symbol."
404 :type '(repeat symbol))
406 (defun eword-decode-header (&optional code-conversion separator)
407 "Decode MIME encoded-words in header fields.
408 If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
409 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
410 Otherwise it decodes non-ASCII bit patterns as the
411 default-mime-charset.
412 If SEPARATOR is not nil, it is used as header separator."
414 (rotate-memo args-eword-decode-header (list code-conversion))
415 (unless code-conversion
416 (message "eword-decode-header is called with no code-conversion")
418 (if (and code-conversion
419 (not (mime-charset-to-coding-system code-conversion)))
420 (setq code-conversion default-mime-charset))
423 (std11-narrow-to-header separator)
424 (rotate-memo args-h-eword-decode-header (buffer-substring (point-min) (point-max)))
426 (let (beg p end field-name field-body decoded)
427 (goto-char (point-min))
428 (while (re-search-forward std11-field-head-regexp nil t)
429 (setq beg (match-beginning 0)
431 field-name (buffer-substring beg (1- p))
432 end (std11-field-end)
433 field-body (buffer-substring p end)
434 decoded (ew-decode-field
436 (ew-lf-crlf-to-crlf field-body)))
437 (unless (equal field-body decoded)
438 (setq decoded (ew-crlf-refold
440 (1+ (string-width field-name))
442 (delete-region p end)
443 (insert (ew-crlf-to-lf decoded))))
444 (eword-decode-region (point-min) (point-max) t nil nil)
447 (defun eword-decode-unfold ()
448 (goto-char (point-min))
450 (while (re-search-forward std11-field-head-regexp nil t)
451 (setq beg (match-beginning 0)
452 end (std11-field-end))
453 (setq field (buffer-substring beg end))
454 (if (string-match eword-encoded-word-regexp field)
456 (narrow-to-region (goto-char beg) end)
457 (while (re-search-forward "\n\\([ \t]\\)" nil t)
458 (replace-match (match-string 1))
460 (goto-char (point-max))
465 ;;; @ encoded-word decoder
468 (defvar eword-decode-encoded-word-error-handler
469 'eword-decode-encoded-word-default-error-handler)
471 (defvar eword-warning-face nil
472 "Face used for invalid encoded-word.")
474 (defun eword-decode-encoded-word-default-error-handler (word signal)
475 (and (add-text-properties 0 (length word)
476 (and eword-warning-face
477 (list 'face eword-warning-face))
481 (defun eword-decode-encoded-word (word &optional must-unfold)
482 "Decode WORD if it is an encoded-word.
484 If your emacs implementation can not decode the charset of WORD, it
485 returns WORD. Similarly the encoded-word is broken, it returns WORD.
487 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
488 if there are in decoded encoded-word (generated by bad manner MUA such
489 as a version of Net$cape)."
490 (or (if (string-match eword-encoded-word-regexp word)
492 (substring word (match-beginning 1) (match-end 1))
496 (substring word (match-beginning 2) (match-end 2))
499 (substring word (match-beginning 3) (match-end 3))
502 (eword-decode-encoded-text charset encoding text must-unfold)
504 (funcall eword-decode-encoded-word-error-handler word err)
510 ;;; @ encoded-text decoder
513 (defun eword-decode-encoded-text (charset encoding string
514 &optional must-unfold)
515 "Decode STRING as an encoded-text.
517 If your emacs implementation can not decode CHARSET, it returns nil.
519 If ENCODING is not \"B\" or \"Q\", it occurs error.
520 So you should write error-handling code if you don't want break by errors.
522 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
523 if there are in decoded encoded-text (generated by bad manner MUA such
524 as a version of Net$cape)."
525 (let ((cs (mime-charset-to-coding-system charset)))
529 ((string-equal "B" encoding)
530 (if (and (string-match eword-B-encoded-text-regexp string)
531 (string-equal string (match-string 0 string)))
532 (base64-decode-string string)
533 (error "Invalid encoded-text %s" string)))
534 ((string-equal "Q" encoding)
535 (if (and (string-match eword-Q-encoded-text-regexp string)
536 (string-equal string (match-string 0 string)))
537 (q-encoding-decode-string string)
538 (error "Invalid encoded-text %s" string)))
540 (error "Invalid encoding %s" encoding)
544 (setq dest (decode-mime-charset-string dest charset))
548 (cond ((eq chr ?\n) "")
550 (t (char-to-string chr)))
552 (std11-unfold-string dest)
557 ;;; @ lexical analyze
560 (defvar eword-lexical-analyze-cache nil)
561 (defvar eword-lexical-analyze-cache-max 299
562 "*Max position of eword-lexical-analyze-cache.
563 It is max size of eword-lexical-analyze-cache - 1.")
565 (defcustom eword-lexical-analyzers
566 '(eword-analyze-quoted-string
567 eword-analyze-domain-literal
568 eword-analyze-comment
570 eword-analyze-special
571 eword-analyze-encoded-word
573 "*List of functions to return result of lexical analyze.
574 Each function must have two arguments: STRING and MUST-UNFOLD.
575 STRING is the target string to be analyzed.
576 If MUST-UNFOLD is not nil, each function must unfold and eliminate
577 bare-CR and bare-LF from the result even if they are included in
578 content of the encoded-word.
579 Each function must return nil if it can not analyze STRING as its
582 Previous function is preferred to next function. If a function
583 returns nil, next function is used. Otherwise the return value will
586 :type '(repeat function))
588 (defun eword-analyze-quoted-string (string &optional must-unfold)
589 (let ((p (std11-check-enclosure string ?\" ?\")))
591 (cons (cons 'quoted-string
592 (if eword-decode-quoted-encoded-word
593 (eword-decode-quoted-string
594 (substring string 0 p)
595 default-mime-charset)
596 (std11-wrap-as-quoted-string
597 (decode-mime-charset-string
598 (std11-strip-quoted-pair (substring string 1 (1- p)))
599 default-mime-charset))))
600 (substring string p)))
603 (defun eword-analyze-domain-literal (string &optional must-unfold)
604 (std11-analyze-domain-literal string))
606 (defun eword-analyze-comment (string &optional must-unfold)
607 (let ((len (length string)))
608 (if (and (< 0 len) (eq (aref string 0) ?\())
610 (while (and p (< p len) (eq (aref string p) ?\())
611 (setq p (std11-check-enclosure string ?\( ?\) t p)))
614 (eword-decode-comment
615 (std11-unfold-string (substring string 0 p))
616 default-mime-charset))
617 (substring string p)))
620 (defun eword-analyze-spaces (string &optional must-unfold)
621 (std11-analyze-spaces string))
623 (defun eword-analyze-special (string &optional must-unfold)
624 (std11-analyze-special string))
626 (defun eword-analyze-encoded-word (string &optional must-unfold)
627 (let ((decoded (eword-decode-first-encoded-words
629 eword-encoded-word-in-phrase-regexp
630 eword-after-encoded-word-in-phrase-regexp
633 (let ((s (car decoded)))
634 (while (or (string-match std11-atom-regexp s)
635 (string-match std11-spaces-regexp s))
636 (setq s (substring s (match-end 0))))
638 (cons (cons 'atom (car decoded)) (cdr decoded))
639 (cons (cons 'quoted-string
640 (std11-wrap-as-quoted-string (car decoded)))
643 (defun eword-analyze-atom (string &optional must-unfold)
644 (if (let ((enable-multibyte-characters nil))
645 (string-match std11-atom-regexp string))
646 (let ((end (match-end 0)))
647 (if (and eword-decode-sticked-encoded-word
648 (string-match eword-encoded-word-in-phrase-regexp
649 (substring string 0 end))
650 (< 0 (match-beginning 0)))
651 (setq end (match-beginning 0)))
652 (cons (cons 'atom (decode-mime-charset-string
653 (substring string 0 end)
654 default-mime-charset))
655 (substring string end)
658 (defun eword-lexical-analyze-internal (string must-unfold)
659 (let ((last 'eword-analyze-spaces)
661 (while (not (string-equal string ""))
663 (let ((rest eword-lexical-analyzers)
665 (while (and (setq func (car rest))
668 (not eword-decode-sticked-encoded-word)
669 (not (eq last 'eword-analyze-spaces))
670 (eq func 'eword-analyze-encoded-word))
671 (null (setq r (funcall func string must-unfold))))
673 (setq rest (cdr rest)))
675 (or r `((error . ,string) . ""))
677 (setq dest (cons (car ret) dest))
678 (setq string (cdr ret))
683 (defun eword-lexical-analyze (string &optional must-unfold)
684 "Return lexical analyzed list corresponding STRING.
685 It is like std11-lexical-analyze, but it decodes non us-ascii
686 characters encoded as encoded-words or invalid \"raw\" format.
687 \"Raw\" non us-ascii characters are regarded as variable
688 `default-mime-charset'."
689 (let* ((str (copy-sequence string))
690 (key (cons str (cons default-mime-charset must-unfold)))
692 (set-text-properties 0 (length str) nil str)
693 (if (setq ret (assoc key eword-lexical-analyze-cache))
695 (setq ret (eword-lexical-analyze-internal str must-unfold))
696 (setq eword-lexical-analyze-cache
698 (last eword-lexical-analyze-cache
699 eword-lexical-analyze-cache-max)))
702 (defun eword-decode-token (token)
705 (defun eword-decode-and-fold-structured-field
706 (string start-column &optional max-column must-unfold)
707 "Decode and fold (fill) STRING as structured field body.
708 It decodes non us-ascii characters in FULL-NAME encoded as
709 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
710 characters are regarded as variable `default-mime-charset'.
712 If an encoded-word is broken or your emacs implementation can not
713 decode the charset included in it, it is not decoded.
715 If MAX-COLUMN is omitted, `fill-column' is used.
717 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
718 if there are in decoded encoded-words (generated by bad manner MUA
719 such as a version of Net$cape)."
720 (rotate-memo args-eword-decode-and-fold-structured-field
721 (list string start-column max-column must-unfold))
723 (setq max-column fill-column))
724 (let* ((field-name (make-string (1- start-column) ?X))
725 (field-body (ew-lf-crlf-to-crlf string))
726 (ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
727 (decoded (ew-decode-field field-name field-body)))
728 (unless (equal field-body decoded)
729 (setq decoded (ew-crlf-refold decoded start-column max-column)))
730 (ew-crlf-to-lf decoded)))
732 (defun eword-decode-and-unfold-structured-field (string)
733 "Decode and unfold STRING as structured field body.
734 It decodes non us-ascii characters in FULL-NAME encoded as
735 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
736 characters are regarded as variable `default-mime-charset'.
738 If an encoded-word is broken or your emacs implementation can not
739 decode the charset included in it, it is not decoded."
740 (rotate-memo args-eword-decode-and-unfold-structured-field (list string))
741 (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
742 (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
743 (ew-crlf-to-lf (ew-crlf-unfold decoded))))
745 (defun eword-decode-structured-field-body (string &optional must-unfold
746 start-column max-column)
747 "Decode non us-ascii characters in STRING as structured field body.
748 STRING is unfolded before decoding.
750 It decodes non us-ascii characters in FULL-NAME encoded as
751 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
752 characters are regarded as variable `default-mime-charset'.
754 If an encoded-word is broken or your emacs implementation can not
755 decode the charset included in it, it is not decoded.
757 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
758 if there are in decoded encoded-words (generated by bad manner MUA
759 such as a version of Net$cape)."
760 (rotate-memo args-eword-decode-structured-field-body
761 (list string must-unfold start-column max-column))
763 ;; fold with max-column
764 (eword-decode-and-fold-structured-field
765 string start-column max-column must-unfold)
767 (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
768 (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
769 (ew-crlf-to-lf decoded))))
771 (defun eword-decode-unstructured-field-body (string &optional must-unfold)
772 "Decode non us-ascii characters in STRING as unstructured field body.
773 STRING is unfolded before decoding.
775 It decodes non us-ascii characters in FULL-NAME encoded as
776 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
777 characters are regarded as variable `default-mime-charset'.
779 If an encoded-word is broken or your emacs implementation can not
780 decode the charset included in it, it is not decoded.
782 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
783 if there are in decoded encoded-words (generated by bad manner MUA
784 such as a version of Net$cape)."
785 (rotate-memo args-eword-decode-unstructured-field-body
786 (list string must-unfold))
787 (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
788 (ew-crlf-to-lf (ew-crlf-unfold decoded))))
790 (defun eword-extract-address-components (string)
791 "Extract full name and canonical address from STRING.
792 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
793 If no name can be extracted, FULL-NAME will be nil.
794 It decodes non us-ascii characters in FULL-NAME encoded as
795 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
796 characters are regarded as variable `default-mime-charset'."
797 (rotate-memo args-eword-extract-address-components (list string))
798 (let* ((structure (car (std11-parse-address
799 (eword-lexical-analyze
800 (std11-unfold-string string) 'must-unfold))))
801 (phrase (std11-full-name-string structure))
802 (address (std11-address-string structure))
804 (list phrase address)
811 (provide 'eword-decode)
813 ;;; eword-decode.el ends here