1 ;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs
3 ;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc.
5 ;; Author: ENAMI Tsugutomo <enami@sys.ptg.sony.co.jp>
6 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
7 ;; Tanaka Akira <akr@jaist.ac.jp>
8 ;; Maintainer: Tanaka Akira <akr@jaist.ac.jp>
10 ;; Original: 1992/07/20 ENAMI Tsugutomo's `mime.el'.
11 ;; Renamed: 1993/06/03 to tiny-mime.el
12 ;; Renamed: 1995/10/03 from tiny-mime.el (split off encoder)
13 ;; Renamed: 1997/02/22 from tm-ew-d.el
14 ;; Keywords: encoded-word, MIME, multilingual, header, mail, news
16 ;; This file is part of FLAM (Faithful Library About MIME).
18 ;; This program is free software; you can redistribute it and/or
19 ;; modify it under the terms of the GNU General Public License as
20 ;; published by the Free Software Foundation; either version 2, or (at
21 ;; your option) any later version.
23 ;; This program is distributed in the hope that it will be useful, but
24 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
25 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 ;; General Public License for more details.
28 ;; You should have received a copy of the GNU General Public License
29 ;; along with GNU Emacs; see the file COPYING. If not, write to the
30 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
31 ;; Boston, MA 02111-1307, USA.
42 (eval-when-compile (require 'cl))
44 (defgroup eword-decode nil
45 "Encoded-word decoding"
50 (defvar rotate-memo nil)
51 (defmacro rotate-memo (var val)
53 (unless (boundp ',var) (setq ,var ()))
54 (setq ,var (cons ,val ,var))
55 (let ((tmp (last ,var (- (length ,var) 100))))
56 (when tmp (setcdr tmp nil)))
62 (defcustom eword-decode-sticked-encoded-word nil
63 "*If non-nil, decode encoded-words sticked on atoms,
64 other encoded-words, etc.
65 however this behaviour violates RFC2047."
69 (defcustom eword-decode-quoted-encoded-word nil
70 "*If non-nil, decode encoded-words in quoted-string
71 however this behaviour violates RFC2047."
75 (defcustom eword-max-size-to-decode 1000
76 "*Max size to decode header field."
78 :type '(choice (integer :tag "Limit (bytes)")
79 (const :tag "Don't limit" nil)))
82 ;;; @ MIME encoded-word definition
85 (defconst eword-encoded-word-prefix-regexp
86 (concat (regexp-quote "=?")
87 "\\(" mime-charset-regexp "\\)"
91 (defconst eword-encoded-word-suffix-regexp
94 (defconst eword-encoded-text-in-unstructured-regexp "[!->@-~]+")
95 (defconst eword-encoded-word-in-unstructured-regexp
96 (concat eword-encoded-word-prefix-regexp
97 "\\(" eword-encoded-text-in-unstructured-regexp "\\)"
98 eword-encoded-word-suffix-regexp))
99 (defconst eword-after-encoded-word-in-unstructured-regexp "\\([ \t]\\|$\\)")
101 (defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+")
102 (defconst eword-encoded-word-in-phrase-regexp
103 (concat eword-encoded-word-prefix-regexp
104 "\\(" eword-encoded-text-in-phrase-regexp "\\)"
105 eword-encoded-word-suffix-regexp))
106 (defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t]\\|$\\)")
108 (defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+")
109 (defconst eword-encoded-word-in-comment-regexp
110 (concat eword-encoded-word-prefix-regexp
111 "\\(" eword-encoded-text-in-comment-regexp "\\)"
112 eword-encoded-word-suffix-regexp))
113 (defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)")
115 (defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+")
116 (defconst eword-encoded-word-in-quoted-string-regexp
117 (concat eword-encoded-word-prefix-regexp
118 "\\(" eword-encoded-text-in-quoted-string-regexp "\\)"
119 eword-encoded-word-suffix-regexp))
120 (defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)")
123 (defconst eword-encoded-text-regexp eword-encoded-text-in-unstructured-regexp)
124 (defconst eword-encoded-word-regexp eword-encoded-word-in-unstructured-regexp)
127 ;;; @ internal utilities
130 (defun eword-decode-first-encoded-words (string
133 &optional must-unfold)
134 "Decode MIME encoded-words in beginning of STRING.
136 EWORD-REGEXP is the regexp that matches a encoded-word.
138 eword-encoded-word-in-unstructured-regexp,
139 eword-encoded-text-in-phrase-regexp,
140 eword-encoded-word-in-comment-regexp or
141 eword-encoded-word-in-quoted-string-regexp.
143 AFTER-REGEXP is the regexp that matches a after encoded-word.
145 eword-after-encoded-word-in-unstructured-regexp,
146 eword-after-encoded-text-in-phrase-regexp,
147 eword-after-encoded-word-in-comment-regexp or
148 eword-after-encoded-word-in-quoted-string-regexp.
150 If beginning of STRING matches EWORD-REGEXP with AFTER-REGEXP,
151 returns a cons cell of decoded string(sequence of characters) and
152 the rest(sequence of octets).
154 If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP,
157 If an encoded-word is broken or your emacs implementation can not
158 decode the charset included in it, it is returned in decoded part
159 as encoded-word form.
161 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
162 if there are in decoded encoded-words (generated by bad manner MUA
163 such as a version of Net$cape)."
164 (if eword-decode-sticked-encoded-word (setq after-regexp ""))
165 (let* ((between-ewords-regexp
166 (if eword-decode-sticked-encoded-word
169 (between-ewords-eword-after-regexp
170 (concat "\\`\\(" between-ewords-regexp "\\)"
171 "\\(" eword-regexp "\\)"
174 (concat "\\`\\(" eword-regexp "\\)" after-regexp))
175 (src string) ; sequence of octets.
176 (dst "")) ; sequence of characters.
177 (if (string-match eword-after-regexp src)
180 (ew (substring src 0 q))
181 (dw (eword-decode-encoded-word ew must-unfold)))
182 (setq dst (concat dst dw)
183 src (substring src q))
184 (if (not (string= ew dw))
188 (string-match between-ewords-eword-after-regexp src)
190 (setq p (match-end 1)
192 ew (substring src p q)
193 dw (eword-decode-encoded-word ew must-unfold))
196 (setq dst (concat dst (substring src 0 q))
197 src (substring src q))
200 (setq dst (concat dst dw)
201 src (substring src q)))))
205 (defun eword-decode-entire-string (string
210 delimiters ; list of chars.
214 (if (and code-conversion
215 (not (mime-charset-to-coding-system code-conversion)))
216 (setq code-conversion default-mime-charset))
217 (let ((equal-safe-regexp (concat "\\`=?" safe-regexp))
222 (while (< 0 (length src))
223 (let ((ch (aref src 0))
226 (eword-decode-first-encoded-words src
227 eword-regexp after-regexp must-unfold))))
228 (if (and (not (string= buf ""))
229 (or decoded (memq ch delimiters)))
230 (setq dst (concat dst
231 (std11-wrap-as-quoted-pairs
232 (decode-mime-charset-string buf code-conversion)
233 chars-must-be-quote))
237 (setq dst (concat dst
238 (std11-wrap-as-quoted-pairs
240 chars-must-be-quote))
242 ((memq ch delimiters)
243 (setq dst (concat dst (list ch))
244 src (substring src 1)
247 (setq buf (concat buf (list (aref src 1)))
248 src (substring src 2)
250 ((string-match "\\`[ \t\n]+" src)
251 (setq buf (concat buf (substring src 0 (match-end 0)))
252 src (substring src (match-end 0))
254 ((and (string-match equal-safe-regexp src)
256 (setq buf (concat buf (substring src 0 (match-end 0)))
257 src (substring src (match-end 0))
258 ew-enable eword-decode-sticked-encoded-word))
259 (t (error "something wrong")))))
260 (if (not (string= buf ""))
261 (setq dst (concat dst
262 (std11-wrap-as-quoted-pairs
263 (decode-mime-charset-string buf code-conversion)
264 chars-must-be-quote))))
271 (defun eword-decode-unstructured (string code-conversion &optional must-unfold)
272 (eword-decode-entire-string
274 eword-encoded-word-in-unstructured-regexp
275 eword-after-encoded-word-in-unstructured-regexp
283 (defun eword-decode-comment (string code-conversion &optional must-unfold)
284 (eword-decode-entire-string
286 eword-encoded-word-in-comment-regexp
287 eword-after-encoded-word-in-comment-regexp
291 '(?\( ?\) ?\\ ?\r ?\n)
295 (defun eword-decode-quoted-string (string code-conversion &optional must-unfold)
296 (eword-decode-entire-string
298 eword-encoded-word-in-quoted-string-regexp
299 eword-after-encoded-word-in-quoted-string-regexp
307 (defun eword-decode-string (string &optional must-unfold code-conversion)
308 "Decode MIME encoded-words in STRING.
310 STRING is unfolded before decoding.
312 If an encoded-word is broken or your emacs implementation can not
313 decode the charset included in it, it is not decoded.
315 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
316 if there are in decoded encoded-words (generated by bad manner MUA
317 such as a version of Net$cape).
319 If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
320 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
321 Otherwise it decodes non-ASCII bit patterns as the
322 default-mime-charset."
323 (eword-decode-unstructured
324 (std11-unfold-string string)
328 (defun eword-decode-structured-field-body (string
330 start-column max-column)
331 (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
332 (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
333 (ew-crlf-to-lf decoded)))
335 (defun eword-decode-and-unfold-structured-field-body (string
339 "Decode and unfold STRING as structured field body.
340 It decodes non us-ascii characters in FULL-NAME encoded as
341 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
342 characters are regarded as variable `default-mime-charset'.
344 If an encoded-word is broken or your emacs implementation can not
345 decode the charset included in it, it is not decoded."
346 (let* ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
347 (ew-crlf-to-lf (ew-crlf-unfold decoded))))
349 (defun eword-decode-and-fold-structured-field-body (string
351 &optional max-column)
353 (setq max-column fill-column))
354 (let* ((field-name (make-string (1- start-column) ?X))
355 (field-body (ew-lf-crlf-to-crlf string))
356 (ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
357 (decoded (ew-decode-field field-name field-body)))
358 (unless (equal field-body decoded)
359 (setq decoded (ew-crlf-refold decoded start-column max-column)))
360 (ew-crlf-to-lf decoded)))
362 (defun eword-decode-unstructured-field-body (string &optional start-column
364 (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
365 (ew-crlf-to-lf decoded)))
367 (defun eword-decode-and-unfold-unstructured-field-body (string
368 &optional start-column
370 (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
371 (ew-crlf-to-lf (ew-crlf-unfold decoded))))
373 (defun eword-decode-unfolded-unstructured-field-body (string
374 &optional start-column
376 (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
377 (ew-crlf-to-lf decoded)))
383 (defun eword-decode-region (start end &optional unfolding must-unfold
385 "Decode MIME encoded-words in region between START and END.
387 If UNFOLDING is not nil, it unfolds before decoding.
389 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
390 if there are in decoded encoded-words (generated by bad manner MUA
391 such as a version of Net$cape).
393 If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
394 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
395 Otherwise it decodes non-ASCII bit patterns as the
396 default-mime-charset."
398 (rotate-memo args-eword-decode-region
399 (list start end (buffer-substring start end) unfolding must-unfold code-conversion))
402 (narrow-to-region start end)
404 (eword-decode-unfold)
406 (let ((str (eword-decode-unstructured
407 (buffer-substring (point-min) (point-max))
410 (delete-region (point-min) (point-max))
413 (defun eword-decode-unfold ()
414 (goto-char (point-min))
416 (while (re-search-forward std11-field-head-regexp nil t)
417 (setq beg (match-beginning 0)
418 end (std11-field-end))
419 (setq field (buffer-substring beg end))
420 (if (string-match eword-encoded-word-regexp field)
422 (narrow-to-region (goto-char beg) end)
423 (while (re-search-forward "\n\\([ \t]\\)" nil t)
424 (replace-match (match-string 1))
426 (goto-char (point-max))
430 ;;; @ for message header
433 (defvar mime-field-decoder-alist nil)
435 (defvar mime-field-decoder-cache nil)
437 (defvar mime-update-field-decoder-cache 'ew-mime-update-field-decoder-cache
438 "*Field decoder cache update function.")
441 (defun mime-set-field-decoder (field &rest specs)
442 "Set decoder of FILED.
443 SPECS must be like `MODE1 DECODER1 MODE2 DECODER2 ...'.
444 Each mode must be `nil', `plain', `wide', `summary' or `nov'.
445 If mode is `nil', corresponding decoder is set up for every modes."
447 (let ((mode (pop specs))
448 (function (pop specs)))
451 (let ((cell (assq mode mime-field-decoder-alist)))
453 (setcdr cell (put-alist field function (cdr cell)))
454 (setq mime-field-decoder-alist
455 (cons (cons mode (list (cons field function)))
456 mime-field-decoder-alist))
458 (apply (function mime-set-field-decoder) field specs)
460 (mime-set-field-decoder field
468 (defmacro mime-find-field-presentation-method (name)
469 "Return field-presentation-method from NAME.
470 NAME must be `plain', `wide', `summary' or `nov'."
472 `(or (assq 'summary mime-field-decoder-cache)
478 (symbolp (car (cdr name)))
479 (null (cdr (cdr name))))
480 `(or (assq ,name mime-field-decoder-cache)
484 `(or (assq (or ,name 'summary) mime-field-decoder-cache)
485 (cons (or ,name 'summary) nil))
488 (defun mime-find-field-decoder-internal (field &optional mode)
489 "Return function to decode field-body of FIELD in MODE.
490 Optional argument MODE must be object of field-presentation-method."
491 (cdr (or (assq field (cdr mode))
493 (funcall mime-update-field-decoder-cache
496 (cdr (assq (car mode) mime-field-decoder-cache)))
500 (defun mime-find-field-decoder (field &optional mode)
501 "Return function to decode field-body of FIELD in MODE.
502 Optional argument MODE must be object or name of
503 field-presentation-method. Name of field-presentation-method must be
504 `plain', `wide', `summary' or `nov'.
505 Default value of MODE is `summary'."
507 (let ((p (cdr (mime-find-field-presentation-method mode))))
508 (if (and p (setq p (assq field p)))
510 (cdr (funcall mime-update-field-decoder-cache
511 field (or mode 'summary)))))
512 (inline (mime-find-field-decoder-internal field mode))
516 (defun mime-update-field-decoder-cache (field mode &optional function)
517 "Update field decoder cache `mime-field-decoder-cache'."
518 (cond ((eq function 'identity)
523 (cdr (assq (or mode 'summary) mime-field-decoder-alist))))
524 (setq function (cdr (or (assq field decoder-alist)
525 (assq t decoder-alist)))))
527 (let ((cell (assq mode mime-field-decoder-cache))
530 (if (setq ret (assq field (cdr cell)))
531 (setcdr ret function)
532 (setcdr cell (cons (setq ret (cons field function)) (cdr cell))))
533 (setq mime-field-decoder-cache
534 (cons (cons mode (list (setq ret (cons field function))))
535 mime-field-decoder-cache)))
539 (mime-set-field-decoder 'Archive nil nil)
540 (mime-set-field-decoder 'Content-Md5 nil nil)
541 (mime-set-field-decoder 'Control nil nil)
542 (mime-set-field-decoder 'Date nil nil)
543 (mime-set-field-decoder 'Distribution nil nil)
544 (mime-set-field-decoder 'Followup-Host nil nil)
545 (mime-set-field-decoder 'Followup-To nil nil)
546 (mime-set-field-decoder 'Lines nil nil)
547 (mime-set-field-decoder 'Message-Id nil nil)
548 (mime-set-field-decoder 'Newsgroups nil nil)
549 (mime-set-field-decoder 'Nntp-Posting-Host nil nil)
550 (mime-set-field-decoder 'Path nil nil)
551 (mime-set-field-decoder 'Posted-And-Mailed nil nil)
552 (mime-set-field-decoder 'Received nil nil)
553 (mime-set-field-decoder 'Status nil nil)
554 (mime-set-field-decoder 'X-Face nil nil)
555 (mime-set-field-decoder 'X-Face-Version nil nil)
556 (mime-set-field-decoder 'X-Info nil nil)
557 (mime-set-field-decoder 'X-Pgp-Key-Info nil nil)
558 (mime-set-field-decoder 'X-Pgp-Sig nil nil)
559 (mime-set-field-decoder 'X-Pgp-Sig-Version nil nil)
560 (mime-set-field-decoder 'Xref nil nil)
564 '(Reply-To Resent-Reply-To From Resent-From Sender Resent-Sender
565 To Resent-To Cc Resent-Cc Bcc Resent-Bcc Dcc
567 Mime-Version Content-Type Content-Transfer-Encoding
568 Content-Disposition User-Agent))
571 (setq field (pop fields))
572 (mime-set-field-decoder
574 'plain #'eword-decode-structured-field-body
575 'wide #'eword-decode-and-fold-structured-field-body
576 'summary #'eword-decode-and-unfold-structured-field-body
577 'nov #'eword-decode-and-unfold-structured-field-body)
580 ;; unstructured fields (default)
581 (mime-set-field-decoder
583 'plain #'eword-decode-unstructured-field-body
584 'wide #'eword-decode-unstructured-field-body
585 'summary #'eword-decode-and-unfold-unstructured-field-body
586 'nov #'eword-decode-unfolded-unstructured-field-body)
589 (defun ew-mime-update-field-decoder-cache (field mode)
592 (lexical-let ((field-name (symbol-name field)))
593 (lambda (field-body &optional start-column max-column must-unfold)
594 (setq field-body (ew-lf-to-crlf field-body))
595 (let ((res (ew-crlf-to-lf
596 (ew-decode-field field-name field-body))))
599 (list 'original-field-name field-name
600 'original-field-body field-body)
604 (lexical-let ((field-name (symbol-name field)))
605 (lambda (field-body &optional start-column max-column must-unfold)
606 (setq field-body (ew-lf-to-crlf field-body))
607 (let* ((res (ew-decode-field field-name field-body))
608 (res (if (string= res field-body)
612 (or max-column fill-column))))
613 (res (ew-crlf-to-lf res)))
616 (list 'original-field-name field-name
617 'original-field-body field-body)
621 (lexical-let ((field-name (symbol-name field)))
622 (lambda (field-body &optional start-column max-column must-unfold)
623 (setq field-body (ew-lf-to-crlf field-body))
624 (let ((res (ew-crlf-to-lf
626 (ew-decode-field field-name field-body)))))
629 (list 'original-field-name field-name
630 'original-field-body field-body)
634 (lexical-let ((field-name (symbol-name field)))
635 (lambda (field-body &optional start-column max-column must-unfold)
636 (setq field-body (ew-lf-to-crlf field-body))
638 (let ((ew-ignore-76bytes-limit t))
639 (let ((res (ew-crlf-to-lf
641 (ew-decode-field field-name field-body)))))
644 (list 'original-field-name field-name
645 'original-field-body field-body)
650 (mime-update-field-decoder-cache field mode fun)))
653 (defun mime-decode-field-body (field-body field-name
654 &optional mode max-column)
655 "Decode FIELD-BODY as FIELD-NAME in MODE, and return the result.
656 Optional argument MODE must be `plain', `wide', `summary' or `nov'.
657 Default mode is `summary'.
659 If MODE is `wide' and MAX-COLUMN is non-nil, the result is folded with
662 Non MIME encoded-word part in FILED-BODY is decoded with
663 `default-mime-charset'."
664 (unless mode (setq mode 'summary))
665 (if (symbolp field-name) (setq field-name (symbol-name field-name)))
668 (let ((ew-ignore-76bytes-limit t))
670 field-name (ew-lf-crlf-to-crlf field-body)))
672 field-name (ew-lf-crlf-to-crlf field-body)))))
673 (if (and (eq mode 'wide) max-column)
674 (setq decoded (ew-crlf-refold
676 (1+ (string-width field-name))
678 (if (not (eq mode 'plain))
679 (setq decoded (ew-crlf-unfold decoded))))
680 (setq decoded (ew-crlf-to-lf decoded))
681 (add-text-properties 0 (length decoded)
682 (list 'original-field-name field-name
683 'original-field-body field-body)
688 (defun mime-decode-header-in-region (start end
689 &optional code-conversion)
690 "Decode MIME encoded-words in region between START and END.
691 If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
692 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
693 Otherwise it decodes non-ASCII bit patterns as the
694 default-mime-charset."
698 (narrow-to-region start end)
699 (let ((default-charset
701 (if (mime-charset-to-coding-system code-conversion)
703 default-mime-charset))))
705 (let ((mode-obj (mime-find-field-presentation-method 'wide))
706 beg p end len field-decoder
707 field-name field-body)
708 (goto-char (point-min))
709 (while (re-search-forward std11-field-head-regexp nil t)
710 (setq beg (match-beginning 0)
712 field-name (buffer-substring beg (1- p))
713 len (string-width field-name)
714 field-decoder (inline
715 (mime-find-field-decoder-internal
716 (intern (capitalize field-name))
719 (setq end (std11-field-end)
720 field-body (buffer-substring p end))
721 (let ((default-mime-charset default-charset))
722 (delete-region p end)
723 (insert (funcall field-decoder field-body (1+ len)))
725 (add-text-properties beg (min (1+ (point)) (point-max))
726 (list 'original-field-name field-name
727 'original-field-body field-body))
729 (eword-decode-region (point-min) (point-max) t)
733 (defun mime-decode-header-in-buffer (&optional code-conversion separator)
734 "Decode MIME encoded-words in header fields.
735 If CODE-CONVERSION is nil, it decodes only encoded-words. If it is
736 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
737 Otherwise it decodes non-ASCII bit patterns as the
738 default-mime-charset.
739 If SEPARATOR is not nil, it is used as header separator."
741 (mime-decode-header-in-region
744 (goto-char (point-min))
745 (if (re-search-forward
746 (concat "^\\(" (regexp-quote (or separator "")) "\\)?$")
753 (define-obsolete-function-alias 'eword-decode-header
754 'mime-decode-header-in-buffer)
757 ;;; @ encoded-word decoder
760 (defvar eword-decode-encoded-word-error-handler
761 'eword-decode-encoded-word-default-error-handler)
763 (defvar eword-warning-face nil
764 "Face used for invalid encoded-word.")
766 (defun eword-decode-encoded-word-default-error-handler (word signal)
767 (and (add-text-properties 0 (length word)
768 (and eword-warning-face
769 (list 'face eword-warning-face))
773 (defun eword-decode-encoded-word (word &optional must-unfold)
774 "Decode WORD if it is an encoded-word.
776 If your emacs implementation can not decode the charset of WORD, it
777 returns WORD. Similarly the encoded-word is broken, it returns WORD.
779 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
780 if there are in decoded encoded-word (generated by bad manner MUA such
781 as a version of Net$cape)."
782 (or (if (string-match eword-encoded-word-regexp word)
784 (substring word (match-beginning 1) (match-end 1))
788 (substring word (match-beginning 2) (match-end 2))
791 (substring word (match-beginning 3) (match-end 3))
794 (eword-decode-encoded-text charset encoding text must-unfold)
796 (funcall eword-decode-encoded-word-error-handler word err)
802 ;;; @ encoded-text decoder
805 (defun eword-decode-encoded-text (charset encoding string
806 &optional must-unfold)
807 "Decode STRING as an encoded-text.
809 If your emacs implementation can not decode CHARSET, it returns nil.
811 If ENCODING is not \"B\" or \"Q\", it occurs error.
812 So you should write error-handling code if you don't want break by errors.
814 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
815 if there are in decoded encoded-text (generated by bad manner MUA such
816 as a version of Net$cape)."
817 (let ((cs (mime-charset-to-coding-system charset)))
819 (let ((dest (encoded-text-decode-string string encoding)))
821 (setq dest (decode-mime-charset-string dest charset))
825 (cond ((eq chr ?\n) "")
827 (t (char-to-string chr)))
829 (std11-unfold-string dest)
834 ;;; @ lexical analyze
837 (defvar eword-lexical-analyze-cache nil)
838 (defvar eword-lexical-analyze-cache-max 299
839 "*Max position of eword-lexical-analyze-cache.
840 It is max size of eword-lexical-analyze-cache - 1.")
842 (defcustom eword-lexical-analyzers
843 '(eword-analyze-quoted-string
844 eword-analyze-domain-literal
845 eword-analyze-comment
847 eword-analyze-special
848 eword-analyze-encoded-word
850 "*List of functions to return result of lexical analyze.
851 Each function must have two arguments: STRING and MUST-UNFOLD.
852 STRING is the target string to be analyzed.
853 If MUST-UNFOLD is not nil, each function must unfold and eliminate
854 bare-CR and bare-LF from the result even if they are included in
855 content of the encoded-word.
856 Each function must return nil if it can not analyze STRING as its
859 Previous function is preferred to next function. If a function
860 returns nil, next function is used. Otherwise the return value will
863 :type '(repeat function))
865 (defun eword-analyze-quoted-string (string &optional must-unfold)
866 (let ((p (std11-check-enclosure string ?\" ?\")))
868 (cons (cons 'quoted-string
869 (if eword-decode-quoted-encoded-word
870 (eword-decode-quoted-string
871 (substring string 0 p)
872 default-mime-charset)
873 (std11-wrap-as-quoted-string
874 (decode-mime-charset-string
875 (std11-strip-quoted-pair (substring string 1 (1- p)))
876 default-mime-charset))))
877 (substring string p)))
880 (defun eword-analyze-domain-literal (string &optional must-unfold)
881 (std11-analyze-domain-literal string))
883 (defun eword-analyze-comment (string &optional must-unfold)
884 (let ((len (length string)))
885 (if (and (< 0 len) (eq (aref string 0) ?\())
887 (while (and p (< p len) (eq (aref string p) ?\())
888 (setq p (std11-check-enclosure string ?\( ?\) t p)))
891 (eword-decode-comment
892 (std11-unfold-string (substring string 0 p))
893 default-mime-charset))
894 (substring string p)))
898 (defun eword-analyze-spaces (string &optional must-unfold)
899 (std11-analyze-spaces string))
901 (defun eword-analyze-special (string &optional must-unfold)
902 (std11-analyze-special string))
904 (defun eword-analyze-encoded-word (string &optional must-unfold)
905 (let ((decoded (eword-decode-first-encoded-words
907 eword-encoded-word-in-phrase-regexp
908 eword-after-encoded-word-in-phrase-regexp
911 (let ((s (car decoded)))
912 (while (or (string-match std11-atom-regexp s)
913 (string-match std11-spaces-regexp s))
914 (setq s (substring s (match-end 0))))
916 (cons (cons 'atom (car decoded)) (cdr decoded))
917 (cons (cons 'quoted-string
918 (std11-wrap-as-quoted-string (car decoded)))
921 (defun eword-analyze-atom (string &optional must-unfold)
922 (if (string-match std11-atom-regexp (string-as-unibyte string))
923 (let ((end (match-end 0)))
924 (if (and eword-decode-sticked-encoded-word
925 (string-match eword-encoded-word-in-phrase-regexp
926 (substring string 0 end))
927 (< 0 (match-beginning 0)))
928 (setq end (match-beginning 0)))
929 (cons (cons 'atom (decode-mime-charset-string
930 (substring string 0 end)
931 default-mime-charset))
932 (substring string end)
935 (defun eword-lexical-analyze-internal (string must-unfold)
936 (let ((last 'eword-analyze-spaces)
938 (while (not (string-equal string ""))
940 (let ((rest eword-lexical-analyzers)
942 (while (and (setq func (car rest))
945 (not eword-decode-sticked-encoded-word)
946 (not (eq last 'eword-analyze-spaces))
947 (eq func 'eword-analyze-encoded-word))
948 (null (setq r (funcall func string must-unfold))))
950 (setq rest (cdr rest)))
952 (or r `((error . ,string) . ""))
954 (setq dest (cons (car ret) dest))
955 (setq string (cdr ret))
960 (defun eword-lexical-analyze (string &optional must-unfold)
961 "Return lexical analyzed list corresponding STRING.
962 It is like std11-lexical-analyze, but it decodes non us-ascii
963 characters encoded as encoded-words or invalid \"raw\" format.
964 \"Raw\" non us-ascii characters are regarded as variable
965 `default-mime-charset'."
966 (let* ((str (copy-sequence string))
967 (key (cons str (cons default-mime-charset must-unfold)))
969 (set-text-properties 0 (length str) nil str)
970 (if (setq ret (assoc key eword-lexical-analyze-cache))
972 (setq ret (eword-lexical-analyze-internal str must-unfold))
973 (setq eword-lexical-analyze-cache
975 (last eword-lexical-analyze-cache
976 eword-lexical-analyze-cache-max)))
979 (defun eword-decode-token (token)
982 (defun eword-extract-address-components (string)
983 "Extract full name and canonical address from STRING.
984 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
985 If no name can be extracted, FULL-NAME will be nil.
986 It decodes non us-ascii characters in FULL-NAME encoded as
987 encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii
988 characters are regarded as variable `default-mime-charset'."
989 (rotate-memo args-eword-extract-address-components (list string))
990 (let* ((structure (car (std11-parse-address
991 (eword-lexical-analyze
992 (std11-unfold-string string) 'must-unfold))))
993 (phrase (std11-full-name-string structure))
994 (address (std11-address-string structure))
996 (list phrase address)
1003 (provide 'eword-decode)
1005 ;;; eword-decode.el ends here