9 ;;; user customizable variable.
11 (defvar ew-decode-quoted-encoded-word nil)
12 (defvar ew-ignore-75bytes-limit nil)
13 (defvar ew-ignore-76bytes-limit nil)
14 (defvar ew-permit-sticked-comment nil)
15 (defvar ew-permit-sticked-special nil)
17 ;; anonymous function to decode ground string.
18 ;; NOTE: STR is CRLF-form and it should return as CRLF-form.
19 (defvar ew-decode-us-ascii (lambda (str) (decode-coding-string str 'iso-latin-1-unix)))
22 (defvar ew-decode-field-syntax-alist
23 '(("from" ew-scan-unibyte-std11 . ew:tag-mailbox+-tok)
24 ("sender" ew-scan-unibyte-std11 . ew:tag-mailbox-tok)
25 ("to" ew-scan-unibyte-std11 . ew:tag-address+-tok)
26 ("resent-to" ew-scan-unibyte-std11 . ew:tag-address+-tok)
27 ("cc" ew-scan-unibyte-std11 . ew:tag-address+-tok)
28 ("resent-cc" ew-scan-unibyte-std11 . ew:tag-address+-tok)
29 ("bcc" ew-scan-unibyte-std11 . ew:tag-address*-tok)
30 ("resent-bcc" ew-scan-unibyte-std11 . ew:tag-address*-tok)
31 ("message-id" ew-scan-unibyte-std11)
32 ("resent-message-id" ew-scan-unibyte-std11)
33 ("in-reply-to" ew-scan-unibyte-std11 . ew:tag-phrase-msg-id*-tok)
34 ("references" ew-scan-unibyte-std11 . ew:tag-phrase-msg-id*-tok)
35 ("keywords" ew-scan-unibyte-std11 . ew:tag-phrase*-tok)
36 ("subject" ew-scan-unibyte-unstructured)
37 ("comments" ew-scan-unibyte-unstructured)
38 ("encrypted" ew-scan-unibyte-std11)
39 ("date" ew-scan-unibyte-std11)
40 ("reply-to" ew-scan-unibyte-std11 . ew:tag-address+-tok)
41 ("received" ew-scan-unibyte-std11)
42 ("resent-reply-to" ew-scan-unibyte-std11 . ew:tag-address+-tok)
43 ("resent-from" ew-scan-unibyte-std11 . ew:tag-mailbox+-tok)
44 ("resent-sender" ew-scan-unibyte-std11 . ew:tag-mailbox-tok)
45 ("resent-date" ew-scan-unibyte-std11)
46 ("return-path" ew-scan-unibyte-std11)
47 ("mime-version" ew-scan-unibyte-std11)
48 ("content-type" ew-scan-unibyte-mime)
49 ("content-transfer-encoding" ew-scan-unibyte-mime)
50 ("content-id" ew-scan-unibyte-mime)
51 ("content-description" ew-scan-unibyte-unstructured)
53 (defvar ew-decode-field-default-syntax '(ew-scan-unibyte-unstructured))
55 (defun ew-decode-field (field-name field-body &optional eword-filter)
56 "Decode MIME RFC2047 encoded-words in a field.
57 FIELD-NAME is a name of the field such as \"To\", \"Subject\" etc. and
58 used to selecting syntax of body of the field and deciding first
59 column of body of the field.
60 FIELD-BODY is a body of the field.
62 If FIELD-BODY has multiple lines, each line is separated by CRLF as
63 pure network representation. Also if the result has multiple lines,
64 each line is separated by CRLF.
66 If EWORD-FILTER is non-nil, it should be closure. it is called for
67 each successful decoded encoded-word with decoded string as a
68 argument. The return value of EWORD-FILTER is used as decoding result
69 instead of its argument."
70 (let ((tmp (assoc (downcase field-name) ew-decode-field-syntax-alist))
71 frag-anchor frag1 frag2 decode)
74 (setq tmp ew-decode-field-default-syntax))
75 (setq frag-anchor (funcall (car tmp) (1+ (length field-name)) field-body))
76 ;;(setq zzz frag-anchor)
78 (ew-mark (cdr tmp) frag-anchor))
79 (setq frag1 (get frag-anchor 'next-frag))
80 (while (not (eq frag1 frag-anchor))
81 (setq decode (get frag1 'decode))
82 (setq frag2 (get frag1 'next-frag))
83 (while (and (not (eq frag2 frag-anchor))
84 (eq decode (get frag2 'decode)))
85 (setq frag2 (get frag2 'next-frag)))
86 (funcall decode frag-anchor frag1 frag2 eword-filter)
88 (mapconcat (lambda (frag) (or (get frag 'result) (symbol-name frag)))
89 (ew-frag-list frag-anchor) "")))
91 (defun ew-mark (tag anchor)
92 (let ((tlist (cons (list (symbol-value tag)) (ew-pair-list anchor))))
93 ;;(insert (format "%s" tlist))
95 (lambda () (if (null tlist) '(0)
96 (prog1 (car tlist) (setq tlist (cdr tlist)))))
97 (lambda (msg tok) (setq zzz-anchor anchor) (message "parse error: %s%s : %s" msg tok anchor)))))
99 (defun ew-decode-none (anchor frag end eword-filter)
100 (while (not (eq frag end))
101 (put frag 'result (funcall ew-decode-us-ascii (symbol-name frag)))
102 (setq frag (get frag 'next-frag))))
104 (defun ew-decode-generic (anchor start end
109 (let ((frag start) result buff type f)
110 (while (not (eq frag end))
111 (setq type (get frag 'type))
113 ((and (memq type eword)
114 (ew-proper-eword-p frag))
116 (setq result (ew-rappend result
117 (funcall decode-others
120 (let ((first frag) (ewords (list frag)))
122 (setq f (get frag 'next-frag))
123 (while (and (not (eq f end))
124 (memq (get f 'type) gap))
125 (setq f (get f 'next-frag)))
126 (and (not (eq f end))
127 (ew-proper-eword-p f)))
128 (setq ewords (ew-rcons* ewords f)
130 (while (not (eq first frag))
131 (put first 'result "")
132 (setq first (get first 'next-frag)))
133 (put frag 'result "")
134 (setq result (ew-rappend result
135 (funcall decode-ewords
139 (setq buff (cons frag buff))
140 (put frag 'result ""))
142 (error "unexpected token: %s (%s)" frag type)))
143 (setq frag (get frag 'next-frag)))
145 (setq result (ew-rappend result (funcall decode-others (nreverse buff)))))
147 (apply 'ew-quote-concat (nreverse result)))
150 (defun ew-decode-generic-others (frags puncts quotes targets)
151 (let (result buff frag type tmp)
153 (setq frag (car frags)
154 type (get frag 'type)
159 (setq buff (nreverse buff)
160 tmp (funcall ew-decode-us-ascii
161 (mapconcat 'car buff "")))
162 (if (ew-contain-non-ascii-p tmp)
163 (setq result (ew-rcons* result tmp))
164 (setq result (ew-rcons*
166 (funcall ew-decode-us-ascii
167 (mapconcat 'cdr buff "")))))
169 (setq result (ew-rcons*
171 (symbol-name frag))))
173 (setq buff (ew-rcons*
175 (cons (substring (symbol-name frag) 1)
176 (symbol-name frag)))))
178 (setq buff (ew-rcons*
180 (cons (symbol-name frag)
181 (symbol-name frag)))))
183 (error "something wrong: unexpected token: %s (%s)" frag type))))
185 (setq buff (nreverse buff)
186 tmp (funcall ew-decode-us-ascii
187 (mapconcat 'car buff "")))
188 (if (ew-contain-non-ascii-p tmp)
189 (setq result (ew-rcons* result tmp))
190 (setq result (ew-rcons*
192 (funcall ew-decode-us-ascii
193 (mapconcat 'cdr buff "")))))
197 (defun ew-decode-unstructured-ewords (ewords eword-filter)
200 (setq result (ew-rcons*
202 (list (ew-decode-eword (symbol-name (car ewords))
205 ewords (cdr ewords)))
208 (defun ew-decode-unstructured-others (frags)
211 (setq result (ew-rcons*
213 (symbol-name (car frags)))
215 (list (funcall ew-decode-us-ascii
216 (apply 'concat (nreverse result))))))
218 (defun ew-decode-unstructured (anchor start end eword-filter)
221 'ew-decode-unstructured-ewords
222 'ew-decode-unstructured-others
223 '(ew:raw-us-texts-tok)
226 '(ew:raw-us-texts-tok
231 (defun ew-decode-phrase-ewords (ewords eword-filter)
232 (let ((qs (eq (get (car ewords) 'type) 'ew:raw-qs-texts-tok))
236 (setq result (ew-rcons*
238 (list (ew-decode-eword (symbol-name (car ewords))
241 require-quoting (or require-quoting
242 (string-match "[][()<>@,;:\\\".\000-\037]"
244 ewords (cdr ewords)))
247 (funcall (if qs 'ew-embed-in-quoted-string 'ew-embed-in-phrase)
248 (apply 'ew-quote-concat
252 (defun ew-decode-phrase-others (frags)
253 (ew-decode-generic-others
255 '(ew:raw-qs-begin-tok
257 '(ew:raw-qs-qfold-tok
264 ew:raw-qs-fold-tok)))
266 (defun ew-decode-phrase (anchor start end eword-filter)
269 'ew-decode-phrase-ewords
270 'ew-decode-phrase-others
271 (if ew-decode-quoted-encoded-word
272 '(ew:raw-atom-tok ew:raw-qs-texts-tok)
288 (defun ew-decode-comment-ewords (ewords eword-filter)
289 (let (require-quoting
292 (setq result (ew-rcons*
294 (list (ew-decode-eword (symbol-name (car ewords))
297 require-quoting (or require-quoting
298 (string-match "[()\\\\]" (caar result)))
299 ewords (cdr ewords)))
303 (apply 'ew-quote-concat
307 (defun ew-decode-comment-others (frags)
308 (ew-decode-generic-others
311 '(ew:raw-cm-qfold-tok
313 '(ew:raw-cm-texts-tok
315 ew:raw-cm-fold-tok)))
317 (defun ew-decode-comment (anchor start end eword-filter)
320 'ew-decode-comment-ewords
321 'ew-decode-comment-others
322 '(ew:raw-cm-texts-tok)
325 '(ew:raw-cm-texts-tok
334 (defun ew-embed-in-phrase (str)
335 (concat "\"" (ew-embed-in-quoted-string str) "\""))
337 (defun ew-embed-in-quoted-string (str)
338 (ew-quote-as-quoted-pair str '(?\\ ?\")))
340 (defun ew-embed-in-comment (str)
341 (ew-quote-as-quoted-pair str '(?\\ ?\( ?\))))
343 (defun ew-quote-as-quoted-pair (str specials)
344 (let ((i 0) (j 0) (l (length str)) result)
346 (when (member (aref str j) specials)
347 (setq result (ew-rcons*
354 (setq result (ew-rcons*
357 (apply 'concat (nreverse result))))
361 (defun ew-proper-eword-p (frag)
363 (or ew-ignore-75bytes-limit
364 (<= (length (symbol-name frag)) 75))
365 (or ew-ignore-76bytes-limit
366 (<= (get frag 'line-length) 76))
368 ((eq (get frag 'type) 'ew:raw-cm-texts-tok)
369 (ew-eword-p (symbol-name frag)))
370 ((eq (get frag 'type) 'ew:raw-qs-texts-tok)
371 (ew-eword-p (symbol-name frag)))
372 ((eq (get frag 'type) 'ew:raw-atom-tok)
374 (or ew-permit-sticked-comment
376 (not (ew-comment-frag-p (get frag 'prev-frag)))
377 (not (ew-comment-frag-p (get frag 'next-frag)))))
378 (or ew-permit-sticked-special
380 (or (ew-comment-frag-p (get frag 'prev-frag))
381 (not (ew-special-frag-p (get frag 'prev-frag))))
382 (or (ew-comment-frag-p (get frag 'next-frag))
383 (not (ew-special-frag-p (get frag 'next-frag))))))
384 (ew-eword-p (symbol-name frag))))
385 ((eq (get frag 'type) 'ew:raw-us-texts-tok)
387 (or ew-permit-sticked-special
388 (not (ew-special-frag-p (get frag 'prev-frag))))
389 (ew-eword-p (symbol-name frag))))
393 (defun ew-contain-non-ascii-p (str)
394 (not (eq (charsets-to-mime-charset (find-charset-string str)) 'us-ascii)))
398 (ew-decode-field "To" " =?US-ASCII?Q?phrase?= <akr@jaist.ac.jp>")
399 (ew-decode-field "To" " =?US-ASCII?Q?phrase?= < =?US-ASCII?Q?akr?= @jaist.ac.jp>")
400 (ew-decode-field "To" " =?US-ASCII?Q?akr?= @jaist.ac.jp")
401 (ew-decode-field "Subject" " =?ISO-2022-JP?B?GyRCJCIbKEI=?=")
402 (ew-decode-field "Content-Type" " text/vnd.latex-z(=?US-ASCII?Q?What=3F?=);charset=ISO-2022-JP")
404 (ew-decode-field "To" " =?US-ASCII?Q?A=22B=5CC?= <akr@jaist.ac.jp>")
405 (let ((ew-decode-quoted-encoded-word t))
406 (ew-decode-field "To" " \"=?US-ASCII?Q?A=22B=5CC?=\" <akr@jaist.ac.jp>"))
408 (ew-decode-field "To" " akr@jaist.ac.jp (=?US-ASCII?Q?=28A=29B=5C?=)")
410 (ew-decode-field "To" "\"A\\BC\e$B\\\"\\\\\e(B\" <foo@bar>")
411 (ew-decode-field "To" "\"A\\BC\" <foo@bar>")
412 (ew-decode-field "To" "\"\e\\$\\B\\$\\\"\e\\(\\B\" <foo@bar>")