10 (defvar ew-decode-field-cache-buf '())
11 (defvar ew-decode-field-cache-num 300)
13 (defun ew-decode-field (field-name field-body)
14 "Decode MIME RFC2047 encoded-words in a field.
15 FIELD-NAME is a name of the field such as \"To\", \"Subject\" etc. and
16 used to selecting syntax of body of the field and deciding first
17 column of body of the field.
18 FIELD-BODY is a body of the field.
20 If FIELD-BODY has multiple lines, each line is separated by CRLF as
21 pure network representation. Also if the result has multiple lines,
22 each line is separated by CRLF."
23 (let* ((key (ew-cons* field-name field-body
24 (ew-dynamic-options)))
25 (tmp (assoc key ew-decode-field-cache-buf)))
29 (setq tmp (nthcdr ew-decode-field-cache-num
30 ew-decode-field-cache-buf))
33 (setcdr (cdr tmp) ew-decode-field-cache-buf)
34 (setq ew-decode-field-cache-buf (cdr tmp))
36 (setq ew-decode-field-cache-buf
38 ew-decode-field-cache-buf)))
39 (setcar (car ew-decode-field-cache-buf) key)
40 (setcdr (car ew-decode-field-cache-buf)
41 (ew-decode-field-no-cache
42 field-name field-body))
43 (cdar ew-decode-field-cache-buf)))))
45 (defun ew-analyze-field-to-decode (field-name field-body)
46 "Analyze FIELD-BODY to decode."
47 (let ((tmp (assq (intern (downcase field-name)) ew-decode-field-syntax-alist))
51 (setq tmp ew-decode-field-default-syntax))
52 (setq anchor (funcall (car tmp) (1+ (length field-name)) field-body))
53 (put anchor 'field-name field-name)
54 (put anchor 'scanner (car tmp))
55 (put anchor 'marker (cdr tmp))
58 (defun ew-decode-analyzed-field (anchor)
59 "Decode analyzed field."
60 (or (get anchor 'decoded)
61 (let (tmp frag1 frag2 decode)
62 (when ew-decode-sticked-encoded-word
64 (get anchor 'next-frag)
66 (if (eq (get anchor 'scanner) 'ew-scan-unibyte-unstructured)
69 (when (get anchor 'marker)
70 (ew-mark (get anchor 'marker) anchor))
71 (setq frag1 (get anchor 'next-frag))
72 (while (not (eq frag1 anchor))
73 (setq decode (get frag1 'decode))
74 (setq frag2 (get frag1 'next-frag))
75 (while (and (not (eq frag2 anchor))
76 (eq decode (get frag2 'decode)))
77 (setq frag2 (get frag2 'next-frag)))
78 (funcall decode anchor frag1 frag2)
80 (setq frag1 (get anchor 'prev-frag)
82 (while (not (eq frag1 anchor))
83 (setq tmp (cons (or (get frag1 'decoded) (symbol-name frag1)) tmp)
84 frag1 (get frag1 'prev-frag)))
85 (put anchor 'decoded (apply 'concat tmp)))))
87 (defun ew-decode-field-no-cache (field-name field-body)
88 "No caching version of ew-decode-field."
89 (ew-decode-analyzed-field
90 (ew-analyze-field-to-decode field-name field-body)))
92 (defun ew-mark (tag anchor)
93 (let ((tlist (cons (list (symbol-value tag)) (ew-pair-list anchor))))
94 ;;(insert (format "%s" tlist))
99 (prog1 (car tlist) (setq tlist (cdr tlist)))))
101 (message "%s%s : %s" msg tok anchor)
102 (when (< 0 ew-parse-error-sit-for-seconds)
103 (sit-for ew-parse-error-sit-for-seconds))))))
105 (defsubst ew-decode-us-ascii (str)
106 (decode-mime-charset-string str ew-default-mime-charset 'LF))
108 (defun ew-decode-none (anchor frag end)
109 (while (not (eq frag end))
110 (put frag 'decoded (ew-decode-us-ascii (symbol-name frag)))
111 (setq frag (get frag 'next-frag))))
113 (defsubst ew-proper-eword-p (frag)
115 (or ew-ignore-75bytes-limit
116 (<= (length (symbol-name frag)) 75))
117 (or ew-ignore-76bytes-limit
118 (<= (get frag 'line-length) 76))
120 ((eq (get frag 'type) 'ew:cm-texts)
121 (ew-eword-p (symbol-name frag)))
122 ((eq (get frag 'type) 'ew:qs-texts)
123 (ew-eword-p (symbol-name frag)))
124 ((eq (get frag 'type) 'ew:atom)
126 (or ew-permit-sticked-comment
128 (not (ew-comment-frag-p (get frag 'prev-frag)))
129 (not (ew-comment-frag-p (get frag 'next-frag)))))
130 (or ew-permit-sticked-special
132 (or (ew-comment-frag-p (get frag 'prev-frag))
133 (not (ew-special-frag-p (get frag 'prev-frag))))
134 (or (ew-comment-frag-p (get frag 'next-frag))
135 (not (ew-special-frag-p (get frag 'next-frag))))))
136 (ew-eword-p (symbol-name frag))))
137 ((eq (get frag 'type) 'ew:us-texts)
139 (or ew-permit-sticked-special
140 (not (ew-special-frag-p (get frag 'prev-frag))))
141 (ew-eword-p (symbol-name frag))))
145 (defun ew-decode-generic (anchor start end
149 (let ((frag start) (start-others start) type f)
150 (while (not (eq frag end))
151 (setq type (get frag 'type))
153 ((and (memq type eword)
154 (ew-proper-eword-p frag))
155 (when (not (eq start-others frag))
156 (funcall decode-others start-others frag))
157 (let ((first frag) (ewords (list frag)))
159 (setq f (get frag 'next-frag))
160 (while (and (not (eq f end))
161 (memq (get f 'type) gap))
162 (setq f (get f 'next-frag)))
163 (and (not (eq f end))
164 (ew-proper-eword-p f)))
165 (setq frag (get frag 'next-frag))
166 (while (not (eq frag f))
167 (put frag 'decoded "")
168 (setq frag (get frag 'next-frag)))
169 (setq ewords (ew-rcons* ewords f)
171 (funcall decode-ewords
173 (setq start-others (get frag 'next-frag)))
177 (error "unexpected token: %s (%s)" frag type)))
178 (setq frag (get frag 'next-frag)))
179 (when (not (eq start-others end))
180 (funcall decode-others start-others end))))
182 (defun ew-decode-generic-others (start end puncts quotes targets)
183 (let ((frag start) (start-nonpunct start) type buff tmp)
184 (while (not (eq frag end))
185 (setq type (get frag 'type))
189 (setq buff (apply 'concat (nreverse buff))
190 tmp (ew-decode-us-ascii buff))
192 (while (not (eq start-nonpunct frag))
193 (put start-nonpunct 'decoded (symbol-name start-nonpunct))
194 (setq start-nonpunct (get start-nonpunct 'next-frag)))
196 (put start-nonpunct 'decoded tmp)
197 (setq start-nonpunct (get start-nonpunct 'next-frag))
198 (while (not (eq start-nonpunct frag))
199 (put start-nonpunct 'decoded "")
200 (setq start-nonpunct (get start-nonpunct 'next-frag)))))
202 (put frag 'decoded (symbol-name frag))
203 (setq start-nonpunct (get frag 'next-frag)))
205 (setq buff (ew-rcons* buff
206 (substring (symbol-name frag) 1))))
208 (setq buff (ew-rcons* buff
209 (symbol-name frag))))
210 (t (error "something wrong: unexpected token: %s (%s)" frag type)))
211 (setq frag (get frag 'next-frag)))
213 (setq buff (apply 'concat (nreverse buff))
214 tmp (ew-decode-us-ascii buff))
216 (while (not (eq start-nonpunct frag))
217 (put start-nonpunct 'decoded (symbol-name start-nonpunct))
218 (setq start-nonpunct (get start-nonpunct 'next-frag)))
220 (put start-nonpunct 'decoded tmp)
221 (setq start-nonpunct (get start-nonpunct 'next-frag))
222 (while (not (eq start-nonpunct frag))
223 (put start-nonpunct 'decoded "")
224 (setq start-nonpunct (get start-nonpunct 'next-frag))))))))
226 (defun ew-decode-unstructured-ewords (ewords)
230 (list (ew-decode-eword (symbol-name (car ewords)))))
231 (setq ewords (cdr ewords))))
233 (defun ew-decode-unstructured-others (start end)
235 (while (not (eq start end))
236 (put start 'decoded "")
237 (setq strs (ew-rcons* strs
239 start (get start 'next-frag)))
240 (put (get end 'prev-frag)
243 (apply 'concat (nreverse strs))))))
245 (defun ew-decode-unstructured (anchor start end)
248 'ew-decode-unstructured-ewords
249 'ew-decode-unstructured-others
256 (let ((frag end) tmp)
257 (while (not (eq frag start))
258 (setq frag (get frag 'prev-frag)
259 tmp (cons (get frag 'decoded) tmp))
260 (put frag 'decoded ""))
261 (put start 'decoded (ew-encode-crlf (apply 'ew-quote-concat tmp)))))
263 (defun ew-decode-phrase-ewords (ewords)
264 (let* ((qs (eq (get (car ewords) 'type) 'ew:qs-texts))
265 (regexp (if qs "[\\\\\\\"]" "[][()<>@,;:\\\\\\\".\000-\037]"))
272 (list (setq decoded (ew-decode-eword (symbol-name (car tmp))))))
274 has-dangerous-char (or has-dangerous-char
275 (string-match regexp decoded))))
276 (when has-dangerous-char
279 (setq decoded (get (car tmp) 'decoded))
280 (setcar decoded (ew-embed-in-quoted-string (car decoded)))
281 (setq tmp (cdr tmp)))
283 (setq decoded (get (car ewords) 'decoded))
284 (setcar decoded (concat "\"" (car decoded)))
285 (setq decoded (get (car (last ewords)) 'decoded))
286 (setcar decoded (concat (car decoded) "\""))))))
288 (defun ew-decode-phrase-others (start end)
289 (ew-decode-generic-others
302 (defmacro ew-rotate (var val len)
303 (let ((tmp (make-symbol "tmp")))
304 `(let ((,tmp (nthcdr ,(- len 2) ,var)))
307 (setcdr (cdr ,tmp) ,var)
308 (setq ,var (cdr ,tmp))
310 (setq ,var (cons nil ,var)))
311 (setcar ,var ,val))))
313 (defun ew-decode-phrase (anchor start end)
316 'ew-decode-phrase-ewords
317 'ew-decode-phrase-others
318 (if ew-decode-quoted-encoded-word
319 '(ew:atom ew:qs-texts)
335 (let ((frag start) decoded str len idx char
338 (while (not (eq frag end))
339 (setq decoded (get frag 'decoded)
340 str (or (car-safe decoded) decoded)
344 (setq char (sref str idx))
345 (ew-rotate chars char 3)
346 (ew-rotate frags frag 3)
347 (when (and (not (memq char '(?\t ?\ )))
348 (equal (cdr chars) '(?\n ?\r))
349 (eq (get (setq tmp (nth 2 frags)) 'type) 'ew:qs-qpair)
350 (eq (symbol-name tmp) (get tmp 'decoded)))
351 (put tmp 'decoded "\r"))
352 (setq idx (char-next-index char idx)))
353 (setq frag (get frag 'next-frag)))
356 (while (not (eq frag start))
357 (setq frag (get frag 'prev-frag)
358 tmp (cons (get frag 'decoded) tmp))
359 (put frag 'decoded ""))
360 (put start 'decoded (ew-encode-crlf (apply 'ew-quote-concat tmp)))))
362 (defun ew-decode-comment-ewords (ewords)
363 (let* ((regexp "[()\\\\]")
370 (list (setq decoded (ew-decode-eword (symbol-name (car tmp))))))
372 has-dangerous-char (or has-dangerous-char
373 (string-match regexp decoded))))
374 (when has-dangerous-char
377 (setq decoded (get (car tmp) 'decoded))
378 (setcar decoded (ew-embed-in-comment (car decoded)))
379 (setq tmp (cdr tmp))))))
381 (defun ew-decode-comment-others (start end)
382 (ew-decode-generic-others
391 (defun ew-decode-comment (anchor start end)
394 'ew-decode-comment-ewords
395 'ew-decode-comment-others
404 (let ((frag start) decoded str len idx char
406 (while (not (eq frag end))
407 (setq decoded (get frag 'decoded)
408 str (or (car-safe decoded) decoded)
412 (setq char (sref str idx))
413 (ew-rotate chars char 3)
414 (ew-rotate frags frag 3)
415 (when (and (not (memq char '(?\t ?\ )))
416 (equal (cdr chars) '(?\n ?\r))
417 (eq (get (setq tmp (nth 2 frags)) 'type) 'ew:cm-qpair)
418 (eq (symbol-name tmp) (get tmp 'decoded)))
419 (put tmp 'decoded "\r"))
420 (setq idx (char-next-index char idx)))
421 (setq frag (get frag 'next-frag)))
424 (while (not (eq frag start))
425 (setq frag (get frag 'prev-frag)
426 tmp (cons (get frag 'decoded) tmp))
427 (put frag 'decoded ""))
428 (put start 'decoded (ew-encode-crlf (apply 'ew-quote-concat tmp)))))
432 (defun ew-embed-in-phrase (str)
433 (concat "\"" (ew-embed-in-quoted-string str) "\""))
435 (defun ew-embed-in-quoted-string (str)
436 (ew-quote-as-quoted-pair str '(?\\ ?\")))
438 (defun ew-embed-in-comment (str)
439 (ew-quote-as-quoted-pair str '(?\\ ?\( ?\))))
441 (defun ew-quote-as-quoted-pair (str specials)
442 (let ((i 0) (j 0) (l (length str)) result)
444 (when (member (aref str j) specials)
445 (setq result (ew-rcons*
452 (setq result (ew-rcons*
455 (apply 'concat (nreverse result))))
459 (defun ew-contain-non-ascii-p (str)
460 (not (eq (charsets-to-mime-charset (find-charset-string str)) 'us-ascii)))
464 (ew-decode-field "To" " =?US-ASCII?Q?phrase?= <akr@jaist.ac.jp>")
465 (ew-decode-field "To" " =?US-ASCII?Q?phrase?= < =?US-ASCII?Q?akr?= @jaist.ac.jp>")
466 (ew-decode-field "To" " =?US-ASCII?Q?akr?= @jaist.ac.jp")
467 (ew-decode-field "Subject" " =?ISO-2022-JP?B?GyRCJCIbKEI=?=")
468 (ew-decode-field "Content-Type" " text/vnd.latex-z(=?US-ASCII?Q?What=3F?=);charset=ISO-2022-JP")
470 (ew-decode-field "To" " =?US-ASCII?Q?A=22B=5CC?= <akr@jaist.ac.jp>")
471 (let ((ew-decode-quoted-encoded-word t))
472 (ew-decode-field "To" " \"=?US-ASCII?Q?A=22B=5CC?=\" <akr@jaist.ac.jp>"))
474 (ew-decode-field "To" " akr@jaist.ac.jp (=?US-ASCII?Q?=28A=29B=5C?=)")
476 (ew-decode-field "To" "\"A\\BC\e$B\\\"\\\\\e(B\" <foo@bar>")
477 (ew-decode-field "To" "\"A\\BC\" <foo@bar>")
478 (ew-decode-field "To" "\"\e\\$\\B\\$\\\"\e\\(\\B\" <foo@bar>")