11 (defvar ew-decode-field-cache-buf '())
12 (defvar ew-decode-field-cache-num 300)
14 (defun ew-decode-field (field-name field-body)
15 "Decode MIME RFC2047 encoded-words in a field.
16 FIELD-NAME is a name of the field such as \"To\", \"Subject\" etc. and
17 used to selecting syntax of body of the field and deciding first
18 column of body of the field.
19 FIELD-BODY is a body of the field.
21 If FIELD-BODY has multiple lines, each line is separated by CRLF as
22 pure network representation. Also if the result has multiple lines,
23 each line is separated by CRLF."
24 (let* ((key (ew-cons* field-name field-body
25 (ew-dynamic-options)))
26 (tmp (assoc key ew-decode-field-cache-buf)))
30 (setq tmp (nthcdr ew-decode-field-cache-num
31 ew-decode-field-cache-buf))
34 (setcdr (cdr tmp) ew-decode-field-cache-buf)
35 (setq ew-decode-field-cache-buf (cdr tmp))
37 (setq ew-decode-field-cache-buf
39 ew-decode-field-cache-buf)))
40 (setcar (car ew-decode-field-cache-buf) key)
41 (setcdr (car ew-decode-field-cache-buf)
42 (ew-decode-field-no-cache
43 field-name field-body))
44 (cdar ew-decode-field-cache-buf)))))
46 (defun ew-analyze-field-to-decode (field-name field-body)
47 "Analyze FIELD-BODY to decode."
48 (let ((tmp (assq (intern (downcase field-name)) ew-decode-field-syntax-alist))
52 (setq tmp ew-decode-field-default-syntax))
53 (setq anchor (funcall (car tmp) (1+ (length field-name)) field-body))
54 (put anchor 'field-name field-name)
55 (put anchor 'scanner (car tmp))
56 (put anchor 'marker (cdr tmp))
59 (defun ew-decode-analyzed-field (anchor)
60 "Decode analyzed field."
61 (or (get anchor 'decoded)
62 (let (tmp frag1 frag2 decode)
63 (when ew-decode-sticked-encoded-word
65 (get anchor 'next-frag)
67 (if (eq (get anchor 'scanner) 'ew-scan-unibyte-unstructured)
70 (when (get anchor 'marker)
71 (ew-mark (get anchor 'marker) anchor))
72 (setq frag1 (get anchor 'next-frag))
73 (while (not (eq frag1 anchor))
74 (setq decode (get frag1 'decode))
75 (setq frag2 (get frag1 'next-frag))
76 (while (and (not (eq frag2 anchor))
77 (eq decode (get frag2 'decode)))
78 (setq frag2 (get frag2 'next-frag)))
79 (funcall decode anchor frag1 frag2)
81 (setq frag1 (get anchor 'prev-frag)
83 (while (not (eq frag1 anchor))
84 (setq tmp (cons (or (get frag1 'decoded) (symbol-name frag1)) tmp)
85 frag1 (get frag1 'prev-frag)))
86 (put anchor 'decoded (apply 'concat tmp)))))
88 (defun ew-decode-field-no-cache (field-name field-body)
89 "No caching version of ew-decode-field."
90 (ew-decode-analyzed-field
91 (ew-analyze-field-to-decode field-name field-body)))
93 (defun ew-mark (tag anchor)
94 (let ((tlist (cons (list (symbol-value tag)) (ew-pair-list anchor))))
95 ;;(insert (format "%s" tlist))
100 (prog1 (car tlist) (setq tlist (cdr tlist)))))
102 (message "%s%s : %s" msg tok anchor)
103 (when (< 0 ew-parse-error-sit-for-seconds)
104 (sit-for ew-parse-error-sit-for-seconds))))))
106 (defsubst ew-decode-us-ascii (str)
107 (decode-mime-charset-string str ew-default-mime-charset 'LF))
109 (defun ew-decode-none (anchor frag end)
110 (while (not (eq frag end))
111 (put frag 'decoded (ew-decode-us-ascii (symbol-name frag)))
112 (setq frag (get frag 'next-frag))))
114 (defsubst ew-proper-eword-p (frag)
116 (or ew-ignore-75bytes-limit
117 (<= (length (symbol-name frag)) 75))
118 (or ew-ignore-76bytes-limit
119 (<= (get frag 'line-length) 76))
121 ((eq (get frag 'type) 'ew:cm-texts)
122 (ew-eword-p (symbol-name frag)))
123 ((eq (get frag 'type) 'ew:qs-texts)
124 (ew-eword-p (symbol-name frag)))
125 ((eq (get frag 'type) 'ew:atom)
127 (or ew-permit-sticked-comment
129 (not (ew-comment-frag-p (get frag 'prev-frag)))
130 (not (ew-comment-frag-p (get frag 'next-frag)))))
131 (or ew-permit-sticked-special
133 (or (ew-comment-frag-p (get frag 'prev-frag))
134 (not (ew-special-frag-p (get frag 'prev-frag))))
135 (or (ew-comment-frag-p (get frag 'next-frag))
136 (not (ew-special-frag-p (get frag 'next-frag))))))
137 (ew-eword-p (symbol-name frag))))
138 ((eq (get frag 'type) 'ew:us-texts)
140 (or ew-permit-sticked-special
141 (not (ew-special-frag-p (get frag 'prev-frag))))
142 (ew-eword-p (symbol-name frag))))
146 (defun ew-decode-generic (anchor start end
150 (let ((frag start) (start-others start) type f)
151 (while (not (eq frag end))
152 (setq type (get frag 'type))
154 ((and (memq type eword)
155 (ew-proper-eword-p frag))
156 (when (not (eq start-others frag))
157 (funcall decode-others start-others frag))
158 (let ((first frag) (ewords (list frag)))
160 (setq f (get frag 'next-frag))
161 (while (and (not (eq f end))
162 (memq (get f 'type) gap))
163 (setq f (get f 'next-frag)))
164 (and (not (eq f end))
165 (ew-proper-eword-p f)))
166 (setq frag (get frag 'next-frag))
167 (while (not (eq frag f))
168 (put frag 'decoded "")
169 (setq frag (get frag 'next-frag)))
170 (setq ewords (ew-rcons* ewords f)
172 (funcall decode-ewords
174 (setq start-others (get frag 'next-frag)))
178 (error "unexpected token: %s (%s)" frag type)))
179 (setq frag (get frag 'next-frag)))
180 (when (not (eq start-others end))
181 (funcall decode-others start-others end))))
183 (defun ew-decode-generic-others (start end puncts quotes targets)
184 (let ((frag start) (start-nonpunct start) type buff tmp)
185 (while (not (eq frag end))
186 (setq type (get frag 'type))
190 (setq buff (apply 'concat (nreverse buff))
191 tmp (ew-decode-us-ascii buff))
193 (while (not (eq start-nonpunct frag))
194 (put start-nonpunct 'decoded (symbol-name start-nonpunct))
195 (setq start-nonpunct (get start-nonpunct 'next-frag)))
197 (put start-nonpunct 'decoded tmp)
198 (setq start-nonpunct (get start-nonpunct 'next-frag))
199 (while (not (eq start-nonpunct frag))
200 (put start-nonpunct 'decoded "")
201 (setq start-nonpunct (get start-nonpunct 'next-frag)))))
203 (put frag 'decoded (symbol-name frag))
204 (setq start-nonpunct (get frag 'next-frag)))
206 (setq buff (ew-rcons* buff
207 (substring (symbol-name frag) 1))))
209 (setq buff (ew-rcons* buff
210 (symbol-name frag))))
211 (t (error "something wrong: unexpected token: %s (%s)" frag type)))
212 (setq frag (get frag 'next-frag)))
214 (setq buff (apply 'concat (nreverse buff))
215 tmp (ew-decode-us-ascii buff))
217 (while (not (eq start-nonpunct frag))
218 (put start-nonpunct 'decoded (symbol-name start-nonpunct))
219 (setq start-nonpunct (get start-nonpunct 'next-frag)))
221 (put start-nonpunct 'decoded tmp)
222 (setq start-nonpunct (get start-nonpunct 'next-frag))
223 (while (not (eq start-nonpunct frag))
224 (put start-nonpunct 'decoded "")
225 (setq start-nonpunct (get start-nonpunct 'next-frag))))))))
227 (defun ew-decode-unstructured-ewords (ewords)
231 (list (ew-decode-eword (symbol-name (car ewords)))))
232 (setq ewords (cdr ewords))))
234 (defun ew-decode-unstructured-others (start end)
236 (while (not (eq start end))
237 (put start 'decoded "")
238 (setq strs (ew-rcons* strs
240 start (get start 'next-frag)))
241 (put (get end 'prev-frag)
244 (apply 'concat (nreverse strs))))))
246 (defun ew-decode-unstructured (anchor start end)
249 'ew-decode-unstructured-ewords
250 'ew-decode-unstructured-others
257 (let ((frag end) tmp)
258 (while (not (eq frag start))
259 (setq frag (get frag 'prev-frag)
260 tmp (cons (get frag 'decoded) tmp))
261 (put frag 'decoded ""))
262 (put start 'decoded (ew-encode-crlf (apply 'ew-quote-concat tmp)))))
264 (defun ew-decode-phrase-ewords (ewords)
265 (let* ((qs (eq (get (car ewords) 'type) 'ew:qs-texts))
266 (regexp (if qs "[\\\\\\\"]" "[][()<>@,;:\\\\\\\".\000-\037]"))
273 (list (setq decoded (ew-decode-eword (symbol-name (car tmp))))))
275 has-dangerous-char (or has-dangerous-char
276 (string-match regexp decoded))))
277 (when has-dangerous-char
280 (setq decoded (get (car tmp) 'decoded))
281 (setcar decoded (ew-embed-in-quoted-string (car decoded)))
282 (setq tmp (cdr tmp)))
284 (setq decoded (get (car ewords) 'decoded))
285 (setcar decoded (concat "\"" (car decoded)))
286 (setq decoded (get (car (last ewords)) 'decoded))
287 (setcar decoded (concat (car decoded) "\""))))))
289 (defun ew-decode-phrase-others (start end)
290 (ew-decode-generic-others
303 (defmacro ew-rotate (var val len)
304 (let ((tmp (make-symbol "tmp")))
305 `(let ((,tmp (nthcdr ,(- len 2) ,var)))
308 (setcdr (cdr ,tmp) ,var)
309 (setq ,var (cdr ,tmp))
311 (setq ,var (cons nil ,var)))
312 (setcar ,var ,val))))
314 (defun ew-decode-phrase (anchor start end)
317 'ew-decode-phrase-ewords
318 'ew-decode-phrase-others
319 (if ew-decode-quoted-encoded-word
320 '(ew:atom ew:qs-texts)
336 (let ((frag start) decoded str len idx char
339 (while (not (eq frag end))
340 (setq decoded (get frag 'decoded)
341 str (or (car-safe decoded) decoded)
345 (setq char (sref str idx))
346 (ew-rotate chars char 3)
347 (ew-rotate frags frag 3)
348 (when (and (not (memq char '(?\t ?\ )))
349 (equal (cdr chars) '(?\n ?\r))
350 (eq (get (setq tmp (nth 2 frags)) 'type) 'ew:qs-qpair)
351 (eq (symbol-name tmp) (get tmp 'decoded)))
352 (put tmp 'decoded "\r"))
353 (setq idx (char-next-index char idx)))
354 (setq frag (get frag 'next-frag)))
357 (while (not (eq frag start))
358 (setq frag (get frag 'prev-frag)
359 tmp (cons (get frag 'decoded) tmp))
360 (put frag 'decoded ""))
361 (put start 'decoded (ew-encode-crlf (apply 'ew-quote-concat tmp)))))
363 (defun ew-decode-comment-ewords (ewords)
364 (let* ((regexp "[()\\\\]")
371 (list (setq decoded (ew-decode-eword (symbol-name (car tmp))))))
373 has-dangerous-char (or has-dangerous-char
374 (string-match regexp decoded))))
375 (when has-dangerous-char
378 (setq decoded (get (car tmp) 'decoded))
379 (setcar decoded (ew-embed-in-comment (car decoded)))
380 (setq tmp (cdr tmp))))))
382 (defun ew-decode-comment-others (start end)
383 (ew-decode-generic-others
392 (defun ew-decode-comment (anchor start end)
395 'ew-decode-comment-ewords
396 'ew-decode-comment-others
405 (let ((frag start) decoded str len idx char
407 (while (not (eq frag end))
408 (setq decoded (get frag 'decoded)
409 str (or (car-safe decoded) decoded)
413 (setq char (sref str idx))
414 (ew-rotate chars char 3)
415 (ew-rotate frags frag 3)
416 (when (and (not (memq char '(?\t ?\ )))
417 (equal (cdr chars) '(?\n ?\r))
418 (eq (get (setq tmp (nth 2 frags)) 'type) 'ew:cm-qpair)
419 (eq (symbol-name tmp) (get tmp 'decoded)))
420 (put tmp 'decoded "\r"))
421 (setq idx (char-next-index char idx)))
422 (setq frag (get frag 'next-frag)))
425 (while (not (eq frag start))
426 (setq frag (get frag 'prev-frag)
427 tmp (cons (get frag 'decoded) tmp))
428 (put frag 'decoded ""))
429 (put start 'decoded (ew-encode-crlf (apply 'ew-quote-concat tmp)))))
433 (defun ew-embed-in-phrase (str)
434 (concat "\"" (ew-embed-in-quoted-string str) "\""))
436 (defun ew-embed-in-quoted-string (str)
437 (ew-quote-as-quoted-pair str '(?\\ ?\")))
439 (defun ew-embed-in-comment (str)
440 (ew-quote-as-quoted-pair str '(?\\ ?\( ?\))))
442 (defun ew-quote-as-quoted-pair (str specials)
443 (let ((i 0) (j 0) (l (length str)) result)
445 (when (member (aref str j) specials)
446 (setq result (ew-rcons*
453 (setq result (ew-rcons*
456 (apply 'concat (nreverse result))))
460 (defun ew-contain-non-ascii-p (str)
461 (not (eq (charsets-to-mime-charset (find-charset-string str)) 'us-ascii)))
464 (defun ew-decode-field-test (field-name field-body)
467 (read-string "field-name:" (or (get-text-property (point) 'original-field-name)
471 (re-search-backward "^\\([!-9;-~]+\\):" nil t)
474 (read-string "field-body:" (or (get-text-property (point) 'original-field-body)
478 (re-search-backward "^\\([!-9;-~]+\\):" nil t)
480 (goto-char (match-end 0))
481 (looking-at ".*\\(\n[ \t].*\\)*")
482 (ew-lf-crlf-to-crlf (match-string 0)))))
484 (with-output-to-temp-buffer "*DOODLE*"
486 (set-buffer standard-output)
487 (let ((ew-decode-sticked-encoded-word nil)
488 (ew-decode-quoted-encoded-word nil)
489 (ew-ignore-75bytes-limit nil)
490 (ew-ignore-76bytes-limit nil)
491 (ew-permit-sticked-comment nil)
492 (ew-permit-sticked-special nil)
493 (ew-permit-null-encoded-text nil)
495 '(ew-ignore-76bytes-limit
496 ew-ignore-75bytes-limit
497 ew-permit-sticked-special
498 ew-permit-sticked-comment
499 ew-permit-null-encoded-text
500 ew-decode-sticked-encoded-word
501 ew-decode-quoted-encoded-word
504 (setq d1 (ew-decode-field-no-cache field-name field-body))
505 (insert field-name ":" field-body "\n"
506 (make-string 76 ?-) "\n"
507 field-name ":" d1 "\n")
509 (set (car options) t)
510 (insert (format "-- %s -> t\n" (car options)))
511 (setq d2 (ew-decode-field-no-cache field-name field-body))
512 (unless (equal d1 d2)
513 (insert field-name ":" d2 "\n")
515 (setq options (cdr options)))))))
521 (ew-decode-field "To" " =?US-ASCII?Q?phrase?= <akr@jaist.ac.jp>")
522 (ew-decode-field "To" " =?US-ASCII?Q?phrase?= < =?US-ASCII?Q?akr?= @jaist.ac.jp>")
523 (ew-decode-field "To" " =?US-ASCII?Q?akr?= @jaist.ac.jp")
524 (ew-decode-field "Subject" " =?ISO-2022-JP?B?GyRCJCIbKEI=?=")
525 (ew-decode-field "Content-Type" " text/vnd.latex-z(=?US-ASCII?Q?What=3F?=);charset=ISO-2022-JP")
527 (ew-decode-field "To" " =?US-ASCII?Q?A=22B=5CC?= <akr@jaist.ac.jp>")
528 (let ((ew-decode-quoted-encoded-word t))
529 (ew-decode-field "To" " \"=?US-ASCII?Q?A=22B=5CC?=\" <akr@jaist.ac.jp>"))
531 (ew-decode-field "To" " akr@jaist.ac.jp (=?US-ASCII?Q?=28A=29B=5C?=)")
533 (ew-decode-field "To" "\"A\\BC\e$B\\\"\\\\\e(B\" <foo@bar>")
534 (ew-decode-field "To" "\"A\\BC\" <foo@bar>")
535 (ew-decode-field "To" "\"\e\\$\\B\\$\\\"\e\\(\\B\" <foo@bar>")