2 ;;; A multilingual MIME message header encoder/decoder.
3 ;;; by Morioka Tomohiko (morioka@jaist.ac.jp)
5 ;;; original MIME decoder is
6 ;;; mime.el,v 1.5 1992/07/18 07:52:08 by Enami Tsugutomo
21 "$Id: tm-eword.el,v 5.12 1995/05/21 16:06:27 morioka Exp morioka $")
23 (defconst mime/tiny-mime-version (get-version-string mime/RCS-ID))
26 ;;; @ MIME encoded-word definition
29 (defconst mime/charset-regexp "[A-Za-z0-9!#$%&'*+---^_`{}|~]")
30 (defconst mime/encoded-text-regexp "[!->@-~]+")
32 (defconst mime/Base64-token-regexp "[A-Za-z0-9+/=]")
33 (defconst mime/Base64-encoded-text-regexp
35 mime/Base64-token-regexp
36 mime/Base64-token-regexp
37 mime/Base64-token-regexp
38 mime/Base64-token-regexp
40 (defconst mime/Base64-encoding-and-encoded-text-regexp
41 (concat "\\(B\\)\\?" mime/Base64-encoded-text-regexp))
43 (defconst mime/Quoted-Printable-hex-char-regexp "[0123456789ABCDEF]")
44 (defconst mime/Quoted-Printable-octet-regexp
46 mime/Quoted-Printable-hex-char-regexp
47 mime/Quoted-Printable-hex-char-regexp))
48 (defconst mime/Quoted-Printable-encoded-text-regexp
49 (concat "\\([^=?]\\|" mime/Quoted-Printable-octet-regexp "\\)+"))
50 (defconst mime/Quoted-Printable-encoding-and-encoded-text-regexp
51 (concat "\\(Q\\)\\?" mime/Quoted-Printable-encoded-text-regexp))
53 (defconst mime/encoded-word-regexp (concat (regexp-quote "=?")
61 mime/encoded-text-regexp
65 (defun mime/nth-string (s n)
67 (substring s (match-beginning n) (match-end n))
68 (buffer-substring (match-beginning n) (match-end n))))
70 (defun mime/encoded-word-charset (str)
71 (mime/nth-string str 1))
73 (defun mime/encoded-word-encoding (str)
74 (mime/nth-string str 2))
76 (defun mime/encoded-word-encoded-text (str)
77 (mime/nth-string str 3))
79 (defun mime/rest-of-string (str)
81 (substring str (match-end 0))
82 (buffer-substring (match-end 0)(point-max))
89 (defvar mime/no-encoding-header-fields '("X-Nsubject"))
91 (defvar mime/use-X-Nsubject nil)
94 ;;; @ compatible module among Mule, NEmacs and NEpoch
96 (cond ((boundp 'MULE) (require 'tm-mule))
97 ((boundp 'NEMACS)(require 'tm-nemacs))
98 (t (require 'tm-orig))
102 ;;; @ Application Interface
105 ;;; @@ MIME header decoders
109 (defun mime/decode-encoded-word (word)
110 (if (string-match mime/encoded-word-regexp word)
111 (let ((charset (upcase (mime/encoded-word-charset word)))
112 (encoding (mime/encoded-word-encoding word))
113 (text (mime/encoded-word-encoded-text word)))
114 (mime/decode-encoded-text charset encoding text))
117 (defun mime/decode-region (beg end)
121 (narrow-to-region beg end)
122 (goto-char (point-min))
123 (let (charset encoding text)
124 (while (re-search-forward mime/encoded-word-regexp nil t)
125 (insert (mime/decode-encoded-word
127 (buffer-substring (match-beginning 0) (match-end 0))
128 (delete-region (match-beginning 0) (match-end 0))
134 (defun mime/decode-message-header ()
138 (narrow-to-region (goto-char (point-min))
139 (progn (re-search-forward "^$" nil t) (point)))
140 (mime/prepare-decode-message-header)
141 (mime/decode-region (point-min) (point-max))
144 (defun mime/decode-string (str)
145 (let ((dest "")(ew nil)
147 (while (setq beg (string-match mime/encoded-word-regexp str))
149 (if (not (and (eq ew t) (string= (substring str 0 beg) " ")))
150 (setq dest (concat dest (substring str 0 beg)
154 (setq end (match-end 0))
155 (setq dest (concat dest (mime/decode-encoded-word (substring str beg end))
157 (setq str (substring str end))
163 ;;; @@ MIME header encoders
166 (defun mime/encode-string (string encoding &optional mode)
167 (cond ((equal encoding "B")
168 (base64-encode-string string)
170 ((equal encoding "Q")
171 (quoted-printable-encode-string string (or mode 'phrase))
175 (defun mime/encode-field (str)
176 (setq str (rfc822/unfolding-string str))
177 (let ((ret (message/divide-field str))
178 field-name field-body)
179 (setq field-name (car ret))
180 (setq field-body (nth 1 ret))
181 (concat field-name " "
182 (cond ((string= field-body "") "")
183 ((or (string-match "^Reply-To:$" field-name)
184 (string-match "^From:$" field-name)
185 (string-match "^Sender:$" field-name)
186 (string-match "^Resent-Reply-To:$" field-name)
187 (string-match "^Resent-From:$" field-name)
188 (string-match "^Resent-Sender:$" field-name)
189 (string-match "^To:$" field-name)
190 (string-match "^Resent-To:$" field-name)
191 (string-match "^cc:$" field-name)
192 (string-match "^Resent-cc:$" field-name)
193 (string-match "^bcc:$" field-name)
194 (string-match "^Resent-bcc:$" field-name)
196 (mime/encode-address-list
197 (+ (length field-name) 1) field-body)
201 (let ((r mime/no-encoding-header-fields) fn)
204 (if (string-match (concat "^" fn ":$") field-name)
205 (throw 'tag field-body)
209 (nth 1 (mime/encode-header-string
210 (+ (length field-name) 1) field-body))
215 (defun mime/encode-message-header ()
219 (narrow-to-region (goto-char (point-min))
222 (concat "^" (regexp-quote mail-header-separator) "$")
226 (goto-char (point-min))
228 (while (re-search-forward "^.+:.*\\(\n\\s +.*\\)*" nil t)
229 (setq beg (match-beginning 0))
230 (setq end (match-end 0))
231 (setq field (buffer-substring beg end))
232 (insert (mime/encode-field
234 (buffer-substring beg end)
235 (delete-region beg end)
238 (if mime/use-X-Nsubject
240 (goto-char (point-min))
241 (if (re-search-forward "^Subject:.*\\(\n\\s +.*\\)*" nil t)
242 (let ((str (buffer-substring (match-beginning 0)(match-end 0))))
243 (if (string-match mime/encoded-word-regexp str)
246 (nth 1 (message/divide-field
248 (rfc822/unfolding-string str))
255 ;;; @ functions for message header encoding
258 (defun mime/encode-and-split-string (n string charset encoding)
260 (len (length string))
261 (js (mime/convert-string-from-emacs string charset))
262 (cesl (+ (length charset) (length encoding) 6 ))
264 (setq ewl (mime/encoded-word-length js encoding))
267 (setq m (+ n ewl cesl))
270 (while (and (< i len)
271 (setq js (mime/convert-string-from-emacs
272 (substring string 0 i) charset))
273 (setq m (+ n (mime/encoded-word-length js encoding) cesl))
276 (setq i (+ i (char-bytes (elt string i))))
278 (setq js (mime/convert-string-from-emacs
279 (substring string 0 j) charset))
280 (setq m (+ n (mime/encoded-word-length js encoding) cesl))
281 (setq rest (substring string j))
286 (list m (concat "=?" charset "?" encoding "?"
287 (mime/encode-string js encoding)
292 (defun mime/encode-header-word (n string charset encoding)
293 (let (dest str ret m)
294 (if (null (setq ret (mime/encode-and-split-string n string charset encoding)))
297 (setq dest (nth 1 ret))
299 (setq str (nth 2 ret))
300 (while (and (stringp str)
301 (setq ret (mime/encode-and-split-string 1 str charset encoding))
303 (setq dest (concat dest "\n " (nth 1 ret)))
305 (setq str (nth 2 ret))
311 (defun mime/encode-header-string (n string &optional mode)
312 (if (string= string "")
314 (let ((ssl (mime/separate-string-for-encoder string))
315 i len cell et w ew (dest "") b l)
316 (setq len (length ssl))
317 (setq cell (nth 0 ssl))
319 ;; string-width crashes when the argument is nil,
320 ;; so replace the argument
321 ;; (original modification by Kenji Rikitake 9-JAN-1995)
322 (setq w (or (cdr cell) ""))
325 (if (> (+ n (string-width w)) 76)
327 (setq dest (concat dest "\n "))
331 (setq dest (concat dest w))
332 (setq b (+ b (string-width w)))
335 (setq ew (mime/encode-header-word n (cdr cell) (car et) (cdr et)))
336 (setq dest (nth 1 ew))
341 (setq cell (nth i ssl))
344 (cond ((string-match "^[ \t]*$" w)
345 (setq b (+ b (string-width (cdr cell))))
346 (setq dest (concat dest (cdr cell)))
349 (if (> (+ b (string-width w)) 76)
351 (if (eq (elt dest (- (length dest) 1)) 32)
352 (setq dest (substring dest 0 (- (length dest) 1)))
354 (setq dest (concat dest "\n " w))
355 (setq b (+ (length w) 1))
357 (setq l (length dest))
359 (eq (elt dest (- l 2)) ?\?)
360 (eq (elt dest (- l 1)) ?=)
363 (setq dest (concat dest " "))
366 (setq dest (concat dest w))
367 (setq b (+ b (string-width w)))
370 (if (not (eq (elt dest (- (length dest) 1)) 32))
372 (setq dest (concat dest " "))
376 (mime/encode-header-word b (cdr cell) (car et) (cdr et)))
378 (if (string-match "^\n" (nth 1 ew))
379 (setq dest (concat (substring dest 0 (- (length dest) 1))
381 (setq dest (concat dest (nth 1 ew)))
389 (defun mime/encode-address-list (n str)
390 (let* ((ret (message/parse-addresses str))
391 (r ret) cell en-ret j cl (dest "") s)
394 (cond ((string= (nth 1 cell) "<")
395 (setq en-ret (mime/encode-header-string n (nth 0 cell) 'phrase))
396 (setq dest (concat dest (nth 1 en-ret)))
397 (setq n (car en-ret))
400 (mime/encode-header-string
401 n (concat (nth 1 cell)(nth 2 cell)(nth 3 cell) ", ")))
402 (setq en-ret (mime/encode-header-string
403 n (concat (nth 1 cell)(nth 2 cell)(nth 3 cell))))
405 (if (and (eq (elt (nth 1 en-ret) 0) ?\n)
406 (eq (elt dest (- (length dest) 1)) 32))
407 (setq dest (substring dest 0 (- (length dest) 1)))
409 (setq dest (concat dest (nth 1 en-ret)))
410 (setq n (car en-ret))
413 (setq en-ret (mime/encode-header-string n (nth 0 cell)))
414 (setq dest (concat dest (nth 1 en-ret)))
415 (setq n (car en-ret))
417 (setq en-ret (mime/encode-header-string (+ n 2) (nth 2 cell)
419 (if (eq (elt (nth 1 en-ret) 0) ?\n)
421 (setq dest (concat dest "\n ("))
422 (setq en-ret (mime/encode-header-string 2 (nth 2 cell)
426 (setq dest (concat dest " ("))
428 (setq dest (concat dest (nth 1 en-ret)))
429 (setq n (car en-ret))
432 (mime/encode-header-string n (concat (nth 3 cell) ", "))
434 (setq en-ret (mime/encode-header-string n (nth 3 cell)))
436 (setq dest (concat dest (nth 1 en-ret)))
437 (setq n (car en-ret))
442 (mime/encode-header-string n (concat (nth 0 cell) ", "))
444 (setq en-ret (mime/encode-header-string n (nth 0 cell)))
446 (setq dest (concat dest (nth 1 en-ret)))
447 (setq n (car en-ret))
454 ;;; @ utility for encoder
457 ;;; @@ encoded-word length
460 (defun mime/encoded-word-length (string encoding)
461 (cond ((equal encoding "B") (base64-encoded-length string))
462 ((equal encoding "Q") (quoted-printable-encoded-length string))
465 ;;; @@ separate by character set
469 (defconst LC-space 2)
471 ;; by mol. 1993/10/16
472 (defun mime/char-type (chr)
473 (if (or (= chr 32)(= chr ?\t))
478 (defun mime/separate-string-by-chartype (string)
479 (let ((len (length string))
484 (setq chr (elt string 0))
485 (setq pcs (mime/char-type chr))
486 (setq i (char-bytes chr))
487 (setq ds (substring string 0 i))
489 (setq chr (elt string i))
490 (setq cs (mime/char-type chr))
491 (setq j (+ i (char-bytes chr)))
492 (setq s (substring string i j))
495 (setq ds (concat ds s))
496 (progn (setq dest (append dest (list (cons pcs ds))))
501 (if (not (string= ds ""))
502 (setq dest (append dest (list (cons pcs ds)))))
506 (defun mime/separate-string-by-charset (str)
507 (let ((rl (mime/separate-string-by-chartype str))
508 (i 1) len (pcell nil) cell ncell dpcell (dest nil) LC)
509 (setq len (length rl))
510 (setq dpcell (list (nth 0 rl)))
511 (setq cell (nth 1 rl))
512 (setq ncell (nth 2 rl))
514 (setq LC (car (car dpcell)))
515 (cond ((and (not (eq LC lc-ascii))
516 (eq (car cell) LC-space)
517 (not (eq (car ncell) lc-ascii)))
518 (setq dpcell (list (cons LC
519 (concat (cdr (car dpcell)) (cdr cell))
522 ((and (not (eq LC lc-ascii))
524 (setq dpcell (list (cons LC
525 (concat (cdr (car dpcell)) (cdr cell))
528 ((and (eq LC lc-ascii)
529 (member (car cell) mime/latin-lc-list))
530 (setq dpcell (list (cons (car cell)
531 (concat (cdr (car dpcell)) (cdr cell))
534 ((and (member LC mime/latin-lc-list)
535 (eq (car cell) lc-ascii))
536 (setq dpcell (list (cons LC
537 (concat (cdr (car dpcell)) (cdr cell))
541 (setq dest (append dest dpcell))
542 (setq dpcell (list cell))
546 (setq ncell (nth (+ i 1) rl))
548 (setq dest (append dest dpcell))
551 (defun mime/separate-string-for-encoder (string)
553 (if (string-match "[ \t]+$" string)
555 (setq lastspace (substring string
558 (setq string (substring string 0 (match-beginning 0)))
560 (let ((rl (mime/separate-string-by-charset string))
561 (i 0) len cell0 cell1 cell2 (dest nil))
562 (setq len (length rl))
563 (setq cell0 (nth 0 rl))
564 (setq cell1 (nth 1 rl))
565 (setq cell2 (nth 2 rl))
567 (cond ((and (not (eq (car cell0) lc-ascii))
568 (eq (car cell1) LC-space)
569 (not (eq (car cell2) lc-ascii))
574 (cdr (assoc (car cell0)
575 mime/lc-charset-and-encoding-alist))
576 (concat (cdr cell0) (cdr cell1))
579 (setq cell0 (nth i rl))
580 (setq cell1 (nth (+ i 1) rl))
581 (setq cell2 (nth (+ i 2) rl))
587 (cdr (assoc (car cell0)
588 mime/lc-charset-and-encoding-alist))
593 (setq cell2 (nth (+ i 2) rl))
598 (list (cons nil lastspace))))
604 ;;; basic functions for MIME header decoder
607 ;;; @ utility for decoder
610 (defun mime/unfolding ()
611 (goto-char (point-min))
613 (while (re-search-forward message/field-regexp nil t)
614 (setq beg (match-beginning 0))
615 (setq end (match-end 0))
616 (setq field (buffer-substring beg end))
617 (if (string-match mime/encoded-word-regexp field)
621 (narrow-to-region (goto-char beg) end)
622 (while (re-search-forward "\n[ \t]+" nil t)
630 (defun mime/prepare-decode-message-header ()
632 (goto-char (point-min))
633 (while (re-search-forward
634 (concat (regexp-quote "?=")
638 (replace-match "?==?")
642 (setq mime-charset-list
643 (list (list "US-ASCII" lc-ascii)
644 (list "ISO-8859-1" lc-ascii lc-ltn1)
645 (list "ISO-8859-2" lc-ascii lc-ltn2)
646 (list "ISO-8859-3" lc-ascii lc-ltn3)
647 (list "ISO-8859-4" lc-ascii lc-ltn4)
648 (list "ISO-8859-5" lc-ascii lc-crl)
649 (list "ISO-8859-7" lc-ascii lc-grk)
650 (list "ISO-8859-9" lc-ascii lc-ltn5)
651 (list "ISO-2022-JP" lc-ascii lc-jp)
652 (list "ISO-2022-KR" lc-ascii lc-kr)
653 (list "ISO-2022-JP-2" lc-ascii lc-ltn1 lc-grk
654 lc-jp lc-cn lc-kr lc-jp2)
655 (list "ISO-2022-INT-1" lc-ascii lc-ltn1 lc-grk
656 lc-jp lc-cn lc-kr lc-jp2 lc-cns1 lc-cns2)
659 (setq eword-field-body-separator-regexp " / ")
661 (if (string-match eword-field-body-separator-regexp str)
662 (list (substring str 0 (match-beginning 0))
663 (substring str (match-beginning 0)(match-end 0))
664 (substring str (match-end 0))
667 (defun find-lc-set-string (str)
668 (let (dest (len (length str))(i 0) chr lc)
670 (setq chr (elt str i))
671 (setq lc (get-lc chr))
672 (if (not (memq lc dest))
673 (setq dest (cons lc dest))
675 (setq i (+ i (char-bytes chr)))
679 (defun mime/lc-set-to-charset (lc-set)
680 (let ((rest mime-charset-list) cell)
683 (setq cell (car rest))
684 (if (subsetp lc-set (cdr cell))
685 (throw 'tag (car cell))
687 (setq rest (cdr rest))
690 (run-hooks 'mime/tiny-mime-load-hook)
697 ;;; mode: outline-minor
698 ;;; outline-regexp: ";;; @+\\|(......"