1 ;;; mime-parse.el --- MIME message parser
3 ;; Copyright (C) 1994,95,96,97,98,99,2001 Free Software Foundation, Inc.
5 ;; Author: MORIOKA Tomohiko <morioka@jaist.ac.jp>
6 ;; Shuhei KOBAYASHI <shuhei@aqua.ocn.ne.jp>
7 ;; Keywords: parse, MIME, multimedia, mail, news
9 ;; This file is part of FLIM (Faithful Library about Internet Message).
11 ;; This program is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
16 ;; This program is distributed in the hope that it will be useful, but
17 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ;; General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
32 (autoload 'mime-entity-body-buffer "mime")
33 (autoload 'mime-entity-body-start-point "mime")
34 (autoload 'mime-entity-body-end-point "mime")
37 ;;; @ lexical analyzer
40 (defcustom mime-lexical-analyzer
41 '(std11-analyze-quoted-string
42 std11-analyze-domain-literal
47 "*List of functions to return result of lexical analyze.
48 Each function must have two arguments: STRING and START.
49 STRING is the target string to be analyzed.
50 START is start position of STRING to analyze.
52 Previous function is preferred to next function. If a function
53 returns nil, next function is used. Otherwise the return value will
56 :type '(repeat function))
58 (defun mime-analyze-tspecial (string start)
59 (if (and (> (length string) start)
60 (memq (aref string start) mime-tspecial-char-list))
61 (cons (cons 'tspecials (substring string start (1+ start)))
64 (defun mime-analyze-token (string start)
65 (if (and (string-match mime-token-regexp string start)
66 (= (match-beginning 0) start))
67 (let ((end (match-end 0)))
68 (cons (cons 'mime-token (substring string start end))
71 (defun mime-lexical-analyze (string)
72 "Analyze STRING as lexical tokens of MIME."
73 (let ((ret (std11-lexical-analyze string mime-lexical-analyzer))
75 ;; skip leading linear-white-space.
76 (while (memq (car (car ret)) '(spaces comment))
80 ;; remove linear-white-space.
82 (if (memq (car (car tail)) '(spaces comment))
84 (setcdr prev (cdr tail))
85 (setq tail (cdr tail)))
91 ;;; @ parameter value decoder
94 (defun mime-decode-parameter-value (text charset language)
96 (set-buffer-multibyte nil)
98 (goto-char (point-min))
99 (while (re-search-forward "%[0-9A-Fa-f][0-9A-Fa-f]" nil t)
100 (insert (prog1 (string-to-int
101 (buffer-substring (point)(- (point) 2))
103 (delete-region (point)(- (point) 3)))))
104 (setq text (buffer-string))
106 ;; I believe that `decode-mime-charset-string' of mcs-e20.el should
107 ;; be independent of the value of `enable-multibyte-characters'.
109 (set-buffer-multibyte t)
110 (setq text (decode-mime-charset-string text charset)))
112 (put-text-property 0 (length text) 'mime-language language text))
115 (defun mime-decode-parameter-encode-segment (segment)
117 (set-buffer-multibyte nil)
119 (goto-char (point-min))
121 (when (looking-at (eval-when-compile
122 (concat mime-attribute-char-regexp "+")))
123 (goto-char (match-end 0)))
125 (insert (prog1 (format "%%%02X" (char-int (char-after)))
126 (delete-region (point)(1+ (point))))))
129 (defun mime-decode-parameters (params)
130 "Decode PARAMS as a property list of MIME parameter values.
131 Return value is an association list of MIME parameter values.
132 If parameter continuation is used, segments of values are concatenated.
133 If parameters contain charset information, values are decoded.
134 If parameters contain language information, it is set to `mime-language'
135 property of the decoded-value."
136 ;; (unless (zerop (% (length params) 2)) ...)
137 (let ((len (/ (length params) 2))
140 (if (and (string-match (eval-when-compile
141 (concat "^\\(" mime-attribute-char-regexp "+\\)"
142 "\\(\\*[0-9]+\\)?" ; continuation
143 "\\(\\*\\)?$")) ; charset/language
145 (> (match-end 0) (match-end 1)))
146 ;; parameter value extensions are used.
147 (let* ((attribute (downcase
148 (substring (car params) 0 (match-end 1))))
149 (section (if (match-beginning 2)
151 (substring (car params)
152 (1+ (match-beginning 2))
155 ;; EPARAM := (ATTRIBUTE VALUES CHARSET LANGUAGE)
156 ;; VALUES := [1*VALUE] ; vector of LEN elements.
157 (eparam (assoc attribute eparams))
159 (setq params (cdr params))
162 (setq eparam (cdr eparam))
163 (setq eparam (list (make-vector len nil) nil nil)
164 eparams (cons (cons attribute eparam) eparams)))
165 ;; if parameter name ends with "*", it is an extended-parameter.
166 (if (match-beginning 3)
168 ;; extended-initial-parameter.
169 (if (string-match (eval-when-compile
171 "^\\(" mime-charset-regexp "\\)?"
172 "'\\(" mime-language-regexp "\\)?"
173 "'\\(\\(" mime-attribute-char-regexp
174 "\\|%[0-9A-Fa-f][0-9A-Fa-f]\\)+\\)$"))
179 (substring value (match-beginning 3)))
180 (setq eparam (cdr eparam))
182 (when (match-beginning 1)
185 (substring value 0 (match-end 1)))))
186 (setq eparam (cdr eparam))
188 (when (match-beginning 2)
195 ;; invalid parameter value.
197 (mime-decode-parameter-encode-segment value)))
198 ;; extended-other-parameter.
199 (if (string-match (eval-when-compile
201 "^\\(\\(" mime-attribute-char-regexp
202 "\\|%[0-9A-Fa-f][0-9A-Fa-f]\\)+\\)$"))
204 (aset (car eparam) section value)
205 ;; invalid parameter value.
206 (aset (car eparam) section
207 (mime-decode-parameter-encode-segment value))))
208 ;; regular-parameter. parameter continuation only.
209 (aset (car eparam) section
210 (mime-decode-parameter-encode-segment value))))
211 ;; parameter value extensions are not used,
212 ;; or invalid parameter name (in RFC 2231, although valid in RFC 2045).
213 (setq dest (cons (cons (downcase (car params))
214 ;;; ;; decode (invalid!) encoded-words.
215 ;;; (eword-decode-string
216 ;;; (decode-mime-charset-string
217 ;;; (car (cdr params))
218 ;;; default-mime-charset)
222 params (cdr params)))
223 (setq params (cdr params)))
224 ;; concat and decode parameters.
226 (setq dest (cons (cons (car (car eparams)) ; attribute
227 (mime-decode-parameter-value
228 (mapconcat (function identity)
229 (nth 1 (car eparams)) ; values
231 (nth 2 (car eparams)) ; charset
232 (nth 3 (car eparams)) ; language
235 eparams (cdr eparams)))
238 ;;; for compatibility with flim-1_13-rfc2231 API.
239 (defalias 'mime-parse-parameters-from-list 'mime-decode-parameters)
240 (make-obsolete 'mime-parse-parameters-from-list 'mime-decode-parameters)
243 ;;; @ parameter value encoder
246 (defun mime-divide-extended-parameter (name value)
247 "Divide MIME parameter value \"NAME=VALUE\" into segments.
248 Each of \" NAME*n*=SEGMENT_n\;\" will be no more than 78 characters.
249 Return value is a list of string when division is performed, otherwise
250 return value is just a string."
251 ;; `limit' must be more than (length "CHARSET'LANGUAGE'%XX").
253 ;; Since MIME spec does not limit either length of CHARSET or length
254 ;; of LANGUAGE, we choose 30 for minimum `limit' based on the longest
255 ;; name of charset that Emacs supports ("ISO-2022-CN-EXT"; 15 chars).
257 ;; Anyway, if `name' is too long, we will ignore 78 chars limit.
258 (let ((limit (max (- 78 4 (length name)) 30))); (length " *=;") => 4
259 (if (> limit (length value))
263 (setq limit (max (- limit 2) 30)) ; (length "*n") => 2
265 (set-buffer-multibyte nil)
267 (while (> (point-max) limit)
268 (goto-char (- limit 3)) ; (length "%XX") => 3
270 ((eq (char-after) ?%)
274 (eq (char-after) ?%)))
277 (eq (char-after) ?%)))
280 (setq result (cons (prog1 (buffer-substring (point-min)(point))
281 (delete-region (point-min)(point)))
284 (when (zerop (% count 10))
285 (setq limit (max (1- limit) 30))))
287 (cons (buffer-substring (point-min)(point-max))
290 (defun mime-encode-extended-parameter (name value)
291 "Encode MIME parameter value \"NAME=VALUE\" as an extended-parameter.
292 If encoding is unnecessary, return nil.
293 If division is performed, return value is a list of string, otherwise
294 return value is just a string."
295 (let ((language (get-text-property 0 'mime-language value)))
297 (string-match "[^ -~]" value)) ; Nonmatching printable US-ASCII.
299 (let ((charset (find-mime-charset-by-charsets
300 (find-charset-string value))))
301 ;; I believe that `encode-mime-charset-string' of mcs-e20.el should
302 ;; be independent of the value of `enable-multibyte-characters'.
304 (set-buffer-multibyte t)
305 (setq value (encode-mime-charset-string value charset))
306 (set-buffer-multibyte nil)
308 (goto-char (point-min))
309 (insert (symbol-name charset)
311 (if language (symbol-name language) "")
313 (while (re-search-forward mime-non-attribute-char-regexp nil t)
314 (insert (prog1 (format "%%%02X" (char-int
315 (char-after (1- (point)))))
316 (delete-region (1- (point))(point)))))
317 (mime-divide-extended-parameter name (buffer-string)))))))
319 (defun mime-divide-regular-parameter (name value)
320 "Divide MIME parameter value \"NAME=VALUE\" into segments.
321 Each of \" NAME*n=SEGMENT_n\;\" will be no more than 78 characters.
322 Return value is a list of string when division is performed, otherwise
323 just a string is returned."
324 (let ((limit (max (- (eval-when-compile (- 78 (length " =\"\";")))
327 (if (> limit (length value))
328 (concat "\"" value "\"")
331 (setq limit (max (- limit 2) 30)) ; (length "*n") => 2
332 (setq limit (1- limit)) ; XXX
334 (set-buffer-multibyte nil)
336 (while (> (point-max) limit)
337 (goto-char (point-min))
338 (while (< (point) limit)
339 (when (eq (char-after) ?\\)
342 (setq result (cons (concat "\""
343 (prog1 (buffer-substring
350 (when (zerop (% count 10))
351 (setq limit (max (1- limit) 30))))
354 (buffer-substring (point-min)(point-max))
358 (defun mime-encode-regular-parameter (name value)
359 "Encode MIME parameter value \"NAME=VALUE\" as a regular-parameter.
360 If division is performed, return value is a list of string, otherwise
361 return value is just a string."
363 (set-buffer-multibyte nil)
365 (goto-char (point-min))
367 (when (memq (char-after) '(?\\ ?\"))
370 (mime-divide-regular-parameter name (buffer-string))))
372 (defun mime-encode-parameters (params)
373 "Encode PARAMS plist with MIME Parameter-Value Extensions.
374 Return value is an alist of MIME parameter values."
375 (let (name value encoded result)
377 (setq name (car params)
378 value (car (cdr params))
379 params (cdr (cdr params)))
381 ;; first two clauses are for backward compatibility,
382 ;; especially for "ftp.in" in the distribution.
383 ((not (string-match (eval-when-compile
384 (concat "^\\(" mime-attribute-char-regexp "+\\)"
385 "\\(\\*[0-9]+\\)?" ; continuation
386 "\\(\\*\\)?$")) ; charset/language
388 ;; invalid parameter name.
389 ;; XXX: Should we signal an error?
391 ((> (match-end 0) (match-end 1))
392 ;; this parameter value is already encoded.
393 (setq result (cons (cons name
394 (if (match-beginning 3)
395 ;; extended-parameter
398 (std11-wrap-as-quoted-string value)))
400 ((setq encoded (mime-encode-extended-parameter name value))
401 ;; extended-parameter
402 (if (stringp encoded)
403 (setq result (cons (cons (concat name "*") encoded) result))
407 (setq result (cons (cons (concat name
408 "*" (int-to-string section)
413 encoded(cdr encoded))))))
416 (setq encoded (mime-encode-regular-parameter name value))
417 (if (stringp encoded)
418 (setq result (cons (cons name encoded) result))
422 (setq result (cons (cons (concat name
423 "*" (int-to-string section))
427 encoded (cdr encoded))))))))
434 (defun mime-parse-parameters (tokens)
435 "Parse TOKENS as MIME parameter values.
436 Return a property list, which is a list of the form
437 \(PARAMETER-NAME1 VALUE1 PARAMETER-NAME2 VALUE2...)."
438 (let (params attribute)
440 (eq (car (car tokens)) 'tspecials)
441 (string= (cdr (car tokens)) ";")
442 (setq tokens (cdr tokens))
443 (eq (car (car tokens)) 'mime-token)
445 (setq attribute (cdr (car tokens)))
446 (setq tokens (cdr tokens)))
447 (eq (car (car tokens)) 'tspecials)
448 (string= (cdr (car tokens)) "=")
449 (setq tokens (cdr tokens))
450 (memq (car (car tokens)) '(mime-token quoted-string)))
451 (setq params (cons (if (eq (car (car tokens)) 'quoted-string)
452 (std11-strip-quoted-pair (cdr (car tokens)))
454 (cons attribute params))
455 tokens (cdr tokens)))
463 (defun mime-parse-Content-Type (field-body)
464 "Parse FIELD-BODY as a Content-Type field.
465 FIELD-BODY is a string.
466 Return value is a mime-content-type object.
467 If FIELD-BODY is not a valid Content-Type field, return nil."
468 (let ((tokens (mime-lexical-analyze field-body)))
469 (when (eq (car (car tokens)) 'mime-token)
470 (let ((primary-type (cdr (car tokens))))
471 (setq tokens (cdr tokens))
472 (when (and (eq (car (car tokens)) 'tspecials)
473 (string= (cdr (car tokens)) "/")
474 (setq tokens (cdr tokens))
475 (eq (car (car tokens)) 'mime-token))
476 (make-mime-content-type
477 (intern (downcase primary-type))
478 (intern (downcase (cdr (car tokens))))
479 (mime-decode-parameters
480 (mime-parse-parameters (cdr tokens)))))))))
483 (defun mime-read-Content-Type ()
484 "Parse field-body of Content-Type field of current buffer.
485 Return value is a mime-content-type object.
486 If Content-Type field is not found, return nil."
487 (let ((field-body (std11-field-body "Content-Type")))
489 (mime-parse-Content-Type field-body)
493 ;;; @@ Content-Disposition
497 (defun mime-parse-Content-Disposition (field-body)
498 "Parse FIELD-BODY as a Content-Disposition field.
499 FIELD-BODY is a string.
500 Return value is a mime-content-disposition object.
501 If FIELD-BODY is not a valid Content-Disposition field, return nil."
502 (let ((tokens (mime-lexical-analyze field-body)))
503 (when (eq (car (car tokens)) 'mime-token)
504 (make-mime-content-disposition
505 (intern (downcase (cdr (car tokens))))
506 (mime-decode-parameters
507 (mime-parse-parameters (cdr tokens)))))))
510 (defun mime-read-Content-Disposition ()
511 "Parse field-body of Content-Disposition field of current buffer.
512 Return value is a mime-content-disposition object.
513 If Content-Disposition field is not found, return nil."
514 (let ((field-body (std11-field-body "Content-Disposition")))
516 (mime-parse-Content-Disposition field-body)
520 ;;; @@ Content-Transfer-Encoding
524 (defun mime-parse-Content-Transfer-Encoding (field-body)
525 "Parse FIELD-BODY as a Content-Transfer-Encoding field.
526 FIELD-BODY is a string.
527 Return value is a string.
528 If FIELD-BODY is not a valid Content-Transfer-Encoding field, return nil."
529 (let ((tokens (mime-lexical-analyze field-body)))
530 (when (eq (car (car tokens)) 'mime-token)
531 (downcase (cdr (car tokens))))))
534 (defun mime-read-Content-Transfer-Encoding ()
535 "Parse field-body of Content-Transfer-Encoding field of current buffer.
536 Return value is a string.
537 If Content-Transfer-Encoding field is not found, return nil."
538 (let ((field-body (std11-field-body "Content-Transfer-Encoding")))
540 (mime-parse-Content-Transfer-Encoding field-body)
544 ;;; @@ Content-ID / Message-ID
548 (defun mime-parse-msg-id (tokens)
549 "Parse TOKENS as msg-id of Content-ID or Message-ID field."
550 (car (std11-parse-msg-id tokens)))
553 (defun mime-uri-parse-cid (string)
554 "Parse STRING as cid URI."
555 (mime-parse-msg-id (cons '(specials . "<")
557 (cdr (cdr (std11-lexical-analyze string)))
558 '((specials . ">"))))))
564 ;; (defun mime-parse-multipart (entity)
565 ;; (with-current-buffer (mime-entity-body-buffer entity)
566 ;; (let* ((representation-type
567 ;; (mime-entity-representation-type-internal entity))
568 ;; (content-type (mime-entity-content-type-internal entity))
571 ;; (mime-content-type-parameter content-type "boundary")))
572 ;; (delimiter (concat "\n" (regexp-quote dash-boundary)))
573 ;; (close-delimiter (concat delimiter "--[ \t]*$"))
574 ;; (rsep (concat delimiter "[ \t]*\n"))
576 ;; (if (eq (mime-content-type-subtype content-type) 'digest)
577 ;; (make-mime-content-type 'message 'rfc822)
578 ;; (make-mime-content-type 'text 'plain)
580 ;; (body-start (mime-entity-body-start-point entity))
581 ;; (body-end (mime-entity-body-end-point entity)))
583 ;; (goto-char body-end)
584 ;; (narrow-to-region body-start
585 ;; (if (re-search-backward close-delimiter nil t)
586 ;; (match-beginning 0)
588 ;; (goto-char body-start)
589 ;; (if (re-search-forward
590 ;; (concat "^" (regexp-quote dash-boundary) "[ \t]*\n")
592 ;; (let ((cb (match-end 0))
593 ;; ce ncb ret children
594 ;; (node-id (mime-entity-node-id-internal entity))
596 ;; (while (re-search-forward rsep nil t)
597 ;; (setq ce (match-beginning 0))
598 ;; (setq ncb (match-end 0))
600 ;; (narrow-to-region cb ce)
601 ;; (setq ret (mime-parse-message representation-type dc-ctl
602 ;; entity (cons i node-id)))
604 ;; (setq children (cons ret children))
605 ;; (goto-char (setq cb ncb))
608 ;; (setq ce (point-max))
610 ;; (narrow-to-region cb ce)
611 ;; (setq ret (mime-parse-message representation-type dc-ctl
612 ;; entity (cons i node-id)))
614 ;; (setq children (cons ret children))
615 ;; (mime-entity-set-children-internal entity (nreverse children))
617 ;; (mime-entity-set-content-type-internal
618 ;; entity (make-mime-content-type 'message 'x-broken))
622 ;; (defun mime-parse-encapsulated (entity)
623 ;; (mime-entity-set-children-internal
625 ;; (with-current-buffer (mime-entity-body-buffer entity)
627 ;; (narrow-to-region (mime-entity-body-start-point entity)
628 ;; (mime-entity-body-end-point entity))
629 ;; (list (mime-parse-message
630 ;; (mime-entity-representation-type-internal entity) nil
631 ;; entity (cons 0 (mime-entity-node-id-internal entity))))
634 ;; (defun mime-parse-external (entity)
635 ;; (require 'mmexternal)
636 ;; (mime-entity-set-children-internal
638 ;; (with-current-buffer (mime-entity-body-buffer entity)
640 ;; (narrow-to-region (mime-entity-body-start-point entity)
641 ;; (mime-entity-body-end-point entity))
642 ;; (list (mime-parse-message
643 ;; 'mime-external-entity nil
644 ;; entity (cons 0 (mime-entity-node-id-internal entity))))
645 ;; ;; [tomo] Should we unify with `mime-parse-encapsulated'?
648 (defun mime-parse-message (representation-type &optional default-ctl
650 (let ((header-start (point-min))
653 (body-end (point-max))
655 (goto-char header-start)
656 (if (re-search-forward "^$" nil t)
657 (setq header-end (match-end 0)
658 body-start (if (= header-end body-end)
661 (setq header-end (point-min)
662 body-start (point-min)))
664 (narrow-to-region header-start header-end)
665 (setq content-type (or (let ((str (std11-fetch-field "Content-Type")))
667 (mime-parse-Content-Type str)
671 (luna-make-entity representation-type
672 :location (current-buffer)
673 :content-type content-type
676 :buffer (current-buffer)
677 :header-start header-start
678 :header-end header-end
679 :body-start body-start
688 (defun mime-parse-buffer (&optional buffer representation-type)
689 "Parse BUFFER as a MIME message.
690 If buffer is omitted, it parses current-buffer."
693 (if buffer (set-buffer buffer))
694 (mime-parse-message (or representation-type
695 'mime-buffer-entity) nil)))
701 (provide 'mime-parse)
703 ;;; mime-parse.el ends here