1 ;;; mime-parse.el --- MIME message parser
3 ;; Copyright (C) 1994,95,96,97,98,99,2001 Free Software Foundation, Inc.
5 ;; Author: MORIOKA Tomohiko <morioka@jaist.ac.jp>
6 ;; Shuhei KOBAYASHI <shuhei@aqua.ocn.ne.jp>
7 ;; Keywords: parse, MIME, multimedia, mail, news
9 ;; This file is part of FLIM (Faithful Library about Internet Message).
11 ;; This program is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
16 ;; This program is distributed in the hope that it will be useful, but
17 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ;; General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
32 (autoload 'mime-entity-body-buffer "mime")
33 (autoload 'mime-entity-body-start-point "mime")
34 (autoload 'mime-entity-body-end-point "mime")
37 ;;; @ lexical analyzer
40 (defcustom mime-lexical-analyzer
41 '(std11-analyze-quoted-string
42 std11-analyze-domain-literal
47 "*List of functions to return result of lexical analyze.
48 Each function must have two arguments: STRING and START.
49 STRING is the target string to be analyzed.
50 START is start position of STRING to analyze.
52 Previous function is preferred to next function. If a function
53 returns nil, next function is used. Otherwise the return value will
56 :type '(repeat function))
58 (defun mime-analyze-tspecial (string start)
59 (if (and (> (length string) start)
60 (memq (aref string start) mime-tspecial-char-list))
61 (cons (cons 'tspecials (substring string start (1+ start)))
64 (defun mime-analyze-token (string start)
65 (if (and (string-match mime-token-regexp string start)
66 (= (match-beginning 0) start))
67 (let ((end (match-end 0)))
68 (cons (cons 'mime-token (substring string start end))
71 (defun mime-lexical-analyze (string)
72 "Analyze STRING as lexical tokens of MIME."
73 (let ((ret (std11-lexical-analyze string mime-lexical-analyzer))
75 ;; skip leading linear-white-space.
76 (while (memq (car (car ret)) '(spaces comment))
80 ;; remove linear-white-space.
82 (if (memq (car (car tail)) '(spaces comment))
84 (setcdr prev (cdr tail))
85 (setq tail (cdr tail)))
94 (defun mime-decode-parameter-value (text charset language)
96 (set-buffer-multibyte nil)
98 (goto-char (point-min))
99 (while (re-search-forward "%[0-9A-Fa-f][0-9A-Fa-f]" nil t)
100 (insert (prog1 (string-to-int
101 (buffer-substring (point)(- (point) 2))
103 (delete-region (point)(- (point) 3)))))
104 (setq text (buffer-string))
106 ;; I believe that `decode-mime-charset-string' of mcs-e20.el should
107 ;; be independent of the value of `enable-multibyte-characters'.
109 (set-buffer-multibyte t)
110 (setq text (decode-mime-charset-string text charset)))
112 (put-text-property 0 (length text) 'mime-language language text))
115 (defun mime-decode-parameter-encode-segment (segment)
117 (set-buffer-multibyte nil)
119 (goto-char (point-min))
121 (when (looking-at (eval-when-compile
122 (concat mime-attribute-char-regexp "+")))
123 (goto-char (match-end 0)))
125 (insert (prog1 (format "%%%02X" (char-int (char-after)))
126 (delete-region (point)(1+ (point))))))
129 (defun mime-decode-parameters (params)
130 "Decode PARAMS as a property list of MIME parameter values.
131 Return value is an association list of MIME parameter values.
132 If parameter continuation is used, segments of values are concatenated.
133 If parameters contain charset information, values are decoded.
134 If parameters contain language information, it is set to `mime-language'
135 property of the decoded-value."
136 ;; (unless (zerop (% (length params) 2)) ...)
137 (let ((len (/ (length params) 2))
140 (if (and (string-match (eval-when-compile
141 (concat "^\\(" mime-attribute-char-regexp "+\\)"
142 "\\(\\*[0-9]+\\)?" ; continuation
143 "\\(\\*\\)?$")) ; charset/language
145 (> (match-end 0) (match-end 1)))
146 ;; parameter value extensions are used.
147 (let* ((attribute (downcase
148 (substring (car params) 0 (match-end 1))))
149 (section (if (match-beginning 2)
151 (substring (car params)
152 (1+ (match-beginning 2))
155 ;; EPARAM := (ATTRIBUTE VALUES CHARSET LANGUAGE)
156 ;; VALUES := [1*VALUE] ; vector of LEN elements.
157 (eparam (assoc attribute eparams))
159 (setq params (cdr params))
162 (setq eparam (cdr eparam))
163 (setq eparam (list (make-vector len nil) nil nil)
164 eparams (cons (cons attribute eparam) eparams)))
165 ;; if parameter-name ends with "*", it is an extended-parameter.
166 (if (match-beginning 3)
168 ;; extended-initial-parameter.
169 (if (string-match (eval-when-compile
171 "^\\(" mime-charset-regexp "\\)?"
172 "'\\(" mime-language-regexp "\\)?"
173 "'\\(\\(" mime-attribute-char-regexp
174 "\\|%[0-9A-Fa-f][0-9A-Fa-f]\\)+\\)$"))
179 (substring value (match-beginning 3)))
180 (setq eparam (cdr eparam))
182 (when (match-beginning 1)
185 (substring value 0 (match-end 1)))))
186 (setq eparam (cdr eparam))
188 (when (match-beginning 2)
195 ;; invalid parameter-value.
197 (mime-decode-parameter-encode-segment value)))
198 ;; extended-other-parameter.
199 (if (string-match (eval-when-compile
201 "^\\(\\(" mime-attribute-char-regexp
202 "\\|%[0-9A-Fa-f][0-9A-Fa-f]\\)+\\)$"))
204 (aset (car eparam) section value)
205 ;; invalid parameter-value.
206 (aset (car eparam) section
207 (mime-decode-parameter-encode-segment value))))
208 ;; regular-parameter. parameter continuation only.
209 (aset (car eparam) section
210 (mime-decode-parameter-encode-segment value))))
211 ;; parameter value extensions are not used,
212 ;; or invalid attribute-name (in RFC2231, although valid in RFC2045).
213 (setq dest (cons (cons (downcase (car params))
214 ;;; ;; decode (invalid!) encoded-words.
215 ;;; (eword-decode-string
216 ;;; (decode-mime-charset-string
217 ;;; (car (cdr params))
218 ;;; default-mime-charset)
222 params (cdr params)))
223 (setq params (cdr params)))
224 ;; concat and decode parameters.
226 (setq dest (cons (cons (car (car eparams)) ; attribute
227 (mime-decode-parameter-value
228 (mapconcat (function identity)
229 (nth 1 (car eparams)) ; values
231 (nth 2 (car eparams)) ; charset
232 (nth 3 (car eparams)) ; language
235 eparams (cdr eparams)))
238 ;;; for compatibility with flim-1_13-rfc2231 API.
239 (defalias 'mime-parse-parameters-from-list 'mime-decode-parameters)
240 (make-obsolete 'mime-parse-parameters-from-list 'mime-decode-parameters)
242 (defun mime-parse-parameters (tokens)
243 "Parse TOKENS as MIME parameter values.
244 Return a property list, which is a list of the form
245 \(PARAMETER-NAME1 VALUE1 PARAMETER-NAME2 VALUE2...)."
246 (let (params attribute)
248 (eq (car (car tokens)) 'tspecials)
249 (string= (cdr (car tokens)) ";")
250 (setq tokens (cdr tokens))
251 (eq (car (car tokens)) 'mime-token)
253 (setq attribute (cdr (car tokens)))
254 (setq tokens (cdr tokens)))
255 (eq (car (car tokens)) 'tspecials)
256 (string= (cdr (car tokens)) "=")
257 (setq tokens (cdr tokens))
258 (memq (car (car tokens)) '(mime-token quoted-string)))
259 (setq params (cons (if (eq (car (car tokens)) 'quoted-string)
260 (std11-strip-quoted-pair (cdr (car tokens)))
262 (cons attribute params))
263 tokens (cdr tokens)))
271 (defun mime-parse-Content-Type (field-body)
272 "Parse FIELD-BODY as a Content-Type field.
273 FIELD-BODY is a string.
274 Return value is a mime-content-type object.
275 If FIELD-BODY is not a valid Content-Type field, return nil."
276 (let ((tokens (mime-lexical-analyze field-body)))
277 (when (eq (car (car tokens)) 'mime-token)
278 (let ((primary-type (cdr (car tokens))))
279 (setq tokens (cdr tokens))
280 (when (and (eq (car (car tokens)) 'tspecials)
281 (string= (cdr (car tokens)) "/")
282 (setq tokens (cdr tokens))
283 (eq (car (car tokens)) 'mime-token))
284 (make-mime-content-type
285 (intern (downcase primary-type))
286 (intern (downcase (cdr (car tokens))))
287 (mime-decode-parameters
288 (mime-parse-parameters (cdr tokens)))))))))
291 (defun mime-read-Content-Type ()
292 "Parse field-body of Content-Type field of current-buffer.
293 Return value is a mime-content-type object.
294 If Content-Type field is not found, return nil."
295 (let ((field-body (std11-field-body "Content-Type")))
297 (mime-parse-Content-Type field-body)
301 ;;; @@ Content-Disposition
305 (defun mime-parse-Content-Disposition (field-body)
306 "Parse FIELD-BODY as a Content-Disposition field.
307 FIELD-BODY is a string.
308 Return value is a mime-content-disposition object.
309 If FIELD-BODY is not a valid Content-Disposition field, return nil."
310 (let ((tokens (mime-lexical-analyze field-body)))
311 (when (eq (car (car tokens)) 'mime-token)
312 (make-mime-content-disposition
313 (intern (downcase (cdr (car tokens))))
314 (mime-decode-parameters
315 (mime-parse-parameters (cdr tokens)))))))
318 (defun mime-read-Content-Disposition ()
319 "Parse field-body of Content-Disposition field of current-buffer.
320 Return value is a mime-content-disposition object.
321 If Content-Disposition field is not found, return nil."
322 (let ((field-body (std11-field-body "Content-Disposition")))
324 (mime-parse-Content-Disposition field-body)
328 ;;; @@ Content-Transfer-Encoding
332 (defun mime-parse-Content-Transfer-Encoding (field-body)
333 "Parse FIELD-BODY as a Content-Transfer-Encoding field.
334 FIELD-BODY is a string.
335 Return value is a string.
336 If FIELD-BODY is not a valid Content-Transfer-Encoding field, return nil."
337 (let ((tokens (mime-lexical-analyze field-body)))
338 (when (eq (car (car tokens)) 'mime-token)
339 (downcase (cdr (car tokens))))))
342 (defun mime-read-Content-Transfer-Encoding ()
343 "Parse field-body of Content-Transfer-Encoding field of current-buffer.
344 Return value is a string.
345 If Content-Transfer-Encoding field is not found, return nil."
346 (let ((field-body (std11-field-body "Content-Transfer-Encoding")))
348 (mime-parse-Content-Transfer-Encoding field-body)
352 ;;; @@ Content-ID / Message-ID
356 (defun mime-parse-msg-id (tokens)
357 "Parse TOKENS as msg-id of Content-ID or Message-ID field."
358 (car (std11-parse-msg-id tokens)))
361 (defun mime-uri-parse-cid (string)
362 "Parse STRING as cid URI."
363 (mime-parse-msg-id (cons '(specials . "<")
365 (cdr (cdr (std11-lexical-analyze string)))
366 '((specials . ">"))))))
372 ;; (defun mime-parse-multipart (entity)
373 ;; (with-current-buffer (mime-entity-body-buffer entity)
374 ;; (let* ((representation-type
375 ;; (mime-entity-representation-type-internal entity))
376 ;; (content-type (mime-entity-content-type-internal entity))
379 ;; (mime-content-type-parameter content-type "boundary")))
380 ;; (delimiter (concat "\n" (regexp-quote dash-boundary)))
381 ;; (close-delimiter (concat delimiter "--[ \t]*$"))
382 ;; (rsep (concat delimiter "[ \t]*\n"))
384 ;; (if (eq (mime-content-type-subtype content-type) 'digest)
385 ;; (make-mime-content-type 'message 'rfc822)
386 ;; (make-mime-content-type 'text 'plain)
388 ;; (body-start (mime-entity-body-start-point entity))
389 ;; (body-end (mime-entity-body-end-point entity)))
391 ;; (goto-char body-end)
392 ;; (narrow-to-region body-start
393 ;; (if (re-search-backward close-delimiter nil t)
394 ;; (match-beginning 0)
396 ;; (goto-char body-start)
397 ;; (if (re-search-forward
398 ;; (concat "^" (regexp-quote dash-boundary) "[ \t]*\n")
400 ;; (let ((cb (match-end 0))
401 ;; ce ncb ret children
402 ;; (node-id (mime-entity-node-id-internal entity))
404 ;; (while (re-search-forward rsep nil t)
405 ;; (setq ce (match-beginning 0))
406 ;; (setq ncb (match-end 0))
408 ;; (narrow-to-region cb ce)
409 ;; (setq ret (mime-parse-message representation-type dc-ctl
410 ;; entity (cons i node-id)))
412 ;; (setq children (cons ret children))
413 ;; (goto-char (setq cb ncb))
416 ;; (setq ce (point-max))
418 ;; (narrow-to-region cb ce)
419 ;; (setq ret (mime-parse-message representation-type dc-ctl
420 ;; entity (cons i node-id)))
422 ;; (setq children (cons ret children))
423 ;; (mime-entity-set-children-internal entity (nreverse children))
425 ;; (mime-entity-set-content-type-internal
426 ;; entity (make-mime-content-type 'message 'x-broken))
430 ;; (defun mime-parse-encapsulated (entity)
431 ;; (mime-entity-set-children-internal
433 ;; (with-current-buffer (mime-entity-body-buffer entity)
435 ;; (narrow-to-region (mime-entity-body-start-point entity)
436 ;; (mime-entity-body-end-point entity))
437 ;; (list (mime-parse-message
438 ;; (mime-entity-representation-type-internal entity) nil
439 ;; entity (cons 0 (mime-entity-node-id-internal entity))))
442 ;; (defun mime-parse-external (entity)
443 ;; (require 'mmexternal)
444 ;; (mime-entity-set-children-internal
446 ;; (with-current-buffer (mime-entity-body-buffer entity)
448 ;; (narrow-to-region (mime-entity-body-start-point entity)
449 ;; (mime-entity-body-end-point entity))
450 ;; (list (mime-parse-message
451 ;; 'mime-external-entity nil
452 ;; entity (cons 0 (mime-entity-node-id-internal entity))))
453 ;; ;; [tomo] Should we unify with `mime-parse-encapsulated'?
456 (defun mime-parse-message (representation-type &optional default-ctl
458 (let ((header-start (point-min))
461 (body-end (point-max))
463 (goto-char header-start)
464 (if (re-search-forward "^$" nil t)
465 (setq header-end (match-end 0)
466 body-start (if (= header-end body-end)
469 (setq header-end (point-min)
470 body-start (point-min)))
472 (narrow-to-region header-start header-end)
473 (setq content-type (or (let ((str (std11-fetch-field "Content-Type")))
475 (mime-parse-Content-Type str)
479 (luna-make-entity representation-type
480 :location (current-buffer)
481 :content-type content-type
484 :buffer (current-buffer)
485 :header-start header-start
486 :header-end header-end
487 :body-start body-start
496 (defun mime-parse-buffer (&optional buffer representation-type)
497 "Parse BUFFER as a MIME message.
498 If buffer is omitted, it parses current-buffer."
501 (if buffer (set-buffer buffer))
502 (mime-parse-message (or representation-type
503 'mime-buffer-entity) nil)))
509 (provide 'mime-parse)
511 ;;; mime-parse.el ends here