1 ;;; mime-parse.el --- MIME message parser
3 ;; Copyright (C) 1994,95,96,97,98,99,2001 Free Software Foundation, Inc.
5 ;; Author: MORIOKA Tomohiko <morioka@jaist.ac.jp>
6 ;; Shuhei KOBAYASHI <shuhei@aqua.ocn.ne.jp>
7 ;; Keywords: parse, MIME, multimedia, mail, news
9 ;; This file is part of FLIM (Faithful Library about Internet Message).
11 ;; This program is free software; you can redistribute it and/or
12 ;; modify it under the terms of the GNU General Public License as
13 ;; published by the Free Software Foundation; either version 2, or (at
14 ;; your option) any later version.
16 ;; This program is distributed in the hope that it will be useful, but
17 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 ;; General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
24 ;; Boston, MA 02110-1301, USA.
32 (autoload 'mime-entity-body-buffer "mime")
33 (autoload 'mime-entity-body-start-point "mime")
34 (autoload 'mime-entity-body-end-point "mime")
37 ;;; @ lexical analyzer
40 (defcustom mime-lexical-analyzer
41 '(std11-analyze-quoted-string
42 std11-analyze-domain-literal
47 "*List of functions to return result of lexical analyze.
48 Each function must have two arguments: STRING and START.
49 STRING is the target string to be analyzed.
50 START is start position of STRING to analyze.
52 Previous function is preferred to next function. If a function
53 returns nil, next function is used. Otherwise the return value will
56 :type '(repeat function))
58 (defun mime-analyze-tspecial (string start)
59 (if (and (> (length string) start)
60 (memq (aref string start) mime-tspecial-char-list))
61 (cons (cons 'tspecials (substring string start (1+ start)))
64 (defun mime-analyze-token (string start)
65 (if (and (string-match mime-token-regexp string start)
66 (= (match-beginning 0) start))
67 (let ((end (match-end 0)))
68 (cons (cons 'mime-token (substring string start end))
71 (defun mime-lexical-analyze (string)
72 "Analyze STRING as lexical tokens of MIME."
73 (let ((ret (std11-lexical-analyze string mime-lexical-analyzer))
75 ;; skip leading linear-white-space.
76 (while (memq (car (car ret)) '(spaces comment))
80 ;; remove linear-white-space.
82 (if (memq (car (car tail)) '(spaces comment))
84 (setcdr prev (cdr tail))
85 (setq tail (cdr tail)))
94 (defun mime-decode-parameter-value (text charset language)
96 (set-buffer-multibyte nil)
98 (goto-char (point-min))
99 (while (re-search-forward "%[0-9A-Fa-f][0-9A-Fa-f]" nil t)
100 (insert (prog1 (string-to-int
101 (buffer-substring (point)(- (point) 2))
103 (delete-region (point)(- (point) 3)))))
104 (setq text (buffer-string))
106 ;; I believe that `decode-mime-charset-string' of mcs-e20.el should
107 ;; be independent of the value of `enable-multibyte-characters'.
109 (set-buffer-multibyte t)
110 (setq text (decode-mime-charset-string text charset)))
112 (put-text-property 0 (length text) 'mime-language language text))
115 (defun mime-decode-parameter-encode-segment (segment)
117 (set-buffer-multibyte nil)
119 (goto-char (point-min))
121 (when (looking-at (eval-when-compile
122 (concat mime-attribute-char-regexp "+")))
123 (goto-char (match-end 0)))
125 (insert (prog1 (format "%%%02X" (char-int (char-after)))
126 (delete-region (point)(1+ (point))))))
129 (defun mime-decode-parameters (params)
130 "Decode PARAMS as a property list of MIME parameter values.
131 Return value is an association list of MIME parameter values.
132 If parameter continuation is used, segments of values are concatenated.
133 If parameters contain charset information, values are decoded.
134 If parameters contain language information, it is set to `mime-language'
135 property of the decoded-value."
136 ;; (unless (zerop (% (length params) 2)) ...)
137 (let ((len (/ (length params) 2))
140 (if (and (string-match (eval-when-compile
141 (concat "^\\(" mime-attribute-char-regexp "+\\)"
142 "\\(\\*[0-9]+\\)?" ; continuation
143 "\\(\\*\\)?$")) ; charset/language
145 (> (match-end 0) (match-end 1)))
146 ;; parameter value extensions are used.
147 (let* ((attribute (downcase
148 (substring (car params) 0 (match-end 1))))
149 (section (if (match-beginning 2)
151 (substring (car params)
152 (1+ (match-beginning 2))
155 ;; EPARAM := (ATTRIBUTE VALUES CHARSET LANGUAGE)
156 ;; VALUES := [1*VALUE] ; vector of LEN elements.
157 (eparam (assoc attribute eparams))
159 (setq params (cdr params))
162 (setq eparam (cdr eparam))
163 (setq eparam (list (make-vector len nil) nil nil)
164 eparams (cons (cons attribute eparam) eparams)))
165 ;; if parameter-name ends with "*", it is an extended-parameter.
166 (if (match-beginning 3)
168 ;; extended-initial-parameter.
169 (if (string-match (eval-when-compile
171 "^\\(" mime-charset-regexp "\\)?"
172 "'\\(" mime-language-regexp "\\)?"
173 "'\\(\\(" mime-attribute-char-regexp
174 "\\|%[0-9A-Fa-f][0-9A-Fa-f]\\)+\\)$"))
179 (substring value (match-beginning 3)))
180 (setq eparam (cdr eparam))
182 (when (match-beginning 1)
185 (substring value 0 (match-end 1)))))
186 (setq eparam (cdr eparam))
188 (when (match-beginning 2)
195 ;; invalid parameter-value.
197 (mime-decode-parameter-encode-segment value)))
198 ;; extended-other-parameter.
199 (if (string-match (eval-when-compile
201 "^\\(\\(" mime-attribute-char-regexp
202 "\\|%[0-9A-Fa-f][0-9A-Fa-f]\\)+\\)$"))
204 (aset (car eparam) section value)
205 ;; invalid parameter-value.
206 (aset (car eparam) section
207 (mime-decode-parameter-encode-segment value))))
208 ;; regular-parameter. parameter continuation only.
209 (aset (car eparam) section
210 (mime-decode-parameter-encode-segment value))))
211 ;; parameter value extensions are not used,
212 ;; or invalid attribute-name (in RFC2231, although valid in RFC2045).
213 (setq dest (cons (cons (downcase (car params))
214 ;;; ;; decode (invalid!) encoded-words.
215 ;;; (eword-decode-string
216 ;;; (decode-mime-charset-string
217 ;;; (car (cdr params))
218 ;;; default-mime-charset)
222 params (cdr params)))
223 (setq params (cdr params)))
224 ;; concat and decode parameters.
226 (setq dest (cons (cons (car (car eparams)) ; attribute
227 (mime-decode-parameter-value
228 (mapconcat (function identity)
229 (nth 1 (car eparams)) ; values
231 (nth 2 (car eparams)) ; charset
232 (nth 3 (car eparams)) ; language
235 eparams (cdr eparams)))
238 ;;; for compatibility with flim-1_13-rfc2231 API.
239 (defalias 'mime-parse-parameters-from-list 'mime-decode-parameters)
240 (make-obsolete 'mime-parse-parameters-from-list 'mime-decode-parameters)
242 (defun mime-parse-parameters (tokens)
243 "Parse TOKENS as MIME parameter values.
244 Return a property list, which is a list of the form
245 \(PARAMETER-NAME1 VALUE1 PARAMETER-NAME2 VALUE2...)."
246 (let (params attribute)
248 (eq (car (car tokens)) 'tspecials)
249 (string= (cdr (car tokens)) ";")
250 (setq tokens (cdr tokens))
251 (eq (car (car tokens)) 'mime-token)
253 (setq attribute (cdr (car tokens)))
254 (setq tokens (cdr tokens)))
255 (eq (car (car tokens)) 'tspecials)
256 (string= (cdr (car tokens)) "=")
257 (setq tokens (cdr tokens))
258 (memq (car (car tokens)) '(mime-token quoted-string)))
259 (setq params (cons (if (eq (car (car tokens)) 'quoted-string)
260 (std11-strip-quoted-pair (cdr (car tokens)))
262 (cons attribute params))
263 tokens (cdr tokens)))
266 ;; unlimited patch by simm-emacs@fan.gr.jp
267 ;; Mon, 10 Jan 2000 12:59:46 +0900
268 (defun mime-parse-parameter (string)
270 (and mime-decode-unlimited
271 (string-match "\033" str)
272 (setq str (decode-coding-string string 'iso-2022-7bit-ss2)))
274 `,(concat "^[ \t]*\;[ \t]*\\(" mime-token-regexp "\\)"
276 "\\(\"\\([^\"\\\r\n]\\|\\\\.\\)*\"\\|[^; \t\n]*\\)"
279 (let ((e (match-end 2)))
280 (if mime-decode-unlimited
283 (encode-coding-string
284 (substring str (match-beginning 1) (match-end 1))
286 (encode-coding-string
287 (std11-strip-quoted-string
288 (substring str (match-beginning 2) e))
290 (encode-coding-string (substring str e) 'iso-2022-7bit-ss2))
293 (downcase (substring str (match-beginning 1) (match-end 1)))
294 (std11-strip-quoted-string (substring sutr
295 (match-beginning 2) e)))
296 (substring str e)))))))
303 (defun mime-parse-Content-Type (string)
304 "Parse STRING as field-body of Content-Type field.
306 (PRIMARY-TYPE SUBTYPE (NAME1 . VALUE1)(NAME2 . VALUE2) ...)
307 or nil. PRIMARY-TYPE and SUBTYPE are symbol and NAME_n and VALUE_n
309 (setq string (std11-unfold-string string))
310 (if (string-match `,(concat "^\\(" mime-token-regexp
311 "\\)/\\(" mime-token-regexp "\\)") string)
312 (let* ((type (downcase
313 (substring string (match-beginning 1) (match-end 1))))
315 (substring string (match-beginning 2) (match-end 2))))
317 (setq string (substring string (match-end 0)))
318 (while (setq ret (mime-parse-parameter string))
319 (setq dest (cons (car ret) dest)
322 (make-mime-content-type (intern type)(intern subtype)
327 (defun mime-read-Content-Type ()
328 "Parse field-body of Content-Type field of current-buffer.
329 Return value is a mime-content-type object.
330 If Content-Type field is not found, return nil."
331 (let ((field-body (std11-field-body "Content-Type")))
333 (mime-parse-Content-Type field-body)
337 ;;; @@ Content-Disposition
341 (defun mime-parse-Content-Disposition (string)
342 "Parse STRING as field-body of Content-Disposition field."
343 (setq string (std11-unfold-string string))
344 (if (string-match `,(concat "^" mime-token-regexp) string)
345 (let* ((e (match-end 0))
346 (type (downcase (substring string 0 e)))
348 (setq string (substring string e))
349 (while (setq ret (mime-parse-parameter string))
350 (setq dest (cons (car ret) dest)
353 (cons (cons 'type (intern type))
358 (defun mime-read-Content-Disposition ()
359 "Parse field-body of Content-Disposition field of current-buffer.
360 Return value is a mime-content-disposition object.
361 If Content-Disposition field is not found, return nil."
362 (let ((field-body (std11-field-body "Content-Disposition")))
364 (mime-parse-Content-Disposition field-body)
368 ;;; @@ Content-Transfer-Encoding
372 (defun mime-parse-Content-Transfer-Encoding (string)
373 "Parse STRING as field-body of Content-Transfer-Encoding field."
374 (let ((tokens (std11-lexical-analyze string mime-lexical-analyzer))
377 (setq token (car tokens))
378 (std11-ignored-token-p token))
379 (setq tokens (cdr tokens)))
381 (if (eq (car token) 'mime-token)
382 (downcase (cdr token))
386 (defun mime-read-Content-Transfer-Encoding ()
387 "Parse field-body of Content-Transfer-Encoding field of current-buffer.
388 Return value is a string.
389 If Content-Transfer-Encoding field is not found, return nil."
390 (let ((field-body (std11-field-body "Content-Transfer-Encoding")))
392 (mime-parse-Content-Transfer-Encoding field-body)
396 ;;; @@ Content-ID / Message-ID
400 (defun mime-parse-msg-id (tokens)
401 "Parse TOKENS as msg-id of Content-ID or Message-ID field."
402 (car (std11-parse-msg-id tokens)))
405 (defun mime-uri-parse-cid (string)
406 "Parse STRING as cid URI."
407 (mime-parse-msg-id (cons '(specials . "<")
409 (cdr (cdr (std11-lexical-analyze string)))
410 '((specials . ">"))))))
416 ;; (defun mime-parse-multipart (entity)
417 ;; (with-current-buffer (mime-entity-body-buffer entity)
418 ;; (let* ((representation-type
419 ;; (mime-entity-representation-type-internal entity))
420 ;; (content-type (mime-entity-content-type-internal entity))
423 ;; (mime-content-type-parameter content-type "boundary")))
424 ;; (delimiter (concat "\n" (regexp-quote dash-boundary)))
425 ;; (close-delimiter (concat delimiter "--[ \t]*$"))
426 ;; (rsep (concat delimiter "[ \t]*\n"))
428 ;; (if (eq (mime-content-type-subtype content-type) 'digest)
429 ;; (make-mime-content-type 'message 'rfc822)
430 ;; (make-mime-content-type 'text 'plain)
432 ;; (body-start (mime-entity-body-start-point entity))
433 ;; (body-end (mime-entity-body-end-point entity)))
435 ;; (goto-char body-end)
436 ;; (narrow-to-region body-start
437 ;; (if (re-search-backward close-delimiter nil t)
438 ;; (match-beginning 0)
440 ;; (goto-char body-start)
441 ;; (if (re-search-forward
442 ;; (concat "^" (regexp-quote dash-boundary) "[ \t]*\n")
444 ;; (let ((cb (match-end 0))
445 ;; ce ncb ret children
446 ;; (node-id (mime-entity-node-id-internal entity))
448 ;; (while (re-search-forward rsep nil t)
449 ;; (setq ce (match-beginning 0))
450 ;; (setq ncb (match-end 0))
452 ;; (narrow-to-region cb ce)
453 ;; (setq ret (mime-parse-message representation-type dc-ctl
454 ;; entity (cons i node-id)))
456 ;; (setq children (cons ret children))
457 ;; (goto-char (setq cb ncb))
460 ;; (setq ce (point-max))
462 ;; (narrow-to-region cb ce)
463 ;; (setq ret (mime-parse-message representation-type dc-ctl
464 ;; entity (cons i node-id)))
466 ;; (setq children (cons ret children))
467 ;; (mime-entity-set-children-internal entity (nreverse children))
469 ;; (mime-entity-set-content-type-internal
470 ;; entity (make-mime-content-type 'message 'x-broken))
474 ;; (defun mime-parse-encapsulated (entity)
475 ;; (mime-entity-set-children-internal
477 ;; (with-current-buffer (mime-entity-body-buffer entity)
479 ;; (narrow-to-region (mime-entity-body-start-point entity)
480 ;; (mime-entity-body-end-point entity))
481 ;; (list (mime-parse-message
482 ;; (mime-entity-representation-type-internal entity) nil
483 ;; entity (cons 0 (mime-entity-node-id-internal entity))))
486 ;; (defun mime-parse-external (entity)
487 ;; (require 'mmexternal)
488 ;; (mime-entity-set-children-internal
490 ;; (with-current-buffer (mime-entity-body-buffer entity)
492 ;; (narrow-to-region (mime-entity-body-start-point entity)
493 ;; (mime-entity-body-end-point entity))
494 ;; (list (mime-parse-message
495 ;; 'mime-external-entity nil
496 ;; entity (cons 0 (mime-entity-node-id-internal entity))))
497 ;; ;; [tomo] Should we unify with `mime-parse-encapsulated'?
500 (defun mime-parse-message (representation-type &optional default-ctl
502 (let ((header-start (point-min))
505 (body-end (point-max))
507 (goto-char header-start)
508 (if (re-search-forward "^$" nil t)
509 (setq header-end (match-end 0)
510 body-start (if (= header-end body-end)
513 (setq header-end (point-min)
514 body-start (point-min)))
516 (narrow-to-region header-start header-end)
517 (setq content-type (or (let ((str (std11-fetch-field "Content-Type")))
519 (mime-parse-Content-Type str)
523 (luna-make-entity representation-type
524 :location (current-buffer)
525 :content-type content-type
528 :buffer (current-buffer)
529 :header-start header-start
530 :header-end header-end
531 :body-start body-start
540 (defun mime-parse-buffer (&optional buffer representation-type)
541 "Parse BUFFER as a MIME message.
542 If buffer is omitted, it parses current-buffer."
545 (if buffer (set-buffer buffer))
546 (mime-parse-message (or representation-type
547 'mime-buffer-entity) nil)))
553 (provide 'mime-parse)
555 ;;; mime-parse.el ends here