(eword-encoded-word-regexp): Match with RFC-2231.
[elisp/flim.git] / mime-parse.el
index 5fb7dd9..2522ce7 100644 (file)
@@ -1,6 +1,6 @@
 ;;; mime-parse.el --- MIME message parser
 
-;; Copyright (C) 1994,1995,1996,1997,1998 Free Software Foundation, Inc.
+;; Copyright (C) 1994,1995,1996,1997,1998,1999 Free Software Foundation, Inc.
 
 ;; Author: MORIOKA Tomohiko <morioka@jaist.ac.jp>
 ;; Keywords: parse, MIME, multimedia, mail, news
 
 ;;; Code:
 
-(require 'emu)
-(require 'std11)
 (require 'mime-def)
+(require 'std11)
+
+(autoload 'mime-entity-body-buffer "mime")
+(autoload 'mime-entity-body-start-point "mime")
+(autoload 'mime-entity-body-end-point "mime")
+
+
+;;; @ lexical analyzer
+;;;
+
+(defcustom mime-lexical-analyzer
+  '(std11-analyze-quoted-string
+    std11-analyze-domain-literal
+    std11-analyze-comment
+    std11-analyze-spaces
+    mime-analyze-tspecial
+    mime-analyze-token)
+  "*List of functions to return result of lexical analyze.
+Each function must have two arguments: STRING and START.
+STRING is the target string to be analyzed.
+START is start position of STRING to analyze.
+
+Previous function is preferred to next function.  If a function
+returns nil, next function is used.  Otherwise the return value will
+be the result."
+  :group 'mime
+  :type '(repeat function))
+
+(defun mime-analyze-tspecial (string start)
+  (if (and (> (length string) start)
+          (memq (aref string start) mime-tspecial-char-list))
+      (cons (cons 'tpecials (substring string start (1+ start)))
+           (1+ start))
+    ))
 
-(eval-when-compile (require 'cl))
+(defun mime-analyze-token (string start)
+  (if (and (string-match mime-token-regexp string start)
+          (= (match-beginning 0) start))
+      (let ((end (match-end 0)))
+       (cons (cons 'mime-token (substring string start end))
+             ;;(substring string end)
+             end)
+       )))
 
 
 ;;; @ field parser
 (defconst mime/content-parameter-value-regexp
   (concat "\\("
          std11-quoted-string-regexp
-         "\\|[^; \t\n]*\\)"))
+         "\\|[^; \t\n]*\\)")
+  )
+
+(defconst mime::attribute-char-regexp "[^][*'%()<>@,;:\\\"/?=\000- ]")
+(defconst mime::attribute-regexp (concat mime::attribute-char-regexp "+")
+  )
+
+(defconst mime::ext-octet-regexp "%[0-9a-f][0-9a-f]")
+(defconst mime::extended-other-values-regexp
+  (concat "\\(" mime::attribute-char-regexp "\\|"
+         mime::ext-octet-regexp "\\)+")
+  )
+(defconst mime::extended-initial-value-regexp
+  (concat "\\(" mime-charset-regexp "\\)'\\(" mime-charset-regexp "\\)'\\("
+         mime::extended-other-values-regexp "\\)")
+  )
 
 (defconst mime::parameter-regexp
-  (concat "^[ \t]*\;[ \t]*\\(" mime-token-regexp "\\)"
-         "[ \t]*=[ \t]*\\(" mime/content-parameter-value-regexp "\\)"))
-
-(defun mime-parse-parameter (str)
-  (if (string-match mime::parameter-regexp str)
-      (let ((e (match-end 2)))
-       (cons
-        (cons (downcase (substring str (match-beginning 1) (match-end 1)))
-              (std11-strip-quoted-string
-               (substring str (match-beginning 2) e))
-              )
-        (substring str e)
-        ))))
-
+  (concat "[ \t]*\;[ \t]*\\(" mime::attribute-regexp "\\)"
+         "\\(\\*\\([0-9]+\\)\\)?\\(\\*\\)?"
+         "[ \t]*=[ \t]*\\(" mime/content-parameter-value-regexp "\\)")
+  )
+
+(defun mime-parse-parameters (str)
+  (with-temp-buffer
+    (let (rest)
+      (insert str)
+      (goto-char (point-min))
+      (while (looking-at mime::parameter-regexp)
+       (let* ((name (buffer-substring (match-beginning 1) (match-end 1)))
+              (no (or (and (match-beginning 3)
+                           (string-to-int
+                            (buffer-substring (match-beginning 3)
+                                              (match-end 3)
+                                              )))
+                      0))
+              (encoded (and (match-beginning 4) t))
+              (next (match-end 0))
+              (parm (or (assoc name rest)
+                        (car (setq rest
+                                   (cons (make-mime-parameter name) rest)
+                                   )))))
+         (mime-parameter-append-raw-value
+          parm
+          no
+          encoded
+          (if encoded
+              (if (and (eq no 0)
+                       (progn
+                         (goto-char (match-beginning 5))
+                         (looking-at mime::extended-initial-value-regexp)
+                         ))
+                  (progn
+                    (mime-parameter-set-charset
+                     parm
+                     (intern (downcase
+                              (buffer-substring (match-beginning 1)
+                                                (match-end 1)
+                                                ))))
+                    (mime-parameter-set-language
+                     parm
+                     (intern (downcase
+                               (buffer-substring (match-beginning 2)
+                                                 (match-end 2)
+                                                 ))))
+                    (buffer-substring (match-beginning 3) (match-end 3))
+                    )
+                (buffer-substring (match-beginning 5) (match-end 5))
+                )
+            (std11-strip-quoted-string
+             (buffer-substring (match-beginning 5) (match-end 5))
+             )))
+         (goto-char next)
+         ))
+      rest)))
 
 ;;; @ Content-Type
 ;;;
 
+;;;###autoload
 (defun mime-parse-Content-Type (string)
-  "Parse STRING as field-body of Content-Type field.
-Return value is
-    (PRIMARY-TYPE SUBTYPE (NAME1 . VALUE1)(NAME2 . VALUE2) ...)
-or nil.  PRIMARY-TYPE and SUBTYPE are symbol and NAME_n and VALUE_n
-are string."
+  "Parse STRING as field-body of Content-Type field."
   (setq string (std11-unfold-string string))
   (if (string-match `,(concat "^\\(" mime-token-regexp
                              "\\)/\\(" mime-token-regexp "\\)") string)
@@ -73,18 +167,14 @@ are string."
                       (substring string (match-beginning 2) (match-end 2))))
             ret dest)
        (setq string (substring string (match-end 0)))
-       (while (setq ret (mime-parse-parameter string))
-         (setq dest (cons (car ret) dest)
-               string (cdr ret))
-         )
        (make-mime-content-type (intern type)(intern subtype)
-                               (nreverse dest))
-       )))
+                               (nreverse (mime-parse-parameters string))
+                               ))))
 
+;;;###autoload
 (defun mime-read-Content-Type ()
   "Read field-body of Content-Type field from current-buffer,
-and return parsed it.  Format of return value is as same as
-`mime-parse-Content-Type'."
+and return parsed it."
   (let ((str (std11-field-body "Content-Type")))
     (if str
        (mime-parse-Content-Type str)
@@ -94,24 +184,25 @@ and return parsed it.  Format of return value is as same as
 ;;; @ Content-Disposition
 ;;;
 
-(defconst mime-disposition-type-regexp mime-token-regexp)
+(eval-and-compile
+  (defconst mime-disposition-type-regexp mime-token-regexp)
+  )
 
+;;;###autoload
 (defun mime-parse-Content-Disposition (string)
   "Parse STRING as field-body of Content-Disposition field."
   (setq string (std11-unfold-string string))
-  (if (string-match `,(concat "^" mime-disposition-type-regexp) string)
+  (if (string-match (eval-when-compile
+                     (concat "^" mime-disposition-type-regexp)) string)
       (let* ((e (match-end 0))
             (type (downcase (substring string 0 e)))
             ret dest)
        (setq string (substring string e))
-       (while (setq ret (mime-parse-parameter string))
-         (setq dest (cons (car ret) dest)
-               string (cdr ret))
-         )
        (cons (cons 'type (intern type))
-             (nreverse dest))
+             (nreverse (mime-parse-parameters string)))
        )))
 
+;;;###autoload
 (defun mime-read-Content-Disposition ()
   "Read field-body of Content-Disposition field from current-buffer,
 and return parsed it."
@@ -120,33 +211,25 @@ and return parsed it."
        (mime-parse-Content-Disposition str)
       )))
 
-(defsubst mime-content-disposition-type (content-disposition)
-  "Return disposition-type of CONTENT-DISPOSITION."
-  (cdr (car content-disposition)))
-
-(defsubst mime-content-disposition-parameters (content-disposition)
-  "Return disposition-parameters of CONTENT-DISPOSITION."
-  (cdr content-disposition))
-
-(defsubst mime-content-disposition-parameter (content-disposition parameter)
-  "Return PARAMETER value of CONTENT-DISPOSITION."
-  (cdr (assoc parameter (cdr content-disposition))))
-
-(defsubst mime-content-disposition-filename (content-disposition)
-  "Return filename of CONTENT-DISPOSITION."
-  (mime-content-disposition-parameter content-disposition "filename"))
-
 
 ;;; @ Content-Transfer-Encoding
 ;;;
 
+;;;###autoload
 (defun mime-parse-Content-Transfer-Encoding (string)
   "Parse STRING as field-body of Content-Transfer-Encoding field."
-  (if (string-match "[ \t\n\r]+$" string)
-      (setq string (match-string 0 string))
-    )
-  (downcase string))
-
+  (let ((tokens (std11-lexical-analyze string mime-lexical-analyzer))
+       token)
+    (while (and tokens
+               (setq token (car tokens))
+               (std11-ignored-token-p token))
+      (setq tokens (cdr tokens)))
+    (if token
+       (if (eq (car token) 'mime-token)
+           (downcase (cdr token))
+         ))))
+
+;;;###autoload
 (defun mime-read-Content-Transfer-Encoding (&optional default-encoding)
   "Read field-body of Content-Transfer-Encoding field from
 current-buffer, and return it.
@@ -157,181 +240,148 @@ If is is not found, return DEFAULT-ENCODING."
       default-encoding)))
 
 
+;;; @ Content-Id / Message-Id
+;;;
+
+;;;###autoload
+(defun mime-parse-msg-id (tokens)
+  "Parse TOKENS as msg-id of Content-Id or Message-Id field."
+  (car (std11-parse-msg-id tokens)))
+
+;;;###autoload
+(defun mime-uri-parse-cid (string)
+  "Parse STRING as cid URI."
+  (inline
+    (mime-parse-msg-id (cons '(specials . "<")
+                            (nconc
+                             (cdr (cdr (std11-lexical-analyze string)))
+                             '((specials . ">")))))))
+
+
 ;;; @ message parser
 ;;;
 
-(defalias 'mime-entity-point-min 'mime-entity-header-start)
-(defalias 'mime-entity-point-max 'mime-entity-body-end)
-
-(defsubst mime-entity-media-type (entity)
-  (mime-content-type-primary-type (mime-entity-content-type entity)))
-(defsubst mime-entity-media-subtype (entity)
-  (mime-content-type-subtype (mime-entity-content-type entity)))
-(defsubst mime-entity-parameters (entity)
-  (mime-content-type-parameters (mime-entity-content-type entity)))
-
-(defsubst mime-entity-type/subtype (entity-info)
-  (mime-type/subtype-string (mime-entity-media-type entity-info)
-                           (mime-entity-media-subtype entity-info)))
-
-(defun mime-parse-multipart (header-start header-end body-start body-end
-                                         content-type content-disposition
-                                         encoding node-id)
-  (goto-char (point-min))
-  (let* ((dash-boundary
-         (concat "--"
-                 (std11-strip-quoted-string
-                  (mime-content-type-parameter content-type "boundary"))))
-        (delimiter       (concat "\n" (regexp-quote dash-boundary)))
-        (close-delimiter (concat delimiter "--[ \t]*$"))
-        (rsep (concat delimiter "[ \t]*\n"))
-        (dc-ctl
-         (if (eq (mime-content-type-subtype content-type) 'digest)
-             (make-mime-content-type 'message 'rfc822)
-           (make-mime-content-type 'text 'plain)
-           ))
-        cb ce ret ncb children (i 0))
-    (save-restriction
-      (goto-char body-end)
-      (narrow-to-region header-end
-                       (if (re-search-backward close-delimiter nil t)
-                           (match-beginning 0)
-                         body-end))
-      (goto-char header-end)
-      (re-search-forward rsep nil t)
-      (setq cb (match-end 0))
-      (while (re-search-forward rsep nil t)
-       (setq ce (match-beginning 0))
-       (setq ncb (match-end 0))
-       (save-restriction
-         (narrow-to-region cb ce)
-         (setq ret (mime-parse-message dc-ctl "7bit" (cons i node-id)))
-         )
-       (setq children (cons ret children))
-       (goto-char (setq cb ncb))
-       (setq i (1+ i))
-       )
-      (setq ce (point-max))
+(defun mime-parse-multipart (entity)
+  (with-current-buffer (mime-entity-body-buffer entity)
+    (let* ((representation-type
+           (mime-entity-representation-type-internal entity))
+          (content-type (mime-entity-content-type-internal entity))
+          (dash-boundary
+           (concat "--"
+                   (mime-content-type-parameter content-type "boundary")))
+          (delimiter       (concat "\n" (regexp-quote dash-boundary)))
+          (close-delimiter (concat delimiter "--[ \t]*$"))
+          (rsep (concat delimiter "[ \t]*\n"))
+          (dc-ctl
+           (if (eq (mime-content-type-subtype content-type) 'digest)
+               (make-mime-content-type 'message 'rfc822)
+             (make-mime-content-type 'text 'plain)
+             ))
+          (body-start (mime-entity-body-start-point entity))
+          (body-end (mime-entity-body-end-point entity)))
       (save-restriction
-       (narrow-to-region cb ce)
-       (setq ret (mime-parse-message dc-ctl "7bit" (cons i node-id)))
-       )
-      (setq children (cons ret children))
-      )
-    (make-mime-entity (current-buffer)
-                     header-start header-end body-start body-end
-                     node-id content-type content-disposition encoding
-                     (nreverse children))
-    ))
-
-(defun mime-parse-message (&optional default-ctl default-encoding node-id)
-  "Parse current-buffer as a MIME message.
-DEFAULT-CTL is used when an entity does not have valid Content-Type
-field.  Its format must be as same as return value of
-mime-{parse|read}-Content-Type."
+       (goto-char body-end)
+       (narrow-to-region body-start
+                         (if (re-search-backward close-delimiter nil t)
+                             (match-beginning 0)
+                           body-end))
+       (goto-char body-start)
+       (if (re-search-forward
+            (concat "^" (regexp-quote dash-boundary) "[ \t]*\n")
+            nil t)
+           (let ((cb (match-end 0))
+                 ce ncb ret children
+                 (node-id (mime-entity-node-id-internal entity))
+                 (i 0))
+             (while (re-search-forward rsep nil t)
+               (setq ce (match-beginning 0))
+               (setq ncb (match-end 0))
+               (save-restriction
+                 (narrow-to-region cb ce)
+                 (setq ret (mime-parse-message representation-type dc-ctl
+                                               entity (cons i node-id)))
+                 )
+               (setq children (cons ret children))
+               (goto-char (setq cb ncb))
+               (setq i (1+ i))
+               )
+             (setq ce (point-max))
+             (save-restriction
+               (narrow-to-region cb ce)
+               (setq ret (mime-parse-message representation-type dc-ctl
+                                             entity (cons i node-id)))
+               )
+             (setq children (cons ret children))
+             (mime-entity-set-children-internal entity (nreverse children))
+             )
+         (mime-entity-set-content-type-internal
+          entity (make-mime-content-type 'message 'x-broken))
+         nil)
+       ))))
+
+(defun mime-parse-encapsulated (entity)
+  (mime-entity-set-children-internal
+   entity
+   (with-current-buffer (mime-entity-body-buffer entity)
+     (save-restriction
+       (narrow-to-region (mime-entity-body-start-point entity)
+                        (mime-entity-body-end-point entity))
+       (list (mime-parse-message
+             (mime-entity-representation-type-internal entity) nil
+             entity (cons 0 (mime-entity-node-id-internal entity))))
+       ))))
+
+(defun mime-parse-message (representation-type &optional default-ctl 
+                                              parent node-id)
   (let ((header-start (point-min))
        header-end
        body-start
        (body-end (point-max))
-       content-type content-disposition encoding
-       primary-type)
+       content-type)
     (goto-char header-start)
     (if (re-search-forward "^$" nil t)
        (setq header-end (match-end 0)
-             body-start (1+ header-end))
+             body-start (if (= header-end body-end)
+                            body-end
+                          (1+ header-end)))
       (setq header-end (point-min)
-           body-start (point-min))
-      )
+           body-start (point-min)))
     (save-restriction
       (narrow-to-region header-start header-end)
       (setq content-type (or (let ((str (std11-fetch-field "Content-Type")))
                               (if str
                                   (mime-parse-Content-Type str)
                                 ))
-                            default-ctl)
-           content-disposition (let ((str (std11-fetch-field
-                                           "Content-Disposition")))
-                                 (if str
-                                     (mime-parse-Content-Disposition str)
-                                   ))
-           encoding (let ((str (std11-fetch-field
-                                "Content-Transfer-Encoding")))
-                      (if str
-                          (mime-parse-Content-Transfer-Encoding str)
-                        default-encoding))
-           primary-type (mime-content-type-primary-type content-type))
+                            default-ctl))
       )
-    (cond ((eq primary-type 'multipart)
-          (mime-parse-multipart header-start header-end
-                                body-start body-end
-                                content-type content-disposition encoding
-                                node-id)
-          )
-         ((and (eq primary-type 'message)
-               (memq (mime-content-type-subtype content-type)
-                     '(rfc822 news external-body)
-                     ))
-           (make-mime-entity (current-buffer)
-                            header-start header-end body-start body-end
-                            node-id
-                            content-type content-disposition encoding
-                            (save-restriction
-                              (narrow-to-region body-start body-end)
-                              (list (mime-parse-message
-                                     nil nil (cons 0 node-id)))
-                              ))
-          )
-         (t 
-           (make-mime-entity (current-buffer)
-                            header-start header-end body-start body-end
-                            node-id
-                            content-type content-disposition encoding nil)
-          ))
+    (luna-make-entity representation-type
+                     :location (current-buffer)
+                     :content-type content-type
+                     :parent parent
+                     :node-id node-id
+                     :buffer (current-buffer)
+                     :header-start header-start
+                     :header-end header-end
+                     :body-start body-start
+                     :body-end body-end)
     ))
 
 
 ;;; @ for buffer
 ;;;
 
-(defvar mime-message-structure nil
-  "Information about structure of message.
-Please use reference function `mime-entity-SLOT' to get value of SLOT.
-
-Following is a list of slots of the structure:
-
-buffer                 buffer includes this entity (buffer).
-node-id                        node-id (list of integers)
-header-start           minimum point of header in raw-buffer
-header-end             maximum point of header in raw-buffer
-body-start             minimum point of body in raw-buffer
-body-end               maximum point of body in raw-buffer
-content-type           content-type (content-type)
-content-disposition    content-disposition (content-disposition)
-encoding               Content-Transfer-Encoding (string or nil)
-children               entities included in this entity (list of entity)
-
-If an entity includes other entities in its body, such as multipart or
-message/rfc822, `mime-entity' structures of them are included in
-`children', so the `mime-entity' structure become a tree.")
-(make-variable-buffer-local 'mime-message-structure)
-
-(defun mime-parse-buffer (&optional buffer)
+;;;###autoload
+(defun mime-parse-buffer (&optional buffer representation-type)
   "Parse BUFFER as a MIME message.
 If buffer is omitted, it parses current-buffer."
   (save-excursion
     (if buffer (set-buffer buffer))
-    (setq mime-message-structure (mime-parse-message))
+    (setq mime-message-structure
+         (mime-parse-message (or representation-type
+                                 'mime-buffer-entity) nil))
     ))
 
 
-;;; @ utilities
-;;;
-
-(defsubst mime-root-entity-p (entity)
-  "Return t if ENTITY is root-entity (message)."
-  (null (mime-entity-node-id entity)))
-
-
 ;;; @ end
 ;;;