From: morioka Date: Fri, 10 Apr 1998 14:55:56 +0000 (+0000) Subject: Copied from MEL, SEMI (mime-def.el eword-decode.el eword-encode.el) X-Git-Tag: flim-1_0_0~9 X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=635edef501f7deb04cdf7e5c327e3c8afd67ca46;p=elisp%2Fflim.git Copied from MEL, SEMI (mime-def.el eword-decode.el eword-encode.el) and APEL (std11-parse.el std11.el). --- diff --git a/eword-decode.el b/eword-decode.el new file mode 100644 index 0000000..4365cd6 --- /dev/null +++ b/eword-decode.el @@ -0,0 +1,621 @@ +;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs + +;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc. + +;; Author: ENAMI Tsugutomo +;; MORIOKA Tomohiko +;; Maintainer: MORIOKA Tomohiko +;; Created: 1995/10/03 +;; Original: 1992/07/20 ENAMI Tsugutomo's `mime.el'. +;; Renamed: 1993/06/03 to tiny-mime.el +;; Renamed: 1995/10/03 from tiny-mime.el (split off encoder) +;; Renamed: 1997/02/22 from tm-ew-d.el +;; Keywords: encoded-word, MIME, multilingual, header, mail, news + +;; This file is part of SEMI (Spadework for Emacs MIME Interfaces). + +;; This program is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 2, or (at +;; your option) any later version. + +;; This program is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Code: + +(require 'std11-parse) +(require 'mel) +(require 'mime-def) + +(defgroup eword-decode nil + "Encoded-word decoding" + :group 'mime) + +(defconst eword-decode-version "1.2.2") + + +;;; @ MIME encoded-word definition +;;; + +(defconst eword-encoded-text-regexp "[!->@-~]+") +(defconst eword-encoded-word-regexp + (concat (regexp-quote "=?") + "\\(" + mime-charset-regexp + "\\)" + (regexp-quote "?") + "\\(B\\|Q\\)" + (regexp-quote "?") + "\\(" + eword-encoded-text-regexp + "\\)" + (regexp-quote "?="))) + + +;;; @@ Base64 +;;; + +(defconst base64-token-regexp "[A-Za-z0-9+/]") +(defconst base64-token-padding-regexp "[A-Za-z0-9+/=]") + +(defconst eword-B-encoded-text-regexp + (concat "\\(\\(" + base64-token-regexp + base64-token-regexp + base64-token-regexp + base64-token-regexp + "\\)*" + base64-token-regexp + base64-token-regexp + base64-token-padding-regexp + base64-token-padding-regexp + "\\)")) + +;; (defconst eword-B-encoding-and-encoded-text-regexp +;; (concat "\\(B\\)\\?" eword-B-encoded-text-regexp)) + + +;;; @@ Quoted-Printable +;;; + +(defconst quoted-printable-hex-chars "0123456789ABCDEF") +(defconst quoted-printable-octet-regexp + (concat "=[" quoted-printable-hex-chars + "][" quoted-printable-hex-chars "]")) + +(defconst eword-Q-encoded-text-regexp + (concat "\\([^=?]\\|" quoted-printable-octet-regexp "\\)+")) +;; (defconst eword-Q-encoding-and-encoded-text-regexp +;; (concat "\\(Q\\)\\?" eword-Q-encoded-text-regexp)) + + +;;; @ for string +;;; + +(defun eword-decode-string (string &optional must-unfold) + "Decode MIME encoded-words in STRING. + +STRING is unfolded before decoding. + +If an encoded-word is broken or your emacs implementation can not +decode the charset included in it, it is not decoded. + +If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even +if there are in decoded encoded-words (generated by bad manner MUA +such as a version of Net$cape)." + (setq string (std11-unfold-string string)) + (let ((dest "")(ew nil) + beg end) + (while (and (string-match eword-encoded-word-regexp string) + (setq beg (match-beginning 0) + end (match-end 0)) + ) + (if (> beg 0) + (if (not + (and (eq ew t) + (string-match "^[ \t]+$" (substring string 0 beg)) + )) + (setq dest (concat dest (substring string 0 beg))) + ) + ) + (setq dest + (concat dest + (eword-decode-encoded-word + (substring string beg end) must-unfold) + )) + (setq string (substring string end)) + (setq ew t) + ) + (concat dest string) + )) + + +;;; @ for region +;;; + +(defun eword-decode-region (start end &optional unfolding must-unfold) + "Decode MIME encoded-words in region between START and END. + +If UNFOLDING is not nil, it unfolds before decoding. + +If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even +if there are in decoded encoded-words (generated by bad manner MUA +such as a version of Net$cape)." + (interactive "*r") + (save-excursion + (save-restriction + (narrow-to-region start end) + (if unfolding + (eword-decode-unfold) + ) + (goto-char (point-min)) + (while (re-search-forward (concat "\\(" eword-encoded-word-regexp "\\)" + "\\(\n?[ \t]\\)+" + "\\(" eword-encoded-word-regexp "\\)") + nil t) + (replace-match "\\1\\6") + (goto-char (point-min)) + ) + (while (re-search-forward eword-encoded-word-regexp nil t) + (insert (eword-decode-encoded-word + (prog1 + (buffer-substring (match-beginning 0) (match-end 0)) + (delete-region (match-beginning 0) (match-end 0)) + ) must-unfold)) + ) + ))) + + +;;; @ for message header +;;; + +(defcustom eword-decode-ignored-field-list + '(newsgroups path lines nntp-posting-host message-id date) + "*List of field-names to be ignored when decoding. +Each field name must be symbol." + :group 'eword-decode + :type '(repeat symbol)) + +(defcustom eword-decode-structured-field-list + '(reply-to resent-reply-to from resent-from sender resent-sender + to resent-to cc resent-cc bcc resent-bcc dcc + mime-version content-type content-transfer-encoding + content-disposition) + "*List of field-names to decode as structured field. +Each field name must be symbol." + :group 'eword-decode + :type '(repeat symbol)) + +(defun eword-decode-header (&optional code-conversion separator) + "Decode MIME encoded-words in header fields. +If CODE-CONVERSION is nil, it decodes only encoded-words. If it is +mime-charset, it decodes non-ASCII bit patterns as the mime-charset. +Otherwise it decodes non-ASCII bit patterns as the +default-mime-charset. +If SEPARATOR is not nil, it is used as header separator." + (interactive "*") + (save-excursion + (save-restriction + (std11-narrow-to-header separator) + (let ((default-charset + (if code-conversion + (if (mime-charset-to-coding-system code-conversion) + code-conversion + default-mime-charset)))) + (if default-charset + (let (beg p end field-name len) + (goto-char (point-min)) + (while (re-search-forward std11-field-head-regexp nil t) + (setq beg (match-beginning 0) + p (match-end 0) + field-name (buffer-substring beg (1- p)) + len (string-width field-name) + field-name (intern (downcase field-name)) + end (std11-field-end)) + (cond ((memq field-name eword-decode-ignored-field-list) + ;; Don't decode + ) + ((memq field-name eword-decode-structured-field-list) + ;; Decode as structured field + (let ((body (buffer-substring p end)) + (default-mime-charset default-charset)) + (delete-region p end) + (insert (eword-decode-and-fold-structured-field + body (1+ len))) + )) + (t + ;; Decode as unstructured field + (save-restriction + (narrow-to-region beg (1+ end)) + (decode-mime-charset-region p end default-charset) + (goto-char p) + (if (re-search-forward eword-encoded-word-regexp + nil t) + (eword-decode-region beg (point-max) 'unfold)) + ))))) + (eword-decode-region (point-min) (point-max) t) + ))))) + +(defun eword-decode-unfold () + (goto-char (point-min)) + (let (field beg end) + (while (re-search-forward std11-field-head-regexp nil t) + (setq beg (match-beginning 0) + end (std11-field-end)) + (setq field (buffer-substring beg end)) + (if (string-match eword-encoded-word-regexp field) + (save-restriction + (narrow-to-region (goto-char beg) end) + (while (re-search-forward "\n\\([ \t]\\)" nil t) + (replace-match (match-string 1)) + ) + (goto-char (point-max)) + )) + ))) + + +;;; @ encoded-word decoder +;;; + +(defvar eword-warning-face nil "Face used for invalid encoded-word.") + +(defun eword-decode-encoded-word (word &optional must-unfold) + "Decode WORD if it is an encoded-word. + +If your emacs implementation can not decode the charset of WORD, it +returns WORD. Similarly the encoded-word is broken, it returns WORD. + +If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even +if there are in decoded encoded-word (generated by bad manner MUA such +as a version of Net$cape)." + (or (if (string-match eword-encoded-word-regexp word) + (let ((charset + (substring word (match-beginning 1) (match-end 1)) + ) + (encoding + (upcase + (substring word (match-beginning 2) (match-end 2)) + )) + (text + (substring word (match-beginning 3) (match-end 3)) + )) + (condition-case err + (eword-decode-encoded-text charset encoding text must-unfold) + (error + (and + (add-text-properties 0 (length word) + (and eword-warning-face + (list 'face eword-warning-face)) + word) + word))) + )) + word)) + + +;;; @ encoded-text decoder +;;; + +(defun eword-decode-encoded-text (charset encoding string + &optional must-unfold) + "Decode STRING as an encoded-text. + +If your emacs implementation can not decode CHARSET, it returns nil. + +If ENCODING is not \"B\" or \"Q\", it occurs error. +So you should write error-handling code if you don't want break by errors. + +If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even +if there are in decoded encoded-text (generated by bad manner MUA such +as a version of Net$cape)." + (let ((cs (mime-charset-to-coding-system charset))) + (if cs + (let ((dest + (cond + ((string-equal "B" encoding) + (if (and (string-match eword-B-encoded-text-regexp string) + (string-equal string (match-string 0 string))) + (base64-decode-string string) + (error "Invalid encoded-text %s" string))) + ((string-equal "Q" encoding) + (if (and (string-match eword-Q-encoded-text-regexp string) + (string-equal string (match-string 0 string))) + (q-encoding-decode-string string) + (error "Invalid encoded-text %s" string))) + (t + (error "Invalid encoding %s" encoding) + ))) + ) + (if dest + (progn + (setq dest (decode-coding-string dest cs)) + (if must-unfold + (mapconcat (function + (lambda (chr) + (cond + ((eq chr ?\n) "") + ((eq chr ?\t) " ") + (t (char-to-string chr))) + )) + (std11-unfold-string dest) + "") + dest) + )))))) + + +;;; @ lexical analyze +;;; + +(defvar eword-lexical-analyze-cache nil) +(defvar eword-lexical-analyze-cache-max 299 + "*Max position of eword-lexical-analyze-cache. +It is max size of eword-lexical-analyze-cache - 1.") + +(defcustom eword-lexical-analyzers + '(eword-analyze-quoted-string + eword-analyze-domain-literal + eword-analyze-comment + eword-analyze-spaces + eword-analyze-special + eword-analyze-encoded-word + eword-analyze-atom) + "*List of functions to return result of lexical analyze. +Each function must have two arguments: STRING and MUST-UNFOLD. +STRING is the target string to be analyzed. +If MUST-UNFOLD is not nil, each function must unfold and eliminate +bare-CR and bare-LF from the result even if they are included in +content of the encoded-word. +Each function must return nil if it can not analyze STRING as its +format. + +Previous function is preferred to next function. If a function +returns nil, next function is used. Otherwise the return value will +be the result." + :group 'eword-decode + :type '(repeat function)) + +(defun eword-analyze-quoted-string (string &optional must-unfold) + (let ((p (std11-check-enclosure string ?\" ?\"))) + (if p + (cons (cons 'quoted-string + (decode-mime-charset-string + (std11-strip-quoted-pair (substring string 1 (1- p))) + default-mime-charset)) + (substring string p)) + ))) + +(defun eword-analyze-domain-literal (string &optional must-unfold) + (std11-analyze-domain-literal string)) + +(defun eword-analyze-comment (string &optional must-unfold) + (let ((p (std11-check-enclosure string ?\( ?\) t))) + (if p + (cons (cons 'comment + (eword-decode-string + (decode-mime-charset-string + (std11-strip-quoted-pair (substring string 1 (1- p))) + default-mime-charset) + must-unfold)) + (substring string p)) + ))) + +(defun eword-analyze-spaces (string &optional must-unfold) + (std11-analyze-spaces string)) + +(defun eword-analyze-special (string &optional must-unfold) + (std11-analyze-special string)) + +(defun eword-analyze-encoded-word (string &optional must-unfold) + (if (eq (string-match eword-encoded-word-regexp string) 0) + (let ((end (match-end 0)) + (dest (eword-decode-encoded-word (match-string 0 string) + must-unfold)) + ) + (setq string (substring string end)) + (while (eq (string-match `,(concat "[ \t\n]*\\(" + eword-encoded-word-regexp + "\\)") + string) + 0) + (setq end (match-end 0)) + (setq dest + (concat dest + (eword-decode-encoded-word (match-string 1 string) + must-unfold)) + string (substring string end)) + ) + (cons (cons 'atom dest) string) + ))) + +(defun eword-analyze-atom (string &optional must-unfold) + (if (string-match std11-atom-regexp string) + (let ((end (match-end 0))) + (cons (cons 'atom (decode-mime-charset-string + (substring string 0 end) + default-mime-charset)) + (substring string end) + )))) + +(defun eword-lexical-analyze-internal (string must-unfold) + (let (dest ret) + (while (not (string-equal string "")) + (setq ret + (let ((rest eword-lexical-analyzers) + func r) + (while (and (setq func (car rest)) + (null (setq r (funcall func string must-unfold))) + ) + (setq rest (cdr rest))) + (or r `((error . ,string) . "")) + )) + (setq dest (cons (car ret) dest)) + (setq string (cdr ret)) + ) + (nreverse dest) + )) + +(defun eword-lexical-analyze (string &optional must-unfold) + "Return lexical analyzed list corresponding STRING. +It is like std11-lexical-analyze, but it decodes non us-ascii +characters encoded as encoded-words or invalid \"raw\" format. +\"Raw\" non us-ascii characters are regarded as variable +`default-mime-charset'." + (let ((key (copy-sequence string)) + ret) + (set-text-properties 0 (length key) nil key) + (if (setq ret (assoc key eword-lexical-analyze-cache)) + (cdr ret) + (setq ret (eword-lexical-analyze-internal key must-unfold)) + (setq eword-lexical-analyze-cache + (cons (cons key ret) + (last eword-lexical-analyze-cache + eword-lexical-analyze-cache-max))) + ret))) + +(defun eword-decode-token (token) + (let ((type (car token)) + (value (cdr token))) + (cond ((eq type 'quoted-string) + (std11-wrap-as-quoted-string value)) + ((eq type 'comment) + (concat "(" (std11-wrap-as-quoted-pairs value '(?( ?))) ")")) + (t value)))) + +(defun eword-decode-and-fold-structured-field + (string start-column &optional max-column must-unfold) + "Decode and fold (fill) STRING as structured field body. +It decodes non us-ascii characters in FULL-NAME encoded as +encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii +characters are regarded as variable `default-mime-charset'. + +If an encoded-word is broken or your emacs implementation can not +decode the charset included in it, it is not decoded. + +If MAX-COLUMN is omitted, `fill-column' is used. + +If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even +if there are in decoded encoded-words (generated by bad manner MUA +such as a version of Net$cape)." + (or max-column + (setq max-column fill-column)) + (let ((c start-column) + (tokens (eword-lexical-analyze string must-unfold)) + (result "") + token) + (while (and (setq token (car tokens)) + (setq tokens (cdr tokens))) + (let* ((type (car token))) + (if (eq type 'spaces) + (let* ((next-token (car tokens)) + (next-str (eword-decode-token next-token)) + (next-len (string-width next-str)) + (next-c (+ c next-len 1))) + (if (< next-c max-column) + (setq result (concat result " " next-str) + c next-c) + (setq result (concat result "\n " next-str) + c (1+ next-len))) + (setq tokens (cdr tokens)) + ) + (let* ((str (eword-decode-token token))) + (setq result (concat result str) + c (+ c (string-width str))) + )))) + (if token + (concat result (eword-decode-token token)) + result))) + +(defun eword-decode-and-unfold-structured-field (string) + "Decode and unfold STRING as structured field body. +It decodes non us-ascii characters in FULL-NAME encoded as +encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii +characters are regarded as variable `default-mime-charset'. + +If an encoded-word is broken or your emacs implementation can not +decode the charset included in it, it is not decoded." + (let ((tokens (eword-lexical-analyze string 'must-unfold)) + (result "")) + (while tokens + (let* ((token (car tokens)) + (type (car token))) + (setq tokens (cdr tokens)) + (setq result + (if (eq type 'spaces) + (concat result " ") + (concat result (eword-decode-token token)) + )))) + result)) + +(defun eword-decode-structured-field-body (string &optional must-unfold + start-column max-column) + "Decode non us-ascii characters in STRING as structured field body. +STRING is unfolded before decoding. + +It decodes non us-ascii characters in FULL-NAME encoded as +encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii +characters are regarded as variable `default-mime-charset'. + +If an encoded-word is broken or your emacs implementation can not +decode the charset included in it, it is not decoded. + +If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even +if there are in decoded encoded-words (generated by bad manner MUA +such as a version of Net$cape)." + (if start-column + ;; fold with max-column + (eword-decode-and-fold-structured-field + string start-column max-column must-unfold) + ;; Don't fold + (mapconcat (function eword-decode-token) + (eword-lexical-analyze string must-unfold) + "") + )) + +(defun eword-decode-unstructured-field-body (string &optional must-unfold) + "Decode non us-ascii characters in STRING as unstructured field body. +STRING is unfolded before decoding. + +It decodes non us-ascii characters in FULL-NAME encoded as +encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii +characters are regarded as variable `default-mime-charset'. + +If an encoded-word is broken or your emacs implementation can not +decode the charset included in it, it is not decoded. + +If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even +if there are in decoded encoded-words (generated by bad manner MUA +such as a version of Net$cape)." + (eword-decode-string + (decode-mime-charset-string string default-mime-charset) + must-unfold)) + +(defun eword-extract-address-components (string) + "Extract full name and canonical address from STRING. +Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). +If no name can be extracted, FULL-NAME will be nil. +It decodes non us-ascii characters in FULL-NAME encoded as +encoded-words or invalid \"raw\" string. \"Raw\" non us-ascii +characters are regarded as variable `default-mime-charset'." + (let* ((structure (car (std11-parse-address + (eword-lexical-analyze + (std11-unfold-string string) 'must-unfold)))) + (phrase (std11-full-name-string structure)) + (address (std11-address-string structure)) + ) + (list phrase address) + )) + + +;;; @ end +;;; + +(provide 'eword-decode) + +;;; eword-decode.el ends here diff --git a/eword-encode.el b/eword-encode.el new file mode 100644 index 0000000..2748d71 --- /dev/null +++ b/eword-encode.el @@ -0,0 +1,642 @@ +;;; eword-encode.el --- RFC 2047 based encoded-word encoder for GNU Emacs + +;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc. + +;; Author: MORIOKA Tomohiko +;; Keywords: encoded-word, MIME, multilingual, header, mail, news + +;; This file is part of SEMI (Spadework for Emacs MIME Interfaces). + +;; This program is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 2, or (at +;; your option) any later version. + +;; This program is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Code: + +(require 'emu) +(require 'mel) +(require 'std11) +(require 'mime-def) +(require 'eword-decode) + + +;;; @ version +;;; + +(defconst eword-encode-version "1.2") + + +;;; @ variables +;;; + +(defvar eword-field-encoding-method-alist + '(("X-Nsubject" . iso-2022-jp-2) + ("Newsgroups" . nil) + ("Message-ID" . nil) + (t . mime) + ) + "*Alist to specify field encoding method. +Its key is field-name, value is encoding method. + +If method is `mime', this field will be encoded into MIME format. + +If method is a MIME-charset, this field will be encoded as the charset +when it must be convert into network-code. + +If method is `default-mime-charset', this field will be encoded as +variable `default-mime-charset' when it must be convert into +network-code. + +If method is nil, this field will not be encoded.") + +(defvar eword-charset-encoding-alist + '((us-ascii . nil) + (iso-8859-1 . "Q") + (iso-8859-2 . "Q") + (iso-8859-3 . "Q") + (iso-8859-4 . "Q") + (iso-8859-5 . "Q") + (koi8-r . "Q") + (iso-8859-7 . "Q") + (iso-8859-8 . "Q") + (iso-8859-9 . "Q") + (iso-2022-jp . "B") + (iso-2022-kr . "B") + (gb2312 . "B") + (cn-gb . "B") + (cn-gb-2312 . "B") + (euc-kr . "B") + (iso-2022-jp-2 . "B") + (iso-2022-int-1 . "B") + )) + + +;;; @ encoded-text encoder +;;; + +(defun eword-encode-text (charset encoding string &optional mode) + "Encode STRING as an encoded-word, and return the result. +CHARSET is a symbol to indicate MIME charset of the encoded-word. +ENCODING allows \"B\" or \"Q\". +MODE is allows `text', `comment', `phrase' or nil. Default value is +`phrase'." + (let ((text + (cond ((string= encoding "B") + (base64-encode-string string)) + ((string= encoding "Q") + (q-encoding-encode-string string mode)) + ) + )) + (if text + (concat "=?" (upcase (symbol-name charset)) "?" + encoding "?" text "?=") + ))) + + +;;; @ charset word +;;; + +(defsubst eword-encode-char-type (character) + (if (or (eq character ? )(eq character ?\t)) + nil + (char-charset character) + )) + +(defun eword-encode-divide-into-charset-words (string) + (let ((len (length string)) + dest) + (while (> len 0) + (let* ((chr (sref string 0)) + (charset (eword-encode-char-type chr)) + (i (char-bytes chr)) + ) + (while (and (< i len) + (setq chr (sref string i)) + (eq charset (eword-encode-char-type chr)) + ) + (setq i (+ i (char-bytes chr))) + ) + (setq dest (cons (cons charset (substring string 0 i)) dest) + string (substring string i) + len (- len i) + ))) + (nreverse dest) + )) + + +;;; @ word +;;; + +(defun eword-encode-charset-words-to-words (charset-words) + (let (dest) + (while charset-words + (let* ((charset-word (car charset-words)) + (charset (car charset-word)) + ) + (if charset + (let ((charsets (list charset)) + (str (cdr charset-word)) + ) + (catch 'tag + (while (setq charset-words (cdr charset-words)) + (setq charset-word (car charset-words) + charset (car charset-word)) + (if (null charset) + (throw 'tag nil) + ) + (or (memq charset charsets) + (setq charsets (cons charset charsets)) + ) + (setq str (concat str (cdr charset-word))) + )) + (setq dest (cons (cons charsets str) dest)) + ) + (setq dest (cons charset-word dest) + charset-words (cdr charset-words) + )))) + (nreverse dest) + )) + + +;;; @ rule +;;; + +(defmacro tm-eword::make-rword (text charset encoding type) + (` (list (, text)(, charset)(, encoding)(, type)))) +(defmacro tm-eword::rword-text (rword) + (` (car (, rword)))) +(defmacro tm-eword::rword-charset (rword) + (` (car (cdr (, rword))))) +(defmacro tm-eword::rword-encoding (rword) + (` (car (cdr (cdr (, rword)))))) +(defmacro tm-eword::rword-type (rword) + (` (car (cdr (cdr (cdr (, rword))))))) + +(defun tm-eword::find-charset-rule (charsets) + (if charsets + (let* ((charset (charsets-to-mime-charset charsets)) + (encoding (cdr (assq charset eword-charset-encoding-alist))) + ) + (list charset encoding) + ))) + +(defun tm-eword::words-to-ruled-words (wl &optional mode) + (mapcar (function + (lambda (word) + (let ((ret (tm-eword::find-charset-rule (car word)))) + (tm-eword::make-rword (cdr word) (car ret)(nth 1 ret) mode) + ))) + wl)) + +(defun tm-eword::space-process (seq) + (let (prev a ac b c cc) + (while seq + (setq b (car seq)) + (setq seq (cdr seq)) + (setq c (car seq)) + (setq cc (tm-eword::rword-charset c)) + (if (null (tm-eword::rword-charset b)) + (progn + (setq a (car prev)) + (setq ac (tm-eword::rword-charset a)) + (if (and (tm-eword::rword-encoding a) + (tm-eword::rword-encoding c)) + (cond ((eq ac cc) + (setq prev (cons + (cons (concat (car a)(car b)(car c)) + (cdr a)) + (cdr prev) + )) + (setq seq (cdr seq)) + ) + (t + (setq prev (cons + (cons (concat (car a)(car b)) + (cdr a)) + (cdr prev) + )) + )) + (setq prev (cons b prev)) + )) + (setq prev (cons b prev)) + )) + (reverse prev) + )) + +(defun tm-eword::split-string (str &optional mode) + (tm-eword::space-process + (tm-eword::words-to-ruled-words + (eword-encode-charset-words-to-words + (eword-encode-divide-into-charset-words str)) + mode))) + + +;;; @ length +;;; + +(defun tm-eword::encoded-word-length (rword) + (let ((string (tm-eword::rword-text rword)) + (charset (tm-eword::rword-charset rword)) + (encoding (tm-eword::rword-encoding rword)) + ret) + (setq ret + (cond ((string-equal encoding "B") + (setq string (encode-mime-charset-string string charset)) + (base64-encoded-length string) + ) + ((string-equal encoding "Q") + (setq string (encode-mime-charset-string string charset)) + (q-encoding-encoded-length string + (tm-eword::rword-type rword)) + ))) + (if ret + (cons (+ 7 (length (symbol-name charset)) ret) string) + ))) + + +;;; @ encode-string +;;; + +(defun tm-eword::encode-string-1 (column rwl) + (let* ((rword (car rwl)) + (ret (tm-eword::encoded-word-length rword)) + string len) + (if (null ret) + (cond ((and (setq string (car rword)) + (or (<= (setq len (+ (length string) column)) 76) + (<= column 1)) + ) + (setq rwl (cdr rwl)) + ) + (t + (setq string "\n ") + (setq len 1) + )) + (cond ((and (setq len (car ret)) + (<= (+ column len) 76) + ) + (setq string + (eword-encode-text + (tm-eword::rword-charset rword) + (tm-eword::rword-encoding rword) + (cdr ret) + (tm-eword::rword-type rword) + )) + (setq len (+ (length string) column)) + (setq rwl (cdr rwl)) + ) + (t + (setq string (car rword)) + (let* ((p 0) np + (str "") nstr) + (while (and (< p len) + (progn + (setq np (+ p (char-bytes (sref string p)))) + (setq nstr (substring string 0 np)) + (setq ret (tm-eword::encoded-word-length + (cons nstr (cdr rword)) + )) + (setq nstr (cdr ret)) + (setq len (+ (car ret) column)) + (<= len 76) + )) + (setq str nstr + p np)) + (if (string-equal str "") + (setq string "\n " + len 1) + (setq rwl (cons (cons (substring string p) (cdr rword)) + (cdr rwl))) + (setq string + (eword-encode-text + (tm-eword::rword-charset rword) + (tm-eword::rword-encoding rword) + str + (tm-eword::rword-type rword))) + (setq len (+ (length string) column)) + ) + ))) + ) + (list string len rwl) + )) + +(defun tm-eword::encode-rwl (column rwl) + (let (ret dest ps special str ew-f pew-f) + (while rwl + (setq ew-f (nth 2 (car rwl))) + (if (and pew-f ew-f) + (setq rwl (cons '(" ") rwl) + pew-f nil) + (setq pew-f ew-f) + ) + (setq ret (tm-eword::encode-string-1 column rwl)) + (setq str (car ret)) + (if (eq (elt str 0) ?\n) + (if (eq special ?\() + (progn + (setq dest (concat dest "\n (")) + (setq ret (tm-eword::encode-string-1 2 rwl)) + (setq str (car ret)) + )) + (cond ((eq special ? ) + (if (string= str "(") + (setq ps t) + (setq dest (concat dest " ")) + (setq ps nil) + )) + ((eq special ?\() + (if ps + (progn + (setq dest (concat dest " (")) + (setq ps nil) + ) + (setq dest (concat dest "(")) + ) + ))) + (cond ((string= str " ") + (setq special ? ) + ) + ((string= str "(") + (setq special ?\() + ) + (t + (setq special nil) + (setq dest (concat dest str)) + )) + (setq column (nth 1 ret) + rwl (nth 2 ret)) + ) + (list dest column) + )) + +(defun tm-eword::encode-string (column str &optional mode) + (tm-eword::encode-rwl column (tm-eword::split-string str mode)) + ) + + +;;; @ converter +;;; + +(defun tm-eword::phrase-to-rwl (phrase) + (let (token type dest str) + (while phrase + (setq token (car phrase)) + (setq type (car token)) + (cond ((eq type 'quoted-string) + (setq str (concat "\"" (cdr token) "\"")) + (setq dest + (append dest + (list + (let ((ret (tm-eword::find-charset-rule + (find-non-ascii-charset-string str)))) + (tm-eword::make-rword + str (car ret)(nth 1 ret) 'phrase) + ) + ))) + ) + ((eq type 'comment) + (setq dest + (append dest + '(("(" nil nil)) + (tm-eword::words-to-ruled-words + (eword-encode-charset-words-to-words + (eword-encode-divide-into-charset-words + (cdr token))) + 'comment) + '((")" nil nil)) + )) + ) + (t + (setq dest + (append dest + (tm-eword::words-to-ruled-words + (eword-encode-charset-words-to-words + (eword-encode-divide-into-charset-words + (cdr token)) + ) 'phrase))) + )) + (setq phrase (cdr phrase)) + ) + (tm-eword::space-process dest) + )) + +(defun eword-addr-seq-to-rwl (seq) + (let (dest pname) + (while seq + (let* ((token (car seq)) + (name (car token)) + ) + (cond ((eq name 'spaces) + (setq dest (nconc dest (list (list (cdr token) nil nil)))) + ) + ((eq name 'comment) + (setq dest + (nconc + dest + (list (list "(" nil nil)) + (tm-eword::split-string (cdr token) 'comment) + (list (list ")" nil nil)) + )) + ) + ((eq name 'quoted-string) + (setq dest + (nconc + dest + (list + (list (concat "\"" (cdr token) "\"") nil nil) + ))) + ) + (t + (setq dest + (if (or (eq pname 'spaces) + (eq pname 'comment)) + (nconc dest (list (list (cdr token) nil nil))) + (nconc (butlast dest) + (list + (list (concat (car (car (last dest))) + (cdr token)) + nil nil))))) + )) + (setq seq (cdr seq) + pname name)) + ) + dest)) + +(defun eword-phrase-route-addr-to-rwl (phrase-route-addr) + (if (eq (car phrase-route-addr) 'phrase-route-addr) + (let ((phrase (nth 1 phrase-route-addr)) + (route (nth 2 phrase-route-addr)) + dest) + (if (eq (car (car phrase)) 'spaces) + (setq phrase (cdr phrase)) + ) + (setq dest (tm-eword::phrase-to-rwl phrase)) + (if dest + (setq dest (append dest '((" " nil nil)))) + ) + (append + dest + (eword-addr-seq-to-rwl + (append '((specials . "<")) + route + '((specials . ">")))) + )))) + +(defun eword-addr-spec-to-rwl (addr-spec) + (if (eq (car addr-spec) 'addr-spec) + (eword-addr-seq-to-rwl (cdr addr-spec)) + )) + +(defun tm-eword::mailbox-to-rwl (mbox) + (let ((addr (nth 1 mbox)) + (comment (nth 2 mbox)) + dest) + (setq dest (or (eword-phrase-route-addr-to-rwl addr) + (eword-addr-spec-to-rwl addr) + )) + (if comment + (setq dest + (append dest + '((" " nil nil) + ("(" nil nil)) + (tm-eword::split-string comment 'comment) + '((")" nil nil)) + ))) + dest)) + +(defun tm-eword::addresses-to-rwl (addresses) + (let ((dest (tm-eword::mailbox-to-rwl (car addresses)))) + (if dest + (while (setq addresses (cdr addresses)) + (setq dest (append dest + '(("," nil nil)) + '((" " nil nil)) + (tm-eword::mailbox-to-rwl (car addresses)) + )) + )) + dest)) + +(defun tm-eword::encode-address-list (column str) + (tm-eword::encode-rwl + column + (tm-eword::addresses-to-rwl (std11-parse-addresses-string str)) + )) + + +;;; @ application interfaces +;;; + +(defun eword-encode-field (string) + "Encode header field STRING, and return the result. +A lexical token includes non-ASCII character is encoded as MIME +encoded-word. ASCII token is not encoded." + (setq string (std11-unfold-string string)) + (let ((ret (string-match std11-field-head-regexp string))) + (or (if ret + (let ((field-name (substring string 0 (1- (match-end 0)))) + (field-body (eliminate-top-spaces + (substring string (match-end 0)))) + ) + (if (setq ret + (cond ((string-equal field-body "") "") + ((memq (intern (downcase field-name)) + '(reply-to + from sender + resent-reply-to resent-from + resent-sender to resent-to + cc resent-cc + bcc resent-bcc dcc + mime-version) + ) + (car (tm-eword::encode-address-list + (+ (length field-name) 2) field-body)) + ) + (t + (car (tm-eword::encode-string + (1+ (length field-name)) + field-body 'text)) + )) + ) + (concat field-name ": " ret) + ))) + (car (tm-eword::encode-string 0 string)) + ))) + +(defun eword-in-subject-p () + (let ((str (std11-field-body "Subject"))) + (if (and str (string-match eword-encoded-word-regexp str)) + str))) + +(defsubst eword-find-field-encoding-method (field-name) + (setq field-name (downcase field-name)) + (let ((alist eword-field-encoding-method-alist)) + (catch 'found + (while alist + (let* ((pair (car alist)) + (str (car pair))) + (if (and (stringp str) + (string= field-name (downcase str))) + (throw 'found (cdr pair)) + )) + (setq alist (cdr alist))) + (cdr (assq t eword-field-encoding-method-alist)) + ))) + +(defun eword-encode-header (&optional code-conversion) + "Encode header fields to network representation, such as MIME encoded-word. + +It refer variable `eword-field-encoding-method-alist'." + (interactive "*") + (save-excursion + (save-restriction + (std11-narrow-to-header mail-header-separator) + (goto-char (point-min)) + (let ((default-cs (mime-charset-to-coding-system default-mime-charset)) + beg end field-name) + (while (re-search-forward std11-field-head-regexp nil t) + (setq beg (match-beginning 0)) + (setq field-name (buffer-substring beg (1- (match-end 0)))) + (setq end (std11-field-end)) + (and (find-non-ascii-charset-region beg end) + (let ((method (eword-find-field-encoding-method + (downcase field-name)))) + (cond ((eq method 'mime) + (let ((field + (buffer-substring-no-properties beg end) + )) + (delete-region beg end) + (insert (eword-encode-field field)) + )) + (code-conversion + (let ((cs + (or (mime-charset-to-coding-system + method) + default-cs))) + (encode-coding-region beg end cs) + ))) + )) + )) + ))) + +(defun eword-encode-string (str &optional column mode) + (car (tm-eword::encode-rwl (or column 0) (tm-eword::split-string str mode))) + ) + + +;;; @ end +;;; + +(provide 'eword-encode) + +;;; eword-encode.el ends here diff --git a/mime-def.el b/mime-def.el new file mode 100644 index 0000000..dfbeece --- /dev/null +++ b/mime-def.el @@ -0,0 +1,76 @@ +;;; mime-def.el --- definition module for SEMI + +;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc. + +;; Author: MORIOKA Tomohiko +;; Keywords: definition, MIME, multimedia, mail, news + +;; This file is part of SEMI (Spadework for Emacs MIME Interfaces). + +;; This program is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 2, or (at +;; your option) any later version. + +;; This program is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Code: + +(require 'custom) + +(defgroup mime nil + "Emacs MIME Interfaces" + :group 'news + :group 'mail) + +(custom-handle-keyword 'default-mime-charset :group 'mime + 'custom-variable) + +(unless (fboundp 'butlast) + (defun butlast (x &optional n) + "Returns a copy of LIST with the last N elements removed." + (if (and n (<= n 0)) x + (nbutlast (copy-sequence x) n))) + + (defun nbutlast (x &optional n) + "Modifies LIST to remove the last N elements." + (let ((m (length x))) + (or n (setq n 1)) + (and (< n m) + (progn + (if (> n 0) (setcdr (nthcdr (- (1- m) n) x) nil)) + x)))) + ) + +(defsubst eliminate-top-spaces (string) + "Eliminate top sequence of space or tab in STRING." + (if (string-match "^[ \t]+" string) + (substring string (match-end 0)) + string)) + + +;;; @ definitions about MIME +;;; + +(defconst mime-tspecials "][()<>@,\;:\\\"/?=") +(defconst mime-token-regexp (concat "[^" mime-tspecials "\000-\040]+")) +(defconst mime-charset-regexp mime-token-regexp) + +(defconst mime-media-type/subtype-regexp + (concat mime-token-regexp "/" mime-token-regexp)) + + +;;; @ end +;;; + +(provide 'mime-def) + +;;; mime-def.el ends here diff --git a/std11-parse.el b/std11-parse.el new file mode 100644 index 0000000..3abf0f1 --- /dev/null +++ b/std11-parse.el @@ -0,0 +1,461 @@ +;;; std11-parse.el --- STD 11 parser for GNU Emacs + +;; Copyright (C) 1995,1996,1997 Free Software Foundation, Inc. + +;; Author: MORIOKA Tomohiko +;; Keywords: mail, news, RFC 822, STD 11 +;; Version: $Id: std11-parse.el,v 1.1 1998-04-10 14:55:56 morioka Exp $ + +;; This file is part of MU (Message Utilities). + +;; This program is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 2, or (at +;; your option) any later version. + +;; This program is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Code: + +(require 'std11) +(require 'emu) + + +;;; @ lexical analyze +;;; + +(defconst std11-space-chars " \t\n") +(defconst std11-spaces-regexp (` (, (concat "[" std11-space-chars "]+")))) +(defconst std11-special-char-list '(?\] ?\[ + ?\( ?\) ?< ?> ?@ + ?, ?\; ?: ?\\ ?\" + ?.)) +(defconst std11-atom-regexp + (` (, (concat "^[^" std11-special-char-list std11-space-chars "]+")))) + +(defun std11-analyze-spaces (string) + (if (and (string-match std11-spaces-regexp string) + (= (match-beginning 0) 0)) + (let ((end (match-end 0))) + (cons (cons 'spaces (substring string 0 end)) + (substring string end) + )))) + +(defun std11-analyze-special (str) + (if (and (> (length str) 0) + (memq (aref str 0) std11-special-char-list)) + (cons (cons 'specials (substring str 0 1)) + (substring str 1) + ))) + +(defun std11-analyze-atom (str) + (if (string-match std11-atom-regexp str) + (let ((end (match-end 0))) + (cons (cons 'atom (substring str 0 end)) + (substring str end) + )))) + +(defun std11-check-enclosure (str open close &optional recursive from) + (let ((len (length str)) + (i (or from 0)) + ) + (if (and (> len i) + (eq (aref str i) open)) + (let (p chr) + (setq i (1+ i)) + (catch 'tag + (while (< i len) + (setq chr (aref str i)) + (cond ((eq chr ?\\) + (setq i (1+ i)) + (if (>= i len) + (throw 'tag nil) + ) + (setq i (1+ i)) + ) + ((eq chr close) + (throw 'tag (1+ i)) + ) + ((eq chr open) + (if (and recursive + (setq p (std11-check-enclosure + str open close recursive i)) + ) + (setq i p) + (throw 'tag nil) + )) + (t + (setq i (1+ i)) + )) + )))))) + +(defun std11-analyze-quoted-string (str) + (let ((p (std11-check-enclosure str ?\" ?\"))) + (if p + (cons (cons 'quoted-string (substring str 1 (1- p))) + (substring str p)) + ))) + +(defun std11-analyze-domain-literal (str) + (let ((p (std11-check-enclosure str ?\[ ?\]))) + (if p + (cons (cons 'domain-literal (substring str 1 (1- p))) + (substring str p)) + ))) + +(defun std11-analyze-comment (str) + (let ((p (std11-check-enclosure str ?\( ?\) t))) + (if p + (cons (cons 'comment (substring str 1 (1- p))) + (substring str p)) + ))) + +(defun std11-lexical-analyze (str) + (let (dest ret) + (while (not (string-equal str "")) + (setq ret + (or (std11-analyze-quoted-string str) + (std11-analyze-domain-literal str) + (std11-analyze-comment str) + (std11-analyze-spaces str) + (std11-analyze-special str) + (std11-analyze-atom str) + '((error) . "") + )) + (setq dest (cons (car ret) dest)) + (setq str (cdr ret)) + ) + (nreverse dest) + )) + + +;;; @ parser +;;; + +(defun std11-ignored-token-p (token) + (let ((type (car token))) + (or (eq type 'spaces)(eq type 'comment)) + )) + +(defun std11-parse-token (lal) + (let (token itl) + (while (and lal + (progn + (setq token (car lal)) + (std11-ignored-token-p token) + )) + (setq lal (cdr lal)) + (setq itl (cons token itl)) + ) + (cons (nreverse (cons token itl)) + (cdr lal)) + )) + +(defun std11-parse-ascii-token (lal) + (let (token itl parsed token-value) + (while (and lal + (setq token (car lal)) + (or (std11-ignored-token-p token) + (if (and (setq token-value (cdr token)) + (find-non-ascii-charset-string token-value) + ) + (setq token nil) + ))) + (setq lal (cdr lal)) + (setq itl (cons token itl)) + ) + (if (and token + (setq parsed (nreverse (cons token itl))) + ) + (cons parsed (cdr lal)) + ))) + +(defun std11-parse-token-or-comment (lal) + (let (token itl) + (while (and lal + (progn + (setq token (car lal)) + (eq (car token) 'spaces) + )) + (setq lal (cdr lal)) + (setq itl (cons token itl)) + ) + (cons (nreverse (cons token itl)) + (cdr lal)) + )) + +(defun std11-parse-word (lal) + (let ((ret (std11-parse-ascii-token lal))) + (if ret + (let ((elt (car ret)) + (rest (cdr ret)) + ) + (if (or (assq 'atom elt) + (assq 'quoted-string elt)) + (cons (cons 'word elt) rest) + ))))) + +(defun std11-parse-word-or-comment (lal) + (let ((ret (std11-parse-token-or-comment lal))) + (if ret + (let ((elt (car ret)) + (rest (cdr ret)) + ) + (cond ((or (assq 'atom elt) + (assq 'quoted-string elt)) + (cons (cons 'word elt) rest) + ) + ((assq 'comment elt) + (cons (cons 'comment-word elt) rest) + )) + )))) + +(defun std11-parse-phrase (lal) + (let (ret phrase) + (while (setq ret (std11-parse-word-or-comment lal)) + (setq phrase (append phrase (cdr (car ret)))) + (setq lal (cdr ret)) + ) + (if phrase + (cons (cons 'phrase phrase) lal) + ))) + +(defun std11-parse-local-part (lal) + (let ((ret (std11-parse-word lal))) + (if ret + (let ((local-part (cdr (car ret))) dot) + (setq lal (cdr ret)) + (while (and (setq ret (std11-parse-ascii-token lal)) + (setq dot (car ret)) + (string-equal (cdr (assq 'specials dot)) ".") + (setq ret (std11-parse-word (cdr ret))) + (setq local-part + (append local-part dot (cdr (car ret))) + ) + (setq lal (cdr ret)) + )) + (cons (cons 'local-part local-part) lal) + )))) + +(defun std11-parse-sub-domain (lal) + (let ((ret (std11-parse-ascii-token lal))) + (if ret + (let ((sub-domain (car ret))) + (if (or (assq 'atom sub-domain) + (assq 'domain-literal sub-domain) + ) + (cons (cons 'sub-domain sub-domain) + (cdr ret) + ) + ))))) + +(defun std11-parse-domain (lal) + (let ((ret (std11-parse-sub-domain lal))) + (if ret + (let ((domain (cdr (car ret))) dot) + (setq lal (cdr ret)) + (while (and (setq ret (std11-parse-ascii-token lal)) + (setq dot (car ret)) + (string-equal (cdr (assq 'specials dot)) ".") + (setq ret (std11-parse-sub-domain (cdr ret))) + (setq domain + (append domain dot (cdr (car ret))) + ) + (setq lal (cdr ret)) + )) + (cons (cons 'domain domain) lal) + )))) + +(defun std11-parse-at-domain (lal) + (let ((ret (std11-parse-ascii-token lal)) at-sign) + (if (and ret + (setq at-sign (car ret)) + (string-equal (cdr (assq 'specials at-sign)) "@") + (setq ret (std11-parse-domain (cdr ret))) + ) + (cons (cons 'at-domain (append at-sign (cdr (car ret)))) + (cdr ret)) + ))) + +(defun std11-parse-addr-spec (lal) + (let ((ret (std11-parse-local-part lal)) + addr) + (if (and ret + (prog1 + (setq addr (cdr (car ret))) + (setq lal (cdr ret)) + (and (setq ret (std11-parse-at-domain lal)) + (setq addr (append addr (cdr (car ret)))) + (setq lal (cdr ret)) + ))) + (cons (cons 'addr-spec addr) lal) + ))) + +(defun std11-parse-route (lal) + (let ((ret (std11-parse-at-domain lal)) + route comma colon) + (if (and ret + (progn + (setq route (cdr (car ret))) + (setq lal (cdr ret)) + (while (and (setq ret (std11-parse-ascii-token lal)) + (setq comma (car ret)) + (string-equal (cdr (assq 'specials comma)) ",") + (setq ret (std11-parse-at-domain (cdr ret))) + ) + (setq route (append route comma (cdr (car ret)))) + (setq lal (cdr ret)) + ) + (and (setq ret (std11-parse-ascii-token lal)) + (setq colon (car ret)) + (string-equal (cdr (assq 'specials colon)) ":") + (setq route (append route colon)) + ) + )) + (cons (cons 'route route) + (cdr ret) + ) + ))) + +(defun std11-parse-route-addr (lal) + (let ((ret (std11-parse-ascii-token lal)) + < route addr-spec >) + (if (and ret + (setq < (car ret)) + (string-equal (cdr (assq 'specials <)) "<") + (setq lal (cdr ret)) + (progn (and (setq ret (std11-parse-route lal)) + (setq route (cdr (car ret))) + (setq lal (cdr ret)) + ) + (setq ret (std11-parse-addr-spec lal)) + ) + (setq addr-spec (cdr (car ret))) + (setq lal (cdr ret)) + (setq ret (std11-parse-ascii-token lal)) + (setq > (car ret)) + (string-equal (cdr (assq 'specials >)) ">") + ) + (cons (cons 'route-addr (append route addr-spec)) + (cdr ret) + ) + ))) + +(defun std11-parse-phrase-route-addr (lal) + (let ((ret (std11-parse-phrase lal)) phrase) + (if ret + (progn + (setq phrase (cdr (car ret))) + (setq lal (cdr ret)) + )) + (if (setq ret (std11-parse-route-addr lal)) + (cons (list 'phrase-route-addr + phrase + (cdr (car ret))) + (cdr ret)) + ))) + +(defun std11-parse-mailbox (lal) + (let ((ret (or (std11-parse-phrase-route-addr lal) + (std11-parse-addr-spec lal))) + mbox comment) + (if (and ret + (prog1 + (setq mbox (car ret)) + (setq lal (cdr ret)) + (if (and (setq ret (std11-parse-token-or-comment lal)) + (setq comment (cdr (assq 'comment (car ret)))) + ) + (setq lal (cdr ret)) + ))) + (cons (list 'mailbox mbox comment) + lal) + ))) + +(defun std11-parse-group (lal) + (let ((ret (std11-parse-phrase lal)) + phrase colon comma mbox semicolon) + (if (and ret + (setq phrase (cdr (car ret))) + (setq lal (cdr ret)) + (setq ret (std11-parse-ascii-token lal)) + (setq colon (car ret)) + (string-equal (cdr (assq 'specials colon)) ":") + (setq lal (cdr ret)) + (progn + (and (setq ret (std11-parse-mailbox lal)) + (setq mbox (list (car ret))) + (setq lal (cdr ret)) + (progn + (while (and (setq ret (std11-parse-ascii-token lal)) + (setq comma (car ret)) + (string-equal + (cdr (assq 'specials comma)) ",") + (setq lal (cdr ret)) + (setq ret (std11-parse-mailbox lal)) + (setq mbox (cons (car ret) mbox)) + (setq lal (cdr ret)) + ) + ))) + (and (setq ret (std11-parse-ascii-token lal)) + (setq semicolon (car ret)) + (string-equal (cdr (assq 'specials semicolon)) ";") + ))) + (cons (list 'group phrase (nreverse mbox)) + (cdr ret) + ) + ))) + +(defun std11-parse-address (lal) + (or (std11-parse-group lal) + (std11-parse-mailbox lal) + )) + +(defun std11-parse-addresses (lal) + (let ((ret (std11-parse-address lal))) + (if ret + (let ((dest (list (car ret)))) + (setq lal (cdr ret)) + (while (and (setq ret (std11-parse-ascii-token lal)) + (string-equal (cdr (assq 'specials (car ret))) ",") + (setq ret (std11-parse-address (cdr ret))) + ) + (setq dest (cons (car ret) dest)) + (setq lal (cdr ret)) + ) + (nreverse dest) + )))) + +(defun std11-parse-msg-id (lal) + (let ((ret (std11-parse-ascii-token lal)) + < addr-spec >) + (if (and ret + (setq < (car ret)) + (string-equal (cdr (assq 'specials <)) "<") + (setq lal (cdr ret)) + (setq ret (std11-parse-addr-spec lal)) + (setq addr-spec (car ret)) + (setq lal (cdr ret)) + (setq ret (std11-parse-ascii-token lal)) + (setq > (car ret)) + (string-equal (cdr (assq 'specials >)) ">") + ) + (cons (cons 'msg-id (cdr addr-spec)) + (cdr ret)) + ))) + + +;;; @ end +;;; + +(provide 'std11-parse) + +;;; std11-parse.el ends here diff --git a/std11.el b/std11.el new file mode 100644 index 0000000..1a70c5e --- /dev/null +++ b/std11.el @@ -0,0 +1,418 @@ +;;; std11.el --- STD 11 functions for GNU Emacs + +;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc. + +;; Author: MORIOKA Tomohiko +;; Keywords: mail, news, RFC 822, STD 11 + +;; This file is part of MU (Message Utilities). + +;; This program is free software; you can redistribute it and/or +;; modify it under the terms of the GNU General Public License as +;; published by the Free Software Foundation; either version 2, or (at +;; your option) any later version. + +;; This program is distributed in the hope that it will be useful, but +;; WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Code: + +(autoload 'buffer-substring-no-properties "emu") +(autoload 'member "emu") + +(eval-when-compile + (provide 'std11) + (require 'std11-parse)) + + +;;; @ field +;;; + +(defconst std11-field-name-regexp "[!-9;-~]+") +(defconst std11-field-head-regexp + (concat "^" std11-field-name-regexp ":")) +(defconst std11-next-field-head-regexp + (concat "\n" std11-field-name-regexp ":")) + +(defun std11-field-end () + "Move to end of field and return this point. [std11.el]" + (if (re-search-forward std11-next-field-head-regexp nil t) + (goto-char (match-beginning 0)) + (if (re-search-forward "^$" nil t) + (goto-char (1- (match-beginning 0))) + (end-of-line) + )) + (point) + ) + +(defun std11-field-body (name &optional boundary) + "Return body of field NAME. +If BOUNDARY is not nil, it is used as message header separator. +\[std11.el]" + (save-excursion + (save-restriction + (std11-narrow-to-header boundary) + (goto-char (point-min)) + (let ((case-fold-search t)) + (if (re-search-forward (concat "^" name ":[ \t]*") nil t) + (buffer-substring-no-properties (match-end 0) (std11-field-end)) + ))))) + +(defun std11-find-field-body (field-names &optional boundary) + "Return the first found field-body specified by FIELD-NAMES +of the message header in current buffer. If BOUNDARY is not nil, it is +used as message header separator. [std11.el]" + (save-excursion + (save-restriction + (std11-narrow-to-header boundary) + (let ((case-fold-search t) + field-name) + (catch 'tag + (while (setq field-name (car field-names)) + (goto-char (point-min)) + (if (re-search-forward (concat "^" field-name ":[ \t]*") nil t) + (throw 'tag + (buffer-substring-no-properties + (match-end 0) (std11-field-end))) + ) + (setq field-names (cdr field-names)) + )))))) + +(defun std11-field-bodies (field-names &optional default-value boundary) + "Return list of each field-bodies of FIELD-NAMES of the message header +in current buffer. If BOUNDARY is not nil, it is used as message +header separator. [std11.el]" + (save-excursion + (save-restriction + (std11-narrow-to-header boundary) + (let* ((case-fold-search t) + (dest (make-list (length field-names) default-value)) + (s-rest field-names) + (d-rest dest) + field-name) + (while (setq field-name (car s-rest)) + (goto-char (point-min)) + (if (re-search-forward (concat "^" field-name ":[ \t]*") nil t) + (setcar d-rest + (buffer-substring-no-properties + (match-end 0) (std11-field-end))) + ) + (setq s-rest (cdr s-rest) + d-rest (cdr d-rest)) + ) + dest)))) + + +;;; @ unfolding +;;; + +(defun std11-unfold-string (string) + "Unfold STRING as message header field. [std11.el]" + (let ((dest "")) + (while (string-match "\n\\([ \t]\\)" string) + (setq dest (concat dest + (substring string 0 (match-beginning 0)) + (match-string 1 string) + )) + (setq string (substring string (match-end 0))) + ) + (concat dest string) + )) + + +;;; @ header +;;; + +(defun std11-narrow-to-header (&optional boundary) + "Narrow to the message header. +If BOUNDARY is not nil, it is used as message header separator. +\[std11.el]" + (narrow-to-region + (goto-char (point-min)) + (if (re-search-forward + (concat "^\\(" (regexp-quote (or boundary "")) "\\)?$") + nil t) + (match-beginning 0) + (point-max) + ))) + +(defun std11-header-string (regexp &optional boundary) + "Return string of message header fields matched by REGEXP. +If BOUNDARY is not nil, it is used as message header separator. +\[std11.el]" + (let ((case-fold-search t)) + (save-excursion + (save-restriction + (std11-narrow-to-header boundary) + (goto-char (point-min)) + (let (field header) + (while (re-search-forward std11-field-head-regexp nil t) + (setq field + (buffer-substring (match-beginning 0) (std11-field-end))) + (if (string-match regexp field) + (setq header (concat header field "\n")) + )) + header) + )))) + +(defun std11-header-string-except (regexp &optional boundary) + "Return string of message header fields not matched by REGEXP. +If BOUNDARY is not nil, it is used as message header separator. +\[std11.el]" + (let ((case-fold-search t)) + (save-excursion + (save-restriction + (std11-narrow-to-header boundary) + (goto-char (point-min)) + (let (field header) + (while (re-search-forward std11-field-head-regexp nil t) + (setq field + (buffer-substring (match-beginning 0) (std11-field-end))) + (if (not (string-match regexp field)) + (setq header (concat header field "\n")) + )) + header) + )))) + +(defun std11-collect-field-names (&optional boundary) + "Return list of all field-names of the message header in current buffer. +If BOUNDARY is not nil, it is used as message header separator. +\[std11.el]" + (save-excursion + (save-restriction + (std11-narrow-to-header boundary) + (goto-char (point-min)) + (let (dest name) + (while (re-search-forward std11-field-head-regexp nil t) + (setq name (buffer-substring-no-properties + (match-beginning 0)(1- (match-end 0)))) + (or (member name dest) + (setq dest (cons name dest)) + ) + ) + dest)))) + + +;;; @ quoted-string +;;; + +(defun std11-wrap-as-quoted-pairs (string specials) + (let (dest + (i 0) + (b 0) + (len (length string)) + ) + (while (< i len) + (let ((chr (aref string i))) + (if (memq chr specials) + (setq dest (concat dest (substring string b i) "\\") + b i) + )) + (setq i (1+ i)) + ) + (concat dest (substring string b)) + )) + +(defconst std11-non-qtext-char-list '(?\" ?\\ ?\r ?\n)) + +(defun std11-wrap-as-quoted-string (string) + "Wrap STRING as RFC 822 quoted-string. [std11.el]" + (concat "\"" + (std11-wrap-as-quoted-pairs string std11-non-qtext-char-list) + "\"")) + +(defun std11-strip-quoted-pair (string) + "Strip quoted-pairs in STRING. [std11.el]" + (let (dest + (b 0) + (i 0) + (len (length string)) + ) + (while (< i len) + (let ((chr (aref string i))) + (if (eq chr ?\\) + (setq dest (concat dest (substring string b i)) + b (1+ i) + i (+ i 2)) + (setq i (1+ i)) + ))) + (concat dest (substring string b)) + )) + +(defun std11-strip-quoted-string (string) + "Strip quoted-string STRING. [std11.el]" + (let ((len (length string))) + (or (and (>= len 2) + (let ((max (1- len))) + (and (eq (aref string 0) ?\") + (eq (aref string max) ?\") + (std11-strip-quoted-pair (substring string 1 max)) + ))) + string))) + + +;;; @ composer +;;; + +(defun std11-addr-to-string (seq) + "Return string from lexical analyzed list SEQ +represents addr-spec of RFC 822. [std11.el]" + (mapconcat (function + (lambda (token) + (let ((name (car token))) + (cond + ((eq name 'spaces) "") + ((eq name 'comment) "") + ((eq name 'quoted-string) + (concat "\"" (cdr token) "\"")) + (t (cdr token))) + ))) + seq "") + ) + +(defun std11-address-string (address) + "Return string of address part from parsed ADDRESS of RFC 822. +\[std11.el]" + (cond ((eq (car address) 'group) + (mapconcat (function std11-address-string) + (car (cdr address)) + ", ") + ) + ((eq (car address) 'mailbox) + (let ((addr (nth 1 address))) + (std11-addr-to-string + (if (eq (car addr) 'phrase-route-addr) + (nth 2 addr) + (cdr addr) + ) + ))))) + +(defun std11-full-name-string (address) + "Return string of full-name part from parsed ADDRESS of RFC 822. +\[std11.el]" + (cond ((eq (car address) 'group) + (mapconcat (function + (lambda (token) + (cdr token) + )) + (nth 1 address) "") + ) + ((eq (car address) 'mailbox) + (let ((addr (nth 1 address)) + (comment (nth 2 address)) + phrase) + (if (eq (car addr) 'phrase-route-addr) + (setq phrase + (mapconcat + (function + (lambda (token) + (let ((type (car token))) + (cond ((eq type 'quoted-string) + (std11-strip-quoted-pair (cdr token)) + ) + ((eq type 'comment) + (concat + "(" + (std11-strip-quoted-pair (cdr token)) + ")") + ) + (t + (cdr token) + ))))) + (nth 1 addr) "")) + ) + (cond ((> (length phrase) 0) phrase) + (comment (std11-strip-quoted-pair comment)) + ) + )))) + +(defun std11-msg-id-string (msg-id) + "Return string from parsed MSG-ID of RFC 822." + (concat "<" (std11-addr-to-string (cdr msg-id)) ">") + ) + +(defun std11-fill-msg-id-list-string (string &optional column) + "Fill list of msg-id in STRING, and return the result." + (or column + (setq column 12)) + (let ((lal (std11-lexical-analyze string)) + dest) + (let ((ret (std11-parse-msg-id lal))) + (if ret + (let* ((str (std11-msg-id-string (car ret))) + (len (length str))) + (setq lal (cdr ret)) + (if (> (+ len column) 76) + (setq dest (concat dest "\n " str) + column (1+ len)) + (setq dest str + column (+ column len)) + )) + (setq dest (concat dest (cdr (car lal))) + lal (cdr lal)) + )) + (while lal + (let ((ret (std11-parse-msg-id lal))) + (if ret + (let* ((str (std11-msg-id-string (car ret))) + (len (1+ (length str)))) + (setq lal (cdr ret)) + (if (> (+ len column) 76) + (setq dest (concat dest "\n " str) + column len) + (setq dest (concat dest " " str) + column (+ column len)) + )) + (setq dest (concat dest (cdr (car lal))) + lal (cdr lal)) + ))) + dest)) + + +;;; @ parser +;;; + +(defun std11-parse-address-string (string) + "Parse STRING as mail address. [std11.el]" + (std11-parse-address (std11-lexical-analyze string)) + ) + +(defun std11-parse-addresses-string (string) + "Parse STRING as mail address list. [std11.el]" + (std11-parse-addresses (std11-lexical-analyze string)) + ) + +(defun std11-extract-address-components (string) + "Extract full name and canonical address from STRING. +Returns a list of the form (FULL-NAME CANONICAL-ADDRESS). +If no name can be extracted, FULL-NAME will be nil. [std11.el]" + (let* ((structure (car (std11-parse-address-string + (std11-unfold-string string)))) + (phrase (std11-full-name-string structure)) + (address (std11-address-string structure)) + ) + (list phrase address) + )) + +(provide 'std11) + +(mapcar (function + (lambda (func) + (autoload func "std11-parse") + )) + '(std11-lexical-analyze + std11-parse-address std11-parse-addresses + std11-parse-address-string)) + + +;;; @ end +;;; + +;;; std11.el ends here