2 ;;; tl-822.el --- RFC 822 parser for GNU Emacs
4 ;;; Copyright (C) 1995 Free Software Foundation, Inc.
5 ;;; Copyright (C) 1995,1996 MORIOKA Tomohiko
7 ;;; Author: MORIOKA Tomohiko <morioka@jaist.ac.jp>
8 ;;; Keywords: mail, news, RFC 822
10 ;;; This file is part of tl (Tiny Library).
12 ;;; This program is free software; you can redistribute it and/or
13 ;;; modify it under the terms of the GNU General Public License as
14 ;;; published by the Free Software Foundation; either version 2, or
15 ;;; (at your option) any later version.
17 ;;; This program is distributed in the hope that it will be useful,
18 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 ;;; General Public License for more details.
22 ;;; You should have received a copy of the GNU General Public License
23 ;;; along with This program. If not, write to the Free Software
24 ;;; Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
32 (defconst rfc822/RCS-ID
33 "$Id: tl-822.el,v 7.5 1996-03-25 10:17:34 morioka Exp $")
34 (defconst rfc822/version (get-version-string rfc822/RCS-ID))
40 (defconst rfc822/field-name-regexp "[!-9;-~]+")
42 (defconst rfc822/field-top-regexp
43 (concat "\\(" rfc822/field-name-regexp "\\):"))
45 (defconst rfc822::next-field-top-regexp (concat "\n" rfc822/field-top-regexp))
47 (defun rfc822/field-end ()
48 (if (re-search-forward rfc822::next-field-top-regexp nil t)
49 (goto-char (match-beginning 0))
50 (if (re-search-forward "^$" nil t)
51 (goto-char (1- (match-beginning 0)))
57 (defun rfc822/get-field-body (name)
58 (let ((case-fold-search t))
62 (goto-char (point-min))
63 (or (and (re-search-forward "^$" nil t) (match-end 0))
66 (goto-char (point-min))
67 (if (re-search-forward (concat "^" name ":[ \t]*") nil t)
68 (buffer-substring-no-properties
78 (defun rfc822/get-header-string-except (pat boundary)
79 (let ((case-fold-search t))
82 (narrow-to-region (goto-char (point-min))
85 (concat "^\\(" (regexp-quote boundary) "\\)?$")
89 (goto-char (point-min))
91 (while (re-search-forward rfc822/field-top-regexp nil t)
92 (setq field (buffer-substring (match-beginning 0)
95 (if (not (string-match pat field))
96 (setq header (concat header field "\n"))
105 (defconst rfc822/linear-white-space-regexp "\\(\n?[ \t]\\)+")
106 (defconst rfc822/quoted-pair-regexp "\\\\.")
107 (defconst rfc822/non-qtext-char-list '(?\" ?\\ ?\r ?\n))
108 (defconst rfc822/qtext-regexp
109 (concat "[^" (char-list-to-string rfc822/non-qtext-char-list) " \t]"))
110 (defconst rfc822/quoted-string-regexp
114 "\\(" rfc822/linear-white-space-regexp "?"
115 (regexp-or rfc822/qtext-regexp rfc822/quoted-pair-regexp)
117 rfc822/linear-white-space-regexp "?"
120 (defun rfc822/wrap-as-quoted-string (str)
121 "Wrap string STR as RFC 822 quoted-string. [tl-822.el]"
125 (if (memq chr rfc822/non-qtext-char-list)
126 (concat "\\" (char-to-string chr))
132 (defun rfc822/strip-quoted-pair (str)
138 (setq chr (elt str i))
139 (if (or flag (not (eq chr ?\\)))
141 (setq dest (concat dest (char-to-string chr)))
150 (defun rfc822/strip-quoted-string (str)
151 (rfc822/strip-quoted-pair
152 (let ((max (- (length str) 1))
154 (if (and (eq (elt str 0) ?\")
155 (eq (elt str max) ?\")
157 (substring str 1 max)
165 (defun rfc822/unfolding-string (str)
167 (while (string-match "\n\\s +" str)
168 (setq dest (concat dest (substring str 0 (match-beginning 0)) " "))
169 (setq str (substring str (match-end 0)))
175 ;;; @ lexical analyze
178 (defconst rfc822/special-chars "][()<>@,;:\\<>.\"")
179 (defconst rfc822/space-chars " \t\n")
180 (defconst rfc822/non-atom-chars
181 (concat rfc822/special-chars rfc822/space-chars))
182 (defconst rfc822/non-dtext-chars "[]")
183 (defconst rfc822/non-ctext-chars "()")
185 (defun rfc822/analyze-spaces (str)
186 (let ((i (position-mismatched
189 (find elt rfc822/space-chars)
193 (cons (cons 'spaces (substring str 0 i))
198 (defun rfc822/analyze-special (str)
199 (if (and (> (length str) 0)
200 (find (elt str 0) rfc822/special-chars)
202 (cons (cons 'specials (substring str 0 1))
207 (defun rfc822/analyze-atom (str)
208 (let ((i (position-mismatched
211 (not (find elt rfc822/non-atom-chars))
215 (cons (cons 'atom (substring str 0 i))
220 (defun rfc822/analyze-quoted-pair (str)
221 (if (and (>= (length str) 2)
224 (cons (cons 'quoted-pair (substring str 0 2))
229 (defun rfc822/analyze-quoted-string (str)
230 (if (and (> (length str) 0)
233 (let* ((i (position-mismatched
236 (not (memq elt rfc822/non-qtext-char-list))
238 (setq str (substring str 1))
240 (rest (substring str i))
244 (eq (elt rest 0) ?\")
246 (cons (cons 'quoted-string (substring str 0 i))
251 (defun rfc822/analyze-domain-literal (str)
252 (if (and (> (length str) 0)
255 (let* ((i (position-mismatched
258 (not (find elt rfc822/non-dtext-chars))
260 (setq str (substring str 1))
262 (rest (substring str i))
266 (eq (elt rest 0) ?\])
268 (cons (cons 'domain-literal (substring str 0 i))
273 (defun rfc822/analyze-comment (str)
274 (if (and (> (length str) 0)
279 (setq str (substring str 1))
281 (while (not (string-equal str ""))
282 (setq p (position-mismatched
285 (not (find elt rfc822/non-ctext-chars))
288 (setq dest (concat dest (substring str 0 p)))
289 (setq str (substring str p))
291 ((setq ret (rfc822/analyze-comment str))
292 (setq dest (concat dest "(" (cdr (car ret)) ")"))
298 (if (and (> (length str) 0)
301 (cons (cons 'comment dest)
306 (defun rfc822/lexical-analyze (str)
308 (i 0)(len (length str))
310 (while (not (string-equal str ""))
312 (or (rfc822/analyze-quoted-string str)
313 (rfc822/analyze-domain-literal str)
314 (rfc822/analyze-comment str)
315 (rfc822/analyze-spaces str)
316 (rfc822/analyze-special str)
317 (rfc822/analyze-atom str)
320 (setq dest (cons (car ret) dest))
330 (defun rfc822/ignored-token-p (token)
331 (let ((type (car token)))
332 (or (eq type 'spaces)(eq type 'comment))
335 (defun rfc822/parse-token (lal)
339 (setq token (car lal))
340 (rfc822/ignored-token-p token)
343 (setq itl (cons token itl))
345 (cons (reverse (cons token itl))
349 (defun rfc822/parse-ascii-token (lal)
350 (let (token itl parsed)
351 (while (and lal (cdr (car lal))
352 (if (find-charset-string (cdr (setq token (car lal))))
354 (rfc822/ignored-token-p token)
357 (setq itl (cons token itl))
360 (setq parsed (reverse (cons token itl)))
362 (cons parsed (cdr lal))
365 (defun rfc822/parse-token-or-comment (lal)
369 (setq token (car lal))
370 (eq (car token) 'spaces)
373 (setq itl (cons token itl))
375 (cons (reverse (cons token itl))
379 (defun rfc822/parse-word (lal)
380 (let ((ret (rfc822/parse-ascii-token lal)))
382 (let ((elt (car ret))
385 (if (or (assq 'atom elt)
386 (assq 'quoted-string elt))
387 (cons (cons 'word elt) rest)
390 (defun rfc822/parse-word-or-comment (lal)
391 (let ((ret (rfc822/parse-token-or-comment lal)))
393 (let ((elt (car ret))
396 (cond ((or (assq 'atom elt)
397 (assq 'quoted-string elt))
398 (cons (cons 'word elt) rest)
401 (cons (cons 'comment-word elt) rest)
405 (defun rfc822/parse-phrase (lal)
407 (while (setq ret (rfc822/parse-word-or-comment lal))
408 (setq phrase (append phrase (cdr (car ret))))
412 (cons (cons 'phrase phrase) lal)
415 (defun rfc822/parse-local-part (lal)
416 (let ((ret (rfc822/parse-word lal)))
418 (let ((local-part (cdr (car ret))) dot)
420 (while (and (setq ret (rfc822/parse-ascii-token lal))
422 (string-equal (cdr (assq 'specials dot)) ".")
423 (setq ret (rfc822/parse-word (cdr ret)))
425 (append local-part dot (cdr (car ret)))
429 (cons (cons 'local-part local-part) lal)
432 (defun rfc822/parse-sub-domain (lal)
433 (let ((ret (rfc822/parse-ascii-token lal)))
435 (let ((sub-domain (car ret)))
436 (if (or (assq 'atom sub-domain)
437 (assq 'domain-literal sub-domain)
439 (cons (cons 'sub-domain sub-domain)
444 (defun rfc822/parse-domain (lal)
445 (let ((ret (rfc822/parse-sub-domain lal)))
447 (let ((domain (cdr (car ret))) dot)
449 (while (and (setq ret (rfc822/parse-ascii-token lal))
451 (string-equal (cdr (assq 'specials dot)) ".")
452 (setq ret (rfc822/parse-sub-domain (cdr ret)))
454 (append domain dot (cdr (car ret)))
458 (cons (cons 'domain domain) lal)
461 (defun rfc822/parse-at-domain (lal)
462 (let ((ret (rfc822/parse-ascii-token lal)) at-sign)
464 (setq at-sign (car ret))
465 (string-equal (cdr (assq 'specials at-sign)) "@")
466 (setq ret (rfc822/parse-domain (cdr ret)))
468 (cons (cons 'at-domain (append at-sign (cdr (car ret))))
472 (defun rfc822/parse-addr-spec (lal)
473 (let ((ret (rfc822/parse-local-part lal))
477 (setq addr (cdr (car ret)))
479 (and (setq ret (rfc822/parse-at-domain lal))
480 (setq addr (append addr (cdr (car ret))))
483 (cons (cons 'addr-spec addr) lal)
486 (defun rfc822/parse-route (lal)
487 (let ((ret (rfc822/parse-at-domain lal))
491 (setq route (cdr (car ret)))
493 (while (and (setq ret (rfc822/parse-ascii-token lal))
494 (setq comma (car ret))
495 (string-equal (cdr (assq 'specials comma)) ",")
496 (setq ret (rfc822/parse-at-domain (cdr ret)))
498 (setq route (append route comma (cdr (car ret))))
501 (and (setq ret (rfc822/parse-ascii-token lal))
502 (setq colon (car ret))
503 (string-equal (cdr (assq 'specials colon)) ":")
504 (setq route (append route colon))
507 (cons (cons 'route route)
512 (defun rfc822/parse-route-addr (lal)
513 (let ((ret (rfc822/parse-ascii-token lal))
517 (string-equal (cdr (assq 'specials <)) "<")
519 (progn (and (setq ret (rfc822/parse-route lal))
520 (setq route (cdr (car ret)))
523 (setq ret (rfc822/parse-addr-spec lal))
525 (setq addr-spec (cdr (car ret)))
527 (setq ret (rfc822/parse-ascii-token lal))
529 (string-equal (cdr (assq 'specials >)) ">")
531 (cons (cons 'route-addr (append route addr-spec))
536 (defun rfc822/parse-phrase-route-addr (lal)
537 (let ((ret (rfc822/parse-phrase lal)) phrase)
540 (setq phrase (cdr (car ret)))
543 (if (setq ret (rfc822/parse-route-addr lal))
544 (cons (list 'phrase-route-addr
550 (defun rfc822/parse-mailbox (lal)
551 (let ((ret (or (rfc822/parse-phrase-route-addr lal)
552 (rfc822/parse-addr-spec lal)))
556 (setq mbox (car ret))
558 (if (and (setq ret (rfc822/parse-token-or-comment lal))
559 (setq comment (cdr (assq 'comment (car ret))))
563 (cons (list 'mailbox mbox comment)
567 (defun rfc822/parse-group (lal)
568 (let ((ret (rfc822/parse-phrase lal))
569 phrase colon comma mbox semicolon)
571 (setq phrase (cdr (car ret)))
573 (setq ret (rfc822/parse-ascii-token lal))
574 (setq colon (car ret))
575 (string-equal (cdr (assq 'specials colon)) ":")
578 (and (setq ret (rfc822/parse-mailbox lal))
579 (setq mbox (list (car ret)))
582 (while (and (setq ret (rfc822/parse-ascii-token lal))
583 (setq comma (car ret))
585 (cdr (assq 'specials comma)) ",")
587 (setq ret (rfc822/parse-mailbox lal))
588 (setq mbox (cons (car ret) mbox))
592 (and (setq ret (rfc822/parse-ascii-token lal))
593 (setq semicolon (car ret))
594 (string-equal (cdr (assq 'specials semicolon)) ";")
596 (cons (list 'group phrase (reverse mbox))
601 (defun rfc822/parse-address (lal)
602 (or (rfc822/parse-group lal)
603 (rfc822/parse-mailbox lal)
606 (defun rfc822/parse-addresses (lal)
607 (let ((ret (rfc822/parse-address lal)))
609 (let ((dest (list (car ret))))
611 (while (and (setq ret (rfc822/parse-ascii-token lal))
612 (string-equal (cdr (assq 'specials (car ret))) ",")
613 (setq ret (rfc822/parse-address (cdr ret)))
615 (setq dest (cons (car ret) dest))
621 (defun rfc822/addr-to-string (seq)
624 (if (eq (car token) 'spaces)
631 (defun rfc822/address-string (address)
632 (if (eq (car address) 'mailbox)
633 (let ((addr (nth 1 address))
635 (rfc822/addr-to-string
636 (if (eq (car addr) 'phrase-route-addr)
642 (defun rfc822/full-name-string (address)
643 (if (eq (car address) 'mailbox)
644 (let ((addr (nth 1 address))
645 (comment (nth 2 address))
647 (if (eq (car addr) 'phrase-route-addr)
648 (setq phrase (mapconcat (function
657 (defun rfc822/extract-address-components (str)
658 "Extract full name and canonical address from STR.
659 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
660 If no name can be extracted, FULL-NAME will be nil. [tl-822.el]"
661 (let* ((structure (car
662 (rfc822/parse-address
663 (rfc822/lexical-analyze str)
665 (phrase (rfc822/full-name-string structure))
666 (address (rfc822/address-string structure))
668 (list phrase address)
677 ;;; tl-822.el ends here