2 ;;; tl-822.el --- RFC 822 parser for GNU Emacs
4 ;;; Copyright (C) 1995 Free Software Foundation, Inc.
5 ;;; Copyright (C) 1995,1996 MORIOKA Tomohiko
7 ;;; Author: MORIOKA Tomohiko <morioka@jaist.ac.jp>
8 ;;; Keywords: mail, news, RFC 822
10 ;;; This file is part of tl (Tiny Library).
12 ;;; This program is free software; you can redistribute it and/or
13 ;;; modify it under the terms of the GNU General Public License as
14 ;;; published by the Free Software Foundation; either version 2, or
15 ;;; (at your option) any later version.
17 ;;; This program is distributed in the hope that it will be useful,
18 ;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 ;;; General Public License for more details.
22 ;;; You should have received a copy of the GNU General Public License
23 ;;; along with This program. If not, write to the Free Software
24 ;;; Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
32 (defconst rfc822/RCS-ID
33 "$Id: tl-822.el,v 7.19 1996-04-25 21:24:27 morioka Exp $")
34 (defconst rfc822/version (get-version-string rfc822/RCS-ID))
40 (defconst rfc822/field-name-regexp "[!-9;-~]+")
42 (defconst rfc822/field-top-regexp
43 (concat "\\(" rfc822/field-name-regexp "\\):"))
45 (defconst rfc822::next-field-top-regexp (concat "\n" rfc822/field-top-regexp))
47 (defun rfc822/field-end ()
48 (if (re-search-forward rfc822::next-field-top-regexp nil t)
49 (goto-char (match-beginning 0))
50 (if (re-search-forward "^$" nil t)
51 (goto-char (1- (match-beginning 0)))
57 (defun rfc822/get-field-body (name)
58 (let ((case-fold-search t))
62 (goto-char (point-min))
63 (or (and (re-search-forward "^$" nil t) (match-end 0))
66 (goto-char (point-min))
67 (if (re-search-forward (concat "^" name ":[ \t]*") nil t)
68 (buffer-substring-no-properties
74 (defun rfc822/get-field-bodies (field-names &optional default-value)
75 (let ((case-fold-search t))
79 (goto-char (point-min))
80 (or (and (re-search-forward "^$" nil t) (match-end 0))
83 (goto-char (point-min))
84 (let* ((dest (make-list (length field-names) default-value))
88 (while (setq field-name (car s-rest))
89 (if (re-search-forward (concat "^" field-name ":[ \t]*") nil t)
91 (buffer-substring-no-properties
94 (setq s-rest (cdr s-rest)
103 (defun rfc822/narrow-to-header (&optional boundary)
104 (narrow-to-region (goto-char (point-min))
107 (concat "^\\(" (regexp-quote (or boundary "")) "\\)?$")
112 (defun rfc822/get-header-string (pat &optional boundary)
113 (let ((case-fold-search t))
116 (rfc822/narrow-to-header boundary)
117 (goto-char (point-min))
119 (while (re-search-forward rfc822/field-top-regexp nil t)
120 (setq field (buffer-substring (match-beginning 0)
123 (if (string-match pat field)
124 (setq header (concat header field "\n"))
129 (defun rfc822/get-header-string-except (pat &optional boundary)
130 (let ((case-fold-search t))
133 (rfc822/narrow-to-header boundary)
134 (goto-char (point-min))
136 (while (re-search-forward rfc822/field-top-regexp nil t)
137 (setq field (buffer-substring (match-beginning 0)
140 (if (not (string-match pat field))
141 (setq header (concat header field "\n"))
150 (defconst rfc822/linear-white-space-regexp "\\(\n?[ \t]\\)+")
151 (defconst rfc822/quoted-pair-regexp "\\\\.")
152 (defconst rfc822/non-qtext-char-list '(?\" ?\\ ?\r ?\n))
153 (defconst rfc822/qtext-regexp
154 (concat "[^" (char-list-to-string rfc822/non-qtext-char-list) " \t]"))
155 (defconst rfc822/quoted-string-regexp
159 "\\(" rfc822/linear-white-space-regexp "?"
160 (regexp-or rfc822/qtext-regexp rfc822/quoted-pair-regexp)
162 rfc822/linear-white-space-regexp "?"
165 (defun rfc822/wrap-as-quoted-string (str)
166 "Wrap string STR as RFC 822 quoted-string. [tl-822.el]"
170 (if (memq chr rfc822/non-qtext-char-list)
171 (concat "\\" (char-to-string chr))
177 (defun rfc822/strip-quoted-pair (str)
183 (setq chr (elt str i))
184 (if (or flag (not (eq chr ?\\)))
186 (setq dest (concat dest (char-to-string chr)))
195 (defun rfc822/strip-quoted-string (str)
196 (rfc822/strip-quoted-pair
197 (let ((max (- (length str) 1))
199 (if (and (eq (elt str 0) ?\")
200 (eq (elt str max) ?\")
202 (substring str 1 max)
210 (defun rfc822/unfolding-string (str)
212 (while (string-match "\n\\s +" str)
213 (setq dest (concat dest (substring str 0 (match-beginning 0)) " "))
214 (setq str (substring str (match-end 0)))
220 ;;; @ lexical analyze
223 (defconst rfc822/special-chars "][()<>@,;:\\<>.\"")
224 (defconst rfc822/space-chars " \t\n")
225 (defconst rfc822/non-atom-chars
226 (concat rfc822/special-chars rfc822/space-chars))
227 (defconst rfc822/non-dtext-chars "[]")
228 (defconst rfc822/non-ctext-chars "()")
230 (defun rfc822/analyze-spaces (str)
231 (let ((i (position-mismatched
234 (find elt rfc822/space-chars)
238 (cons (cons 'spaces (substring str 0 i))
243 (defun rfc822/analyze-special (str)
244 (if (and (> (length str) 0)
245 (find (elt str 0) rfc822/special-chars)
247 (cons (cons 'specials (substring str 0 1))
252 (defun rfc822/analyze-atom (str)
253 (let ((i (position-mismatched
256 (not (find elt rfc822/non-atom-chars))
260 (cons (cons 'atom (substring str 0 i))
265 (defun rfc822/analyze-quoted-pair (str)
266 (if (and (>= (length str) 2)
269 (cons (cons 'quoted-pair (substring str 0 2))
274 (defun rfc822/analyze-quoted-string (str)
275 (if (and (> (length str) 0)
278 (let* ((i (position-mismatched
281 (not (memq elt rfc822/non-qtext-char-list))
283 (setq str (substring str 1))
285 (rest (substring str i))
289 (eq (elt rest 0) ?\")
291 (cons (cons 'quoted-string (substring str 0 i))
296 (defun rfc822/analyze-domain-literal (str)
297 (if (and (> (length str) 0)
300 (let* ((i (position-mismatched
303 (not (find elt rfc822/non-dtext-chars))
305 (setq str (substring str 1))
307 (rest (substring str i))
311 (eq (elt rest 0) ?\])
313 (cons (cons 'domain-literal (substring str 0 i))
318 (defun rfc822/analyze-comment (str)
319 (if (and (> (length str) 0)
324 (setq str (substring str 1))
326 (while (not (string-equal str ""))
327 (setq p (position-mismatched
330 (not (find elt rfc822/non-ctext-chars))
333 (setq dest (concat dest (substring str 0 p)))
334 (setq str (substring str p))
336 ((setq ret (rfc822/analyze-comment str))
337 (setq dest (concat dest "(" (cdr (car ret)) ")"))
343 (if (and (> (length str) 0)
346 (cons (cons 'comment dest)
351 (defun rfc822/lexical-analyze (str)
353 (i 0)(len (length str))
355 (while (not (string-equal str ""))
357 (or (rfc822/analyze-quoted-string str)
358 (rfc822/analyze-domain-literal str)
359 (rfc822/analyze-comment str)
360 (rfc822/analyze-spaces str)
361 (rfc822/analyze-special str)
362 (rfc822/analyze-atom str)
365 (setq dest (cons (car ret) dest))
375 (defun rfc822/ignored-token-p (token)
376 (let ((type (car token)))
377 (or (eq type 'spaces)(eq type 'comment))
380 (defun rfc822/parse-token (lal)
384 (setq token (car lal))
385 (rfc822/ignored-token-p token)
388 (setq itl (cons token itl))
390 (cons (nreverse (cons token itl))
394 (defun rfc822/parse-ascii-token (lal)
395 (let (token itl parsed token-value)
397 (setq token (car lal))
398 (if (and (setq token-value (cdr token))
399 (find-charset-string token-value)
402 (rfc822/ignored-token-p token)
405 (setq itl (cons token itl))
408 (setq parsed (nreverse (cons token itl)))
410 (cons parsed (cdr lal))
413 (defun rfc822/parse-token-or-comment (lal)
417 (setq token (car lal))
418 (eq (car token) 'spaces)
421 (setq itl (cons token itl))
423 (cons (nreverse (cons token itl))
427 (defun rfc822/parse-word (lal)
428 (let ((ret (rfc822/parse-ascii-token lal)))
430 (let ((elt (car ret))
433 (if (or (assq 'atom elt)
434 (assq 'quoted-string elt))
435 (cons (cons 'word elt) rest)
438 (defun rfc822/parse-word-or-comment (lal)
439 (let ((ret (rfc822/parse-token-or-comment lal)))
441 (let ((elt (car ret))
444 (cond ((or (assq 'atom elt)
445 (assq 'quoted-string elt))
446 (cons (cons 'word elt) rest)
449 (cons (cons 'comment-word elt) rest)
453 (defun rfc822/parse-phrase (lal)
455 (while (setq ret (rfc822/parse-word-or-comment lal))
456 (setq phrase (append phrase (cdr (car ret))))
460 (cons (cons 'phrase phrase) lal)
463 (defun rfc822/parse-local-part (lal)
464 (let ((ret (rfc822/parse-word lal)))
466 (let ((local-part (cdr (car ret))) dot)
468 (while (and (setq ret (rfc822/parse-ascii-token lal))
470 (string-equal (cdr (assq 'specials dot)) ".")
471 (setq ret (rfc822/parse-word (cdr ret)))
473 (append local-part dot (cdr (car ret)))
477 (cons (cons 'local-part local-part) lal)
480 (defun rfc822/parse-sub-domain (lal)
481 (let ((ret (rfc822/parse-ascii-token lal)))
483 (let ((sub-domain (car ret)))
484 (if (or (assq 'atom sub-domain)
485 (assq 'domain-literal sub-domain)
487 (cons (cons 'sub-domain sub-domain)
492 (defun rfc822/parse-domain (lal)
493 (let ((ret (rfc822/parse-sub-domain lal)))
495 (let ((domain (cdr (car ret))) dot)
497 (while (and (setq ret (rfc822/parse-ascii-token lal))
499 (string-equal (cdr (assq 'specials dot)) ".")
500 (setq ret (rfc822/parse-sub-domain (cdr ret)))
502 (append domain dot (cdr (car ret)))
506 (cons (cons 'domain domain) lal)
509 (defun rfc822/parse-at-domain (lal)
510 (let ((ret (rfc822/parse-ascii-token lal)) at-sign)
512 (setq at-sign (car ret))
513 (string-equal (cdr (assq 'specials at-sign)) "@")
514 (setq ret (rfc822/parse-domain (cdr ret)))
516 (cons (cons 'at-domain (append at-sign (cdr (car ret))))
520 (defun rfc822/parse-addr-spec (lal)
521 (let ((ret (rfc822/parse-local-part lal))
525 (setq addr (cdr (car ret)))
527 (and (setq ret (rfc822/parse-at-domain lal))
528 (setq addr (append addr (cdr (car ret))))
531 (cons (cons 'addr-spec addr) lal)
534 (defun rfc822/parse-route (lal)
535 (let ((ret (rfc822/parse-at-domain lal))
539 (setq route (cdr (car ret)))
541 (while (and (setq ret (rfc822/parse-ascii-token lal))
542 (setq comma (car ret))
543 (string-equal (cdr (assq 'specials comma)) ",")
544 (setq ret (rfc822/parse-at-domain (cdr ret)))
546 (setq route (append route comma (cdr (car ret))))
549 (and (setq ret (rfc822/parse-ascii-token lal))
550 (setq colon (car ret))
551 (string-equal (cdr (assq 'specials colon)) ":")
552 (setq route (append route colon))
555 (cons (cons 'route route)
560 (defun rfc822/parse-route-addr (lal)
561 (let ((ret (rfc822/parse-ascii-token lal))
565 (string-equal (cdr (assq 'specials <)) "<")
567 (progn (and (setq ret (rfc822/parse-route lal))
568 (setq route (cdr (car ret)))
571 (setq ret (rfc822/parse-addr-spec lal))
573 (setq addr-spec (cdr (car ret)))
575 (setq ret (rfc822/parse-ascii-token lal))
577 (string-equal (cdr (assq 'specials >)) ">")
579 (cons (cons 'route-addr (append route addr-spec))
584 (defun rfc822/parse-phrase-route-addr (lal)
585 (let ((ret (rfc822/parse-phrase lal)) phrase)
588 (setq phrase (cdr (car ret)))
591 (if (setq ret (rfc822/parse-route-addr lal))
592 (cons (list 'phrase-route-addr
598 (defun rfc822/parse-mailbox (lal)
599 (let ((ret (or (rfc822/parse-phrase-route-addr lal)
600 (rfc822/parse-addr-spec lal)))
604 (setq mbox (car ret))
606 (if (and (setq ret (rfc822/parse-token-or-comment lal))
607 (setq comment (cdr (assq 'comment (car ret))))
611 (cons (list 'mailbox mbox comment)
615 (defun rfc822/parse-group (lal)
616 (let ((ret (rfc822/parse-phrase lal))
617 phrase colon comma mbox semicolon)
619 (setq phrase (cdr (car ret)))
621 (setq ret (rfc822/parse-ascii-token lal))
622 (setq colon (car ret))
623 (string-equal (cdr (assq 'specials colon)) ":")
626 (and (setq ret (rfc822/parse-mailbox lal))
627 (setq mbox (list (car ret)))
630 (while (and (setq ret (rfc822/parse-ascii-token lal))
631 (setq comma (car ret))
633 (cdr (assq 'specials comma)) ",")
635 (setq ret (rfc822/parse-mailbox lal))
636 (setq mbox (cons (car ret) mbox))
640 (and (setq ret (rfc822/parse-ascii-token lal))
641 (setq semicolon (car ret))
642 (string-equal (cdr (assq 'specials semicolon)) ";")
644 (cons (list 'group phrase (nreverse mbox))
649 (defun rfc822/parse-address (lal)
650 (or (rfc822/parse-group lal)
651 (rfc822/parse-mailbox lal)
654 (defun rfc822/parse-addresses (lal)
655 (let ((ret (rfc822/parse-address lal)))
657 (let ((dest (list (car ret))))
659 (while (and (setq ret (rfc822/parse-ascii-token lal))
660 (string-equal (cdr (assq 'specials (car ret))) ",")
661 (setq ret (rfc822/parse-address (cdr ret)))
663 (setq dest (cons (car ret) dest))
669 (defun rfc822/addr-to-string (seq)
672 (if (eq (car token) 'spaces)
679 (defun rfc822/address-string (address)
680 (cond ((eq (car address) 'group)
681 (mapconcat (function rfc822/address-string)
685 ((eq (car address) 'mailbox)
686 (let ((addr (nth 1 address))
688 (rfc822/addr-to-string
689 (if (eq (car addr) 'phrase-route-addr)
695 (defun rfc822/full-name-string (address)
696 (cond ((eq (car address) 'group)
703 ((eq (car address) 'mailbox)
704 (let ((addr (nth 1 address))
705 (comment (nth 2 address))
707 (if (eq (car addr) 'phrase-route-addr)
708 (setq phrase (mapconcat (function
717 (defun rfc822/extract-address-components (str)
718 "Extract full name and canonical address from STR.
719 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
720 If no name can be extracted, FULL-NAME will be nil. [tl-822.el]"
721 (let* ((structure (car
722 (rfc822/parse-address
723 (rfc822/lexical-analyze str)
725 (phrase (rfc822/full-name-string structure))
726 (address (rfc822/address-string structure))
728 (list phrase address)
737 ;;; tl-822.el ends here