;;; eword-encode.el --- RFC 2047 based encoded-word encoder for GNU Emacs
-;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc.
+;; Copyright (C) 1995,1996,1997,1998,1999,2000 Free Software Foundation, Inc.
-;; Author: MORIOKA Tomohiko <morioka@jaist.ac.jp>
+;; Author: MORIOKA Tomohiko <tomo@m17n.org>
;; Keywords: encoded-word, MIME, multilingual, header, mail, news
;; This file is part of FLIM (Faithful Library about Internet Message).
;;; Code:
-(require 'emu)
+(require 'mime-def)
(require 'mel)
(require 'std11)
-(require 'mime-def)
(require 'eword-decode)
;;; @ variables
;;;
-(defvar eword-field-encoding-method-alist
- '(("X-Nsubject" . iso-2022-jp-2)
- ("Newsgroups" . nil)
- ("Message-ID" . nil)
- (t . mime)
- )
- "*Alist to specify field encoding method.
-Its key is field-name, value is encoding method.
-
-If method is `mime', this field will be encoded into MIME format.
-
-If method is a MIME-charset, this field will be encoded as the charset
-when it must be convert into network-code.
-
-If method is `default-mime-charset', this field will be encoded as
-variable `default-mime-charset' when it must be convert into
-network-code.
+;; User options are defined in mime-def.el.
-If method is nil, this field will not be encoded.")
-
-(defvar eword-charset-encoding-alist
+(defvar mime-header-charset-encoding-alist
'((us-ascii . nil)
(iso-8859-1 . "Q")
(iso-8859-2 . "Q")
(iso-8859-7 . "Q")
(iso-8859-8 . "Q")
(iso-8859-9 . "Q")
+ (iso-8859-14 . "Q")
+ (iso-8859-15 . "Q")
(iso-2022-jp . "B")
+ (iso-2022-jp-3 . "B")
(iso-2022-kr . "B")
(gb2312 . "B")
(cn-gb . "B")
(cn-gb-2312 . "B")
(euc-kr . "B")
+ (tis-620 . "B")
(iso-2022-jp-2 . "B")
(iso-2022-int-1 . "B")
+ (utf-8 . "B")
))
+(defvar mime-header-default-charset-encoding "Q")
+
+(defvar mime-header-encode-method-alist
+ '((eword-encode-address-list
+ . (Reply-To
+ From Sender
+ Resent-Reply-To Resent-From
+ Resent-Sender To Resent-To
+ Cc Resent-Cc Bcc Resent-Bcc
+ Dcc))
+ (eword-encode-in-reply-to . (In-Reply-To))
+ (eword-encode-structured-field-body . (Mime-Version User-Agent))
+ (eword-encode-unstructured-field-body)))
;;; @ encoded-text encoder
;;;
ENCODING allows \"B\" or \"Q\".
MODE is allows `text', `comment', `phrase' or nil. Default value is
`phrase'."
- (let ((text
- (cond ((string= encoding "B")
- (base64-encode-string string))
- ((string= encoding "Q")
- (q-encoding-encode-string string mode))
- )
- ))
+ (let ((text (encoded-text-encode-string string encoding mode)))
(if text
(concat "=?" (upcase (symbol-name charset)) "?"
encoding "?" text "?=")
;;;
(defsubst eword-encode-char-type (character)
- (if (or (eq character ? )(eq character ?\t))
+ (if (memq character '(? ?\t ?\n))
nil
(char-charset character)
))
(let ((len (length string))
dest)
(while (> len 0)
- (let* ((chr (sref string 0))
+ (let* ((chr (aref string 0))
+ ;; (chr (sref string 0))
(charset (eword-encode-char-type chr))
- (i (char-length chr)))
+ (i 1)
+ ;; (i (char-length chr))
+ )
(while (and (< i len)
- (setq chr (sref string i))
- (eq charset (eword-encode-char-type chr))
- )
- (setq i (char-next-index chr i))
+ (setq chr (aref string i))
+ ;; (setq chr (sref string i))
+ (eq charset (eword-encode-char-type chr)))
+ (setq i (1+ i))
+ ;; (setq i (char-next-index chr i))
)
(setq dest (cons (cons charset (substring string 0 i)) dest)
string (substring string i)
- len (- len i)
- )))
- (nreverse dest)
- ))
+ len (- len i))))
+ (nreverse dest)))
;;; @ word
(defmacro ew-rword-type (rword)
(` (car (cdr (cdr (cdr (, rword)))))))
-(defun tm-eword::find-charset-rule (charsets)
+(defun ew-find-charset-rule (charsets)
(if charsets
- (let* ((charset (charsets-to-mime-charset charsets))
- (encoding (cdr (assq charset eword-charset-encoding-alist)))
- )
- (list charset encoding)
- )))
+ (let* ((charset (find-mime-charset-by-charsets charsets))
+ (encoding
+ (cdr (or (assq charset mime-header-charset-encoding-alist)
+ (cons charset mime-header-default-charset-encoding)))))
+ (list charset encoding))))
+
+;; [tomo:2002-11-05] The following code is a quick-fix for emacsen
+;; which is not depended on the Mule model. We should redesign
+;; `eword-encode-split-string' to avoid to depend on the Mule model.
+(if (featurep 'utf-2000)
+;; for CHISE Architecture
+(defun tm-eword::words-to-ruled-words (wl &optional mode)
+ (let (mcs)
+ (mapcar (function
+ (lambda (word)
+ (setq mcs (detect-mime-charset-string (cdr word)))
+ (make-ew-rword
+ (cdr word)
+ mcs
+ (cdr (or (assq mcs mime-header-charset-encoding-alist)
+ (cons mcs mime-header-default-charset-encoding)))
+ mode)
+ ))
+ wl)))
+;; for legacy Mule
(defun tm-eword::words-to-ruled-words (wl &optional mode)
(mapcar (function
(lambda (word)
- (let ((ret (tm-eword::find-charset-rule (car word))))
+ (let ((ret (ew-find-charset-rule (car word))))
(make-ew-rword (cdr word) (car ret)(nth 1 ret) mode)
)))
wl))
+)
-(defun tm-eword::space-process (seq)
+(defun ew-space-process (seq)
(let (prev a ac b c cc)
(while seq
(setq b (car seq))
(setq seq (cdr seq))
(setq c (car seq))
(setq cc (ew-rword-charset c))
- (if (null (ew-rword-charset b))
+ (if (and (null (ew-rword-charset b))
+ (not (eq (ew-rword-type b) 'special)))
(progn
(setq a (car prev))
(setq ac (ew-rword-charset a))
))
(defun eword-encode-split-string (str &optional mode)
- (tm-eword::space-process
+ (ew-space-process
(tm-eword::words-to-ruled-words
(eword-encode-charset-words-to-words
(eword-encode-divide-into-charset-words str))
)
((string-equal encoding "Q")
(setq string (encode-mime-charset-string string charset))
- (q-encoding-encoded-length string
- (ew-rword-type rword))
+ (Q-encoded-text-length string (ew-rword-type rword))
)))
(if ret
(cons (+ 7 (length (symbol-name charset)) ret) string)
;;; @ encode-string
;;;
-(defun tm-eword::encode-string-1 (column rwl)
- (let* ((rword (car rwl))
- (ret (tm-eword::encoded-word-length rword))
- string len)
- (if (null ret)
- (cond ((and (setq string (car rword))
- (or (<= (setq len (+ (length string) column)) 76)
- (<= column 1))
+(defun ew-encode-rword-1 (column rwl &optional must-output)
+ (catch 'can-not-output
+ (let* ((rword (car rwl))
+ (ret (tm-eword::encoded-word-length rword))
+ string len)
+ (if (null ret)
+ (cond ((and (setq string (car rword))
+ (or (<= (setq len (+ (length string) column)) 76)
+ (<= column 1))
+ )
+ (setq rwl (cdr rwl))
+ )
+ ((memq (aref string 0) '(? ?\t))
+ (setq string (concat "\n" string)
+ len (length string)
+ rwl (cdr rwl))
+ )
+ (must-output
+ (setq string "\n "
+ len 1)
+ )
+ (t
+ (throw 'can-not-output nil)
+ ))
+ (cond ((and (setq len (car ret))
+ (<= (+ column len) 76)
)
+ (setq string
+ (eword-encode-text
+ (ew-rword-charset rword)
+ (ew-rword-encoding rword)
+ (cdr ret)
+ (ew-rword-type rword)
+ ))
+ (setq len (+ (length string) column))
(setq rwl (cdr rwl))
)
(t
- (setq string "\n ")
- (setq len 1)
- ))
- (cond ((and (setq len (car ret))
- (<= (+ column len) 76)
- )
- (setq string
- (eword-encode-text
- (ew-rword-charset rword)
- (ew-rword-encoding rword)
- (cdr ret)
- (ew-rword-type rword)
- ))
- (setq len (+ (length string) column))
- (setq rwl (cdr rwl))
- )
- (t
- (setq string (car rword))
- (let* ((p 0) np
- (str "") nstr)
- (while (and (< p len)
- (progn
- (setq np (char-next-index (sref string p) p))
- (setq nstr (substring string 0 np))
- (setq ret (tm-eword::encoded-word-length
- (cons nstr (cdr rword))
- ))
- (setq nstr (cdr ret))
- (setq len (+ (car ret) column))
- (<= len 76)
- ))
- (setq str nstr
- p np))
- (if (string-equal str "")
- (setq string "\n "
- len 1)
- (setq rwl (cons (cons (substring string p) (cdr rword))
- (cdr rwl)))
- (setq string
- (eword-encode-text
- (ew-rword-charset rword)
- (ew-rword-encoding rword)
- str
- (ew-rword-type rword)))
- (setq len (+ (length string) column))
- )
- )))
- )
- (list string len rwl)
- ))
+ (setq string (car rword))
+ (let* ((p 0) np
+ (str "") nstr)
+ (while (and (< p len)
+ (progn
+ (setq np (1+ p))
+ ;;(setq np (char-next-index (sref string p) p))
+ (setq nstr (substring string 0 np))
+ (setq ret (tm-eword::encoded-word-length
+ (cons nstr (cdr rword))
+ ))
+ (setq nstr (cdr ret))
+ (setq len (+ (car ret) column))
+ (<= len 76)
+ ))
+ (setq str nstr
+ p np))
+ (if (string-equal str "")
+ (if must-output
+ (setq string "\n "
+ len 1)
+ (throw 'can-not-output nil))
+ (setq rwl (cons (cons (substring string p) (cdr rword))
+ (cdr rwl)))
+ (setq string
+ (eword-encode-text
+ (ew-rword-charset rword)
+ (ew-rword-encoding rword)
+ str
+ (ew-rword-type rword)))
+ (setq len (+ (length string) column))
+ )
+ )))
+ )
+ (list string len rwl)
+ )))
(defun eword-encode-rword-list (column rwl)
- (let (ret dest ps special str ew-f pew-f)
+ (let (ret dest str ew-f pew-f folded-points)
(while rwl
(setq ew-f (nth 2 (car rwl)))
(if (and pew-f ew-f)
pew-f nil)
(setq pew-f ew-f)
)
- (setq ret (tm-eword::encode-string-1 column rwl))
+ (if (null (setq ret (ew-encode-rword-1 column rwl)))
+ (let ((i (1- (length dest)))
+ c s r-dest r-column)
+ (catch 'success
+ (while (catch 'found
+ (while (>= i 0)
+ (cond ((memq (setq c (aref dest i)) '(? ?\t))
+ (if (memq i folded-points)
+ (throw 'found nil)
+ (setq folded-points (cons i folded-points))
+ (throw 'found i))
+ )
+ ((eq c ?\n)
+ (throw 'found nil)
+ ))
+ (setq i (1- i))))
+ (setq s (substring dest i)
+ r-column (length s)
+ r-dest (concat (substring dest 0 i) "\n" s))
+ (when (setq ret (ew-encode-rword-1 r-column rwl))
+ (setq dest r-dest
+ column r-column)
+ (throw 'success t)
+ ))
+ (setq ret (ew-encode-rword-1 column rwl 'must-output))
+ )))
(setq str (car ret))
- (if (eq (elt str 0) ?\n)
- (if (eq special ?\()
- (progn
- (setq dest (concat dest "\n ("))
- (setq ret (tm-eword::encode-string-1 2 rwl))
- (setq str (car ret))
- ))
- (cond ((eq special ? )
- (if (string= str "(")
- (setq ps t)
- (setq dest (concat dest " "))
- (setq ps nil)
- ))
- ((eq special ?\()
- (if ps
- (progn
- (setq dest (concat dest " ("))
- (setq ps nil)
- )
- (setq dest (concat dest "("))
- )
- )))
- (cond ((string= str " ")
- (setq special ? )
- )
- ((string= str "(")
- (setq special ?\()
- )
- (t
- (setq special nil)
- (setq dest (concat dest str))
- ))
+ (setq dest (concat dest str))
(setq column (nth 1 ret)
rwl (nth 2 ret))
)
(setq dest
(append dest
(list
- (let ((ret (tm-eword::find-charset-rule
- (find-non-ascii-charset-string str))))
+ (let ((ret (ew-find-charset-rule
+ (find-charset-string str))))
(make-ew-rword
str (car ret)(nth 1 ret) 'phrase)
)
((eq type 'comment)
(setq dest
(append dest
- '(("(" nil nil))
+ '(("(" nil nil special))
(tm-eword::words-to-ruled-words
(eword-encode-charset-words-to-words
(eword-encode-divide-into-charset-words
(cdr token)))
'comment)
- '((")" nil nil))
+ '((")" nil nil special))
))
)
(t
))
(setq phrase (cdr phrase))
)
- (tm-eword::space-process dest)
+ (ew-space-process dest)
))
(defun eword-encode-addr-seq-to-rword-list (seq)
(if (or (eq pname 'spaces)
(eq pname 'comment))
(nconc dest (list (list (cdr token) nil nil)))
- (nconc (butlast dest)
+ (nconc (nreverse (cdr (reverse dest)))
+ ;; (butlast dest)
(list
(list (concat (car (car (last dest)))
(cdr token))
(let ((phrase (nth 1 phrase-route-addr))
(route (nth 2 phrase-route-addr))
dest)
- (if (eq (car (car phrase)) 'spaces)
- (setq phrase (cdr phrase))
- )
+ ;; (if (eq (car (car phrase)) 'spaces)
+ ;; (setq phrase (cdr phrase))
+ ;; )
(setq dest (eword-encode-phrase-to-rword-list phrase))
(if dest
(setq dest (append dest '((" " nil nil))))
'((" " nil nil)
("(" nil nil))
(eword-encode-split-string comment 'comment)
- '((")" nil nil))
+ (list '(")" nil nil))
)))
dest))
+(defsubst eword-encode-mailboxes-to-rword-list (mboxes)
+ (let ((dest (eword-encode-mailbox-to-rword-list (car mboxes))))
+ (if dest
+ (while (setq mboxes (cdr mboxes))
+ (setq dest
+ (nconc dest
+ (list '("," nil nil))
+ (eword-encode-mailbox-to-rword-list
+ (car mboxes))))))
+ dest))
+
+(defsubst eword-encode-address-to-rword-list (address)
+ (cond
+ ((eq (car address) 'mailbox)
+ (eword-encode-mailbox-to-rword-list address))
+ ((eq (car address) 'group)
+ (nconc
+ (eword-encode-phrase-to-rword-list (nth 1 address))
+ (list (list ":" nil nil))
+ (eword-encode-mailboxes-to-rword-list (nth 2 address))
+ (list (list ";" nil nil))))))
+
(defsubst eword-encode-addresses-to-rword-list (addresses)
- (let ((dest (eword-encode-mailbox-to-rword-list (car addresses))))
+ (let ((dest (eword-encode-address-to-rword-list (car addresses))))
(if dest
(while (setq addresses (cdr addresses))
(setq dest
- (append dest
- '(("," nil nil))
- '((" " nil nil))
- (eword-encode-mailbox-to-rword-list (car addresses))
- ))
- ))
+ (nconc dest
+ (list '("," nil nil))
+ ;; (list '(" " nil nil))
+ (eword-encode-address-to-rword-list (car addresses))))))
+ dest))
+
+(defsubst eword-encode-msg-id-to-rword-list (msg-id)
+ (list
+ (list
+ (concat "<"
+ (caar (eword-encode-addr-seq-to-rword-list (cdr msg-id)))
+ ">")
+ nil nil)))
+
+(defsubst eword-encode-in-reply-to-to-rword-list (in-reply-to)
+ (let (dest)
+ (while in-reply-to
+ (setq dest
+ (append dest
+ (let ((elt (car in-reply-to)))
+ (if (eq (car elt) 'phrase)
+ (eword-encode-phrase-to-rword-list (cdr elt))
+ (eword-encode-msg-id-to-rword-list elt)
+ ))))
+ (setq in-reply-to (cdr in-reply-to)))
dest))
;;; @ application interfaces
;;;
+(defvar eword-encode-default-start-column 10
+ "Default start column if it is omitted.")
+
(defun eword-encode-string (string &optional column mode)
"Encode STRING as encoded-words, and return the result.
Optional argument COLUMN is start-position of the field.
Optional argument MODE allows `text', `comment', `phrase' or nil.
Default value is `phrase'."
- (car (eword-encode-rword-list (or column 0)
- (eword-encode-split-string string mode))))
+ (car (eword-encode-rword-list
+ (or column eword-encode-default-start-column)
+ (eword-encode-split-string string mode))))
(defun eword-encode-address-list (string &optional column)
"Encode header field STRING as list of address, and return the result.
Optional argument COLUMN is start-position of the field."
- (car (eword-encode-rword-list (or column 0)
- (eword-encode-addresses-to-rword-list
- (std11-parse-addresses-string string))
- )))
+ (car (eword-encode-rword-list
+ (or column eword-encode-default-start-column)
+ (eword-encode-addresses-to-rword-list
+ (std11-parse-addresses-string string))
+ )))
+
+(defun eword-encode-in-reply-to (string &optional column)
+ "Encode header field STRING as In-Reply-To field, and return the result.
+Optional argument COLUMN is start-position of the field."
+ (car (eword-encode-rword-list
+ (or column 13)
+ (eword-encode-in-reply-to-to-rword-list
+ (std11-parse-msg-ids-string string)))))
(defun eword-encode-structured-field-body (string &optional column)
"Encode header field STRING as structured field, and return the result.
Optional argument COLUMN is start-position of the field."
(car (eword-encode-rword-list
- (or column 0)
+ (or column eword-encode-default-start-column)
(eword-encode-addr-seq-to-rword-list (std11-lexical-analyze string))
)))
(defun eword-encode-unstructured-field-body (string &optional column)
"Encode header field STRING as unstructured field, and return the result.
Optional argument COLUMN is start-position of the field."
- (car (eword-encode-rword-list (or column 0)
- (eword-encode-split-string string 'text))))
+ (car (eword-encode-rword-list
+ (or column eword-encode-default-start-column)
+ (eword-encode-split-string string 'text))))
-(defun eword-encode-field (string)
- "Encode header field STRING, and return the result.
+;;;###autoload
+(defun mime-encode-field-body (field-body field-name)
+ "Encode FIELD-BODY as FIELD-NAME, and return the result.
A lexical token includes non-ASCII character is encoded as MIME
encoded-word. ASCII token is not encoded."
- (setq string (std11-unfold-string string))
- (let ((ret (string-match std11-field-head-regexp string)))
- (or (if ret
- (let ((field-name (substring string 0 (1- (match-end 0))))
- (field-body (eliminate-top-spaces
- (substring string (match-end 0))))
- field-name-symbol)
- (if (setq ret
- (cond ((string= field-body "") "")
- ((memq (setq field-name-symbol
- (intern (capitalize field-name)))
- '(Reply-To
- From Sender
- Resent-Reply-To Resent-From
- Resent-Sender To Resent-To
- Cc Resent-Cc Bcc Resent-Bcc
- Dcc))
- (eword-encode-address-list
- field-body (+ (length field-name) 2))
- )
- ((memq field-name-symbol
- '(In-Reply-To
- Mime-Version User-Agent))
- (eword-encode-structured-field-body
- field-body (+ (length field-name) 2))
- )
- (t
- (eword-encode-unstructured-field-body
- field-body (1+ (length field-name)))
- ))
- )
- (concat field-name ": " ret)
- )))
- (eword-encode-string string 0)
- )))
+ (setq field-body (std11-unfold-string field-body))
+ (if (string= field-body "")
+ ""
+ (let ((method-alist mime-header-encode-method-alist)
+ start ret)
+ (if (symbolp field-name)
+ (setq start (1+ (length (symbol-name field-name))))
+ (setq start (1+ (length field-name))
+ field-name (intern (capitalize field-name))))
+ (while (car method-alist)
+ (if (or (not (cdr (car method-alist)))
+ (memq field-name
+ (cdr (car method-alist))))
+ (progn
+ (setq ret
+ (apply (caar method-alist) (list field-body start)))
+ (setq method-alist nil)))
+ (setq method-alist (cdr method-alist)))
+ ret)))
+(defalias 'eword-encode-field-body 'mime-encode-field-body)
+(make-obsolete 'eword-encode-field-body 'mime-encode-field-body)
(defun eword-in-subject-p ()
(let ((str (std11-field-body "Subject")))
(if (and str (string-match eword-encoded-word-regexp str))
str)))
+(make-obsolete 'eword-in-subject-p "Don't use it.")
(defsubst eword-find-field-encoding-method (field-name)
(setq field-name (downcase field-name))
- (let ((alist eword-field-encoding-method-alist))
+ (let ((alist mime-field-encoding-method-alist))
(catch 'found
(while alist
(let* ((pair (car alist))
(throw 'found (cdr pair))
))
(setq alist (cdr alist)))
- (cdr (assq t eword-field-encoding-method-alist))
+ (cdr (assq t mime-field-encoding-method-alist))
)))
-(defun eword-encode-header (&optional code-conversion)
+;;;###autoload
+(defun mime-encode-header-in-buffer (&optional code-conversion)
"Encode header fields to network representation, such as MIME encoded-word.
-
-It refer variable `eword-field-encoding-method-alist'."
+It refers the `mime-field-encoding-method-alist' variable."
(interactive "*")
(save-excursion
(save-restriction
(std11-narrow-to-header mail-header-separator)
(goto-char (point-min))
(let ((default-cs (mime-charset-to-coding-system default-mime-charset))
- beg end field-name)
+ bbeg end field-name)
(while (re-search-forward std11-field-head-regexp nil t)
- (setq beg (match-beginning 0))
- (setq field-name (buffer-substring beg (1- (match-end 0))))
- (setq end (std11-field-end))
- (and (find-non-ascii-charset-region beg end)
+ (setq bbeg (match-end 0)
+ field-name (buffer-substring-no-properties (match-beginning 0)
+ (1- bbeg))
+ end (std11-field-end))
+ (and (delq 'ascii (find-charset-region bbeg end))
(let ((method (eword-find-field-encoding-method
(downcase field-name))))
(cond ((eq method 'mime)
- (let ((field
- (buffer-substring-no-properties beg end)
- ))
- (delete-region beg end)
- (insert (eword-encode-field field))
- ))
+ (let* ((field-body
+ (buffer-substring-no-properties bbeg end))
+ (encoded-body
+ (mime-encode-field-body
+ field-body field-name)))
+ (if (not encoded-body)
+ (error "Cannot encode %s:%s"
+ field-name field-body)
+ (delete-region bbeg end)
+ (insert encoded-body))))
(code-conversion
(let ((cs
(or (mime-charset-to-coding-system
method)
default-cs)))
- (encode-coding-region beg end cs)
- )))
- ))
- ))
- )))
+ (encode-coding-region bbeg end cs)))))))))))
+(defalias 'eword-encode-header 'mime-encode-header-in-buffer)
+(make-obsolete 'eword-encode-header 'mime-encode-header-in-buffer)
;;; @ end