Merge JR-Himi.
[elisp/semi.git] / eword-encode.el
index c08204b..ee3f95c 100644 (file)
@@ -1,12 +1,11 @@
 ;;; eword-encode.el --- RFC 2047 based encoded-word encoder for GNU Emacs
 
-;; Copyright (C) 1995,1996,1997 Free Software Foundation, Inc.
+;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc.
 
 ;; Author: MORIOKA Tomohiko <morioka@jaist.ac.jp>
-;; Version: $Revision: 0.5 $
 ;; Keywords: encoded-word, MIME, multilingual, header, mail, news
 
-;; This file is part of SEMI (SEMI is Emacs MIME Interfaces).
+;; This file is part of SEMI (Spadework for Emacs MIME Interfaces).
 
 ;; This program is free software; you can redistribute it and/or
 ;; modify it under the terms of the GNU General Public License as
 
 ;;; Code:
 
+(require 'emu)
 (require 'mel)
 (require 'std11)
 (require 'mime-def)
-(require 'tl-list)
+(require 'eword-decode)
 
 
 ;;; @ version
 ;;;
 
 (defconst eword-encode-RCS-ID
-  "$Id: eword-encode.el,v 0.5 1997-02-22 17:05:55 morioka Exp $")
+  "$Id: eword-encode.el,v 1.2 1998-03-13 12:55:54 morioka Exp $")
 (defconst eword-encode-version (get-version-string eword-encode-RCS-ID))
 
 
@@ -45,6 +45,7 @@
 (defvar eword-field-encoding-method-alist
   '(("X-Nsubject" . iso-2022-jp-2)
     ("Newsgroups" . nil)
+    ("Message-ID" . nil)
     (t            . mime)
     )
   "*Alist to specify field encoding method.
@@ -61,10 +62,6 @@ network-code.
 
 If method is nil, this field will not be encoded.")
 
-(defvar eword-generate-X-Nsubject nil
-  "*If it is not nil, X-Nsubject field is generated
-when Subject field is encoded by `eword-encode-message-header'.")
-
 (defvar eword-charset-encoding-alist
   '((us-ascii          . nil)
     (iso-8859-1                . "Q")
@@ -90,7 +87,12 @@ when Subject field is encoded by `eword-encode-message-header'.")
 ;;; @ encoded-text encoder
 ;;;
 
-(defun tm-eword::encode-encoded-text (charset encoding string &optional mode)
+(defun eword-encode-text (charset encoding string &optional mode)
+  "Encode STRING as an encoded-word, and return the result.
+CHARSET is a symbol to indicate MIME charset of the encoded-word.
+ENCODING allows \"B\" or \"Q\".
+MODE is allows `text', `comment', `phrase' or nil.  Default value is
+`phrase'."
   (let ((text
         (cond ((string= encoding "B")
                (base64-encode-string string))
@@ -104,76 +106,68 @@ when Subject field is encoded by `eword-encode-message-header'.")
       )))
 
 
-;;; @ leading char
+;;; @ charset word
 ;;;
 
-(defun tm-eword::char-type (chr)
-  (if (or (= chr 32)(= chr ?\t))
+(defsubst eword-encode-char-type (character)
+  (if (or (eq character ? )(eq character ?\t))
       nil
-    (char-charset chr)
+    (char-charset character)
     ))
 
-(defun tm-eword::parse-lc-word (str)
-  (let* ((chr (sref str 0))
-        (lc (tm-eword::char-type chr))
-        (i (char-length chr))
-        (len (length str))
-        )
-    (while (and (< i len)
-               (setq chr (sref str i))
-               (eq lc (tm-eword::char-type chr))
-               )
-      (setq i (+ i (char-length chr)))
-      )
-    (cons (cons lc (substring str 0 i)) (substring str i))
-    ))
-
-(defun tm-eword::split-to-lc-words (str)
-  (let (ret dest)
-    (while (and (not (string= str ""))
-               (setq ret (tm-eword::parse-lc-word str))
-               )
-      (setq dest (cons (car ret) dest))
-      (setq str (cdr ret))
-      )
-    (reverse dest)
+(defun eword-encode-divide-into-charset-words (string)
+  (let ((len (length string))
+       dest)
+    (while (> len 0)
+      (let* ((chr (sref string 0))
+            (charset (eword-encode-char-type chr))
+            (i (char-bytes chr))
+            )
+       (while (and (< i len)
+                   (setq chr (sref string i))
+                   (eq charset (eword-encode-char-type chr))
+                   )
+         (setq i (+ i (char-bytes chr)))
+         )
+       (setq dest (cons (cons charset (substring string 0 i)) dest)
+             string (substring string i)
+             len (- len i)
+             )))
+    (nreverse dest)
     ))
 
 
 ;;; @ word
 ;;;
 
-(defun tm-eword::parse-word (lcwl)
-  (let* ((lcw (car lcwl))
-        (lc (car lcw))
-        )
-    (if (null lc)
-       lcwl
-      (let ((lcl (list lc))
-           (str (cdr lcw))
-           )
-       (catch 'tag
-         (while (setq lcwl (cdr lcwl))
-           (setq lcw (car lcwl))
-           (setq lc (car lcw))
-           (if (null lc)
-               (throw 'tag nil)
-             )
-           (if (not (memq lc lcl))
-               (setq lcl (cons lc lcl))
+(defun eword-encode-charset-words-to-words (charset-words)
+  (let (dest)
+    (while charset-words
+      (let* ((charset-word (car charset-words))
+            (charset (car charset-word))
+            )
+       (if charset
+           (let ((charsets (list charset))
+                 (str (cdr charset-word))
+                 )
+             (catch 'tag
+               (while (setq charset-words (cdr charset-words))
+                 (setq charset-word (car charset-words)
+                       charset (car charset-word))
+                 (if (null charset)
+                     (throw 'tag nil)
+                   )
+                 (or (memq charset charsets)
+                     (setq charsets (cons charset charsets))
+                     )
+                 (setq str (concat str (cdr charset-word)))
+                 ))
+             (setq dest (cons (cons charsets str) dest))
              )
-           (setq str (concat str (cdr lcw)))
-           ))
-       (cons (cons lcl str) lcwl)
-       ))))
-
-(defun tm-eword::lc-words-to-words (lcwl)
-  (let (ret dest)
-    (while (setq ret (tm-eword::parse-word lcwl))
-      (setq dest (cons (car ret) dest))
-      (setq lcwl (cdr ret))
-      )
-    (reverse dest)
+         (setq dest (cons charset-word dest)
+               charset-words (cdr charset-words)
+               ))))
+    (nreverse dest)
     ))
 
 
@@ -244,9 +238,10 @@ when Subject field is encoded by `eword-encode-message-header'.")
 
 (defun tm-eword::split-string (str &optional mode)
   (tm-eword::space-process
-   (tm-eword::words-to-ruled-words (tm-eword::lc-words-to-words
-                                   (tm-eword::split-to-lc-words str))
-                                  mode)))
+   (tm-eword::words-to-ruled-words
+    (eword-encode-charset-words-to-words
+     (eword-encode-divide-into-charset-words str))
+    mode)))
 
 
 ;;; @ length
@@ -281,7 +276,8 @@ when Subject field is encoded by `eword-encode-message-header'.")
         string len)
     (if (null ret)
        (cond ((and (setq string (car rword))
-                   (<= (setq len (+ (length string) column)) 76)
+                   (or (<= (setq len (+ (length string) column)) 76)
+                       (<= column 1))
                    )
               (setq rwl (cdr rwl))
               )
@@ -293,7 +289,7 @@ when Subject field is encoded by `eword-encode-message-header'.")
                  (<= (+ column len) 76)
                  )
             (setq string
-                  (tm-eword::encode-encoded-text
+                  (eword-encode-text
                    (tm-eword::rword-charset rword)
                    (tm-eword::rword-encoding rword)
                    (cdr ret)
@@ -308,7 +304,7 @@ when Subject field is encoded by `eword-encode-message-header'.")
                    (str "") nstr)
               (while (and (< p len)
                           (progn
-                            (setq np (+ p (char-length (sref string p))))
+                            (setq np (+ p (char-bytes (sref string p))))
                             (setq nstr (substring string 0 np))
                             (setq ret (tm-eword::encoded-word-length
                                        (cons nstr (cdr rword))
@@ -325,7 +321,7 @@ when Subject field is encoded by `eword-encode-message-header'.")
                 (setq rwl (cons (cons (substring string p) (cdr rword))
                                 (cdr rwl)))
                 (setq string
-                      (tm-eword::encode-encoded-text
+                      (eword-encode-text
                        (tm-eword::rword-charset rword)
                        (tm-eword::rword-encoding rword)
                        str
@@ -355,7 +351,7 @@ when Subject field is encoded by `eword-encode-message-header'.")
                (setq ret (tm-eword::encode-string-1 2 rwl))
                (setq str (car ret))
                ))
-       (cond ((eq special 32)
+       (cond ((eq special ? )
               (if (string= str "(")
                   (setq ps t)
                 (setq dest (concat dest " "))
@@ -371,7 +367,7 @@ when Subject field is encoded by `eword-encode-message-header'.")
                 )
               )))
       (cond ((string= str " ")
-            (setq special 32)
+            (setq special ? )
             )
            ((string= str "(")
             (setq special ?\()
@@ -416,25 +412,70 @@ when Subject field is encoded by `eword-encode-message-header'.")
                   (append dest
                           '(("(" nil nil))
                           (tm-eword::words-to-ruled-words
-                           (tm-eword::lc-words-to-words
-                            (tm-eword::split-to-lc-words (cdr token)))
+                           (eword-encode-charset-words-to-words
+                            (eword-encode-divide-into-charset-words
+                             (cdr token)))
                            'comment)
                           '((")" nil nil))
                           ))
             )
            (t
-            (setq dest (append dest
-                               (tm-eword::words-to-ruled-words
-                                (tm-eword::lc-words-to-words
-                                 (tm-eword::split-to-lc-words (cdr token))
-                                 ) 'phrase)))
+            (setq dest
+                  (append dest
+                          (tm-eword::words-to-ruled-words
+                           (eword-encode-charset-words-to-words
+                            (eword-encode-divide-into-charset-words
+                             (cdr token))
+                            ) 'phrase)))
             ))
       (setq phrase (cdr phrase))
       )
     (tm-eword::space-process dest)
     ))
 
-(defun tm-eword::phrase-route-addr-to-rwl (phrase-route-addr)
+(defun eword-addr-seq-to-rwl (seq)
+  (let (dest pname)
+    (while seq
+      (let* ((token (car seq))
+            (name (car token))
+            )
+       (cond ((eq name 'spaces)
+              (setq dest (nconc dest (list (list (cdr token) nil nil))))
+              )
+             ((eq name 'comment)
+              (setq dest
+                    (nconc
+                     dest
+                     (list (list "(" nil nil))
+                     (tm-eword::split-string (cdr token) 'comment)
+                     (list (list ")" nil nil))
+                     ))
+              )
+             ((eq name 'quoted-string)
+              (setq dest
+                    (nconc
+                     dest
+                     (list
+                      (list (concat "\"" (cdr token) "\"") nil nil)
+                      )))
+              )
+             (t
+              (setq dest
+                    (if (or (eq pname 'spaces)
+                            (eq pname 'comment))
+                        (nconc dest (list (list (cdr token) nil nil)))
+                      (nconc (butlast dest)
+                             (list
+                              (list (concat (car (car (last dest)))
+                                            (cdr token))
+                                    nil nil)))))
+              ))
+       (setq seq (cdr seq)
+             pname name))
+      )
+    dest))
+
+(defun eword-phrase-route-addr-to-rwl (phrase-route-addr)
   (if (eq (car phrase-route-addr) 'phrase-route-addr)
       (let ((phrase (nth 1 phrase-route-addr))
            (route (nth 2 phrase-route-addr))
@@ -448,20 +489,23 @@ when Subject field is encoded by `eword-encode-message-header'.")
          )
        (append
         dest
-        (list (list (concat "<" (std11-addr-to-string route) ">") nil nil))
+        (eword-addr-seq-to-rwl
+         (append '((specials . "<"))
+                 route
+                 '((specials . ">"))))
         ))))
 
-(defun tm-eword::addr-spec-to-rwl (addr-spec)
+(defun eword-addr-spec-to-rwl (addr-spec)
   (if (eq (car addr-spec) 'addr-spec)
-      (list (list (std11-addr-to-string (cdr addr-spec)) nil nil))
+      (eword-addr-seq-to-rwl (cdr addr-spec))
     ))
 
 (defun tm-eword::mailbox-to-rwl (mbox)
   (let ((addr (nth 1 mbox))
        (comment (nth 2 mbox))
        dest)
-    (setq dest (or (tm-eword::phrase-route-addr-to-rwl addr)
-                  (tm-eword::addr-spec-to-rwl addr)
+    (setq dest (or (eword-phrase-route-addr-to-rwl addr)
+                  (eword-addr-spec-to-rwl addr)
                   ))
     (if comment
        (setq dest
@@ -495,43 +539,66 @@ when Subject field is encoded by `eword-encode-message-header'.")
 ;;; @ application interfaces
 ;;;
 
-(defun eword-encode-field (str)
-  (setq str (std11-unfold-string str))
-  (let ((ret (string-match std11-field-head-regexp str)))
+(defun eword-encode-field (string)
+  "Encode header field STRING, and return the result.
+A lexical token includes non-ASCII character is encoded as MIME
+encoded-word.  ASCII token is not encoded."
+  (setq string (std11-unfold-string string))
+  (let ((ret (string-match std11-field-head-regexp string)))
     (or (if ret
-           (let ((field-name (substring str 0 (1- (match-end 0))))
+           (let ((field-name (substring string 0 (1- (match-end 0))))
                  (field-body (eliminate-top-spaces
-                              (substring str (match-end 0))))
-                 fname)
+                              (substring string (match-end 0))))
+                 )
              (if (setq ret
                        (cond ((string-equal field-body "") "")
-                             ((member (setq fname (downcase field-name))
-                                      '("reply-to" "from" "sender"
-                                        "resent-reply-to" "resent-from"
-                                        "resent-sender" "to" "resent-to"
-                                        "cc" "resent-cc"
-                                        "bcc" "resent-bcc" "dcc")
-                                      )
+                             ((memq (intern (downcase field-name))
+                                    '(reply-to
+                                      from sender
+                                      resent-reply-to resent-from
+                                      resent-sender to resent-to
+                                      cc resent-cc
+                                      bcc resent-bcc dcc
+                                      mime-version)
+                                    )
                               (car (tm-eword::encode-address-list
                                     (+ (length field-name) 2) field-body))
                               )
                              (t
                               (car (tm-eword::encode-string
-                                    (+ (length field-name) 1)
+                                    (1+ (length field-name))
                                     field-body 'text))
                               ))
                        )
                  (concat field-name ": " ret)
                )))
-       (car (tm-eword::encode-string 0 str))
+       (car (tm-eword::encode-string 0 string))
        )))
 
-(defun eword-exist-encoded-word-in-subject ()
+(defun eword-in-subject-p ()
   (let ((str (std11-field-body "Subject")))
     (if (and str (string-match eword-encoded-word-regexp str))
        str)))
 
-(defun eword-encode-message-header (&optional code-conversion)
+(defsubst eword-find-field-encoding-method (field-name)
+  (setq field-name (downcase field-name))
+  (let ((alist eword-field-encoding-method-alist))
+    (catch 'found
+      (while alist
+       (let* ((pair (car alist))
+              (str (car pair)))
+         (if (and (stringp str)
+                  (string= field-name (downcase str)))
+             (throw 'found (cdr pair))
+           ))
+       (setq alist (cdr alist)))
+      (cdr (assq t eword-field-encoding-method-alist))
+      )))
+
+(defun eword-encode-header (&optional code-conversion)
+  "Encode header fields to network representation, such as MIME encoded-word.
+
+It refer variable `eword-field-encoding-method-alist'."
   (interactive "*")
   (save-excursion
     (save-restriction
@@ -544,61 +611,24 @@ when Subject field is encoded by `eword-encode-message-header'.")
          (setq field-name (buffer-substring beg (1- (match-end 0))))
          (setq end (std11-field-end))
          (and (find-non-ascii-charset-region beg end)
-              (let ((ret (or (ASSOC (downcase field-name)
-                                    eword-field-encoding-method-alist
-                                    :test (function
-                                           (lambda (str1 str2)
-                                             (and (stringp str2)
-                                                  (string= str1
-                                                           (downcase str2))
-                                                  ))))
-                             (assq t eword-field-encoding-method-alist)
-                             )))
-                (if ret
-                    (let ((method (cdr ret)))
-                      (cond ((eq method 'mime)
-                             (let ((field
-                                    (buffer-substring-no-properties beg end)
-                                    ))
-                               (delete-region beg end)
-                               (insert (eword-encode-field field))
-                               ))
-                            (code-conversion
-                             (let ((cs
-                                    (or (mime-charset-to-coding-system
-                                         method)
-                                        default-cs)))
-                               (encode-coding-region beg end cs)
-                               )))
-                      ))
+              (let ((method (eword-find-field-encoding-method
+                             (downcase field-name))))
+                (cond ((eq method 'mime)
+                       (let ((field
+                              (buffer-substring-no-properties beg end)
+                              ))
+                         (delete-region beg end)
+                         (insert (eword-encode-field field))
+                         ))
+                      (code-conversion
+                       (let ((cs
+                              (or (mime-charset-to-coding-system
+                                   method)
+                                  default-cs)))
+                         (encode-coding-region beg end cs)
+                         )))
                 ))
          ))
-      (and eword-generate-X-Nsubject
-          (or (std11-field-body "X-Nsubject")
-              (let ((str (eword-exist-encoded-word-in-subject)))
-                (if str
-                    (progn
-                      (setq str
-                            (eword-decode-string
-                             (std11-unfold-string str)))
-                      (if code-conversion
-                          (setq str
-                                (encode-mime-charset-string
-                                 str
-                                 (or (cdr (ASSOC
-                                           "x-nsubject"
-                                           eword-field-encoding-method-alist
-                                           :test
-                                           (function
-                                            (lambda (str1 str2)
-                                              (and (stringp str2)
-                                                   (string= str1
-                                                            (downcase str2))
-                                                   )))))
-                                     'iso-2022-jp-2)))
-                        )
-                      (insert (concat "\nX-Nsubject: " str))
-                      )))))
       )))
 
 (defun eword-encode-string (str &optional column mode)