updated.
[elisp/semi.git] / eword-encode.el
index 83d22a4..80e8e87 100644 (file)
@@ -3,7 +3,7 @@
 ;; Copyright (C) 1995,1996,1997 Free Software Foundation, Inc.
 
 ;; Author: MORIOKA Tomohiko <morioka@jaist.ac.jp>
-;; Version: $Revision: 0.1 $
+;; Version: $Revision: 0.25 $
 ;; Keywords: encoded-word, MIME, multilingual, header, mail, news
 
 ;; This file is part of SEMI (SEMI is Emacs MIME Interfaces).
 
 ;;; Code:
 
+(require 'emu)
 (require 'mel)
 (require 'std11)
 (require 'mime-def)
-(require 'tl-list)
+(require 'eword-decode)
 
 
 ;;; @ version
 ;;;
 
 (defconst eword-encode-RCS-ID
-  "$Id: eword-encode.el,v 0.1 1997-02-22 16:59:39 morioka Exp $")
+  "$Id: eword-encode.el,v 0.25 1997-06-26 09:21:38 morioka Exp $")
 (defconst eword-encode-version (get-version-string eword-encode-RCS-ID))
 
 
 ;;;
 
 (defvar eword-field-encoding-method-alist
-  (if (boundp 'mime/no-encoding-header-fields)
-      (nconc
-       (mapcar (function
-               (lambda (field-name)
-                 (cons field-name 'default-mime-charset)
-                 ))
-              mime/no-encoding-header-fields)
-       '((t . mime))
-       )
-    '(("X-Nsubject" . iso-2022-jp-2)
-      ("Newsgroups" . nil)
-      (t            . mime)
-      ))
+  '(("X-Nsubject" . iso-2022-jp-2)
+    ("Newsgroups" . nil)
+    ("Message-ID" . nil)
+    (t            . mime)
+    )
   "*Alist to specify field encoding method.
 Its key is field-name, value is encoding method.
 
@@ -70,11 +63,9 @@ network-code.
 
 If method is nil, this field will not be encoded.")
 
-(defvar mime/generate-X-Nsubject
-  (and (boundp 'mime/use-X-Nsubject)
-       mime/use-X-Nsubject)
+(defvar eword-generate-X-Nsubject nil
   "*If it is not nil, X-Nsubject field is generated
-when Subject field is encoded by `eword-encode-message-header'.")
+when Subject field is encoded by `eword-encode-header'.")
 
 (defvar eword-charset-encoding-alist
   '((us-ascii          . nil)
@@ -101,7 +92,12 @@ when Subject field is encoded by `eword-encode-message-header'.")
 ;;; @ encoded-text encoder
 ;;;
 
-(defun tm-eword::encode-encoded-text (charset encoding string &optional mode)
+(defun eword-encode-text (charset encoding string &optional mode)
+  "Encode STRING as an encoded-word, and return the result.
+CHARSET is a symbol to indicate MIME charset of the encoded-word.
+ENCODING allows \"B\" or \"Q\".
+MODE is allows `text', `comment', `phrase' or nil.  Default value is
+`phrase'."
   (let ((text
         (cond ((string= encoding "B")
                (base64-encode-string string))
@@ -115,76 +111,68 @@ when Subject field is encoded by `eword-encode-message-header'.")
       )))
 
 
-;;; @ leading char
+;;; @ charset word
 ;;;
 
-(defun tm-eword::char-type (chr)
-  (if (or (= chr 32)(= chr ?\t))
+(defsubst eword-encode-char-type (character)
+  (if (or (eq character ? )(eq character ?\t))
       nil
-    (char-charset chr)
+    (char-charset character)
     ))
 
-(defun tm-eword::parse-lc-word (str)
-  (let* ((chr (sref str 0))
-        (lc (tm-eword::char-type chr))
-        (i (char-length chr))
-        (len (length str))
-        )
-    (while (and (< i len)
-               (setq chr (sref str i))
-               (eq lc (tm-eword::char-type chr))
-               )
-      (setq i (+ i (char-length chr)))
-      )
-    (cons (cons lc (substring str 0 i)) (substring str i))
-    ))
-
-(defun tm-eword::split-to-lc-words (str)
-  (let (ret dest)
-    (while (and (not (string= str ""))
-               (setq ret (tm-eword::parse-lc-word str))
-               )
-      (setq dest (cons (car ret) dest))
-      (setq str (cdr ret))
-      )
-    (reverse dest)
+(defun eword-encode-divide-into-charset-words (string)
+  (let ((len (length string))
+       dest)
+    (while (> len 0)
+      (let* ((chr (sref string 0))
+            (charset (eword-encode-char-type chr))
+            (i (char-bytes chr))
+            )
+       (while (and (< i len)
+                   (setq chr (sref string i))
+                   (eq charset (eword-encode-char-type chr))
+                   )
+         (setq i (+ i (char-bytes chr)))
+         )
+       (setq dest (cons (cons charset (substring string 0 i)) dest)
+             string (substring string i)
+             len (- len i)
+             )))
+    (nreverse dest)
     ))
 
 
 ;;; @ word
 ;;;
 
-(defun tm-eword::parse-word (lcwl)
-  (let* ((lcw (car lcwl))
-        (lc (car lcw))
-        )
-    (if (null lc)
-       lcwl
-      (let ((lcl (list lc))
-           (str (cdr lcw))
-           )
-       (catch 'tag
-         (while (setq lcwl (cdr lcwl))
-           (setq lcw (car lcwl))
-           (setq lc (car lcw))
-           (if (null lc)
-               (throw 'tag nil)
-             )
-           (if (not (memq lc lcl))
-               (setq lcl (cons lc lcl))
+(defun eword-encode-charset-words-to-words (charset-words)
+  (let (dest)
+    (while charset-words
+      (let* ((charset-word (car charset-words))
+            (charset (car charset-word))
+            )
+       (if charset
+           (let ((charsets (list charset))
+                 (str (cdr charset-word))
+                 )
+             (catch 'tag
+               (while (setq charset-words (cdr charset-words))
+                 (setq charset-word (car charset-words)
+                       charset (car charset-word))
+                 (if (null charset)
+                     (throw 'tag nil)
+                   )
+                 (or (memq charset charsets)
+                     (setq charsets (cons charset charsets))
+                     )
+                 (setq str (concat str (cdr charset-word)))
+                 ))
+             (setq dest (cons (cons charsets str) dest))
              )
-           (setq str (concat str (cdr lcw)))
-           ))
-       (cons (cons lcl str) lcwl)
-       ))))
-
-(defun tm-eword::lc-words-to-words (lcwl)
-  (let (ret dest)
-    (while (setq ret (tm-eword::parse-word lcwl))
-      (setq dest (cons (car ret) dest))
-      (setq lcwl (cdr ret))
-      )
-    (reverse dest)
+         (setq dest (cons charset-word dest)
+               charset-words (cdr charset-words)
+               ))))
+    (nreverse dest)
     ))
 
 
@@ -255,9 +243,10 @@ when Subject field is encoded by `eword-encode-message-header'.")
 
 (defun tm-eword::split-string (str &optional mode)
   (tm-eword::space-process
-   (tm-eword::words-to-ruled-words (tm-eword::lc-words-to-words
-                                   (tm-eword::split-to-lc-words str))
-                                  mode)))
+   (tm-eword::words-to-ruled-words
+    (eword-encode-charset-words-to-words
+     (eword-encode-divide-into-charset-words str))
+    mode)))
 
 
 ;;; @ length
@@ -292,7 +281,8 @@ when Subject field is encoded by `eword-encode-message-header'.")
         string len)
     (if (null ret)
        (cond ((and (setq string (car rword))
-                   (<= (setq len (+ (length string) column)) 76)
+                   (or (<= (setq len (+ (length string) column)) 76)
+                       (<= column 1))
                    )
               (setq rwl (cdr rwl))
               )
@@ -304,7 +294,7 @@ when Subject field is encoded by `eword-encode-message-header'.")
                  (<= (+ column len) 76)
                  )
             (setq string
-                  (tm-eword::encode-encoded-text
+                  (eword-encode-text
                    (tm-eword::rword-charset rword)
                    (tm-eword::rword-encoding rword)
                    (cdr ret)
@@ -319,7 +309,7 @@ when Subject field is encoded by `eword-encode-message-header'.")
                    (str "") nstr)
               (while (and (< p len)
                           (progn
-                            (setq np (+ p (char-length (sref string p))))
+                            (setq np (+ p (char-bytes (sref string p))))
                             (setq nstr (substring string 0 np))
                             (setq ret (tm-eword::encoded-word-length
                                        (cons nstr (cdr rword))
@@ -336,7 +326,7 @@ when Subject field is encoded by `eword-encode-message-header'.")
                 (setq rwl (cons (cons (substring string p) (cdr rword))
                                 (cdr rwl)))
                 (setq string
-                      (tm-eword::encode-encoded-text
+                      (eword-encode-text
                        (tm-eword::rword-charset rword)
                        (tm-eword::rword-encoding rword)
                        str
@@ -366,7 +356,7 @@ when Subject field is encoded by `eword-encode-message-header'.")
                (setq ret (tm-eword::encode-string-1 2 rwl))
                (setq str (car ret))
                ))
-       (cond ((eq special 32)
+       (cond ((eq special ? )
               (if (string= str "(")
                   (setq ps t)
                 (setq dest (concat dest " "))
@@ -382,7 +372,7 @@ when Subject field is encoded by `eword-encode-message-header'.")
                 )
               )))
       (cond ((string= str " ")
-            (setq special 32)
+            (setq special ? )
             )
            ((string= str "(")
             (setq special ?\()
@@ -427,18 +417,21 @@ when Subject field is encoded by `eword-encode-message-header'.")
                   (append dest
                           '(("(" nil nil))
                           (tm-eword::words-to-ruled-words
-                           (tm-eword::lc-words-to-words
-                            (tm-eword::split-to-lc-words (cdr token)))
+                           (eword-encode-charset-words-to-words
+                            (eword-encode-divide-into-charset-words
+                             (cdr token)))
                            'comment)
                           '((")" nil nil))
                           ))
             )
            (t
-            (setq dest (append dest
-                               (tm-eword::words-to-ruled-words
-                                (tm-eword::lc-words-to-words
-                                 (tm-eword::split-to-lc-words (cdr token))
-                                 ) 'phrase)))
+            (setq dest
+                  (append dest
+                          (tm-eword::words-to-ruled-words
+                           (eword-encode-charset-words-to-words
+                            (eword-encode-divide-into-charset-words
+                             (cdr token))
+                            ) 'phrase)))
             ))
       (setq phrase (cdr phrase))
       )
@@ -506,43 +499,50 @@ when Subject field is encoded by `eword-encode-message-header'.")
 ;;; @ application interfaces
 ;;;
 
-(defun eword-encode-field (str)
-  (setq str (std11-unfold-string str))
-  (let ((ret (string-match std11-field-head-regexp str)))
+(defun eword-encode-field (string)
+  "Encode header field STRING, and return the result.
+A lexical token includes non-ASCII character is encoded as MIME
+encoded-word.  ASCII token is not encoded."
+  (setq string (std11-unfold-string string))
+  (let ((ret (string-match std11-field-head-regexp string)))
     (or (if ret
-           (let ((field-name (substring str 0 (1- (match-end 0))))
+           (let ((field-name (substring string 0 (1- (match-end 0))))
                  (field-body (eliminate-top-spaces
-                              (substring str (match-end 0))))
-                 fname)
+                              (substring string (match-end 0))))
+                 )
              (if (setq ret
                        (cond ((string-equal field-body "") "")
-                             ((member (setq fname (downcase field-name))
-                                      '("reply-to" "from" "sender"
-                                        "resent-reply-to" "resent-from"
-                                        "resent-sender" "to" "resent-to"
-                                        "cc" "resent-cc"
-                                        "bcc" "resent-bcc" "dcc")
-                                      )
+                             ((memq (intern (downcase field-name))
+                                    '(reply-to
+                                      from sender
+                                      resent-reply-to resent-from
+                                      resent-sender to resent-to
+                                      cc resent-cc
+                                      bcc resent-bcc dcc)
+                                    )
                               (car (tm-eword::encode-address-list
                                     (+ (length field-name) 2) field-body))
                               )
                              (t
                               (car (tm-eword::encode-string
-                                    (+ (length field-name) 1)
+                                    (1+ (length field-name))
                                     field-body 'text))
                               ))
                        )
                  (concat field-name ": " ret)
                )))
-       (car (tm-eword::encode-string 0 str))
+       (car (tm-eword::encode-string 0 string))
        )))
 
-(defun mime/exist-encoded-word-in-subject ()
+(defun eword-in-subject-p ()
   (let ((str (std11-field-body "Subject")))
-    (if (and str (string-match mime/encoded-word-regexp str))
+    (if (and str (string-match eword-encoded-word-regexp str))
        str)))
 
-(defun eword-encode-message-header (&optional code-conversion)
+(defun eword-encode-header (&optional code-conversion)
+  "Encode header fields to network representation, such as MIME encoded-word.
+
+It refer variable `eword-field-encoding-method-alist'."
   (interactive "*")
   (save-excursion
     (save-restriction
@@ -555,14 +555,14 @@ when Subject field is encoded by `eword-encode-message-header'.")
          (setq field-name (buffer-substring beg (1- (match-end 0))))
          (setq end (std11-field-end))
          (and (find-non-ascii-charset-region beg end)
-              (let ((ret (or (ASSOC (downcase field-name)
-                                    eword-field-encoding-method-alist
-                                    :test (function
-                                           (lambda (str1 str2)
-                                             (and (stringp str2)
-                                                  (string= str1
-                                                           (downcase str2))
-                                                  ))))
+              (let ((ret (or (let ((fname  (downcase field-name)))
+                               (assoc-if
+                                (function
+                                 (lambda (str)
+                                   (and (stringp str)
+                                        (string= fname (downcase str))
+                                        )))
+                                eword-field-encoding-method-alist))
                              (assq t eword-field-encoding-method-alist)
                              )))
                 (if ret
@@ -584,9 +584,9 @@ when Subject field is encoded by `eword-encode-message-header'.")
                       ))
                 ))
          ))
-      (and mime/generate-X-Nsubject
+      (and eword-generate-X-Nsubject
           (or (std11-field-body "X-Nsubject")
-              (let ((str (mime/exist-encoded-word-in-subject)))
+              (let ((str (eword-in-subject-p)))
                 (if str
                     (progn
                       (setq str
@@ -596,16 +596,14 @@ when Subject field is encoded by `eword-encode-message-header'.")
                           (setq str
                                 (encode-mime-charset-string
                                  str
-                                 (or (cdr (ASSOC
-                                           "x-nsubject"
-                                           eword-field-encoding-method-alist
-                                           :test
+                                 (or (cdr (assoc-if
                                            (function
-                                            (lambda (str1 str2)
-                                              (and (stringp str2)
-                                                   (string= str1
-                                                            (downcase str2))
-                                                   )))))
+                                            (lambda (str)
+                                              (and (stringp str)
+                                                   (string= "x-nsubject"
+                                                            (downcase str))
+                                                   )))
+                                           eword-field-encoding-method-alist))
                                      'iso-2022-jp-2)))
                         )
                       (insert (concat "\nX-Nsubject: " str))