eword-decode.el

   1 ;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs
   2
   3 ;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc.
   4
   5 ;; Author: ENAMI Tsugutomo <enami@sys.ptg.sony.co.jp>
   6 ;;         MORIOKA Tomohiko <morioka@jaist.ac.jp>
   7 ;;         Tanaka Akira <akr@jaist.ac.jp>
   8 ;; Maintainer: Tanaka Akira <akr@jaist.ac.jp>
   9 ;; Created: 1995/10/03
  10 ;; Original: 1992/07/20 ENAMI Tsugutomo's `mime.el'.
  11 ;;      Renamed: 1993/06/03 to tiny-mime.el
  12 ;;      Renamed: 1995/10/03 from tiny-mime.el (split off encoder)
  13 ;;      Renamed: 1997/02/22 from tm-ew-d.el
  14 ;; Keywords: encoded-word, MIME, multilingual, header, mail, news
  15
  16 ;; This file is part of FLAM (Faithful Library About MIME).
  17
  18 ;; This program is free software; you can redistribute it and/or
  19 ;; modify it under the terms of the GNU General Public License as
  20 ;; published by the Free Software Foundation; either version 2, or (at
  21 ;; your option) any later version.
  22
  23 ;; This program is distributed in the hope that it will be useful, but
  24 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  25 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  26 ;; General Public License for more details.
  27
  28 ;; You should have received a copy of the GNU General Public License
  29 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
  30 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  31 ;; Boston, MA 02111-1307, USA.
  32
  33 ;;; Code:
  34
  35 (require 'std11)
  36 (require 'mel)
  37 (require 'mime-def)
  38
  39 (defgroup eword-decode nil
  40   "Encoded-word decoding"
  41   :group 'mime)
  42
  43
  44 ;;; @ variables
  45 ;;;
  46
  47 (defcustom eword-decode-sticked-encoded-word nil
  48   "*If non-nil, decode encoded-words sticked on atoms,
  49 other encoded-words, etc.
  50 however this behaviour violates RFC2047."
  51   :group 'eword-decode
  52   :type 'boolean)
  53
  54 (defcustom eword-decode-quoted-encoded-word nil
  55   "*If non-nil, decode encoded-words in quoted-string
  56 however this behaviour violates RFC2047."
  57   :group 'eword-decode
  58   :type 'boolean)
  59
  60
  61 ;;; @ MIME encoded-word definition
  62 ;;;
  63
  64 (defconst eword-encoded-word-prefix-regexp
  65   (concat (regexp-quote "=?")
  66           "\\(" mime-charset-regexp "\\)"
  67           (regexp-quote "?")
  68           "\\(B\\|Q\\)"
  69           (regexp-quote "?")))
  70 (defconst eword-encoded-word-suffix-regexp
  71   (regexp-quote "?="))
  72
  73 (defconst eword-encoded-text-in-unstructured-regexp "[!->@-~]+")
  74 (defconst eword-encoded-word-in-unstructured-regexp
  75   (concat eword-encoded-word-prefix-regexp
  76           "\\(" eword-encoded-text-in-unstructured-regexp "\\)"
  77           eword-encoded-word-suffix-regexp))
  78 (defconst eword-after-encoded-word-in-unstructured-regexp "\\([ \t]\\|$\\)")
  79
  80 (defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+")
  81 (defconst eword-encoded-word-in-phrase-regexp
  82   (concat eword-encoded-word-prefix-regexp
  83           "\\(" eword-encoded-text-in-phrase-regexp "\\)"
  84           eword-encoded-word-suffix-regexp))
  85 (defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t]\\|$\\)")
  86
  87 (defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+")
  88 (defconst eword-encoded-word-in-comment-regexp
  89   (concat eword-encoded-word-prefix-regexp
  90           "\\(" eword-encoded-text-in-comment-regexp "\\)"
  91           eword-encoded-word-suffix-regexp))
  92 (defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)")
  93
  94 (defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+")
  95 (defconst eword-encoded-word-in-quoted-string-regexp
  96   (concat eword-encoded-word-prefix-regexp
  97           "\\(" eword-encoded-text-in-quoted-string-regexp "\\)"
  98           eword-encoded-word-suffix-regexp))
  99 (defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)")
 100
 101 ; obsolete
 102 (defconst eword-encoded-text-regexp eword-encoded-text-in-unstructured-regexp)
 103 (defconst eword-encoded-word-regexp eword-encoded-word-in-unstructured-regexp)
 104
 105
 106 ;;; @@ Base64
 107 ;;;
 108
 109 (defconst base64-token-regexp "[A-Za-z0-9+/]")
 110 (defconst base64-token-padding-regexp "[A-Za-z0-9+/=]")
 111
 112 (defconst eword-B-encoded-text-regexp
 113   (concat "\\(\\("
 114           base64-token-regexp
 115           base64-token-regexp
 116           base64-token-regexp
 117           base64-token-regexp
 118           "\\)*"
 119           base64-token-regexp
 120           base64-token-regexp
 121           base64-token-padding-regexp
 122           base64-token-padding-regexp
 123           "\\)"))
 124
 125 ;; (defconst eword-B-encoding-and-encoded-text-regexp
 126 ;;   (concat "\\(B\\)\\?" eword-B-encoded-text-regexp))
 127
 128
 129 ;;; @@ Quoted-Printable
 130 ;;;
 131
 132 (defconst eword-Q-encoded-text-regexp
 133   (concat "\\([^=?]\\|" quoted-printable-octet-regexp "\\)+"))
 134 ;; (defconst eword-Q-encoding-and-encoded-text-regexp
 135 ;;   (concat "\\(Q\\)\\?" eword-Q-encoded-text-regexp))
 136
 137
 138 ;;; @ internal utilities
 139 ;;;
 140
 141 (defun eword-decode-first-encoded-words (string
 142                                          eword-regexp
 143                                          after-regexp
 144                                          &optional must-unfold)
 145   "Decode MIME encoded-words in beginning of STRING.
 146
 147 EWORD-REGEXP is the regexp that matches a encoded-word.
 148 Usual value is
 149 eword-encoded-word-in-unstructured-regexp,
 150 eword-encoded-text-in-phrase-regexp,
 151 eword-encoded-word-in-comment-regexp or
 152 eword-encoded-word-in-quoted-string-regexp.
 153
 154 AFTER-REGEXP is the regexp that matches a after encoded-word.
 155 Usual value is
 156 eword-after-encoded-word-in-unstructured-regexp,
 157 eword-after-encoded-text-in-phrase-regexp,
 158 eword-after-encoded-word-in-comment-regexp or
 159 eword-after-encoded-word-in-quoted-string-regexp.
 160
 161 If beginning of STRING matches EWORD-REGEXP with AFTER-REGEXP,
 162 returns a cons cell of decoded string(sequence of characters) and
 163 the rest(sequence of octets).
 164
 165 If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP,
 166 returns nil.
 167
 168 If an encoded-word is broken or your emacs implementation can not
 169 decode the charset included in it, it is returned in decoded part
 170 as encoded-word form.
 171
 172 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 173 if there are in decoded encoded-words (generated by bad manner MUA
 174 such as a version of Net$cape)."
 175   (if eword-decode-sticked-encoded-word (setq after-regexp ""))
 176   (let* ((between-ewords-regexp
 177            (if eword-decode-sticked-encoded-word
 178              "\\(\n?[ \t]\\)*"
 179              "\\(\n?[ \t]\\)+"))
 180          (between-ewords-eword-after-regexp
 181            (concat "\\`\\(" between-ewords-regexp "\\)"
 182                       "\\(" eword-regexp "\\)"
 183                       after-regexp))
 184          (eword-after-regexp
 185            (concat "\\`\\(" eword-regexp "\\)" after-regexp))
 186          (src string)   ; sequence of octets.
 187          (dst ""))      ; sequence of characters.
 188     (if (string-match eword-after-regexp src)
 189       (let* (p
 190              (q (match-end 1))
 191              (ew (substring src 0 q))
 192              (dw (eword-decode-encoded-word ew must-unfold)))
 193         (setq dst (concat dst dw)
 194               src (substring src q))
 195         (if (not (string= ew dw))
 196           (progn
 197             (while
 198               (and
 199                 (string-match between-ewords-eword-after-regexp src)
 200                 (progn
 201                   (setq p (match-end 1)
 202                         q (match-end 3)
 203                         ew (substring src p q)
 204                         dw (eword-decode-encoded-word ew must-unfold))
 205                   (if (string= ew dw)
 206                     (progn
 207                       (setq dst (concat dst (substring src 0 q))
 208                             src (substring src q))
 209                       nil)
 210                     t)))
 211               (setq dst (concat dst dw)
 212                     src (substring src q)))))
 213         (cons dst src))
 214       nil)))
 215
 216 (defun eword-decode-entire-string (string
 217                                    eword-regexp
 218                                    after-regexp
 219                                    safe-regexp
 220                                    escape ; ?\\ or nil.
 221                                    delimiters ; list of chars.
 222                                    chars-must-be-quote
 223                                    must-unfold
 224                                    code-conversion)
 225   (if (and code-conversion
 226            (not (mime-charset-to-coding-system code-conversion)))
 227       (setq code-conversion default-mime-charset))
 228   (let ((equal-safe-regexp (concat "\\`=?" safe-regexp))
 229         (dst "")
 230         (buf "")
 231         (src string)
 232         (ew-enable t))
 233     (while (< 0 (length src))
 234       (let ((ch (aref src 0))
 235             (decoded (and
 236                         ew-enable
 237                         (eword-decode-first-encoded-words src
 238                           eword-regexp after-regexp must-unfold))))
 239         (if (and (not (string= buf ""))
 240                  (or decoded (memq ch delimiters)))
 241           (setq dst (concat dst
 242                       (std11-wrap-as-quoted-pairs
 243                         (decode-mime-charset-string buf code-conversion)
 244                         chars-must-be-quote))
 245                 buf ""))
 246         (cond
 247           (decoded
 248             (setq dst (concat dst
 249                         (std11-wrap-as-quoted-pairs
 250                           (car decoded)
 251                           chars-must-be-quote))
 252                   src (cdr decoded)))
 253           ((memq ch delimiters)
 254             (setq dst (concat dst (list ch))
 255                   src (substring src 1)
 256                   ew-enable t))
 257           ((eq ch escape)
 258             (setq buf (concat buf (list (aref src 1)))
 259                   src (substring src 2)
 260                   ew-enable t))
 261           ((string-match "\\`[ \t\n]+" src)
 262             (setq buf (concat buf (substring src 0 (match-end 0)))
 263                   src (substring src (match-end 0))
 264                   ew-enable t))
 265           ((and (string-match equal-safe-regexp src)
 266                 (< 0 (match-end 0)))
 267             (setq buf (concat buf (substring src 0 (match-end 0)))
 268                   src (substring src (match-end 0))
 269                   ew-enable eword-decode-sticked-encoded-word))
 270           (t (error "something wrong")))))
 271     (if (not (string= buf ""))
 272       (setq dst (concat dst
 273                   (std11-wrap-as-quoted-pairs
 274                     (decode-mime-charset-string buf code-conversion)
 275                     chars-must-be-quote))))
 276     dst))
 277
 278
 279 ;;; @ for string
 280 ;;;
 281
 282 (defun eword-decode-unstructured (string code-conversion &optional must-unfold)
 283   (eword-decode-entire-string
 284     string
 285     eword-encoded-word-in-unstructured-regexp
 286     eword-after-encoded-word-in-unstructured-regexp
 287     "[^ \t\n=]*"
 288     nil
 289     nil
 290     nil
 291     must-unfold
 292     code-conversion))
 293
 294 (defun eword-decode-comment (string code-conversion &optional must-unfold)
 295   (eword-decode-entire-string
 296     string
 297     eword-encoded-word-in-comment-regexp
 298     eword-after-encoded-word-in-comment-regexp
 299     "[^ \t\n()\\\\=]*"
 300     ?\\
 301     '(?\( ?\))
 302     '(?\( ?\) ?\\ ?\r ?\n)
 303     must-unfold
 304     code-conversion))
 305
 306 (defun eword-decode-quoted-string (string code-conversion &optional must-unfold)
 307   (eword-decode-entire-string
 308     string
 309     eword-encoded-word-in-quoted-string-regexp
 310     eword-after-encoded-word-in-quoted-string-regexp
 311     "[^ \t\n\"\\\\=]*"
 312     ?\\
 313     '(?\")
 314     '(?\" ?\\ ?\r ?\n)
 315     must-unfold
 316     code-conversion))
 317
 318 (defun eword-decode-string (string &optional must-unfold code-conversion)
 319   "Decode MIME encoded-words in STRING.
 320
 321 STRING is unfolded before decoding.
 322
 323 If an encoded-word is broken or your emacs implementation can not
 324 decode the charset included in it, it is not decoded.
 325
 326 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 327 if there are in decoded encoded-words (generated by bad manner MUA
 328 such as a version of Net$cape).
 329
 330 If CODE-CONVERSION is nil, it decodes only encoded-words.  If it is
 331 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
 332 Otherwise it decodes non-ASCII bit patterns as the
 333 default-mime-charset."
 334   (eword-decode-unstructured
 335     (std11-unfold-string string)
 336     code-conversion
 337     must-unfold))
 338
 339
 340 ;;; @ for region
 341 ;;;
 342
 343 (defun eword-decode-region (start end &optional unfolding must-unfold
 344                                                 code-conversion)
 345   "Decode MIME encoded-words in region between START and END.
 346
 347 If UNFOLDING is not nil, it unfolds before decoding.
 348
 349 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 350 if there are in decoded encoded-words (generated by bad manner MUA
 351 such as a version of Net$cape).
 352
 353 If CODE-CONVERSION is nil, it decodes only encoded-words.  If it is
 354 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
 355 Otherwise it decodes non-ASCII bit patterns as the
 356 default-mime-charset."
 357   (interactive "*r")
 358   (save-excursion
 359     (save-restriction
 360       (narrow-to-region start end)
 361       (if unfolding
 362           (eword-decode-unfold)
 363         )
 364       (let ((str (eword-decode-unstructured
 365                    (buffer-substring (point-min) (point-max))
 366                    code-conversion
 367                    must-unfold)))
 368         (delete-region (point-min) (point-max))
 369         (insert str)))))
 370
 371
 372 ;;; @ for message header
 373 ;;;
 374
 375 (defcustom eword-decode-ignored-field-list
 376   '(Newsgroups Path Lines Nntp-Posting-Host Received Message-Id Date)
 377   "*List of field-names to be ignored when decoding.
 378 Each field name must be symbol."
 379   :group 'eword-decode
 380   :type '(repeat symbol))
 381
 382 (defcustom eword-decode-structured-field-list
 383   '(Reply-To Resent-Reply-To From Resent-From Sender Resent-Sender
 384              To Resent-To Cc Resent-Cc Bcc Resent-Bcc Dcc
 385              Mime-Version Content-Type Content-Transfer-Encoding
 386              Content-Disposition)
 387   "*List of field-names to decode as structured field.
 388 Each field name must be symbol."
 389   :group 'eword-decode
 390   :type '(repeat symbol))
 391
 392 (defun eword-decode-header (&optional code-conversion separator)
 393   "Decode MIME encoded-words in header fields.
 394 If CODE-CONVERSION is nil, it decodes only encoded-words.  If it is
 395 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
 396 Otherwise it decodes non-ASCII bit patterns as the
 397 default-mime-charset.
 398 If SEPARATOR is not nil, it is used as header separator."
 399   (interactive "*")
 400   (if (and code-conversion
 401            (not (mime-charset-to-coding-system code-conversion)))
 402       (setq code-conversion default-mime-charset))
 403   (save-excursion
 404     (save-restriction
 405       (std11-narrow-to-header separator)
 406       (if code-conversion
 407           (let (beg p end field-name len)
 408             (goto-char (point-min))
 409             (while (re-search-forward std11-field-head-regexp nil t)
 410               (setq beg (match-beginning 0)
 411                     p (match-end 0)
 412                     field-name (buffer-substring beg (1- p))
 413                     len (string-width field-name)
 414                     field-name (intern (capitalize field-name))
 415                     end (std11-field-end))
 416               (cond ((memq field-name eword-decode-ignored-field-list)
 417                      ;; Don't decode
 418                      )
 419                     ((memq field-name eword-decode-structured-field-list)
 420                      ;; Decode as structured field
 421                      (let ((body (buffer-substring p end)))
 422                        (delete-region p end)
 423                        (insert (eword-decode-and-fold-structured-field
 424                                 body (1+ len)))
 425                        ))
 426                     (t
 427                      ;; Decode as unstructured field
 428                      (save-restriction
 429                        (narrow-to-region beg (1+ end))
 430                        (goto-char p)
 431                        (eword-decode-region beg (point-max) 'unfold nil
 432                          code-conversion)
 433                        (goto-char (point-max))
 434                        )))))
 435         (eword-decode-region (point-min) (point-max) t nil nil)
 436         ))))
 437
 438 (defun eword-decode-unfold ()
 439   (goto-char (point-min))
 440   (let (field beg end)
 441     (while (re-search-forward std11-field-head-regexp nil t)
 442       (setq beg (match-beginning 0)
 443             end (std11-field-end))
 444       (setq field (buffer-substring beg end))
 445       (if (string-match eword-encoded-word-regexp field)
 446           (save-restriction
 447             (narrow-to-region (goto-char beg) end)
 448             (while (re-search-forward "\n\\([ \t]\\)" nil t)
 449               (replace-match (match-string 1))
 450               )
 451             (goto-char (point-max))
 452             ))
 453       )))
 454
 455
 456 ;;; @ encoded-word decoder
 457 ;;;
 458
 459 (defvar eword-decode-encoded-word-error-handler
 460   'eword-decode-encoded-word-default-error-handler)
 461
 462 (defvar eword-warning-face nil
 463   "Face used for invalid encoded-word.")
 464
 465 (defun eword-decode-encoded-word-default-error-handler (word signal)
 466   (and (add-text-properties 0 (length word)
 467                             (and eword-warning-face
 468                                  (list 'face eword-warning-face))
 469                             word)
 470        word))
 471
 472 (defun eword-decode-encoded-word (word &optional must-unfold)
 473   "Decode WORD if it is an encoded-word.
 474
 475 If your emacs implementation can not decode the charset of WORD, it
 476 returns WORD.  Similarly the encoded-word is broken, it returns WORD.
 477
 478 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 479 if there are in decoded encoded-word (generated by bad manner MUA such
 480 as a version of Net$cape)."
 481   (or (if (string-match eword-encoded-word-regexp word)
 482           (let ((charset
 483                  (substring word (match-beginning 1) (match-end 1))
 484                  )
 485                 (encoding
 486                  (upcase
 487                   (substring word (match-beginning 2) (match-end 2))
 488                   ))
 489                 (text
 490                  (substring word (match-beginning 3) (match-end 3))
 491                  ))
 492             (condition-case err
 493                 (eword-decode-encoded-text charset encoding text must-unfold)
 494               (error
 495                (funcall eword-decode-encoded-word-error-handler word err)
 496                ))
 497             ))
 498       word))
 499
 500
 501 ;;; @ encoded-text decoder
 502 ;;;
 503
 504 (defun eword-decode-encoded-text (charset encoding string
 505                                           &optional must-unfold)
 506   "Decode STRING as an encoded-text.
 507
 508 If your emacs implementation can not decode CHARSET, it returns nil.
 509
 510 If ENCODING is not \"B\" or \"Q\", it occurs error.
 511 So you should write error-handling code if you don't want break by errors.
 512
 513 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 514 if there are in decoded encoded-text (generated by bad manner MUA such
 515 as a version of Net$cape)."
 516   (let ((cs (mime-charset-to-coding-system charset)))
 517     (if cs
 518         (let ((dest
 519                (cond
 520                 ((string-equal "B" encoding)
 521                  (if (and (string-match eword-B-encoded-text-regexp string)
 522                           (string-equal string (match-string 0 string)))
 523                      (base64-decode-string string)
 524                    (error "Invalid encoded-text %s" string)))
 525                 ((string-equal "Q" encoding)
 526                  (if (and (string-match eword-Q-encoded-text-regexp string)
 527                           (string-equal string (match-string 0 string)))
 528                      (q-encoding-decode-string string)
 529                    (error "Invalid encoded-text %s" string)))
 530                 (t
 531                  (error "Invalid encoding %s" encoding)
 532                  )))
 533               )
 534           (when dest
 535             (setq dest (decode-mime-charset-string dest charset))
 536             (if must-unfold
 537                 (mapconcat (function
 538                             (lambda (chr)
 539                               (cond ((eq chr ?\n) "")
 540                                     ((eq chr ?\t) " ")
 541                                     (t (char-to-string chr)))
 542                               ))
 543                            (std11-unfold-string dest)
 544                            "")
 545               dest))))))
 546
 547
 548 ;;; @ lexical analyze
 549 ;;;
 550
 551 (defvar eword-lexical-analyze-cache nil)
 552 (defvar eword-lexical-analyze-cache-max 299
 553   "*Max position of eword-lexical-analyze-cache.
 554 It is max size of eword-lexical-analyze-cache - 1.")
 555
 556 (defcustom eword-lexical-analyzers
 557   '(eword-analyze-quoted-string
 558     eword-analyze-domain-literal
 559     eword-analyze-comment
 560     eword-analyze-spaces
 561     eword-analyze-special
 562     eword-analyze-encoded-word
 563     eword-analyze-atom)
 564   "*List of functions to return result of lexical analyze.
 565 Each function must have two arguments: STRING and MUST-UNFOLD.
 566 STRING is the target string to be analyzed.
 567 If MUST-UNFOLD is not nil, each function must unfold and eliminate
 568 bare-CR and bare-LF from the result even if they are included in
 569 content of the encoded-word.
 570 Each function must return nil if it can not analyze STRING as its
 571 format.
 572
 573 Previous function is preferred to next function.  If a function
 574 returns nil, next function is used.  Otherwise the return value will
 575 be the result."
 576   :group 'eword-decode
 577   :type '(repeat function))
 578
 579 (defun eword-analyze-quoted-string (string &optional must-unfold)
 580   (let ((p (std11-check-enclosure string ?\" ?\")))
 581     (if p
 582         (cons (cons 'quoted-string
 583                     (if eword-decode-quoted-encoded-word
 584                         (eword-decode-quoted-string
 585                           (substring string 0 p)
 586                           default-mime-charset)
 587                       (std11-wrap-as-quoted-string
 588                        (decode-mime-charset-string
 589                         (std11-strip-quoted-pair (substring string 1 (1- p)))
 590                         default-mime-charset))))
 591               (substring string p)))
 592     ))
 593
 594 (defun eword-analyze-domain-literal (string &optional must-unfold)
 595   (std11-analyze-domain-literal string))
 596
 597 (defun eword-analyze-comment (string &optional must-unfold)
 598   (let ((len (length string)))
 599     (if (and (< 0 len) (eq (aref string 0) ?\())
 600         (let ((p 0))
 601           (while (and p (< p len) (eq (aref string p) ?\())
 602             (setq p (std11-check-enclosure string ?\( ?\) t p)))
 603           (setq p (or p len))
 604           (cons (cons 'comment
 605                       (eword-decode-comment
 606                         (std11-unfold-string (substring string 0 p))
 607                         default-mime-charset))
 608                 (substring string p)))
 609       nil)))
 610
 611 (defun eword-analyze-spaces (string &optional must-unfold)
 612   (std11-analyze-spaces string))
 613
 614 (defun eword-analyze-special (string &optional must-unfold)
 615   (std11-analyze-special string))
 616
 617 (defun eword-analyze-encoded-word (string &optional must-unfold)
 618   (let ((decoded (eword-decode-first-encoded-words
 619                   string
 620                   eword-encoded-word-in-phrase-regexp
 621                   eword-after-encoded-word-in-phrase-regexp
 622                   must-unfold)))
 623     (if decoded
 624         (let ((s (car decoded)))
 625           (while (or (string-match std11-atom-regexp s)
 626                      (string-match std11-spaces-regexp s))
 627             (setq s (substring s (match-end 0))))
 628           (if (= (length s) 0)
 629               (cons (cons 'atom (car decoded)) (cdr decoded))
 630             (cons (cons 'quoted-string
 631                         (std11-wrap-as-quoted-string (car decoded)))
 632                   (cdr decoded)))))))
 633
 634 (defun eword-analyze-atom (string &optional must-unfold)
 635   (if (let ((enable-multibyte-characters nil))
 636         (string-match std11-atom-regexp string))
 637       (let ((end (match-end 0)))
 638         (if (and eword-decode-sticked-encoded-word
 639                  (string-match eword-encoded-word-in-phrase-regexp
 640                                (substring string 0 end))
 641                  (< 0 (match-beginning 0)))
 642             (setq end (match-beginning 0)))
 643         (cons (cons 'atom (decode-mime-charset-string
 644                            (substring string 0 end)
 645                            default-mime-charset))
 646               (substring string end)
 647               ))))
 648
 649 (defun eword-lexical-analyze-internal (string must-unfold)
 650   (let ((last 'eword-analyze-spaces)
 651         dest ret)
 652     (while (not (string-equal string ""))
 653       (setq ret
 654             (let ((rest eword-lexical-analyzers)
 655                   func r)
 656               (while (and (setq func (car rest))
 657                           (or
 658                            (and
 659                             (not eword-decode-sticked-encoded-word)
 660                             (not (eq last 'eword-analyze-spaces))
 661                             (eq func 'eword-analyze-encoded-word))
 662                            (null (setq r (funcall func string must-unfold))))
 663                           )
 664                 (setq rest (cdr rest)))
 665               (setq last func)
 666               (or r `((error . ,string) . ""))
 667               ))
 668       (setq dest (cons (car ret) dest))
 669       (setq string (cdr ret))
 670       )
 671     (nreverse dest)
 672     ))
 673
 674 (defun eword-lexical-analyze (string &optional must-unfold)
 675   "Return lexical analyzed list corresponding STRING.
 676 It is like std11-lexical-analyze, but it decodes non us-ascii
 677 characters encoded as encoded-words or invalid \"raw\" format.
 678 \"Raw\" non us-ascii characters are regarded as variable
 679 `default-mime-charset'."
 680   (let* ((str (copy-sequence string))
 681          (key (cons str (cons default-mime-charset must-unfold)))
 682          ret)
 683     (set-text-properties 0 (length str) nil str)
 684     (if (setq ret (assoc key eword-lexical-analyze-cache))
 685         (cdr ret)
 686       (setq ret (eword-lexical-analyze-internal str must-unfold))
 687       (setq eword-lexical-analyze-cache
 688             (cons (cons key ret)
 689                   (last eword-lexical-analyze-cache
 690                         eword-lexical-analyze-cache-max)))
 691       ret)))
 692
 693 (defun eword-decode-token (token)
 694   (cdr token))
 695
 696 (defun eword-decode-and-fold-structured-field
 697   (string start-column &optional max-column must-unfold)
 698   "Decode and fold (fill) STRING as structured field body.
 699 It decodes non us-ascii characters in FULL-NAME encoded as
 700 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 701 characters are regarded as variable `default-mime-charset'.
 702
 703 If an encoded-word is broken or your emacs implementation can not
 704 decode the charset included in it, it is not decoded.
 705
 706 If MAX-COLUMN is omitted, `fill-column' is used.
 707
 708 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 709 if there are in decoded encoded-words (generated by bad manner MUA
 710 such as a version of Net$cape)."
 711   (or max-column
 712       (setq max-column fill-column))
 713   (let ((c start-column)
 714         (tokens (eword-lexical-analyze string must-unfold))
 715         (result "")
 716         token)
 717     (while (and (setq token (car tokens))
 718                 (setq tokens (cdr tokens)))
 719       (let* ((type (car token)))
 720         (if (eq type 'spaces)
 721             (let* ((next-token (car tokens))
 722                    (next-str (eword-decode-token next-token))
 723                    (next-len (string-width next-str))
 724                    (next-c (+ c next-len 1)))
 725               (if (< next-c max-column)
 726                   (setq result (concat result " " next-str)
 727                         c next-c)
 728                 (setq result (concat result "\n " next-str)
 729                       c (1+ next-len)))
 730               (setq tokens (cdr tokens))
 731               )
 732           (let* ((str (eword-decode-token token)))
 733             (setq result (concat result str)
 734                   c (+ c (string-width str)))
 735             ))))
 736     (if token
 737         (concat result (eword-decode-token token))
 738       result)))
 739
 740 (defun eword-decode-and-unfold-structured-field (string)
 741   "Decode and unfold STRING as structured field body.
 742 It decodes non us-ascii characters in FULL-NAME encoded as
 743 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 744 characters are regarded as variable `default-mime-charset'.
 745
 746 If an encoded-word is broken or your emacs implementation can not
 747 decode the charset included in it, it is not decoded."
 748   (let ((tokens (eword-lexical-analyze string 'must-unfold))
 749         (result ""))
 750     (while tokens
 751       (let* ((token (car tokens))
 752              (type (car token)))
 753         (setq tokens (cdr tokens))
 754         (setq result
 755               (if (eq type 'spaces)
 756                   (concat result " ")
 757                 (concat result (eword-decode-token token))
 758                 ))))
 759     result))
 760
 761 (defun eword-decode-structured-field-body (string &optional must-unfold
 762                                                   start-column max-column)
 763   "Decode non us-ascii characters in STRING as structured field body.
 764 STRING is unfolded before decoding.
 765
 766 It decodes non us-ascii characters in FULL-NAME encoded as
 767 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 768 characters are regarded as variable `default-mime-charset'.
 769
 770 If an encoded-word is broken or your emacs implementation can not
 771 decode the charset included in it, it is not decoded.
 772
 773 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 774 if there are in decoded encoded-words (generated by bad manner MUA
 775 such as a version of Net$cape)."
 776   (if start-column
 777       ;; fold with max-column
 778       (eword-decode-and-fold-structured-field
 779        string start-column max-column must-unfold)
 780     ;; Don't fold
 781     (mapconcat (function eword-decode-token)
 782                (eword-lexical-analyze string must-unfold)
 783                "")
 784     ))
 785
 786 (defun eword-decode-unstructured-field-body (string &optional must-unfold)
 787   "Decode non us-ascii characters in STRING as unstructured field body.
 788 STRING is unfolded before decoding.
 789
 790 It decodes non us-ascii characters in FULL-NAME encoded as
 791 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 792 characters are regarded as variable `default-mime-charset'.
 793
 794 If an encoded-word is broken or your emacs implementation can not
 795 decode the charset included in it, it is not decoded.
 796
 797 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 798 if there are in decoded encoded-words (generated by bad manner MUA
 799 such as a version of Net$cape)."
 800   (eword-decode-string string must-unfold default-mime-charset))
 801
 802 (defun eword-extract-address-components (string)
 803   "Extract full name and canonical address from STRING.
 804 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
 805 If no name can be extracted, FULL-NAME will be nil.
 806 It decodes non us-ascii characters in FULL-NAME encoded as
 807 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 808 characters are regarded as variable `default-mime-charset'."
 809   (let* ((structure (car (std11-parse-address
 810                           (eword-lexical-analyze
 811                            (std11-unfold-string string) 'must-unfold))))
 812          (phrase  (std11-full-name-string structure))
 813          (address (std11-address-string structure))
 814          )
 815     (list phrase address)
 816     ))
 817
 818
 819 ;;; @ end
 820 ;;;
 821
 822 (provide 'eword-decode)
 823
 824 ;;; eword-decode.el ends here