eword-decode.el

   1 ;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs
   2
   3 ;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc.
   4
   5 ;; Author: ENAMI Tsugutomo <enami@sys.ptg.sony.co.jp>
   6 ;;         MORIOKA Tomohiko <morioka@jaist.ac.jp>
   7 ;;         Tanaka Akira <akr@jaist.ac.jp>
   8 ;; Maintainer: Tanaka Akira <akr@jaist.ac.jp>
   9 ;; Created: 1995/10/03
  10 ;; Original: 1992/07/20 ENAMI Tsugutomo's `mime.el'.
  11 ;;      Renamed: 1993/06/03 to tiny-mime.el
  12 ;;      Renamed: 1995/10/03 from tiny-mime.el (split off encoder)
  13 ;;      Renamed: 1997/02/22 from tm-ew-d.el
  14 ;; Keywords: encoded-word, MIME, multilingual, header, mail, news
  15
  16 ;; This file is part of FLAM (Faithful Library About MIME).
  17
  18 ;; This program is free software; you can redistribute it and/or
  19 ;; modify it under the terms of the GNU General Public License as
  20 ;; published by the Free Software Foundation; either version 2, or (at
  21 ;; your option) any later version.
  22
  23 ;; This program is distributed in the hope that it will be useful, but
  24 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  25 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  26 ;; General Public License for more details.
  27
  28 ;; You should have received a copy of the GNU General Public License
  29 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
  30 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  31 ;; Boston, MA 02111-1307, USA.
  32
  33 ;;; Code:
  34
  35 (require 'std11-parse)
  36 (require 'mel)
  37 (require 'mime-def)
  38
  39 (defgroup eword-decode nil
  40   "Encoded-word decoding"
  41   :group 'mime)
  42
  43
  44 ;;; @ variables
  45 ;;;
  46
  47 (defcustom eword-decode-sticked-encoded-word nil
  48   "*If non-nil, decode encoded-words sticked on atoms,
  49 other encoded-words, etc.
  50 however this behaviour violates RFC2047."
  51   :group 'eword-decode
  52   :type 'boolean)
  53
  54 (defcustom eword-decode-quoted-encoded-word nil
  55   "*If non-nil, decode encoded-words in quoted-string
  56 however this behaviour violates RFC2047."
  57   :group 'eword-decode
  58   :type 'boolean)
  59
  60
  61 ;;; @ MIME encoded-word definition
  62 ;;;
  63
  64 (defconst eword-encoded-word-prefix-regexp
  65   (concat (regexp-quote "=?")
  66           "\\(" mime-charset-regexp "\\)"
  67           (regexp-quote "?")
  68           "\\(B\\|Q\\)"
  69           (regexp-quote "?")))
  70 (defconst eword-encoded-word-suffix-regexp
  71   (regexp-quote "?="))
  72
  73 (defconst eword-encoded-text-in-unstructured-regexp "[!->@-~]+")
  74 (defconst eword-encoded-word-in-unstructured-regexp
  75   (concat eword-encoded-word-prefix-regexp
  76           "\\(" eword-encoded-text-in-unstructured-regexp "\\)"
  77           eword-encoded-word-suffix-regexp))
  78 (defconst eword-after-encoded-word-in-unstructured-regexp "\\([ \t]\\|$\\)")
  79
  80 (defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+")
  81 (defconst eword-encoded-word-in-phrase-regexp
  82   (concat eword-encoded-word-prefix-regexp
  83           "\\(" eword-encoded-text-in-phrase-regexp "\\)"
  84           eword-encoded-word-suffix-regexp))
  85 (defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t(]\\|$\\)")
  86
  87 (defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+")
  88 (defconst eword-encoded-word-in-comment-regexp
  89   (concat eword-encoded-word-prefix-regexp
  90           "\\(" eword-encoded-text-in-comment-regexp "\\)"
  91           eword-encoded-word-suffix-regexp))
  92 (defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)")
  93
  94 (defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+")
  95 (defconst eword-encoded-word-in-quoted-string-regexp
  96   (concat eword-encoded-word-prefix-regexp
  97           "\\(" eword-encoded-text-in-quoted-string-regexp "\\)"
  98           eword-encoded-word-suffix-regexp))
  99 (defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)")
 100
 101 ; obsolete
 102 (defconst eword-encoded-text-regexp eword-encoded-text-in-unstructured-regexp)
 103 (defconst eword-encoded-word-regexp eword-encoded-word-in-unstructured-regexp)
 104
 105
 106 ;;; @@ Base64
 107 ;;;
 108
 109 (defconst base64-token-regexp "[A-Za-z0-9+/]")
 110 (defconst base64-token-padding-regexp "[A-Za-z0-9+/=]")
 111
 112 (defconst eword-B-encoded-text-regexp
 113   (concat "\\(\\("
 114           base64-token-regexp
 115           base64-token-regexp
 116           base64-token-regexp
 117           base64-token-regexp
 118           "\\)*"
 119           base64-token-regexp
 120           base64-token-regexp
 121           base64-token-padding-regexp
 122           base64-token-padding-regexp
 123           "\\)"))
 124
 125 ;; (defconst eword-B-encoding-and-encoded-text-regexp
 126 ;;   (concat "\\(B\\)\\?" eword-B-encoded-text-regexp))
 127
 128
 129 ;;; @@ Quoted-Printable
 130 ;;;
 131
 132 (defconst quoted-printable-hex-chars "0123456789ABCDEF")
 133 (defconst quoted-printable-octet-regexp
 134   (concat "=[" quoted-printable-hex-chars
 135           "][" quoted-printable-hex-chars "]"))
 136
 137 (defconst eword-Q-encoded-text-regexp
 138   (concat "\\([^=?]\\|" quoted-printable-octet-regexp "\\)+"))
 139 ;; (defconst eword-Q-encoding-and-encoded-text-regexp
 140 ;;   (concat "\\(Q\\)\\?" eword-Q-encoded-text-regexp))
 141
 142
 143 ;;; @ internal utilities
 144 ;;;
 145
 146 (defun eword-decode-first-encoded-words (string
 147                                          eword-regexp
 148                                          after-regexp
 149                                          &optional must-unfold)
 150   "Decode MIME encoded-words in beginning of STRING.
 151
 152 EWORD-REGEXP is the regexp that matches a encoded-word.
 153 Usual value is
 154 eword-encoded-word-in-unstructured-regexp,
 155 eword-encoded-text-in-phrase-regexp,
 156 eword-encoded-word-in-comment-regexp or
 157 eword-encoded-word-in-quoted-string-regexp.
 158
 159 AFTER-REGEXP is the regexp that matches a after encoded-word.
 160 Usual value is
 161 eword-after-encoded-word-in-unstructured-regexp,
 162 eword-after-encoded-text-in-phrase-regexp,
 163 eword-after-encoded-word-in-comment-regexp or
 164 eword-after-encoded-word-in-quoted-string-regexp.
 165
 166 If beginning of STRING matches EWORD-REGEXP and AFTER-REGEXP,
 167 returns a cons cell of decoded string(sequence of characters) and
 168 the rest(sequence of octets).
 169
 170 If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP,
 171 returns nil.
 172
 173 If an encoded-word is broken or your emacs implementation can not
 174 decode the charset included in it, it is returned in decoded part
 175 as encoded-word form.
 176
 177 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 178 if there are in decoded encoded-words (generated by bad manner MUA
 179 such as a version of Net$cape)."
 180   (if eword-decode-sticked-encoded-word (setq after-regexp ""))
 181   (let ((between-ewords-regexp
 182           (if eword-decode-sticked-encoded-word
 183             "\\(\n?[ \t]\\)*"
 184             "\\(\n?[ \t]\\)+"))
 185         (src string)    ; sequence of octets.
 186         (dst ""))       ; sequence of characters.
 187     (if (string-match
 188           (concat "\\`\\(" eword-regexp "\\)" after-regexp) src)
 189       (let* (p
 190              (q (match-end 1))
 191              (ew (substring src 0 q))
 192              (dw (eword-decode-encoded-word ew must-unfold)))
 193         (setq dst (concat dst dw)
 194               src (substring src q))
 195         (if (not (string= ew dw))
 196           (progn
 197             (while
 198               (and
 199                 (string-match
 200                   (concat "\\`\\(" between-ewords-regexp "\\)"
 201                              "\\(" eword-regexp "\\)"
 202                              after-regexp)
 203                   src)
 204                 (progn
 205                   (setq p (match-end 1)
 206                         q (match-end 3)
 207                         ew (substring src p q)
 208                         dw (eword-decode-encoded-word ew must-unfold))
 209                   (if (string= ew dw)
 210                     (progn
 211                       (setq dst (concat dst (substring src 0 q))
 212                             src (substring src q))
 213                       nil)
 214                     t)))
 215               (setq dst (concat dst dw)
 216                     src (substring src q)))))
 217         (cons dst src))
 218       nil)))
 219
 220 (defun eword-decode-entire-string (string
 221                                    eword-regexp
 222                                    after-regexp
 223                                    safe-regexp
 224                                    escape ; ?\\ or nil.
 225                                    delimiters ; list of chars.
 226                                    default-charset
 227                                    must-unfold)
 228   (let ((dst "")
 229         (buf "")
 230         (src string)
 231         (ew-enable t))
 232     (while (< 0 (length src))
 233       (let ((ch (aref src 0))
 234             (decoded (and
 235                         ew-enable
 236                         (eword-decode-first-encoded-words src
 237                           eword-regexp after-regexp must-unfold))))
 238         (if (and (not (string= buf ""))
 239                  (or decoded (memq ch delimiters)))
 240           (setq dst (concat dst
 241                       (std11-wrap-as-quoted-pairs
 242                         (decode-mime-charset-string buf default-charset)
 243                         delimiters))
 244                 buf ""))
 245         (cond
 246           (decoded
 247             (setq dst (concat dst
 248                         (std11-wrap-as-quoted-pairs
 249                           (car decoded)
 250                           delimiters))
 251                   src (cdr decoded)))
 252           ((memq ch delimiters)
 253             (setq dst (concat dst (list ch))
 254                   src (substring src 1)
 255                   ew-enable t))
 256           ((eq ch escape)
 257             (setq buf (concat buf (list (aref src 1)))
 258                   src (substring src 2)
 259                   ew-enable t))
 260           ((string-match "\\`[ \t\n]+" src)
 261             (setq buf (concat buf (substring src 0 (match-end 0)))
 262                   src (substring src (match-end 0))
 263                   ew-enable t))
 264           ((and (string-match (concat "\\`=?" safe-regexp) src)
 265                 (< 0 (match-end 0)))
 266             (setq buf (concat buf (substring src 0 (match-end 0)))
 267                   src (substring src (match-end 0))
 268                   ew-enable eword-decode-sticked-encoded-word))
 269           (t (error "something wrong")))))
 270     (if (not (string= buf ""))
 271       (setq dst (concat dst
 272                   (std11-wrap-as-quoted-pairs
 273                     (decode-mime-charset-string buf default-charset)
 274                     delimiters))))
 275     dst))
 276
 277
 278 ;;; @ for string
 279 ;;;
 280
 281 (defun eword-decode-unstructured (string &optional must-unfold)
 282   (eword-decode-entire-string
 283     string
 284     eword-encoded-word-in-unstructured-regexp
 285     eword-after-encoded-word-in-unstructured-regexp
 286     "[^ \t\n=]*"
 287     nil
 288     nil
 289     default-mime-charset
 290     must-unfold))
 291
 292 (defun eword-decode-comment (string &optional must-unfold)
 293   (eword-decode-entire-string
 294     string
 295     eword-encoded-word-in-comment-regexp
 296     eword-after-encoded-word-in-comment-regexp
 297     "[^ \t\n()\\\\=]*"
 298     ?\\
 299     '(?\( ?\))
 300     default-mime-charset
 301     must-unfold))
 302
 303 (defun eword-decode-quoted-string (string &optional must-unfold)
 304   (eword-decode-entire-string
 305     string
 306     eword-encoded-word-in-quoted-string-regexp
 307     eword-after-encoded-word-in-quoted-string-regexp
 308     "[^ \t\n\"\\\\=]*"
 309     ?\\
 310     '(?\")
 311     default-mime-charset
 312     must-unfold))
 313
 314 (defun eword-decode-string (string &optional must-unfold default-mime-charset)
 315   "Decode MIME encoded-words in STRING.
 316
 317 STRING is unfolded before decoding.
 318
 319 If an encoded-word is broken or your emacs implementation can not
 320 decode the charset included in it, it is not decoded.
 321
 322 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 323 if there are in decoded encoded-words (generated by bad manner MUA
 324 such as a version of Net$cape)."
 325   (eword-decode-unstructured
 326     (std11-unfold-string string)
 327     must-unfold))
 328
 329
 330 ;;; @ for region
 331 ;;;
 332
 333 (defun eword-decode-region (start end &optional unfolding must-unfold
 334                                                 default-mime-charset)
 335   "Decode MIME encoded-words in region between START and END.
 336
 337 If UNFOLDING is not nil, it unfolds before decoding.
 338
 339 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 340 if there are in decoded encoded-words (generated by bad manner MUA
 341 such as a version of Net$cape)."
 342   (interactive "*r")
 343   (save-excursion
 344     (save-restriction
 345       (narrow-to-region start end)
 346       (if unfolding
 347           (eword-decode-unfold)
 348         )
 349       (let ((str (eword-decode-unstructured
 350                    (buffer-substring (point-min) (point-max))
 351                    must-unfold)))
 352         (delete-region (point-min) (point-max))
 353         (insert str)))))
 354
 355
 356 ;;; @ for message header
 357 ;;;
 358
 359 (defcustom eword-decode-ignored-field-list
 360   '(newsgroups path lines nntp-posting-host message-id date)
 361   "*List of field-names to be ignored when decoding.
 362 Each field name must be symbol."
 363   :group 'eword-decode
 364   :type '(repeat symbol))
 365
 366 (defcustom eword-decode-structured-field-list
 367   '(reply-to resent-reply-to from resent-from sender resent-sender
 368              to resent-to cc resent-cc bcc resent-bcc dcc
 369              mime-version content-type content-transfer-encoding
 370              content-disposition)
 371   "*List of field-names to decode as structured field.
 372 Each field name must be symbol."
 373   :group 'eword-decode
 374   :type '(repeat symbol))
 375
 376 (defun eword-decode-header (&optional code-conversion separator)
 377   "Decode MIME encoded-words in header fields.
 378 If CODE-CONVERSION is nil, it decodes only encoded-words.  If it is
 379 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
 380 Otherwise it decodes non-ASCII bit patterns as the
 381 default-mime-charset.
 382 If SEPARATOR is not nil, it is used as header separator."
 383   (interactive "*")
 384   (if (and code-conversion
 385            (not (mime-charset-to-coding-system code-conversion)))
 386       (setq code-conversion default-mime-charset))
 387   (save-excursion
 388     (save-restriction
 389       (std11-narrow-to-header separator)
 390       (if code-conversion
 391           (let (beg p end field-name len)
 392             (goto-char (point-min))
 393             (while (re-search-forward std11-field-head-regexp nil t)
 394               (setq beg (match-beginning 0)
 395                     p (match-end 0)
 396                     field-name (buffer-substring beg (1- p))
 397                     len (string-width field-name)
 398                     field-name (intern (downcase field-name))
 399                     end (std11-field-end))
 400               (cond ((memq field-name eword-decode-ignored-field-list)
 401                      ;; Don't decode
 402                      )
 403                     ((memq field-name eword-decode-structured-field-list)
 404                      ;; Decode as structured field
 405                      (let ((body (buffer-substring p end)))
 406                        (delete-region p end)
 407                        (insert (eword-decode-and-fold-structured-field
 408                                 body (1+ len)))
 409                        ))
 410                     (t
 411                      ;; Decode as unstructured field
 412                      (save-restriction
 413                        (narrow-to-region beg (1+ end))
 414                        (goto-char p)
 415                        (eword-decode-region beg (point-max) 'unfold nil
 416                          code-conversion)
 417                        (goto-char (point-max))
 418                        )))))
 419         (eword-decode-region (point-min) (point-max) t nil code-conversion)
 420         ))))
 421
 422 (defun eword-decode-unfold ()
 423   (goto-char (point-min))
 424   (let (field beg end)
 425     (while (re-search-forward std11-field-head-regexp nil t)
 426       (setq beg (match-beginning 0)
 427             end (std11-field-end))
 428       (setq field (buffer-substring beg end))
 429       (if (string-match eword-encoded-word-regexp field)
 430           (save-restriction
 431             (narrow-to-region (goto-char beg) end)
 432             (while (re-search-forward "\n\\([ \t]\\)" nil t)
 433               (replace-match (match-string 1))
 434               )
 435             (goto-char (point-max))
 436             ))
 437       )))
 438
 439
 440 ;;; @ encoded-word decoder
 441 ;;;
 442
 443 (defvar eword-warning-face nil "Face used for invalid encoded-word.")
 444
 445 (defun eword-decode-encoded-word (word &optional must-unfold)
 446   "Decode WORD if it is an encoded-word.
 447
 448 If your emacs implementation can not decode the charset of WORD, it
 449 returns WORD.  Similarly the encoded-word is broken, it returns WORD.
 450
 451 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 452 if there are in decoded encoded-word (generated by bad manner MUA such
 453 as a version of Net$cape)."
 454   (or (if (string-match eword-encoded-word-regexp word)
 455           (let ((charset
 456                  (substring word (match-beginning 1) (match-end 1))
 457                  )
 458                 (encoding
 459                  (upcase
 460                   (substring word (match-beginning 2) (match-end 2))
 461                   ))
 462                 (text
 463                  (substring word (match-beginning 3) (match-end 3))
 464                  ))
 465             (condition-case err
 466                 (eword-decode-encoded-text charset encoding text must-unfold)
 467               (error
 468                (and
 469                 (add-text-properties 0 (length word)
 470                                      (and eword-warning-face
 471                                           (list 'face eword-warning-face))
 472                                      word)
 473                 word)))
 474             ))
 475       word))
 476
 477
 478 ;;; @ encoded-text decoder
 479 ;;;
 480
 481 (defun eword-decode-encoded-text (charset encoding string
 482                                           &optional must-unfold)
 483   "Decode STRING as an encoded-text.
 484
 485 If your emacs implementation can not decode CHARSET, it returns nil.
 486
 487 If ENCODING is not \"B\" or \"Q\", it occurs error.
 488 So you should write error-handling code if you don't want break by errors.
 489
 490 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 491 if there are in decoded encoded-text (generated by bad manner MUA such
 492 as a version of Net$cape)."
 493   (let ((cs (mime-charset-to-coding-system charset)))
 494     (if cs
 495         (let ((dest
 496                (cond
 497                 ((string-equal "B" encoding)
 498                  (if (and (string-match eword-B-encoded-text-regexp string)
 499                           (string-equal string (match-string 0 string)))
 500                      (base64-decode-string string)
 501                    (error "Invalid encoded-text %s" string)))
 502                 ((string-equal "Q" encoding)
 503                  (if (and (string-match eword-Q-encoded-text-regexp string)
 504                           (string-equal string (match-string 0 string)))
 505                      (q-encoding-decode-string string)
 506                    (error "Invalid encoded-text %s" string)))
 507                 (t
 508                  (error "Invalid encoding %s" encoding)
 509                  )))
 510               )
 511           (if dest
 512               (progn
 513                 (setq dest (decode-coding-string dest cs))
 514                 (if must-unfold
 515                     (mapconcat (function
 516                                 (lambda (chr)
 517                                   (cond
 518                                    ((eq chr ?\n) "")
 519                                    ((eq chr ?\t) " ")
 520                                    (t (char-to-string chr)))
 521                                   ))
 522                                (std11-unfold-string dest)
 523                                "")
 524                   dest)
 525                 ))))))
 526
 527
 528 ;;; @ lexical analyze
 529 ;;;
 530
 531 (defvar eword-lexical-analyze-cache nil)
 532 (defvar eword-lexical-analyze-cache-max 299
 533   "*Max position of eword-lexical-analyze-cache.
 534 It is max size of eword-lexical-analyze-cache - 1.")
 535
 536 (defcustom eword-lexical-analyzers
 537   '(eword-analyze-quoted-string
 538     eword-analyze-domain-literal
 539     eword-analyze-comment
 540     eword-analyze-spaces
 541     eword-analyze-special
 542     eword-analyze-encoded-word
 543     eword-analyze-atom)
 544   "*List of functions to return result of lexical analyze.
 545 Each function must have two arguments: STRING and MUST-UNFOLD.
 546 STRING is the target string to be analyzed.
 547 If MUST-UNFOLD is not nil, each function must unfold and eliminate
 548 bare-CR and bare-LF from the result even if they are included in
 549 content of the encoded-word.
 550 Each function must return nil if it can not analyze STRING as its
 551 format.
 552
 553 Previous function is preferred to next function.  If a function
 554 returns nil, next function is used.  Otherwise the return value will
 555 be the result."
 556   :group 'eword-decode
 557   :type '(repeat function))
 558
 559 (defun eword-analyze-quoted-string (string &optional must-unfold)
 560   (let ((p (std11-check-enclosure string ?\" ?\")))
 561     (if p
 562         (cons (cons 'quoted-string
 563                     (eword-decode-quoted-string (substring string 0 p)))
 564               (substring string p))
 565       )))
 566
 567 (defun eword-analyze-domain-literal (string &optional must-unfold)
 568   (std11-analyze-domain-literal string))
 569
 570 (defun eword-analyze-comment (string &optional must-unfold)
 571   (let ((len (length string)))
 572     (if (and (< 0 len) (eq (aref string 0) ?\())
 573         (let ((p 0))
 574           (while (and p (< p len) (eq (aref string p) ?\())
 575             (setq p (std11-check-enclosure string ?\( ?\) t p)))
 576           (setq p (or p len))
 577           (cons (cons 'comment
 578                       (eword-decode-comment (substring string 0 p)))
 579                 (substring string p)))
 580       nil)))
 581
 582 (defun eword-analyze-spaces (string &optional must-unfold)
 583   (std11-analyze-spaces string))
 584
 585 (defun eword-analyze-special (string &optional must-unfold)
 586   (std11-analyze-special string))
 587
 588 (defun eword-analyze-encoded-word (string &optional must-unfold)
 589   (let ((decoded (eword-decode-first-encoded-words
 590                    string
 591                    eword-encoded-word-in-phrase-regexp
 592                    eword-after-encoded-word-in-phrase-regexp
 593                    must-unfold)))
 594     (if decoded
 595       (cons (cons 'atom (car decoded)) (cdr decoded)))))
 596
 597 (defun eword-analyze-atom (string &optional must-unfold)
 598   (if (let ((enable-multibyte-characters nil))
 599         (string-match std11-atom-regexp string))
 600       (let ((end (match-end 0)))
 601         (if (and eword-decode-sticked-encoded-word
 602                  (string-match eword-encoded-word-in-phrase-regexp
 603                                (substring string 0 end))
 604                  (< 0 (match-beginning 0)))
 605             (setq end (match-beginning 0)))
 606         (cons (cons 'atom (decode-mime-charset-string
 607                            (substring string 0 end)
 608                            default-mime-charset))
 609               (substring string end)
 610               ))))
 611
 612 (defun eword-lexical-analyze-internal (string must-unfold)
 613   (let (dest ret)
 614     (while (not (string-equal string ""))
 615       (setq ret
 616             (let ((rest eword-lexical-analyzers)
 617                   func r)
 618               (while (and (setq func (car rest))
 619                           (null (setq r (funcall func string must-unfold)))
 620                           )
 621                 (setq rest (cdr rest)))
 622               (or r `((error . ,string) . ""))
 623               ))
 624       (setq dest (cons (car ret) dest))
 625       (setq string (cdr ret))
 626       )
 627     (nreverse dest)
 628     ))
 629
 630 (defun eword-lexical-analyze (string &optional must-unfold)
 631   "Return lexical analyzed list corresponding STRING.
 632 It is like std11-lexical-analyze, but it decodes non us-ascii
 633 characters encoded as encoded-words or invalid \"raw\" format.
 634 \"Raw\" non us-ascii characters are regarded as variable
 635 `default-mime-charset'."
 636   (let* ((str (copy-sequence string))
 637          (key (cons str (cons default-mime-charset must-unfold)))
 638          ret)
 639     (set-text-properties 0 (length str) nil str)
 640     (if (setq ret (assoc key eword-lexical-analyze-cache))
 641         (cdr ret)
 642       (setq ret (eword-lexical-analyze-internal str must-unfold))
 643       (setq eword-lexical-analyze-cache
 644             (cons (cons key ret)
 645                   (last eword-lexical-analyze-cache
 646                         eword-lexical-analyze-cache-max)))
 647       ret)))
 648
 649 (defun eword-decode-token (token)
 650   (cdr token))
 651
 652 (defun eword-decode-and-fold-structured-field
 653   (string start-column &optional max-column must-unfold)
 654   "Decode and fold (fill) STRING as structured field body.
 655 It decodes non us-ascii characters in FULL-NAME encoded as
 656 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 657 characters are regarded as variable `default-mime-charset'.
 658
 659 If an encoded-word is broken or your emacs implementation can not
 660 decode the charset included in it, it is not decoded.
 661
 662 If MAX-COLUMN is omitted, `fill-column' is used.
 663
 664 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 665 if there are in decoded encoded-words (generated by bad manner MUA
 666 such as a version of Net$cape)."
 667   (or max-column
 668       (setq max-column fill-column))
 669   (let ((c start-column)
 670         (tokens (eword-lexical-analyze string must-unfold))
 671         (result "")
 672         token)
 673     (while (and (setq token (car tokens))
 674                 (setq tokens (cdr tokens)))
 675       (let* ((type (car token)))
 676         (if (eq type 'spaces)
 677             (let* ((next-token (car tokens))
 678                    (next-str (eword-decode-token next-token))
 679                    (next-len (string-width next-str))
 680                    (next-c (+ c next-len 1)))
 681               (if (< next-c max-column)
 682                   (setq result (concat result " " next-str)
 683                         c next-c)
 684                 (setq result (concat result "\n " next-str)
 685                       c (1+ next-len)))
 686               (setq tokens (cdr tokens))
 687               )
 688           (let* ((str (eword-decode-token token)))
 689             (setq result (concat result str)
 690                   c (+ c (string-width str)))
 691             ))))
 692     (if token
 693         (concat result (eword-decode-token token))
 694       result)))
 695
 696 (defun eword-decode-and-unfold-structured-field (string)
 697   "Decode and unfold STRING as structured field body.
 698 It decodes non us-ascii characters in FULL-NAME encoded as
 699 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 700 characters are regarded as variable `default-mime-charset'.
 701
 702 If an encoded-word is broken or your emacs implementation can not
 703 decode the charset included in it, it is not decoded."
 704   (let ((tokens (eword-lexical-analyze string 'must-unfold))
 705         (result ""))
 706     (while tokens
 707       (let* ((token (car tokens))
 708              (type (car token)))
 709         (setq tokens (cdr tokens))
 710         (setq result
 711               (if (eq type 'spaces)
 712                   (concat result " ")
 713                 (concat result (eword-decode-token token))
 714                 ))))
 715     result))
 716
 717 (defun eword-decode-structured-field-body (string &optional must-unfold
 718                                                   start-column max-column)
 719   "Decode non us-ascii characters in STRING as structured field body.
 720 STRING is unfolded before decoding.
 721
 722 It decodes non us-ascii characters in FULL-NAME encoded as
 723 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 724 characters are regarded as variable `default-mime-charset'.
 725
 726 If an encoded-word is broken or your emacs implementation can not
 727 decode the charset included in it, it is not decoded.
 728
 729 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 730 if there are in decoded encoded-words (generated by bad manner MUA
 731 such as a version of Net$cape)."
 732   (if start-column
 733       ;; fold with max-column
 734       (eword-decode-and-fold-structured-field
 735        string start-column max-column must-unfold)
 736     ;; Don't fold
 737     (mapconcat (function eword-decode-token)
 738                (eword-lexical-analyze string must-unfold)
 739                "")
 740     ))
 741
 742 (defun eword-decode-unstructured-field-body (string &optional must-unfold)
 743   "Decode non us-ascii characters in STRING as unstructured field body.
 744 STRING is unfolded before decoding.
 745
 746 It decodes non us-ascii characters in FULL-NAME encoded as
 747 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 748 characters are regarded as variable `default-mime-charset'.
 749
 750 If an encoded-word is broken or your emacs implementation can not
 751 decode the charset included in it, it is not decoded.
 752
 753 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 754 if there are in decoded encoded-words (generated by bad manner MUA
 755 such as a version of Net$cape)."
 756   (eword-decode-string string must-unfold default-mime-charset))
 757
 758 (defun eword-extract-address-components (string)
 759   "Extract full name and canonical address from STRING.
 760 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
 761 If no name can be extracted, FULL-NAME will be nil.
 762 It decodes non us-ascii characters in FULL-NAME encoded as
 763 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 764 characters are regarded as variable `default-mime-charset'."
 765   (let* ((structure (car (std11-parse-address
 766                           (eword-lexical-analyze
 767                            (std11-unfold-string string) 'must-unfold))))
 768          (phrase  (std11-full-name-string structure))
 769          (address (std11-address-string structure))
 770          )
 771     (list phrase address)
 772     ))
 773
 774
 775 ;;; @ end
 776 ;;;
 777
 778 (provide 'eword-decode)
 779
 780 ;;; eword-decode.el ends here