eword-decode.el

   1 ;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs
   2
   3 ;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc.
   4
   5 ;; Author: ENAMI Tsugutomo <enami@sys.ptg.sony.co.jp>
   6 ;;         MORIOKA Tomohiko <morioka@jaist.ac.jp>
   7 ;;         Tanaka Akira <akr@jaist.ac.jp>
   8 ;; Maintainer: Tanaka Akira <akr@jaist.ac.jp>
   9 ;; Created: 1995/10/03
  10 ;; Original: 1992/07/20 ENAMI Tsugutomo's `mime.el'.
  11 ;;      Renamed: 1993/06/03 to tiny-mime.el
  12 ;;      Renamed: 1995/10/03 from tiny-mime.el (split off encoder)
  13 ;;      Renamed: 1997/02/22 from tm-ew-d.el
  14 ;; Keywords: encoded-word, MIME, multilingual, header, mail, news
  15
  16 ;; This file is part of FLAM (Faithful Library About MIME).
  17
  18 ;; This program is free software; you can redistribute it and/or
  19 ;; modify it under the terms of the GNU General Public License as
  20 ;; published by the Free Software Foundation; either version 2, or (at
  21 ;; your option) any later version.
  22
  23 ;; This program is distributed in the hope that it will be useful, but
  24 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  25 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  26 ;; General Public License for more details.
  27
  28 ;; You should have received a copy of the GNU General Public License
  29 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
  30 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  31 ;; Boston, MA 02111-1307, USA.
  32
  33 ;;; Code:
  34
  35 (require 'std11-parse)
  36 (require 'mel)
  37 (require 'mime-def)
  38
  39 (defgroup eword-decode nil
  40   "Encoded-word decoding"
  41   :group 'mime)
  42
  43
  44 ;;; @ MIME encoded-word definition
  45 ;;;
  46
  47 (defconst eword-encoded-text-regexp "[!->@-~]+")
  48 (defconst eword-encoded-word-regexp
  49   (concat (regexp-quote "=?")
  50           "\\("
  51           mime-charset-regexp
  52           "\\)"
  53           (regexp-quote "?")
  54           "\\(B\\|Q\\)"
  55           (regexp-quote "?")
  56           "\\("
  57           eword-encoded-text-regexp
  58           "\\)"
  59           (regexp-quote "?=")))
  60 (defconst eword-after-encoded-word-regexp "\\([ \t]\\|$\\)")
  61
  62 (defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+")
  63 (defconst eword-encoded-word-in-phrase-regexp
  64   (concat (regexp-quote "=?")
  65           "\\("
  66           mime-charset-regexp
  67           "\\)"
  68           (regexp-quote "?")
  69           "\\(B\\|Q\\)"
  70           (regexp-quote "?")
  71           "\\("
  72           eword-encoded-text-in-phrase-regexp
  73           "\\)"
  74           (regexp-quote "?=")))
  75 (defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t(]\\|$\\)")
  76
  77 (defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+")
  78 (defconst eword-encoded-word-in-comment-regexp
  79   (concat (regexp-quote "=?")
  80           "\\("
  81           mime-charset-regexp
  82           "\\)"
  83           (regexp-quote "?")
  84           "\\(B\\|Q\\)"
  85           (regexp-quote "?")
  86           "\\("
  87           eword-encoded-text-in-comment-regexp
  88           "\\)"
  89           (regexp-quote "?=")))
  90 (defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)")
  91
  92 (defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+")
  93 (defconst eword-encoded-word-in-quoted-string-regexp
  94   (concat (regexp-quote "=?")
  95           "\\("
  96           mime-charset-regexp
  97           "\\)"
  98           (regexp-quote "?")
  99           "\\(B\\|Q\\)"
 100           (regexp-quote "?")
 101           "\\("
 102           eword-encoded-text-in-quoted-string-regexp
 103           "\\)"
 104           (regexp-quote "?=")))
 105 (defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)")
 106
 107
 108 ;;; @@ Base64
 109 ;;;
 110
 111 (defconst base64-token-regexp "[A-Za-z0-9+/]")
 112 (defconst base64-token-padding-regexp "[A-Za-z0-9+/=]")
 113
 114 (defconst eword-B-encoded-text-regexp
 115   (concat "\\(\\("
 116           base64-token-regexp
 117           base64-token-regexp
 118           base64-token-regexp
 119           base64-token-regexp
 120           "\\)*"
 121           base64-token-regexp
 122           base64-token-regexp
 123           base64-token-padding-regexp
 124           base64-token-padding-regexp
 125           "\\)"))
 126
 127 ;; (defconst eword-B-encoding-and-encoded-text-regexp
 128 ;;   (concat "\\(B\\)\\?" eword-B-encoded-text-regexp))
 129
 130
 131 ;;; @@ Quoted-Printable
 132 ;;;
 133
 134 (defconst quoted-printable-hex-chars "0123456789ABCDEF")
 135 (defconst quoted-printable-octet-regexp
 136   (concat "=[" quoted-printable-hex-chars
 137           "][" quoted-printable-hex-chars "]"))
 138
 139 (defconst eword-Q-encoded-text-regexp
 140   (concat "\\([^=?]\\|" quoted-printable-octet-regexp "\\)+"))
 141 ;; (defconst eword-Q-encoding-and-encoded-text-regexp
 142 ;;   (concat "\\(Q\\)\\?" eword-Q-encoded-text-regexp))
 143
 144
 145 ;;; @ for string
 146 ;;;
 147
 148 (defcustom eword-decode-sticked-encoded-word nil
 149   "*If non-nil, decode encoded-words sticked on atoms,
 150 other encoded-words, etc.
 151 however this behaviour violates RFC2047."
 152   :group 'eword-decode
 153   :type 'boolean)
 154
 155 (defcustom eword-decode-quoted-encoded-word nil
 156   "*If non-nil, decode encoded-words in quoted-string
 157 however this behaviour violates RFC2047."
 158   :group 'eword-decode
 159   :type 'boolean)
 160
 161 (defun eword-decode-first-encoded-words (string
 162                                          eword-regexp
 163                                          after-regexp
 164                                          &optional must-unfold)
 165   "Decode MIME encoded-words in beginning of STRING.
 166
 167 EWORD-REGEXP is the regexp that matches a encoded-word.
 168 Usual value is eword-encoded-word-regexp,
 169 eword-encoded-text-in-phrase-regexp,
 170 eword-encoded-word-in-comment-regexp or
 171 eword-encoded-word-in-quoted-string-regexp.
 172
 173 AFTER-REGEXP is the regexp that matches a after encoded-word.
 174 Usual value is eword-after-encoded-word-regexp,
 175 eword-after-encoded-text-in-phrase-regexp,
 176 eword-after-encoded-word-in-comment-regexp or
 177 eword-after-encoded-word-in-quoted-string-regexp.
 178
 179 If beginning of STRING matches EWORD-REGEXP and AFTER-REGEXP,
 180 returns a cons cell of decoded string(sequence of characters) and
 181 the rest(sequence of octets).
 182
 183 If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP,
 184 returns nil.
 185
 186 If an encoded-word is broken or your emacs implementation can not
 187 decode the charset included in it, it is returned in decoded part
 188 as encoded-word form.
 189
 190 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 191 if there are in decoded encoded-words (generated by bad manner MUA
 192 such as a version of Net$cape)."
 193   (if eword-decode-sticked-encoded-word (setq after-regexp ""))
 194   (let ((between-ewords-regexp
 195           (if eword-decode-sticked-encoded-word
 196             "\\(\n?[ \t]\\)*"
 197             "\\(\n?[ \t]\\)+"))
 198         (src string)    ; sequence of octets.
 199         (dst ""))       ; sequence of characters.
 200     (if (string-match
 201           (concat "\\`\\(" eword-regexp "\\)" after-regexp) src)
 202       (let* (p
 203              (q (match-end 1))
 204              (ew (substring src 0 q))
 205              (dw (eword-decode-encoded-word ew must-unfold)))
 206         (setq dst (concat dst dw)
 207               src (substring src q))
 208         (if (not (string= ew dw))
 209           (progn
 210             (while
 211               (and
 212                 (string-match
 213                   (concat "\\`\\(" between-ewords-regexp "\\)"
 214                              "\\(" eword-regexp "\\)"
 215                              after-regexp)
 216                   src)
 217                 (progn
 218                   (setq p (match-end 1)
 219                         q (match-end 3)
 220                         ew (substring src p q)
 221                         dw (eword-decode-encoded-word ew must-unfold))
 222                   (if (string= ew dw)
 223                     (progn
 224                       (setq dst (concat dst (substring src 0 q))
 225                             src (substring src q))
 226                       nil)
 227                     t)))
 228               (setq dst (concat dst dw)
 229                     src (substring src q)))))
 230         (cons dst src))
 231       nil)))
 232
 233 (defun eword-decode-comment-string (string &optional must-unfold)
 234   (let ((src string)
 235         (buf "")
 236         (dst "")
 237         (flag-ew t))
 238     (while (< 0 (length src))
 239       (let ((ch (aref src 0))
 240             (decoded (and
 241                         flag-ew
 242                         (eword-decode-first-encoded-words src
 243                           eword-encoded-word-in-comment-regexp
 244                           eword-after-encoded-word-in-comment-regexp
 245                           must-unfold))))
 246         (if (and (not (string= buf ""))
 247                  (or decoded (eq ch ?\() (eq ch ?\))))
 248           (setq dst (concat dst
 249                       (std11-wrap-as-quoted-pairs
 250                         (decode-mime-charset-string buf
 251                           default-mime-charset)
 252                         '(?\( ?\))))
 253                 buf ""))
 254         (cond
 255           (decoded
 256             (setq dst (concat dst
 257                         (std11-wrap-as-quoted-pairs
 258                           (car decoded)
 259                           '(?( ?))))
 260                   src (cdr decoded)))
 261           ((or (eq ch ?\() (eq ch ?\)))
 262             (setq dst (concat dst (list ch))
 263                   src (substring src 1)
 264                   flag-ew t))
 265           ((eq ch ?\\)
 266             (setq buf (concat buf (list (aref src 1)))
 267                   src (substring src 2)
 268                   flag-ew t))
 269           ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n))
 270             (setq buf (concat buf (list ch))
 271                   src (substring src 1)
 272                   flag-ew t))
 273           ((string-match "\\`=?[^ \t\n()\\\\=]*" src)
 274             (setq buf (concat buf (substring src 0 (match-end 0)))
 275                   src (substring src (match-end 0))
 276                   flag-ew eword-decode-sticked-encoded-word))
 277           (t (error "something wrong")))))
 278     (if (not (string= buf ""))
 279       (setq dst (concat dst
 280                   (std11-wrap-as-quoted-pairs
 281                     (decode-mime-charset-string buf
 282                       default-mime-charset)
 283                     '(?\( ?\))))))
 284     dst))
 285
 286 (defun eword-decode-quoted-string (string &optional must-unfold)
 287   (let ((src string)
 288         (buf "")
 289         (dst "")
 290         (flag-ew t))
 291     (while (< 0 (length src))
 292       (let ((ch (aref src 0))
 293             (decoded (and
 294                         eword-decode-quoted-encoded-word
 295                         flag-ew
 296                         (eword-decode-first-encoded-words src
 297                           eword-encoded-word-in-quoted-string-regexp
 298                           eword-after-encoded-word-in-quoted-string-regexp
 299                           must-unfold))))
 300         (if (and (not (string= buf ""))
 301                  (or decoded (eq ch ?\")))
 302           (setq dst (concat dst
 303                       (std11-wrap-as-quoted-pairs
 304                         (decode-mime-charset-string buf
 305                         default-mime-charset)
 306                         '(?\")))
 307                 buf ""))
 308         (cond
 309           (decoded
 310             (setq dst (concat dst
 311                         (std11-wrap-as-quoted-pairs
 312                           (car decoded)
 313                           '(?\")))
 314                   src (cdr decoded)))
 315           ((or (eq ch ?\"))
 316             (setq dst (concat dst (list ch))
 317                   src (substring src 1)
 318                   flag-ew t))
 319           ((eq ch ?\\)
 320             (setq buf (concat buf (list (aref src 1)))
 321                   src (substring src 2)
 322                   flag-ew t))
 323           ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n))
 324             (setq buf (concat buf (list ch))
 325                   src (substring src 1)
 326                   flag-ew t))
 327           ((string-match "\\`=?[^ \t\n\"\\\\=]*" src)
 328             (setq buf (concat buf (substring src 0 (match-end 0)))
 329                   src (substring src (match-end 0))
 330                   flag-ew eword-decode-sticked-encoded-word))
 331           (t (error "something wrong")))))
 332     (if (not (string= buf ""))
 333       (setq dst (concat dst
 334                   (std11-wrap-as-quoted-pairs
 335                     (decode-mime-charset-string buf
 336                       default-mime-charset)
 337                     '(?\")))))
 338     dst))
 339
 340 (defun eword-decode-unstructured-string (string &optional must-unfold)
 341   (let ((src string)
 342         (buf "")
 343         (dst "")
 344         (flag-ew t))
 345     (while (< 0 (length src))
 346       (let ((ch (aref src 0))
 347             (decoded (and flag-ew (eword-decode-first-encoded-words src
 348                                     eword-encoded-word-regexp
 349                                     eword-after-encoded-word-regexp
 350                                     must-unfold))))
 351         (if (and (not (string= buf ""))
 352                  decoded)
 353           (setq dst (concat dst
 354                       (decode-mime-charset-string buf
 355                         default-mime-charset))
 356                 buf ""))
 357         (cond
 358           (decoded
 359             (setq dst (concat dst (car decoded))
 360                   src (cdr decoded)))
 361           ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n))
 362             (setq buf (concat buf (list ch))
 363                   src (substring src 1)
 364                   flag-ew t))
 365           ((string-match "\\`=?[^ \t\n=]*" src)
 366             (setq buf (concat buf (substring src 0 (match-end 0)))
 367                   src (substring src (match-end 0))
 368                   flag-ew eword-decode-sticked-encoded-word))
 369           (t (error "something wrong")))))
 370     (if (not (string= buf ""))
 371       (setq dst (concat dst
 372                   (decode-mime-charset-string buf
 373                     default-mime-charset))))
 374     dst))
 375
 376 (defun eword-decode-string (string &optional must-unfold)
 377   "Decode MIME encoded-words in STRING.
 378
 379 STRING is unfolded before decoding.
 380
 381 If an encoded-word is broken or your emacs implementation can not
 382 decode the charset included in it, it is not decoded.
 383
 384 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 385 if there are in decoded encoded-words (generated by bad manner MUA
 386 such as a version of Net$cape)."
 387   (eword-decode-unstructured-string
 388     (std11-unfold-string string)
 389     must-unfold))
 390
 391
 392 ;;; @ for region
 393 ;;;
 394
 395 (defun eword-decode-region (start end &optional unfolding must-unfold)
 396   "Decode MIME encoded-words in region between START and END.
 397
 398 If UNFOLDING is not nil, it unfolds before decoding.
 399
 400 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 401 if there are in decoded encoded-words (generated by bad manner MUA
 402 such as a version of Net$cape)."
 403   (interactive "*r")
 404   (save-excursion
 405     (save-restriction
 406       (narrow-to-region start end)
 407       (if unfolding
 408           (eword-decode-unfold)
 409         )
 410       (let ((str (eword-decode-unstructured-string
 411                    (buffer-substring (point-min) (point-max))
 412                    must-unfold)))
 413         (delete-region (point-min) (point-max))
 414         (insert str)))))
 415
 416
 417 ;;; @ for message header
 418 ;;;
 419
 420 (defcustom eword-decode-ignored-field-list
 421   '(newsgroups path lines nntp-posting-host message-id date)
 422   "*List of field-names to be ignored when decoding.
 423 Each field name must be symbol."
 424   :group 'eword-decode
 425   :type '(repeat symbol))
 426
 427 (defcustom eword-decode-structured-field-list
 428   '(reply-to resent-reply-to from resent-from sender resent-sender
 429              to resent-to cc resent-cc bcc resent-bcc dcc
 430              mime-version content-type content-transfer-encoding
 431              content-disposition)
 432   "*List of field-names to decode as structured field.
 433 Each field name must be symbol."
 434   :group 'eword-decode
 435   :type '(repeat symbol))
 436
 437 (defun eword-decode-header (&optional code-conversion separator)
 438   "Decode MIME encoded-words in header fields.
 439 If CODE-CONVERSION is nil, it decodes only encoded-words.  If it is
 440 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
 441 Otherwise it decodes non-ASCII bit patterns as the
 442 default-mime-charset.
 443 If SEPARATOR is not nil, it is used as header separator."
 444   (interactive "*")
 445   (save-excursion
 446     (save-restriction
 447       (std11-narrow-to-header separator)
 448       (let ((default-mime-charset
 449               (if code-conversion
 450                   (if (mime-charset-to-coding-system code-conversion)
 451                       code-conversion
 452                     default-mime-charset))))
 453         (if default-mime-charset
 454             (let (beg p end field-name len)
 455               (goto-char (point-min))
 456               (while (re-search-forward std11-field-head-regexp nil t)
 457                 (setq beg (match-beginning 0)
 458                       p (match-end 0)
 459                       field-name (buffer-substring beg (1- p))
 460                       len (string-width field-name)
 461                       field-name (intern (downcase field-name))
 462                       end (std11-field-end))
 463                 (cond ((memq field-name eword-decode-ignored-field-list)
 464                        ;; Don't decode
 465                        )
 466                       ((memq field-name eword-decode-structured-field-list)
 467                        ;; Decode as structured field
 468                        (let ((body (buffer-substring p end)))
 469                          (delete-region p end)
 470                          (insert (eword-decode-and-fold-structured-field
 471                                   body (1+ len)))
 472                          ))
 473                       (t
 474                        ;; Decode as unstructured field
 475                        (save-restriction
 476                          (narrow-to-region beg (1+ end))
 477                          (goto-char p)
 478                          (eword-decode-region beg (point-max) 'unfold)
 479                          (goto-char (point-max))
 480                          )))))
 481           (eword-decode-region (point-min) (point-max) t)
 482           )))))
 483
 484 (defun eword-decode-unfold ()
 485   (goto-char (point-min))
 486   (let (field beg end)
 487     (while (re-search-forward std11-field-head-regexp nil t)
 488       (setq beg (match-beginning 0)
 489             end (std11-field-end))
 490       (setq field (buffer-substring beg end))
 491       (if (string-match eword-encoded-word-regexp field)
 492           (save-restriction
 493             (narrow-to-region (goto-char beg) end)
 494             (while (re-search-forward "\n\\([ \t]\\)" nil t)
 495               (replace-match (match-string 1))
 496               )
 497             (goto-char (point-max))
 498             ))
 499       )))
 500
 501
 502 ;;; @ encoded-word decoder
 503 ;;;
 504
 505 (defvar eword-warning-face nil "Face used for invalid encoded-word.")
 506
 507 (defun eword-decode-encoded-word (word &optional must-unfold)
 508   "Decode WORD if it is an encoded-word.
 509
 510 If your emacs implementation can not decode the charset of WORD, it
 511 returns WORD.  Similarly the encoded-word is broken, it returns WORD.
 512
 513 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 514 if there are in decoded encoded-word (generated by bad manner MUA such
 515 as a version of Net$cape)."
 516   (or (if (string-match eword-encoded-word-regexp word)
 517           (let ((charset
 518                  (substring word (match-beginning 1) (match-end 1))
 519                  )
 520                 (encoding
 521                  (upcase
 522                   (substring word (match-beginning 2) (match-end 2))
 523                   ))
 524                 (text
 525                  (substring word (match-beginning 3) (match-end 3))
 526                  ))
 527             (condition-case err
 528                 (eword-decode-encoded-text charset encoding text must-unfold)
 529               (error
 530                (and
 531                 (add-text-properties 0 (length word)
 532                                      (and eword-warning-face
 533                                           (list 'face eword-warning-face))
 534                                      word)
 535                 word)))
 536             ))
 537       word))
 538
 539
 540 ;;; @ encoded-text decoder
 541 ;;;
 542
 543 (defun eword-decode-encoded-text (charset encoding string
 544                                           &optional must-unfold)
 545   "Decode STRING as an encoded-text.
 546
 547 If your emacs implementation can not decode CHARSET, it returns nil.
 548
 549 If ENCODING is not \"B\" or \"Q\", it occurs error.
 550 So you should write error-handling code if you don't want break by errors.
 551
 552 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 553 if there are in decoded encoded-text (generated by bad manner MUA such
 554 as a version of Net$cape)."
 555   (let ((cs (mime-charset-to-coding-system charset)))
 556     (if cs
 557         (let ((dest
 558                (cond
 559                 ((string-equal "B" encoding)
 560                  (if (and (string-match eword-B-encoded-text-regexp string)
 561                           (string-equal string (match-string 0 string)))
 562                      (base64-decode-string string)
 563                    (error "Invalid encoded-text %s" string)))
 564                 ((string-equal "Q" encoding)
 565                  (if (and (string-match eword-Q-encoded-text-regexp string)
 566                           (string-equal string (match-string 0 string)))
 567                      (q-encoding-decode-string string)
 568                    (error "Invalid encoded-text %s" string)))
 569                 (t
 570                  (error "Invalid encoding %s" encoding)
 571                  )))
 572               )
 573           (if dest
 574               (progn
 575                 (setq dest (decode-coding-string dest cs))
 576                 (if must-unfold
 577                     (mapconcat (function
 578                                 (lambda (chr)
 579                                   (cond
 580                                    ((eq chr ?\n) "")
 581                                    ((eq chr ?\t) " ")
 582                                    (t (char-to-string chr)))
 583                                   ))
 584                                (std11-unfold-string dest)
 585                                "")
 586                   dest)
 587                 ))))))
 588
 589
 590 ;;; @ lexical analyze
 591 ;;;
 592
 593 (defvar eword-lexical-analyze-cache nil)
 594 (defvar eword-lexical-analyze-cache-max 299
 595   "*Max position of eword-lexical-analyze-cache.
 596 It is max size of eword-lexical-analyze-cache - 1.")
 597
 598 (defcustom eword-lexical-analyzers
 599   '(eword-analyze-quoted-string
 600     eword-analyze-domain-literal
 601     eword-analyze-comment
 602     eword-analyze-spaces
 603     eword-analyze-special
 604     eword-analyze-encoded-word
 605     eword-analyze-atom)
 606   "*List of functions to return result of lexical analyze.
 607 Each function must have two arguments: STRING and MUST-UNFOLD.
 608 STRING is the target string to be analyzed.
 609 If MUST-UNFOLD is not nil, each function must unfold and eliminate
 610 bare-CR and bare-LF from the result even if they are included in
 611 content of the encoded-word.
 612 Each function must return nil if it can not analyze STRING as its
 613 format.
 614
 615 Previous function is preferred to next function.  If a function
 616 returns nil, next function is used.  Otherwise the return value will
 617 be the result."
 618   :group 'eword-decode
 619   :type '(repeat function))
 620
 621 (defun eword-analyze-quoted-string (string &optional must-unfold)
 622   (let ((p (std11-check-enclosure string ?\" ?\")))
 623     (if p
 624         (cons (cons 'quoted-string
 625                     (eword-decode-quoted-string (substring string 0 p)))
 626               (substring string p))
 627       )))
 628
 629 (defun eword-analyze-domain-literal (string &optional must-unfold)
 630   (std11-analyze-domain-literal string))
 631
 632 (defun eword-analyze-comment (string &optional must-unfold)
 633   (let ((len (length string)))
 634     (if (and (< 0 len) (eq (aref string 0) ?\())
 635         (let ((p 0))
 636           (while (and p (< p len) (eq (aref string p) ?\())
 637             (setq p (std11-check-enclosure string ?\( ?\) t p)))
 638           (setq p (or p len))
 639           (cons (cons 'comment
 640                       (eword-decode-comment-string (substring string 0 p)))
 641                 (substring string p)))
 642       nil)))
 643
 644 (defun eword-analyze-spaces (string &optional must-unfold)
 645   (std11-analyze-spaces string))
 646
 647 (defun eword-analyze-special (string &optional must-unfold)
 648   (std11-analyze-special string))
 649
 650 (defun eword-analyze-encoded-word (string &optional must-unfold)
 651   (let ((decoded (eword-decode-first-encoded-words
 652                    string
 653                    eword-encoded-word-in-phrase-regexp
 654                    eword-after-encoded-word-in-phrase-regexp
 655                    must-unfold)))
 656     (if decoded
 657       (cons (cons 'atom (car decoded)) (cdr decoded)))))
 658
 659 (defun eword-analyze-atom (string &optional must-unfold)
 660   (if (let ((enable-multibyte-characters nil))
 661         (string-match std11-atom-regexp string))
 662       (let ((end (match-end 0)))
 663         (if (and eword-decode-sticked-encoded-word
 664                  (string-match eword-encoded-word-in-phrase-regexp
 665                                (substring string 0 end))
 666                  (< 0 (match-beginning 0)))
 667             (setq end (match-beginning 0)))
 668         (cons (cons 'atom (decode-mime-charset-string
 669                            (substring string 0 end)
 670                            default-mime-charset))
 671               (substring string end)
 672               ))))
 673
 674 (defun eword-lexical-analyze-internal (string must-unfold)
 675   (let (dest ret)
 676     (while (not (string-equal string ""))
 677       (setq ret
 678             (let ((rest eword-lexical-analyzers)
 679                   func r)
 680               (while (and (setq func (car rest))
 681                           (null (setq r (funcall func string must-unfold)))
 682                           )
 683                 (setq rest (cdr rest)))
 684               (or r `((error . ,string) . ""))
 685               ))
 686       (setq dest (cons (car ret) dest))
 687       (setq string (cdr ret))
 688       )
 689     (nreverse dest)
 690     ))
 691
 692 (defun eword-lexical-analyze (string &optional must-unfold)
 693   "Return lexical analyzed list corresponding STRING.
 694 It is like std11-lexical-analyze, but it decodes non us-ascii
 695 characters encoded as encoded-words or invalid \"raw\" format.
 696 \"Raw\" non us-ascii characters are regarded as variable
 697 `default-mime-charset'."
 698   (let* ((str (copy-sequence string))
 699          (key (cons str (cons default-mime-charset must-unfold)))
 700          ret)
 701     (set-text-properties 0 (length str) nil str)
 702     (if (setq ret (assoc key eword-lexical-analyze-cache))
 703         (cdr ret)
 704       (setq ret (eword-lexical-analyze-internal str must-unfold))
 705       (setq eword-lexical-analyze-cache
 706             (cons (cons key ret)
 707                   (last eword-lexical-analyze-cache
 708                         eword-lexical-analyze-cache-max)))
 709       ret)))
 710
 711 (defun eword-decode-token (token)
 712   (cdr token))
 713
 714 (defun eword-decode-and-fold-structured-field
 715   (string start-column &optional max-column must-unfold)
 716   "Decode and fold (fill) STRING as structured field body.
 717 It decodes non us-ascii characters in FULL-NAME encoded as
 718 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 719 characters are regarded as variable `default-mime-charset'.
 720
 721 If an encoded-word is broken or your emacs implementation can not
 722 decode the charset included in it, it is not decoded.
 723
 724 If MAX-COLUMN is omitted, `fill-column' is used.
 725
 726 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 727 if there are in decoded encoded-words (generated by bad manner MUA
 728 such as a version of Net$cape)."
 729   (or max-column
 730       (setq max-column fill-column))
 731   (let ((c start-column)
 732         (tokens (eword-lexical-analyze string must-unfold))
 733         (result "")
 734         token)
 735     (while (and (setq token (car tokens))
 736                 (setq tokens (cdr tokens)))
 737       (let* ((type (car token)))
 738         (if (eq type 'spaces)
 739             (let* ((next-token (car tokens))
 740                    (next-str (eword-decode-token next-token))
 741                    (next-len (string-width next-str))
 742                    (next-c (+ c next-len 1)))
 743               (if (< next-c max-column)
 744                   (setq result (concat result " " next-str)
 745                         c next-c)
 746                 (setq result (concat result "\n " next-str)
 747                       c (1+ next-len)))
 748               (setq tokens (cdr tokens))
 749               )
 750           (let* ((str (eword-decode-token token)))
 751             (setq result (concat result str)
 752                   c (+ c (string-width str)))
 753             ))))
 754     (if token
 755         (concat result (eword-decode-token token))
 756       result)))
 757
 758 (defun eword-decode-and-unfold-structured-field (string)
 759   "Decode and unfold STRING as structured field body.
 760 It decodes non us-ascii characters in FULL-NAME encoded as
 761 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 762 characters are regarded as variable `default-mime-charset'.
 763
 764 If an encoded-word is broken or your emacs implementation can not
 765 decode the charset included in it, it is not decoded."
 766   (let ((tokens (eword-lexical-analyze string 'must-unfold))
 767         (result ""))
 768     (while tokens
 769       (let* ((token (car tokens))
 770              (type (car token)))
 771         (setq tokens (cdr tokens))
 772         (setq result
 773               (if (eq type 'spaces)
 774                   (concat result " ")
 775                 (concat result (eword-decode-token token))
 776                 ))))
 777     result))
 778
 779 (defun eword-decode-structured-field-body (string &optional must-unfold
 780                                                   start-column max-column)
 781   "Decode non us-ascii characters in STRING as structured field body.
 782 STRING is unfolded before decoding.
 783
 784 It decodes non us-ascii characters in FULL-NAME encoded as
 785 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 786 characters are regarded as variable `default-mime-charset'.
 787
 788 If an encoded-word is broken or your emacs implementation can not
 789 decode the charset included in it, it is not decoded.
 790
 791 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 792 if there are in decoded encoded-words (generated by bad manner MUA
 793 such as a version of Net$cape)."
 794   (if start-column
 795       ;; fold with max-column
 796       (eword-decode-and-fold-structured-field
 797        string start-column max-column must-unfold)
 798     ;; Don't fold
 799     (mapconcat (function eword-decode-token)
 800                (eword-lexical-analyze string must-unfold)
 801                "")
 802     ))
 803
 804 (defun eword-decode-unstructured-field-body (string &optional must-unfold)
 805   "Decode non us-ascii characters in STRING as unstructured field body.
 806 STRING is unfolded before decoding.
 807
 808 It decodes non us-ascii characters in FULL-NAME encoded as
 809 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 810 characters are regarded as variable `default-mime-charset'.
 811
 812 If an encoded-word is broken or your emacs implementation can not
 813 decode the charset included in it, it is not decoded.
 814
 815 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 816 if there are in decoded encoded-words (generated by bad manner MUA
 817 such as a version of Net$cape)."
 818   (eword-decode-string string must-unfold))
 819
 820 (defun eword-extract-address-components (string)
 821   "Extract full name and canonical address from STRING.
 822 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
 823 If no name can be extracted, FULL-NAME will be nil.
 824 It decodes non us-ascii characters in FULL-NAME encoded as
 825 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 826 characters are regarded as variable `default-mime-charset'."
 827   (let* ((structure (car (std11-parse-address
 828                           (eword-lexical-analyze
 829                            (std11-unfold-string string) 'must-unfold))))
 830          (phrase  (std11-full-name-string structure))
 831          (address (std11-address-string structure))
 832          )
 833     (list phrase address)
 834     ))
 835
 836
 837 ;;; @ end
 838 ;;;
 839
 840 (provide 'eword-decode)
 841
 842 ;;; eword-decode.el ends here