eword-decode.el

   1 ;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs
   2
   3 ;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc.
   4
   5 ;; Author: ENAMI Tsugutomo <enami@sys.ptg.sony.co.jp>
   6 ;;         MORIOKA Tomohiko <morioka@jaist.ac.jp>
   7 ;; Maintainer: MORIOKA Tomohiko <morioka@jaist.ac.jp>
   8 ;; Created: 1995/10/03
   9 ;; Original: 1992/07/20 ENAMI Tsugutomo's `mime.el'.
  10 ;;      Renamed: 1993/06/03 to tiny-mime.el
  11 ;;      Renamed: 1995/10/03 from tiny-mime.el (split off encoder)
  12 ;;      Renamed: 1997/02/22 from tm-ew-d.el
  13 ;; Keywords: encoded-word, MIME, multilingual, header, mail, news
  14
  15 ;; This file is part of SEMI (Spadework for Emacs MIME Interfaces).
  16
  17 ;; This program is free software; you can redistribute it and/or
  18 ;; modify it under the terms of the GNU General Public License as
  19 ;; published by the Free Software Foundation; either version 2, or (at
  20 ;; your option) any later version.
  21
  22 ;; This program is distributed in the hope that it will be useful, but
  23 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  24 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  25 ;; General Public License for more details.
  26
  27 ;; You should have received a copy of the GNU General Public License
  28 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
  29 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  30 ;; Boston, MA 02111-1307, USA.
  31
  32 ;;; Code:
  33
  34 (require 'std11-parse)
  35 (require 'mel)
  36 (require 'mime-def)
  37
  38 (defgroup eword-decode nil
  39   "Encoded-word decoding"
  40   :group 'mime)
  41
  42
  43 ;;; @ MIME encoded-word definition
  44 ;;;
  45
  46 (defconst eword-encoded-text-regexp "[!->@-~]+")
  47 (defconst eword-encoded-word-regexp
  48   (concat (regexp-quote "=?")
  49           "\\("
  50           mime-charset-regexp
  51           "\\)"
  52           (regexp-quote "?")
  53           "\\(B\\|Q\\)"
  54           (regexp-quote "?")
  55           "\\("
  56           eword-encoded-text-regexp
  57           "\\)"
  58           (regexp-quote "?=")))
  59 (defconst eword-after-encoded-word-regexp "\\([ \t]\\|$\\)")
  60
  61 (defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+")
  62 (defconst eword-encoded-word-in-phrase-regexp
  63   (concat (regexp-quote "=?")
  64           "\\("
  65           mime-charset-regexp
  66           "\\)"
  67           (regexp-quote "?")
  68           "\\(B\\|Q\\)"
  69           (regexp-quote "?")
  70           "\\("
  71           eword-encoded-text-in-phrase-regexp
  72           "\\)"
  73           (regexp-quote "?=")))
  74 (defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t(]\\|$\\)")
  75
  76 (defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+")
  77 (defconst eword-encoded-word-in-comment-regexp
  78   (concat (regexp-quote "=?")
  79           "\\("
  80           mime-charset-regexp
  81           "\\)"
  82           (regexp-quote "?")
  83           "\\(B\\|Q\\)"
  84           (regexp-quote "?")
  85           "\\("
  86           eword-encoded-text-in-comment-regexp
  87           "\\)"
  88           (regexp-quote "?=")))
  89 (defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)")
  90
  91 (defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+")
  92 (defconst eword-encoded-word-in-quoted-string-regexp
  93   (concat (regexp-quote "=?")
  94           "\\("
  95           mime-charset-regexp
  96           "\\)"
  97           (regexp-quote "?")
  98           "\\(B\\|Q\\)"
  99           (regexp-quote "?")
 100           "\\("
 101           eword-encoded-text-in-quoted-string-regexp
 102           "\\)"
 103           (regexp-quote "?=")))
 104 (defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)")
 105
 106
 107 ;;; @@ Base64
 108 ;;;
 109
 110 (defconst base64-token-regexp "[A-Za-z0-9+/]")
 111 (defconst base64-token-padding-regexp "[A-Za-z0-9+/=]")
 112
 113 (defconst eword-B-encoded-text-regexp
 114   (concat "\\(\\("
 115           base64-token-regexp
 116           base64-token-regexp
 117           base64-token-regexp
 118           base64-token-regexp
 119           "\\)*"
 120           base64-token-regexp
 121           base64-token-regexp
 122           base64-token-padding-regexp
 123           base64-token-padding-regexp
 124           "\\)"))
 125
 126 ;; (defconst eword-B-encoding-and-encoded-text-regexp
 127 ;;   (concat "\\(B\\)\\?" eword-B-encoded-text-regexp))
 128
 129
 130 ;;; @@ Quoted-Printable
 131 ;;;
 132
 133 (defconst quoted-printable-hex-chars "0123456789ABCDEF")
 134 (defconst quoted-printable-octet-regexp
 135   (concat "=[" quoted-printable-hex-chars
 136           "][" quoted-printable-hex-chars "]"))
 137
 138 (defconst eword-Q-encoded-text-regexp
 139   (concat "\\([^=?]\\|" quoted-printable-octet-regexp "\\)+"))
 140 ;; (defconst eword-Q-encoding-and-encoded-text-regexp
 141 ;;   (concat "\\(Q\\)\\?" eword-Q-encoded-text-regexp))
 142
 143
 144 ;;; @ for string
 145 ;;;
 146
 147 (defcustom eword-decode-sticked-encoded-word nil
 148   "*If non-nil, decode encoded-words sticked on atoms,
 149 other encoded-words, etc.
 150 however this behaviour violates RFC2047."
 151   :group 'eword-decode
 152   :type 'boolean)
 153
 154 (defvar eword-decode-quoted-encoded-word nil
 155   "*If non-nil, decode encoded-words in quoted-string
 156 however this behaviour violates RFC2047.")
 157
 158 (defun eword-decode-first-encoded-words (string
 159                                          eword-regexp
 160                                          after-regexp
 161                                          &optional must-unfold)
 162   "Decode MIME encoded-words in beginning of STRING.
 163
 164 EWORD-REGEXP is the regexp that matches a encoded-word.
 165 Usual value is eword-encoded-word-regexp,
 166 eword-encoded-text-in-phrase-regexp,
 167 eword-encoded-word-in-comment-regexp or
 168 eword-encoded-word-in-quoted-string-regexp.
 169
 170 AFTER-REGEXP is the regexp that matches a after encoded-word.
 171 Usual value is eword-after-encoded-word-regexp,
 172 eword-after-encoded-text-in-phrase-regexp,
 173 eword-after-encoded-word-in-comment-regexp or
 174 eword-after-encoded-word-in-quoted-string-regexp.
 175
 176 If beginning of STRING matches EWORD-REGEXP and AFTER-REGEXP,
 177 returns a cons cell of decoded string(sequence of characters) and
 178 the rest(sequence of octets).
 179
 180 If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP,
 181 returns nil.
 182
 183 If an encoded-word is broken or your emacs implementation can not
 184 decode the charset included in it, it is returned in decoded part
 185 as encoded-word form.
 186
 187 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 188 if there are in decoded encoded-words (generated by bad manner MUA
 189 such as a version of Net$cape)."
 190   (if eword-decode-sticked-encoded-word (setq after-regexp ""))
 191   (let ((between-ewords-regexp
 192           (if eword-decode-sticked-encoded-word
 193             "\\(\n?[ \t]\\)*"
 194             "\\(\n?[ \t]\\)+"))
 195         (src string)    ; sequence of octets.
 196         (dst ""))       ; sequence of characters.
 197     (if (string-match
 198           (concat "\\`\\(" eword-regexp "\\)" after-regexp) src)
 199       (let* (p
 200              (q (match-end 1))
 201              (ew (substring src 0 q))
 202              (dw (eword-decode-encoded-word ew must-unfold)))
 203         (setq dst (concat dst dw)
 204               src (substring src q))
 205         (if (not (string= ew dw))
 206           (progn
 207             (while
 208               (and
 209                 (string-match
 210                   (concat "\\`\\(" between-ewords-regexp "\\)"
 211                              "\\(" eword-regexp "\\)"
 212                              after-regexp)
 213                   src)
 214                 (progn
 215                   (setq p (match-end 1)
 216                         q (match-end 3)
 217                         ew (substring src p q)
 218                         dw (eword-decode-encoded-word ew must-unfold))
 219                   (if (string= ew dw)
 220                     (progn
 221                       (setq dst (concat dst (substring src 0 q))
 222                             src (substring src q))
 223                       nil)
 224                     t)))
 225               (setq dst (concat dst dw)
 226                     src (substring src q)))))
 227         (cons dst src))
 228       nil)))
 229
 230 (defun eword-decode-comment-string (string &optional must-unfold)
 231   (let ((src string)
 232         (buf "")
 233         (dst "")
 234         (flag-ew t))
 235     (while (< 0 (length src))
 236       (let ((ch (aref src 0))
 237             (decoded (and
 238                         flag-ew
 239                         (eword-decode-first-encoded-words src
 240                           eword-encoded-word-in-comment-regexp
 241                           eword-after-encoded-word-in-comment-regexp
 242                           must-unfold))))
 243         (if (and (not (string= buf ""))
 244                  (or decoded (eq ch ?\() (eq ch ?\))))
 245           (setq dst (concat dst
 246                       (std11-wrap-as-quoted-pairs
 247                         (decode-mime-charset-string buf
 248                           default-mime-charset)
 249                         '(?\( ?\))))
 250                 buf ""))
 251         (cond
 252           (decoded
 253             (setq dst (concat dst
 254                         (std11-wrap-as-quoted-pairs
 255                           (car decoded)
 256                           '(?( ?))))
 257                   src (cdr decoded)))
 258           ((or (eq ch ?\() (eq ch ?\)))
 259             (setq dst (concat dst (list ch))
 260                   src (substring src 1)
 261                   flag-ew t))
 262           ((eq ch ?\\)
 263             (setq buf (concat buf (list (aref src 1)))
 264                   src (substring src 2)
 265                   flag-ew t))
 266           ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n))
 267             (setq buf (concat buf (list ch))
 268                   src (substring src 1)
 269                   flag-ew t))
 270           ((string-match "\\`=?[^ \t\n()\\\\=]*" src)
 271             (setq buf (concat buf (substring src 0 (match-end 0)))
 272                   src (substring src (match-end 0))
 273                   flag-ew eword-decode-sticked-encoded-word))
 274           (t (error "something wrong")))))
 275     (if (not (string= buf ""))
 276       (setq dst (concat dst
 277                   (std11-wrap-as-quoted-pairs
 278                     (decode-mime-charset-string buf
 279                       default-mime-charset)
 280                     '(?\( ?\))))))
 281     dst))
 282
 283 (defun eword-decode-quoted-string (string &optional must-unfold)
 284   (let ((src string)
 285         (buf "")
 286         (dst "")
 287         (flag-ew t))
 288     (while (< 0 (length src))
 289       (let ((ch (aref src 0))
 290             (decoded (and
 291                         eword-decode-quoted-encoded-word
 292                         flag-ew
 293                         (eword-decode-first-encoded-words src
 294                           eword-encoded-word-in-quoted-string-regexp
 295                           eword-after-encoded-word-in-quoted-string-regexp
 296                           must-unfold))))
 297         (if (and (not (string= buf ""))
 298                  (or decoded (eq ch ?\")))
 299           (setq dst (concat dst
 300                       (std11-wrap-as-quoted-pairs
 301                         (decode-mime-charset-string buf
 302                         default-mime-charset)
 303                         '(?\")))
 304                 buf ""))
 305         (cond
 306           (decoded
 307             (setq dst (concat dst
 308                         (std11-wrap-as-quoted-pairs
 309                           (car decoded)
 310                           '(?\")))
 311                   src (cdr decoded)))
 312           ((or (eq ch ?\"))
 313             (setq dst (concat dst (list ch))
 314                   src (substring src 1)
 315                   flag-ew t))
 316           ((eq ch ?\\)
 317             (setq buf (concat buf (list (aref src 1)))
 318                   src (substring src 2)
 319                   flag-ew t))
 320           ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n))
 321             (setq buf (concat buf (list ch))
 322                   src (substring src 1)
 323                   flag-ew t))
 324           ((string-match "\\`=?[^ \t\n\"\\\\=]*" src)
 325             (setq buf (concat buf (substring src 0 (match-end 0)))
 326                   src (substring src (match-end 0))
 327                   flag-ew eword-decode-sticked-encoded-word))
 328           (t (error "something wrong")))))
 329     (if (not (string= buf ""))
 330       (setq dst (concat dst
 331                   (std11-wrap-as-quoted-pairs
 332                     (decode-mime-charset-string buf
 333                       default-mime-charset)
 334                     '(?\")))))
 335     dst))
 336
 337 (defun eword-decode-unstructured-string (string &optional must-unfold)
 338   (let ((src string)
 339         (buf "")
 340         (dst "")
 341         (flag-ew t))
 342     (while (< 0 (length src))
 343       (let ((ch (aref src 0))
 344             (decoded (and flag-ew (eword-decode-first-encoded-words src
 345                                     eword-encoded-word-regexp
 346                                     eword-after-encoded-word-regexp
 347                                     must-unfold))))
 348         (if (and (not (string= buf ""))
 349                  decoded)
 350           (setq dst (concat dst
 351                       (decode-mime-charset-string buf
 352                         default-mime-charset))
 353                 buf ""))
 354         (cond
 355           (decoded
 356             (setq dst (concat dst (car decoded))
 357                   src (cdr decoded)))
 358           ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n))
 359             (setq buf (concat buf (list ch))
 360                   src (substring src 1)
 361                   flag-ew t))
 362           ((string-match "\\`=?[^ \t\n=]*" src)
 363             (setq buf (concat buf (substring src 0 (match-end 0)))
 364                   src (substring src (match-end 0))
 365                   flag-ew eword-decode-sticked-encoded-word))
 366           (t (error "something wrong")))))
 367     (if (not (string= buf ""))
 368       (setq dst (concat dst
 369                   (decode-mime-charset-string buf
 370                     default-mime-charset))))
 371     dst))
 372
 373 (defun eword-decode-string (string &optional must-unfold)
 374   "Decode MIME encoded-words in STRING.
 375
 376 STRING is unfolded before decoding.
 377
 378 If an encoded-word is broken or your emacs implementation can not
 379 decode the charset included in it, it is not decoded.
 380
 381 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 382 if there are in decoded encoded-words (generated by bad manner MUA
 383 such as a version of Net$cape)."
 384   (eword-decode-unstructured-string
 385     (std11-unfold-string string)
 386     must-unfold))
 387
 388
 389 ;;; @ for region
 390 ;;;
 391
 392 (defun eword-decode-region (start end &optional unfolding must-unfold)
 393   "Decode MIME encoded-words in region between START and END.
 394
 395 If UNFOLDING is not nil, it unfolds before decoding.
 396
 397 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 398 if there are in decoded encoded-words (generated by bad manner MUA
 399 such as a version of Net$cape)."
 400   (interactive "*r")
 401   (save-excursion
 402     (save-restriction
 403       (narrow-to-region start end)
 404       (if unfolding
 405           (eword-decode-unfold)
 406         )
 407       (let ((str (eword-decode-unstructured-string
 408                    (buffer-substring (point-min) (point-max))
 409                    must-unfold)))
 410         (delete-region (point-min) (point-max))
 411         (insert str)))))
 412
 413
 414 ;;; @ for message header
 415 ;;;
 416
 417 (defcustom eword-decode-ignored-field-list
 418   '(newsgroups path lines nntp-posting-host message-id date)
 419   "*List of field-names to be ignored when decoding.
 420 Each field name must be symbol."
 421   :group 'eword-decode
 422   :type '(repeat symbol))
 423
 424 (defcustom eword-decode-structured-field-list
 425   '(reply-to resent-reply-to from resent-from sender resent-sender
 426              to resent-to cc resent-cc bcc resent-bcc dcc
 427              mime-version content-type content-transfer-encoding
 428              content-disposition)
 429   "*List of field-names to decode as structured field.
 430 Each field name must be symbol."
 431   :group 'eword-decode
 432   :type '(repeat symbol))
 433
 434 (defun eword-decode-header (&optional code-conversion separator)
 435   "Decode MIME encoded-words in header fields.
 436 If CODE-CONVERSION is nil, it decodes only encoded-words.  If it is
 437 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
 438 Otherwise it decodes non-ASCII bit patterns as the
 439 default-mime-charset.
 440 If SEPARATOR is not nil, it is used as header separator."
 441   (interactive "*")
 442   (save-excursion
 443     (save-restriction
 444       (std11-narrow-to-header separator)
 445       (let ((default-charset
 446               (if code-conversion
 447                   (if (mime-charset-to-coding-system code-conversion)
 448                       code-conversion
 449                     default-mime-charset))))
 450         (if default-charset
 451             (let (beg p end field-name len)
 452               (goto-char (point-min))
 453               (while (re-search-forward std11-field-head-regexp nil t)
 454                 (setq beg (match-beginning 0)
 455                       p (match-end 0)
 456                       field-name (buffer-substring beg (1- p))
 457                       len (string-width field-name)
 458                       field-name (intern (downcase field-name))
 459                       end (std11-field-end))
 460                 (cond ((memq field-name eword-decode-ignored-field-list)
 461                        ;; Don't decode
 462                        )
 463                       ((memq field-name eword-decode-structured-field-list)
 464                        ;; Decode as structured field
 465                        (let ((body (buffer-substring p end))
 466                              (default-mime-charset default-charset))
 467                          (delete-region p end)
 468                          (insert (eword-decode-and-fold-structured-field
 469                                   body (1+ len)))
 470                          ))
 471                       (t
 472                        ;; Decode as unstructured field
 473                        (save-restriction
 474                          (narrow-to-region beg (1+ end))
 475                          (goto-char p)
 476                          (let ((default-mime-charset default-charset))
 477                            (eword-decode-region beg (point-max) 'unfold))
 478                          (goto-char (point-max))
 479                          )))))
 480           (eword-decode-region (point-min) (point-max) t)
 481           )))))
 482
 483 (defun eword-decode-unfold ()
 484   (goto-char (point-min))
 485   (let (field beg end)
 486     (while (re-search-forward std11-field-head-regexp nil t)
 487       (setq beg (match-beginning 0)
 488             end (std11-field-end))
 489       (setq field (buffer-substring beg end))
 490       (if (string-match eword-encoded-word-regexp field)
 491           (save-restriction
 492             (narrow-to-region (goto-char beg) end)
 493             (while (re-search-forward "\n\\([ \t]\\)" nil t)
 494               (replace-match (match-string 1))
 495               )
 496             (goto-char (point-max))
 497             ))
 498       )))
 499
 500
 501 ;;; @ encoded-word decoder
 502 ;;;
 503
 504 (defvar eword-warning-face nil "Face used for invalid encoded-word.")
 505
 506 (defun eword-decode-encoded-word (word &optional must-unfold)
 507   "Decode WORD if it is an encoded-word.
 508
 509 If your emacs implementation can not decode the charset of WORD, it
 510 returns WORD.  Similarly the encoded-word is broken, it returns WORD.
 511
 512 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 513 if there are in decoded encoded-word (generated by bad manner MUA such
 514 as a version of Net$cape)."
 515   (or (if (string-match eword-encoded-word-regexp word)
 516           (let ((charset
 517                  (substring word (match-beginning 1) (match-end 1))
 518                  )
 519                 (encoding
 520                  (upcase
 521                   (substring word (match-beginning 2) (match-end 2))
 522                   ))
 523                 (text
 524                  (substring word (match-beginning 3) (match-end 3))
 525                  ))
 526             (condition-case err
 527                 (eword-decode-encoded-text charset encoding text must-unfold)
 528               (error
 529                (and
 530                 (add-text-properties 0 (length word)
 531                                      (and eword-warning-face
 532                                           (list 'face eword-warning-face))
 533                                      word)
 534                 word)))
 535             ))
 536       word))
 537
 538
 539 ;;; @ encoded-text decoder
 540 ;;;
 541
 542 (defun eword-decode-encoded-text (charset encoding string
 543                                           &optional must-unfold)
 544   "Decode STRING as an encoded-text.
 545
 546 If your emacs implementation can not decode CHARSET, it returns nil.
 547
 548 If ENCODING is not \"B\" or \"Q\", it occurs error.
 549 So you should write error-handling code if you don't want break by errors.
 550
 551 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 552 if there are in decoded encoded-text (generated by bad manner MUA such
 553 as a version of Net$cape)."
 554   (let ((cs (mime-charset-to-coding-system charset)))
 555     (if cs
 556         (let ((dest
 557                (cond
 558                 ((string-equal "B" encoding)
 559                  (if (and (string-match eword-B-encoded-text-regexp string)
 560                           (string-equal string (match-string 0 string)))
 561                      (base64-decode-string string)
 562                    (error "Invalid encoded-text %s" string)))
 563                 ((string-equal "Q" encoding)
 564                  (if (and (string-match eword-Q-encoded-text-regexp string)
 565                           (string-equal string (match-string 0 string)))
 566                      (q-encoding-decode-string string)
 567                    (error "Invalid encoded-text %s" string)))
 568                 (t
 569                  (error "Invalid encoding %s" encoding)
 570                  )))
 571               )
 572           (if dest
 573               (progn
 574                 (setq dest (decode-coding-string dest cs))
 575                 (if must-unfold
 576                     (mapconcat (function
 577                                 (lambda (chr)
 578                                   (cond
 579                                    ((eq chr ?\n) "")
 580                                    ((eq chr ?\t) " ")
 581                                    (t (char-to-string chr)))
 582                                   ))
 583                                (std11-unfold-string dest)
 584                                "")
 585                   dest)
 586                 ))))))
 587
 588
 589 ;;; @ lexical analyze
 590 ;;;
 591
 592 (defvar eword-lexical-analyze-cache nil)
 593 (defvar eword-lexical-analyze-cache-max 299
 594   "*Max position of eword-lexical-analyze-cache.
 595 It is max size of eword-lexical-analyze-cache - 1.")
 596
 597 (defcustom eword-lexical-analyzers
 598   '(eword-analyze-quoted-string
 599     eword-analyze-domain-literal
 600     eword-analyze-comment
 601     eword-analyze-spaces
 602     eword-analyze-special
 603     eword-analyze-encoded-word
 604     eword-analyze-atom)
 605   "*List of functions to return result of lexical analyze.
 606 Each function must have two arguments: STRING and MUST-UNFOLD.
 607 STRING is the target string to be analyzed.
 608 If MUST-UNFOLD is not nil, each function must unfold and eliminate
 609 bare-CR and bare-LF from the result even if they are included in
 610 content of the encoded-word.
 611 Each function must return nil if it can not analyze STRING as its
 612 format.
 613
 614 Previous function is preferred to next function.  If a function
 615 returns nil, next function is used.  Otherwise the return value will
 616 be the result."
 617   :group 'eword-decode
 618   :type '(repeat function))
 619
 620 (defun eword-analyze-quoted-string (string &optional must-unfold)
 621   (let ((p (std11-check-enclosure string ?\" ?\")))
 622     (if p
 623         (cons (cons 'quoted-string
 624                     (eword-decode-quoted-string (substring string 0 p)))
 625               (substring string p))
 626       )))
 627
 628 (defun eword-analyze-domain-literal (string &optional must-unfold)
 629   (std11-analyze-domain-literal string))
 630
 631 (defun eword-analyze-comment (string &optional must-unfold)
 632   (let ((len (length string)))
 633     (if (and (< 0 len) (eq (aref string 0) ?\())
 634         (let ((p 0))
 635           (while (and p (< p len) (eq (aref string p) ?\())
 636             (setq p (std11-check-enclosure string ?\( ?\) t p)))
 637           (setq p (or p len))
 638           (cons (cons 'comment
 639                       (eword-decode-comment-string (substring string 0 p)))
 640                 (substring string p)))
 641       nil)))
 642
 643 (defun eword-analyze-spaces (string &optional must-unfold)
 644   (std11-analyze-spaces string))
 645
 646 (defun eword-analyze-special (string &optional must-unfold)
 647   (std11-analyze-special string))
 648
 649 (defun eword-analyze-encoded-word (string &optional must-unfold)
 650   (let ((decoded (eword-decode-first-encoded-words
 651                    string
 652                    eword-encoded-word-in-phrase-regexp
 653                    eword-after-encoded-word-in-phrase-regexp
 654                    must-unfold)))
 655     (if decoded
 656       (cons (cons 'atom (car decoded)) (cdr decoded)))))
 657
 658 (defun eword-analyze-atom (string &optional must-unfold)
 659   (if (let ((enable-multibyte-characters nil))
 660         (string-match std11-atom-regexp string))
 661       (let ((end (match-end 0)))
 662         (if (and eword-decode-sticked-encoded-word
 663                  (string-match eword-encoded-word-in-phrase-regexp
 664                                (substring string 0 end))
 665                  (< 0 (match-beginning 0)))
 666             (setq end (match-beginning 0)))
 667         (cons (cons 'atom (decode-mime-charset-string
 668                            (substring string 0 end)
 669                            default-mime-charset))
 670               (substring string end)
 671               ))))
 672
 673 (defun eword-lexical-analyze-internal (string must-unfold)
 674   (let (dest ret)
 675     (while (not (string-equal string ""))
 676       (setq ret
 677             (let ((rest eword-lexical-analyzers)
 678                   func r)
 679               (while (and (setq func (car rest))
 680                           (null (setq r (funcall func string must-unfold)))
 681                           )
 682                 (setq rest (cdr rest)))
 683               (or r `((error . ,string) . ""))
 684               ))
 685       (setq dest (cons (car ret) dest))
 686       (setq string (cdr ret))
 687       )
 688     (nreverse dest)
 689     ))
 690
 691 (defun eword-lexical-analyze (string &optional must-unfold)
 692   "Return lexical analyzed list corresponding STRING.
 693 It is like std11-lexical-analyze, but it decodes non us-ascii
 694 characters encoded as encoded-words or invalid \"raw\" format.
 695 \"Raw\" non us-ascii characters are regarded as variable
 696 `default-mime-charset'."
 697   (let* ((str (copy-sequence string))
 698          (key (cons str (cons default-mime-charset must-unfold)))
 699          ret)
 700     (set-text-properties 0 (length str) nil str)
 701     (if (setq ret (assoc key eword-lexical-analyze-cache))
 702         (cdr ret)
 703       (setq ret (eword-lexical-analyze-internal str must-unfold))
 704       (setq eword-lexical-analyze-cache
 705             (cons (cons key ret)
 706                   (last eword-lexical-analyze-cache
 707                         eword-lexical-analyze-cache-max)))
 708       ret)))
 709
 710 (defun eword-decode-token (token)
 711   (cdr token))
 712
 713 (defun eword-decode-and-fold-structured-field
 714   (string start-column &optional max-column must-unfold)
 715   "Decode and fold (fill) STRING as structured field body.
 716 It decodes non us-ascii characters in FULL-NAME encoded as
 717 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 718 characters are regarded as variable `default-mime-charset'.
 719
 720 If an encoded-word is broken or your emacs implementation can not
 721 decode the charset included in it, it is not decoded.
 722
 723 If MAX-COLUMN is omitted, `fill-column' is used.
 724
 725 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 726 if there are in decoded encoded-words (generated by bad manner MUA
 727 such as a version of Net$cape)."
 728   (or max-column
 729       (setq max-column fill-column))
 730   (let ((c start-column)
 731         (tokens (eword-lexical-analyze string must-unfold))
 732         (result "")
 733         token)
 734     (while (and (setq token (car tokens))
 735                 (setq tokens (cdr tokens)))
 736       (let* ((type (car token)))
 737         (if (eq type 'spaces)
 738             (let* ((next-token (car tokens))
 739                    (next-str (eword-decode-token next-token))
 740                    (next-len (string-width next-str))
 741                    (next-c (+ c next-len 1)))
 742               (if (< next-c max-column)
 743                   (setq result (concat result " " next-str)
 744                         c next-c)
 745                 (setq result (concat result "\n " next-str)
 746                       c (1+ next-len)))
 747               (setq tokens (cdr tokens))
 748               )
 749           (let* ((str (eword-decode-token token)))
 750             (setq result (concat result str)
 751                   c (+ c (string-width str)))
 752             ))))
 753     (if token
 754         (concat result (eword-decode-token token))
 755       result)))
 756
 757 (defun eword-decode-and-unfold-structured-field (string)
 758   "Decode and unfold STRING as structured field body.
 759 It decodes non us-ascii characters in FULL-NAME encoded as
 760 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 761 characters are regarded as variable `default-mime-charset'.
 762
 763 If an encoded-word is broken or your emacs implementation can not
 764 decode the charset included in it, it is not decoded."
 765   (let ((tokens (eword-lexical-analyze string 'must-unfold))
 766         (result ""))
 767     (while tokens
 768       (let* ((token (car tokens))
 769              (type (car token)))
 770         (setq tokens (cdr tokens))
 771         (setq result
 772               (if (eq type 'spaces)
 773                   (concat result " ")
 774                 (concat result (eword-decode-token token))
 775                 ))))
 776     result))
 777
 778 (defun eword-decode-structured-field-body (string &optional must-unfold
 779                                                   start-column max-column)
 780   "Decode non us-ascii characters in STRING as structured field body.
 781 STRING is unfolded before decoding.
 782
 783 It decodes non us-ascii characters in FULL-NAME encoded as
 784 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 785 characters are regarded as variable `default-mime-charset'.
 786
 787 If an encoded-word is broken or your emacs implementation can not
 788 decode the charset included in it, it is not decoded.
 789
 790 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 791 if there are in decoded encoded-words (generated by bad manner MUA
 792 such as a version of Net$cape)."
 793   (if start-column
 794       ;; fold with max-column
 795       (eword-decode-and-fold-structured-field
 796        string start-column max-column must-unfold)
 797     ;; Don't fold
 798     (mapconcat (function eword-decode-token)
 799                (eword-lexical-analyze string must-unfold)
 800                "")
 801     ))
 802
 803 (defun eword-decode-unstructured-field-body (string &optional must-unfold)
 804   "Decode non us-ascii characters in STRING as unstructured field body.
 805 STRING is unfolded before decoding.
 806
 807 It decodes non us-ascii characters in FULL-NAME encoded as
 808 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 809 characters are regarded as variable `default-mime-charset'.
 810
 811 If an encoded-word is broken or your emacs implementation can not
 812 decode the charset included in it, it is not decoded.
 813
 814 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 815 if there are in decoded encoded-words (generated by bad manner MUA
 816 such as a version of Net$cape)."
 817   (eword-decode-string string must-unfold))
 818
 819 (defun eword-extract-address-components (string)
 820   "Extract full name and canonical address from STRING.
 821 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
 822 If no name can be extracted, FULL-NAME will be nil.
 823 It decodes non us-ascii characters in FULL-NAME encoded as
 824 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 825 characters are regarded as variable `default-mime-charset'."
 826   (let* ((structure (car (std11-parse-address
 827                           (eword-lexical-analyze
 828                            (std11-unfold-string string) 'must-unfold))))
 829          (phrase  (std11-full-name-string structure))
 830          (address (std11-address-string structure))
 831          )
 832     (list phrase address)
 833     ))
 834
 835
 836 ;;; @ end
 837 ;;;
 838
 839 (provide 'eword-decode)
 840
 841 ;;; eword-decode.el ends here