eword-decode.el

   1 ;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs
   2
   3 ;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc.
   4
   5 ;; Author: ENAMI Tsugutomo <enami@sys.ptg.sony.co.jp>
   6 ;;         MORIOKA Tomohiko <morioka@jaist.ac.jp>
   7 ;; Maintainer: MORIOKA Tomohiko <morioka@jaist.ac.jp>
   8 ;; Created: 1995/10/03
   9 ;; Original: 1992/07/20 ENAMI Tsugutomo's `mime.el'.
  10 ;;      Renamed: 1993/06/03 to tiny-mime.el
  11 ;;      Renamed: 1995/10/03 from tiny-mime.el (split off encoder)
  12 ;;      Renamed: 1997/02/22 from tm-ew-d.el
  13 ;; Keywords: encoded-word, MIME, multilingual, header, mail, news
  14
  15 ;; This file is part of SEMI (Spadework for Emacs MIME Interfaces).
  16
  17 ;; This program is free software; you can redistribute it and/or
  18 ;; modify it under the terms of the GNU General Public License as
  19 ;; published by the Free Software Foundation; either version 2, or (at
  20 ;; your option) any later version.
  21
  22 ;; This program is distributed in the hope that it will be useful, but
  23 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  24 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  25 ;; General Public License for more details.
  26
  27 ;; You should have received a copy of the GNU General Public License
  28 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
  29 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  30 ;; Boston, MA 02111-1307, USA.
  31
  32 ;;; Code:
  33
  34 (require 'std11-parse)
  35 (require 'mel)
  36 (require 'mime-def)
  37
  38 (defgroup eword-decode nil
  39   "Encoded-word decoding"
  40   :group 'mime)
  41
  42
  43 ;;; @ MIME encoded-word definition
  44 ;;;
  45
  46 (defconst eword-encoded-text-regexp "[!->@-~]+")
  47 (defconst eword-encoded-word-regexp
  48   (concat (regexp-quote "=?")
  49           "\\("
  50           mime-charset-regexp
  51           "\\)"
  52           (regexp-quote "?")
  53           "\\(B\\|Q\\)"
  54           (regexp-quote "?")
  55           "\\("
  56           eword-encoded-text-regexp
  57           "\\)"
  58           (regexp-quote "?=")))
  59 (defconst eword-after-encoded-word-regexp "\\([ \t]\\|$\\)")
  60
  61 (defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+")
  62 (defconst eword-encoded-word-in-phrase-regexp
  63   (concat (regexp-quote "=?")
  64           "\\("
  65           mime-charset-regexp
  66           "\\)"
  67           (regexp-quote "?")
  68           "\\(B\\|Q\\)"
  69           (regexp-quote "?")
  70           "\\("
  71           eword-encoded-text-in-phrase-regexp
  72           "\\)"
  73           (regexp-quote "?=")))
  74 (defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t(]\\|$\\)")
  75
  76 (defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+")
  77 (defconst eword-encoded-word-in-comment-regexp
  78   (concat (regexp-quote "=?")
  79           "\\("
  80           mime-charset-regexp
  81           "\\)"
  82           (regexp-quote "?")
  83           "\\(B\\|Q\\)"
  84           (regexp-quote "?")
  85           "\\("
  86           eword-encoded-text-in-comment-regexp
  87           "\\)"
  88           (regexp-quote "?=")))
  89 (defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)")
  90
  91 (defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+")
  92 (defconst eword-encoded-word-in-quoted-string-regexp
  93   (concat (regexp-quote "=?")
  94           "\\("
  95           mime-charset-regexp
  96           "\\)"
  97           (regexp-quote "?")
  98           "\\(B\\|Q\\)"
  99           (regexp-quote "?")
 100           "\\("
 101           eword-encoded-text-in-quoted-string-regexp
 102           "\\)"
 103           (regexp-quote "?=")))
 104 (defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)")
 105
 106
 107 ;;; @@ Base64
 108 ;;;
 109
 110 (defconst base64-token-regexp "[A-Za-z0-9+/]")
 111 (defconst base64-token-padding-regexp "[A-Za-z0-9+/=]")
 112
 113 (defconst eword-B-encoded-text-regexp
 114   (concat "\\(\\("
 115           base64-token-regexp
 116           base64-token-regexp
 117           base64-token-regexp
 118           base64-token-regexp
 119           "\\)*"
 120           base64-token-regexp
 121           base64-token-regexp
 122           base64-token-padding-regexp
 123           base64-token-padding-regexp
 124           "\\)"))
 125
 126 ;; (defconst eword-B-encoding-and-encoded-text-regexp
 127 ;;   (concat "\\(B\\)\\?" eword-B-encoded-text-regexp))
 128
 129
 130 ;;; @@ Quoted-Printable
 131 ;;;
 132
 133 (defconst quoted-printable-hex-chars "0123456789ABCDEF")
 134 (defconst quoted-printable-octet-regexp
 135   (concat "=[" quoted-printable-hex-chars
 136           "][" quoted-printable-hex-chars "]"))
 137
 138 (defconst eword-Q-encoded-text-regexp
 139   (concat "\\([^=?]\\|" quoted-printable-octet-regexp "\\)+"))
 140 ;; (defconst eword-Q-encoding-and-encoded-text-regexp
 141 ;;   (concat "\\(Q\\)\\?" eword-Q-encoded-text-regexp))
 142
 143
 144 ;;; @ for string
 145 ;;;
 146
 147 (defvar eword-decode-sticked-encoded-word nil
 148   "*If non-nil, decode encoded-words sticked on atoms,
 149 other encoded-words, etc.
 150 however this behaviour violates RFC2047.")
 151
 152 (defvar eword-decode-quoted-encoded-word nil
 153   "*If non-nil, decode encoded-words in quoted-string
 154 however this behaviour violates RFC2047.")
 155
 156 (defun eword-decode-first-encoded-words (string
 157                                          eword-regexp
 158                                          after-regexp
 159                                          &optional must-unfold)
 160   "Decode MIME encoded-words in beginning of STRING.
 161
 162 EWORD-REGEXP is the regexp that matches a encoded-word.
 163 Usual value is eword-encoded-word-regexp,
 164 eword-encoded-text-in-phrase-regexp,
 165 eword-encoded-word-in-comment-regexp or
 166 eword-encoded-word-in-quoted-string-regexp.
 167
 168 AFTER-REGEXP is the regexp that matches a after encoded-word.
 169 Usual value is eword-after-encoded-word-regexp,
 170 eword-after-encoded-text-in-phrase-regexp,
 171 eword-after-encoded-word-in-comment-regexp or
 172 eword-after-encoded-word-in-quoted-string-regexp.
 173
 174 If beginning of STRING matches EWORD-REGEXP and AFTER-REGEXP,
 175 returns a cons cell of decoded string(sequence of characters) and
 176 the rest(sequence of octets).
 177
 178 If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP,
 179 returns nil.
 180
 181 If an encoded-word is broken or your emacs implementation can not
 182 decode the charset included in it, it is returned in decoded part
 183 as encoded-word form.
 184
 185 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 186 if there are in decoded encoded-words (generated by bad manner MUA
 187 such as a version of Net$cape)."
 188   (if eword-decode-sticked-encoded-word (setq after-regexp ""))
 189   (let ((between-ewords-regexp
 190           (if eword-decode-sticked-encoded-word
 191             "\\(\n?[ \t]\\)*"
 192             "\\(\n?[ \t]\\)+"))
 193         (src string)    ; sequence of octets.
 194         (dst ""))       ; sequence of characters.
 195     (if (string-match
 196           (concat "\\`\\(" eword-regexp "\\)" after-regexp) src)
 197       (let* (p
 198              (q (match-end 1))
 199              (ew (substring src 0 q))
 200              (dw (eword-decode-encoded-word ew must-unfold)))
 201         (setq dst (concat dst dw)
 202               src (substring src q))
 203         (if (not (string= ew dw))
 204           (progn
 205             (while
 206               (and
 207                 (string-match
 208                   (concat "\\`\\(" between-ewords-regexp "\\)"
 209                              "\\(" eword-regexp "\\)"
 210                              after-regexp)
 211                   src)
 212                 (progn
 213                   (setq p (match-end 1)
 214                         q (match-end 3)
 215                         ew (substring src p q)
 216                         dw (eword-decode-encoded-word ew must-unfold))
 217                   (if (string= ew dw)
 218                     (progn
 219                       (setq dst (concat dst (substring src 0 q))
 220                             src (substring src q))
 221                       nil)
 222                     t)))
 223               (setq dst (concat dst dw)
 224                     src (substring src q)))))
 225         (cons dst src))
 226       nil)))
 227
 228 (defun eword-decode-comment-string (string &optional must-unfold)
 229   (let ((src string)
 230         (buf "")
 231         (dst "")
 232         (flag-ew t))
 233     (while (< 0 (length src))
 234       (let ((ch (aref src 0))
 235             (decoded (and
 236                         flag-ew
 237                         (eword-decode-first-encoded-words src
 238                           eword-encoded-word-in-comment-regexp
 239                           eword-after-encoded-word-in-comment-regexp
 240                           must-unfold))))
 241         (if (and (not (string= buf ""))
 242                  (or decoded (eq ch ?\() (eq ch ?\))))
 243           (setq dst (concat dst
 244                       (std11-wrap-as-quoted-pairs
 245                         (decode-mime-charset-string buf
 246                           default-mime-charset)
 247                         '(?\( ?\))))
 248                 buf ""))
 249         (cond
 250           (decoded
 251             (setq dst (concat dst
 252                         (std11-wrap-as-quoted-pairs
 253                           (car decoded)
 254                           '(?( ?))))
 255                   src (cdr decoded)))
 256           ((or (eq ch ?\() (eq ch ?\)))
 257             (setq dst (concat dst (list ch))
 258                   src (substring src 1)
 259                   flag-ew t))
 260           ((eq ch ?\\)
 261             (setq buf (concat buf (list (aref src 1)))
 262                   src (substring src 2)
 263                   flag-ew t))
 264           ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n))
 265             (setq buf (concat buf (list ch))
 266                   src (substring src 1)
 267                   flag-ew t))
 268           ((string-match "\\`=?[^ \t\n()\\\\=]*" src)
 269             (setq buf (concat buf (substring src 0 (match-end 0)))
 270                   src (substring src (match-end 0))
 271                   flag-ew eword-decode-sticked-encoded-word))
 272           (t (error "something wrong")))))
 273     (if (not (string= buf ""))
 274       (setq dst (concat dst
 275                   (std11-wrap-as-quoted-pairs
 276                     (decode-mime-charset-string buf
 277                       default-mime-charset)
 278                     '(?\( ?\))))))
 279     dst))
 280
 281 (defun eword-decode-quoted-string (string &optional must-unfold)
 282   (let ((src string)
 283         (buf "")
 284         (dst "")
 285         (flag-ew t))
 286     (while (< 0 (length src))
 287       (let ((ch (aref src 0))
 288             (decoded (and
 289                         eword-decode-quoted-encoded-word
 290                         flag-ew
 291                         (eword-decode-first-encoded-words src
 292                           eword-encoded-word-in-quoted-string-regexp
 293                           eword-after-encoded-word-in-quoted-string-regexp
 294                           must-unfold))))
 295         (if (and (not (string= buf ""))
 296                  (or decoded (eq ch ?\")))
 297           (setq dst (concat dst
 298                       (std11-wrap-as-quoted-pairs
 299                         (decode-mime-charset-string buf
 300                         default-mime-charset)
 301                         '(?\")))
 302                 buf ""))
 303         (cond
 304           (decoded
 305             (setq dst (concat dst
 306                         (std11-wrap-as-quoted-pairs
 307                           (car decoded)
 308                           '(?\")))
 309                   src (cdr decoded)))
 310           ((or (eq ch ?\"))
 311             (setq dst (concat dst (list ch))
 312                   src (substring src 1)
 313                   flag-ew t))
 314           ((eq ch ?\\)
 315             (setq buf (concat buf (list (aref src 1)))
 316                   src (substring src 2)
 317                   flag-ew t))
 318           ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n))
 319             (setq buf (concat buf (list ch))
 320                   src (substring src 1)
 321                   flag-ew t))
 322           ((string-match "\\`=?[^ \t\n\"\\\\=]*" src)
 323             (setq buf (concat buf (substring src 0 (match-end 0)))
 324                   src (substring src (match-end 0))
 325                   flag-ew eword-decode-sticked-encoded-word))
 326           (t (error "something wrong")))))
 327     (if (not (string= buf ""))
 328       (setq dst (concat dst
 329                   (std11-wrap-as-quoted-pairs
 330                     (decode-mime-charset-string buf
 331                       default-mime-charset)
 332                     '(?\")))))
 333     dst))
 334
 335 (defun eword-decode-unstructured-string (string &optional must-unfold)
 336   (let ((src string)
 337         (buf "")
 338         (dst "")
 339         (flag-ew t))
 340     (while (< 0 (length src))
 341       (let ((ch (aref src 0))
 342             (decoded (and flag-ew (eword-decode-first-encoded-words src
 343                                     eword-encoded-word-regexp
 344                                     eword-after-encoded-word-regexp
 345                                     must-unfold))))
 346         (if (and (not (string= buf ""))
 347                  decoded)
 348           (setq dst (concat dst
 349                       (decode-mime-charset-string buf
 350                         default-mime-charset))
 351                 buf ""))
 352         (cond
 353           (decoded
 354             (setq dst (concat dst (car decoded))
 355                   src (cdr decoded)))
 356           ((or (eq ch ?\ ) (eq ch ?\t) (eq ch ?\n))
 357             (setq buf (concat buf (list ch))
 358                   src (substring src 1)
 359                   flag-ew t))
 360           ((string-match "\\`=?[^ \t\n=]*" src)
 361             (setq buf (concat buf (substring src 0 (match-end 0)))
 362                   src (substring src (match-end 0))
 363                   flag-ew eword-decode-sticked-encoded-word))
 364           (t (error "something wrong")))))
 365     (if (not (string= buf ""))
 366       (setq dst (concat dst
 367                   (decode-mime-charset-string buf
 368                     default-mime-charset))))
 369     dst))
 370
 371 (defun eword-decode-string (string &optional must-unfold)
 372   "Decode MIME encoded-words in STRING.
 373
 374 STRING is unfolded before decoding.
 375
 376 If an encoded-word is broken or your emacs implementation can not
 377 decode the charset included in it, it is not decoded.
 378
 379 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 380 if there are in decoded encoded-words (generated by bad manner MUA
 381 such as a version of Net$cape)."
 382   (eword-decode-unstructured-string
 383     (std11-unfold-string string)
 384     must-unfold))
 385
 386
 387 ;;; @ for region
 388 ;;;
 389
 390 (defun eword-decode-region (start end &optional unfolding must-unfold)
 391   "Decode MIME encoded-words in region between START and END.
 392
 393 If UNFOLDING is not nil, it unfolds before decoding.
 394
 395 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 396 if there are in decoded encoded-words (generated by bad manner MUA
 397 such as a version of Net$cape)."
 398   (interactive "*r")
 399   (save-excursion
 400     (save-restriction
 401       (narrow-to-region start end)
 402       (if unfolding
 403           (eword-decode-unfold)
 404         )
 405       (let ((str (eword-decode-unstructured-string
 406                    (buffer-substring (point-min) (point-max))
 407                    must-unfold)))
 408         (delete-region (point-min) (point-max))
 409         (insert str)))))
 410
 411
 412 ;;; @ for message header
 413 ;;;
 414
 415 (defcustom eword-decode-ignored-field-list
 416   '(newsgroups path lines nntp-posting-host message-id date)
 417   "*List of field-names to be ignored when decoding.
 418 Each field name must be symbol."
 419   :group 'eword-decode
 420   :type '(repeat symbol))
 421
 422 (defcustom eword-decode-structured-field-list
 423   '(reply-to resent-reply-to from resent-from sender resent-sender
 424              to resent-to cc resent-cc bcc resent-bcc dcc
 425              mime-version content-type content-transfer-encoding
 426              content-disposition)
 427   "*List of field-names to decode as structured field.
 428 Each field name must be symbol."
 429   :group 'eword-decode
 430   :type '(repeat symbol))
 431
 432 (defun eword-decode-header (&optional code-conversion separator)
 433   "Decode MIME encoded-words in header fields.
 434 If CODE-CONVERSION is nil, it decodes only encoded-words.  If it is
 435 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
 436 Otherwise it decodes non-ASCII bit patterns as the
 437 default-mime-charset.
 438 If SEPARATOR is not nil, it is used as header separator."
 439   (interactive "*")
 440   (save-excursion
 441     (save-restriction
 442       (std11-narrow-to-header separator)
 443       (let ((default-charset
 444               (if code-conversion
 445                   (if (mime-charset-to-coding-system code-conversion)
 446                       code-conversion
 447                     default-mime-charset))))
 448         (if default-charset
 449             (let (beg p end field-name len)
 450               (goto-char (point-min))
 451               (while (re-search-forward std11-field-head-regexp nil t)
 452                 (setq beg (match-beginning 0)
 453                       p (match-end 0)
 454                       field-name (buffer-substring beg (1- p))
 455                       len (string-width field-name)
 456                       field-name (intern (downcase field-name))
 457                       end (std11-field-end))
 458                 (cond ((memq field-name eword-decode-ignored-field-list)
 459                        ;; Don't decode
 460                        )
 461                       ((memq field-name eword-decode-structured-field-list)
 462                        ;; Decode as structured field
 463                        (let ((body (buffer-substring p end))
 464                              (default-mime-charset default-charset))
 465                          (delete-region p end)
 466                          (insert (eword-decode-and-fold-structured-field
 467                                   body (1+ len)))
 468                          ))
 469                       (t
 470                        ;; Decode as unstructured field
 471                        (save-restriction
 472                          (narrow-to-region beg (1+ end))
 473                          (goto-char p)
 474                          (let ((default-mime-charset default-charset))
 475                            (eword-decode-region beg (point-max) 'unfold))
 476                          (goto-char (point-max))
 477                          )))))
 478           (eword-decode-region (point-min) (point-max) t)
 479           )))))
 480
 481 (defun eword-decode-unfold ()
 482   (goto-char (point-min))
 483   (let (field beg end)
 484     (while (re-search-forward std11-field-head-regexp nil t)
 485       (setq beg (match-beginning 0)
 486             end (std11-field-end))
 487       (setq field (buffer-substring beg end))
 488       (if (string-match eword-encoded-word-regexp field)
 489           (save-restriction
 490             (narrow-to-region (goto-char beg) end)
 491             (while (re-search-forward "\n\\([ \t]\\)" nil t)
 492               (replace-match (match-string 1))
 493               )
 494             (goto-char (point-max))
 495             ))
 496       )))
 497
 498
 499 ;;; @ encoded-word decoder
 500 ;;;
 501
 502 (defvar eword-warning-face nil "Face used for invalid encoded-word.")
 503
 504 (defun eword-decode-encoded-word (word &optional must-unfold)
 505   "Decode WORD if it is an encoded-word.
 506
 507 If your emacs implementation can not decode the charset of WORD, it
 508 returns WORD.  Similarly the encoded-word is broken, it returns WORD.
 509
 510 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 511 if there are in decoded encoded-word (generated by bad manner MUA such
 512 as a version of Net$cape)."
 513   (or (if (string-match eword-encoded-word-regexp word)
 514           (let ((charset
 515                  (substring word (match-beginning 1) (match-end 1))
 516                  )
 517                 (encoding
 518                  (upcase
 519                   (substring word (match-beginning 2) (match-end 2))
 520                   ))
 521                 (text
 522                  (substring word (match-beginning 3) (match-end 3))
 523                  ))
 524             (condition-case err
 525                 (eword-decode-encoded-text charset encoding text must-unfold)
 526               (error
 527                (and
 528                 (add-text-properties 0 (length word)
 529                                      (and eword-warning-face
 530                                           (list 'face eword-warning-face))
 531                                      word)
 532                 word)))
 533             ))
 534       word))
 535
 536
 537 ;;; @ encoded-text decoder
 538 ;;;
 539
 540 (defun eword-decode-encoded-text (charset encoding string
 541                                           &optional must-unfold)
 542   "Decode STRING as an encoded-text.
 543
 544 If your emacs implementation can not decode CHARSET, it returns nil.
 545
 546 If ENCODING is not \"B\" or \"Q\", it occurs error.
 547 So you should write error-handling code if you don't want break by errors.
 548
 549 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 550 if there are in decoded encoded-text (generated by bad manner MUA such
 551 as a version of Net$cape)."
 552   (let ((cs (mime-charset-to-coding-system charset)))
 553     (if cs
 554         (let ((dest
 555                (cond
 556                 ((string-equal "B" encoding)
 557                  (if (and (string-match eword-B-encoded-text-regexp string)
 558                           (string-equal string (match-string 0 string)))
 559                      (base64-decode-string string)
 560                    (error "Invalid encoded-text %s" string)))
 561                 ((string-equal "Q" encoding)
 562                  (if (and (string-match eword-Q-encoded-text-regexp string)
 563                           (string-equal string (match-string 0 string)))
 564                      (q-encoding-decode-string string)
 565                    (error "Invalid encoded-text %s" string)))
 566                 (t
 567                  (error "Invalid encoding %s" encoding)
 568                  )))
 569               )
 570           (if dest
 571               (progn
 572                 (setq dest (decode-coding-string dest cs))
 573                 (if must-unfold
 574                     (mapconcat (function
 575                                 (lambda (chr)
 576                                   (cond
 577                                    ((eq chr ?\n) "")
 578                                    ((eq chr ?\t) " ")
 579                                    (t (char-to-string chr)))
 580                                   ))
 581                                (std11-unfold-string dest)
 582                                "")
 583                   dest)
 584                 ))))))
 585
 586
 587 ;;; @ lexical analyze
 588 ;;;
 589
 590 (defvar eword-lexical-analyze-cache nil)
 591 (defvar eword-lexical-analyze-cache-max 299
 592   "*Max position of eword-lexical-analyze-cache.
 593 It is max size of eword-lexical-analyze-cache - 1.")
 594
 595 (defcustom eword-lexical-analyzers
 596   '(eword-analyze-quoted-string
 597     eword-analyze-domain-literal
 598     eword-analyze-comment
 599     eword-analyze-spaces
 600     eword-analyze-special
 601     eword-analyze-encoded-word
 602     eword-analyze-atom)
 603   "*List of functions to return result of lexical analyze.
 604 Each function must have two arguments: STRING and MUST-UNFOLD.
 605 STRING is the target string to be analyzed.
 606 If MUST-UNFOLD is not nil, each function must unfold and eliminate
 607 bare-CR and bare-LF from the result even if they are included in
 608 content of the encoded-word.
 609 Each function must return nil if it can not analyze STRING as its
 610 format.
 611
 612 Previous function is preferred to next function.  If a function
 613 returns nil, next function is used.  Otherwise the return value will
 614 be the result."
 615   :group 'eword-decode
 616   :type '(repeat function))
 617
 618 (defun eword-analyze-quoted-string (string &optional must-unfold)
 619   (let ((p (std11-check-enclosure string ?\" ?\")))
 620     (if p
 621         (cons (cons 'quoted-string
 622                     (eword-decode-quoted-string (substring string 0 p)))
 623               (substring string p))
 624       )))
 625
 626 (defun eword-analyze-domain-literal (string &optional must-unfold)
 627   (std11-analyze-domain-literal string))
 628
 629 (defun eword-analyze-comment (string &optional must-unfold)
 630   (let ((len (length string)))
 631     (if (and (< 0 len) (eq (aref string 0) ?\())
 632         (let ((p 0))
 633           (while (and p (< p len) (eq (aref string p) ?\())
 634             (setq p (std11-check-enclosure string ?\( ?\) t p)))
 635           (setq p (or p len))
 636           (cons (cons 'comment
 637                       (eword-decode-comment-string (substring string 0 p)))
 638                 (substring string p)))
 639       nil)))
 640
 641 (defun eword-analyze-spaces (string &optional must-unfold)
 642   (std11-analyze-spaces string))
 643
 644 (defun eword-analyze-special (string &optional must-unfold)
 645   (std11-analyze-special string))
 646
 647 (defun eword-analyze-encoded-word (string &optional must-unfold)
 648   (let ((decoded (eword-decode-first-encoded-words
 649                    string
 650                    eword-encoded-word-in-phrase-regexp
 651                    eword-after-encoded-word-in-phrase-regexp
 652                    must-unfold)))
 653     (if decoded
 654       (cons (cons 'atom (car decoded)) (cdr decoded)))))
 655
 656 (defun eword-analyze-atom (string &optional must-unfold)
 657   (if (let ((enable-multibyte-characters nil))
 658         (string-match std11-atom-regexp string))
 659       (let ((end (match-end 0)))
 660         (if (and eword-decode-sticked-encoded-word
 661                  (string-match eword-encoded-word-in-phrase-regexp
 662                                (substring string 0 end))
 663                  (< 0 (match-beginning 0)))
 664             (setq end (match-beginning 0)))
 665         (cons (cons 'atom (decode-mime-charset-string
 666                            (substring string 0 end)
 667                            default-mime-charset))
 668               (substring string end)
 669               ))))
 670
 671 (defun eword-lexical-analyze-internal (string must-unfold)
 672   (let (dest ret)
 673     (while (not (string-equal string ""))
 674       (setq ret
 675             (let ((rest eword-lexical-analyzers)
 676                   func r)
 677               (while (and (setq func (car rest))
 678                           (null (setq r (funcall func string must-unfold)))
 679                           )
 680                 (setq rest (cdr rest)))
 681               (or r `((error . ,string) . ""))
 682               ))
 683       (setq dest (cons (car ret) dest))
 684       (setq string (cdr ret))
 685       )
 686     (nreverse dest)
 687     ))
 688
 689 (defun eword-lexical-analyze (string &optional must-unfold)
 690   "Return lexical analyzed list corresponding STRING.
 691 It is like std11-lexical-analyze, but it decodes non us-ascii
 692 characters encoded as encoded-words or invalid \"raw\" format.
 693 \"Raw\" non us-ascii characters are regarded as variable
 694 `default-mime-charset'."
 695   (let* ((str (copy-sequence string))
 696          (key (cons str (cons default-mime-charset must-unfold)))
 697          ret)
 698     (set-text-properties 0 (length str) nil str)
 699     (if (setq ret (assoc key eword-lexical-analyze-cache))
 700         (cdr ret)
 701       (setq ret (eword-lexical-analyze-internal str must-unfold))
 702       (setq eword-lexical-analyze-cache
 703             (cons (cons key ret)
 704                   (last eword-lexical-analyze-cache
 705                         eword-lexical-analyze-cache-max)))
 706       ret)))
 707
 708 (defun eword-decode-token (token)
 709   (cdr token))
 710
 711 (defun eword-decode-and-fold-structured-field
 712   (string start-column &optional max-column must-unfold)
 713   "Decode and fold (fill) STRING as structured field body.
 714 It decodes non us-ascii characters in FULL-NAME encoded as
 715 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 716 characters are regarded as variable `default-mime-charset'.
 717
 718 If an encoded-word is broken or your emacs implementation can not
 719 decode the charset included in it, it is not decoded.
 720
 721 If MAX-COLUMN is omitted, `fill-column' is used.
 722
 723 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 724 if there are in decoded encoded-words (generated by bad manner MUA
 725 such as a version of Net$cape)."
 726   (or max-column
 727       (setq max-column fill-column))
 728   (let ((c start-column)
 729         (tokens (eword-lexical-analyze string must-unfold))
 730         (result "")
 731         token)
 732     (while (and (setq token (car tokens))
 733                 (setq tokens (cdr tokens)))
 734       (let* ((type (car token)))
 735         (if (eq type 'spaces)
 736             (let* ((next-token (car tokens))
 737                    (next-str (eword-decode-token next-token))
 738                    (next-len (string-width next-str))
 739                    (next-c (+ c next-len 1)))
 740               (if (< next-c max-column)
 741                   (setq result (concat result " " next-str)
 742                         c next-c)
 743                 (setq result (concat result "\n " next-str)
 744                       c (1+ next-len)))
 745               (setq tokens (cdr tokens))
 746               )
 747           (let* ((str (eword-decode-token token)))
 748             (setq result (concat result str)
 749                   c (+ c (string-width str)))
 750             ))))
 751     (if token
 752         (concat result (eword-decode-token token))
 753       result)))
 754
 755 (defun eword-decode-and-unfold-structured-field (string)
 756   "Decode and unfold STRING as structured field body.
 757 It decodes non us-ascii characters in FULL-NAME encoded as
 758 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 759 characters are regarded as variable `default-mime-charset'.
 760
 761 If an encoded-word is broken or your emacs implementation can not
 762 decode the charset included in it, it is not decoded."
 763   (let ((tokens (eword-lexical-analyze string 'must-unfold))
 764         (result ""))
 765     (while tokens
 766       (let* ((token (car tokens))
 767              (type (car token)))
 768         (setq tokens (cdr tokens))
 769         (setq result
 770               (if (eq type 'spaces)
 771                   (concat result " ")
 772                 (concat result (eword-decode-token token))
 773                 ))))
 774     result))
 775
 776 (defun eword-decode-structured-field-body (string &optional must-unfold
 777                                                   start-column max-column)
 778   "Decode non us-ascii characters in STRING as structured field body.
 779 STRING is unfolded before decoding.
 780
 781 It decodes non us-ascii characters in FULL-NAME encoded as
 782 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 783 characters are regarded as variable `default-mime-charset'.
 784
 785 If an encoded-word is broken or your emacs implementation can not
 786 decode the charset included in it, it is not decoded.
 787
 788 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 789 if there are in decoded encoded-words (generated by bad manner MUA
 790 such as a version of Net$cape)."
 791   (if start-column
 792       ;; fold with max-column
 793       (eword-decode-and-fold-structured-field
 794        string start-column max-column must-unfold)
 795     ;; Don't fold
 796     (mapconcat (function eword-decode-token)
 797                (eword-lexical-analyze string must-unfold)
 798                "")
 799     ))
 800
 801 (defun eword-decode-unstructured-field-body (string &optional must-unfold)
 802   "Decode non us-ascii characters in STRING as unstructured field body.
 803 STRING is unfolded before decoding.
 804
 805 It decodes non us-ascii characters in FULL-NAME encoded as
 806 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 807 characters are regarded as variable `default-mime-charset'.
 808
 809 If an encoded-word is broken or your emacs implementation can not
 810 decode the charset included in it, it is not decoded.
 811
 812 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 813 if there are in decoded encoded-words (generated by bad manner MUA
 814 such as a version of Net$cape)."
 815   (eword-decode-string string must-unfold))
 816
 817 (defun eword-extract-address-components (string)
 818   "Extract full name and canonical address from STRING.
 819 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
 820 If no name can be extracted, FULL-NAME will be nil.
 821 It decodes non us-ascii characters in FULL-NAME encoded as
 822 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 823 characters are regarded as variable `default-mime-charset'."
 824   (let* ((structure (car (std11-parse-address
 825                           (eword-lexical-analyze
 826                            (std11-unfold-string string) 'must-unfold))))
 827          (phrase  (std11-full-name-string structure))
 828          (address (std11-address-string structure))
 829          )
 830     (list phrase address)
 831     ))
 832
 833
 834 ;;; @ end
 835 ;;;
 836
 837 (provide 'eword-decode)
 838
 839 ;;; eword-decode.el ends here