eword-decode.el

   1 ;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs
   2
   3 ;; Copyright (C) 1995,1996,1997,1998 Free Software Foundation, Inc.
   4
   5 ;; Author: ENAMI Tsugutomo <enami@sys.ptg.sony.co.jp>
   6 ;;         MORIOKA Tomohiko <morioka@jaist.ac.jp>
   7 ;;         Tanaka Akira <akr@jaist.ac.jp>
   8 ;; Maintainer: Tanaka Akira <akr@jaist.ac.jp>
   9 ;; Created: 1995/10/03
  10 ;; Original: 1992/07/20 ENAMI Tsugutomo's `mime.el'.
  11 ;;      Renamed: 1993/06/03 to tiny-mime.el
  12 ;;      Renamed: 1995/10/03 from tiny-mime.el (split off encoder)
  13 ;;      Renamed: 1997/02/22 from tm-ew-d.el
  14 ;; Keywords: encoded-word, MIME, multilingual, header, mail, news
  15
  16 ;; This file is part of FLAM (Faithful Library About MIME).
  17
  18 ;; This program is free software; you can redistribute it and/or
  19 ;; modify it under the terms of the GNU General Public License as
  20 ;; published by the Free Software Foundation; either version 2, or (at
  21 ;; your option) any later version.
  22
  23 ;; This program is distributed in the hope that it will be useful, but
  24 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  25 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  26 ;; General Public License for more details.
  27
  28 ;; You should have received a copy of the GNU General Public License
  29 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
  30 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  31 ;; Boston, MA 02111-1307, USA.
  32
  33 ;;; Code:
  34
  35 (require 'std11)
  36 (require 'mel)
  37 (require 'mime-def)
  38
  39 (require 'ew-dec)
  40
  41 (defgroup eword-decode nil
  42   "Encoded-word decoding"
  43   :group 'mime)
  44
  45 ;;; TEST
  46
  47 (defvar rotate-memo nil)
  48 (defmacro rotate-memo (var val)
  49   `(when rotate-memo
  50      (unless (boundp ',var) (setq ,var ()))
  51      (setq ,var (cons ,val ,var))
  52      (let ((tmp (last ,var (- (length ,var) 100))))
  53        (when tmp (setcdr tmp nil)))
  54      ,var))
  55
  56 ;;; @ variables
  57 ;;;
  58
  59 (defcustom eword-decode-sticked-encoded-word nil
  60   "*If non-nil, decode encoded-words sticked on atoms,
  61 other encoded-words, etc.
  62 however this behaviour violates RFC2047."
  63   :group 'eword-decode
  64   :type 'boolean)
  65
  66 (defcustom eword-decode-quoted-encoded-word nil
  67   "*If non-nil, decode encoded-words in quoted-string
  68 however this behaviour violates RFC2047."
  69   :group 'eword-decode
  70   :type 'boolean)
  71
  72 (defcustom eword-max-size-to-decode 1000
  73   "*Max size to decode header field."
  74   :group 'eword-decode
  75   :type '(choice (integer :tag "Limit (bytes)")
  76                  (const :tag "Don't limit" nil)))
  77
  78
  79 ;;; @ MIME encoded-word definition
  80 ;;;
  81
  82 (defconst eword-encoded-word-prefix-regexp
  83   (concat (regexp-quote "=?")
  84           "\\(" mime-charset-regexp "\\)"
  85           (regexp-quote "?")
  86           "\\(B\\|Q\\)"
  87           (regexp-quote "?")))
  88 (defconst eword-encoded-word-suffix-regexp
  89   (regexp-quote "?="))
  90
  91 (defconst eword-encoded-text-in-unstructured-regexp "[!->@-~]+")
  92 (defconst eword-encoded-word-in-unstructured-regexp
  93   (concat eword-encoded-word-prefix-regexp
  94           "\\(" eword-encoded-text-in-unstructured-regexp "\\)"
  95           eword-encoded-word-suffix-regexp))
  96 (defconst eword-after-encoded-word-in-unstructured-regexp "\\([ \t]\\|$\\)")
  97
  98 (defconst eword-encoded-text-in-phrase-regexp "[-A-Za-z0-9!*+/=_]+")
  99 (defconst eword-encoded-word-in-phrase-regexp
 100   (concat eword-encoded-word-prefix-regexp
 101           "\\(" eword-encoded-text-in-phrase-regexp "\\)"
 102           eword-encoded-word-suffix-regexp))
 103 (defconst eword-after-encoded-word-in-phrase-regexp "\\([ \t]\\|$\\)")
 104
 105 (defconst eword-encoded-text-in-comment-regexp "[]!-'*->@-[^-~]+")
 106 (defconst eword-encoded-word-in-comment-regexp
 107   (concat eword-encoded-word-prefix-regexp
 108           "\\(" eword-encoded-text-in-comment-regexp "\\)"
 109           eword-encoded-word-suffix-regexp))
 110 (defconst eword-after-encoded-word-in-comment-regexp "\\([ \t()\\\\]\\|$\\)")
 111
 112 (defconst eword-encoded-text-in-quoted-string-regexp "[]!#->@-[^-~]+")
 113 (defconst eword-encoded-word-in-quoted-string-regexp
 114   (concat eword-encoded-word-prefix-regexp
 115           "\\(" eword-encoded-text-in-quoted-string-regexp "\\)"
 116           eword-encoded-word-suffix-regexp))
 117 (defconst eword-after-encoded-word-in-quoted-string-regexp "\\([ \t\"\\\\]\\|$\\)")
 118
 119 ; obsolete
 120 (defconst eword-encoded-text-regexp eword-encoded-text-in-unstructured-regexp)
 121 (defconst eword-encoded-word-regexp eword-encoded-word-in-unstructured-regexp)
 122
 123
 124 ;;; @ internal utilities
 125 ;;;
 126
 127 (defun eword-decode-first-encoded-words (string
 128                                          eword-regexp
 129                                          after-regexp
 130                                          &optional must-unfold)
 131   "Decode MIME encoded-words in beginning of STRING.
 132
 133 EWORD-REGEXP is the regexp that matches a encoded-word.
 134 Usual value is
 135 eword-encoded-word-in-unstructured-regexp,
 136 eword-encoded-text-in-phrase-regexp,
 137 eword-encoded-word-in-comment-regexp or
 138 eword-encoded-word-in-quoted-string-regexp.
 139
 140 AFTER-REGEXP is the regexp that matches a after encoded-word.
 141 Usual value is
 142 eword-after-encoded-word-in-unstructured-regexp,
 143 eword-after-encoded-text-in-phrase-regexp,
 144 eword-after-encoded-word-in-comment-regexp or
 145 eword-after-encoded-word-in-quoted-string-regexp.
 146
 147 If beginning of STRING matches EWORD-REGEXP with AFTER-REGEXP,
 148 returns a cons cell of decoded string(sequence of characters) and
 149 the rest(sequence of octets).
 150
 151 If beginning of STRING does not matches EWORD-REGEXP and AFTER-REGEXP,
 152 returns nil.
 153
 154 If an encoded-word is broken or your emacs implementation can not
 155 decode the charset included in it, it is returned in decoded part
 156 as encoded-word form.
 157
 158 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 159 if there are in decoded encoded-words (generated by bad manner MUA
 160 such as a version of Net$cape)."
 161   (if eword-decode-sticked-encoded-word (setq after-regexp ""))
 162   (let* ((between-ewords-regexp
 163            (if eword-decode-sticked-encoded-word
 164              "\\(\n?[ \t]\\)*"
 165              "\\(\n?[ \t]\\)+"))
 166          (between-ewords-eword-after-regexp
 167            (concat "\\`\\(" between-ewords-regexp "\\)"
 168                       "\\(" eword-regexp "\\)"
 169                       after-regexp))
 170          (eword-after-regexp
 171            (concat "\\`\\(" eword-regexp "\\)" after-regexp))
 172          (src string)   ; sequence of octets.
 173          (dst ""))      ; sequence of characters.
 174     (if (string-match eword-after-regexp src)
 175       (let* (p
 176              (q (match-end 1))
 177              (ew (substring src 0 q))
 178              (dw (eword-decode-encoded-word ew must-unfold)))
 179         (setq dst (concat dst dw)
 180               src (substring src q))
 181         (if (not (string= ew dw))
 182           (progn
 183             (while
 184               (and
 185                 (string-match between-ewords-eword-after-regexp src)
 186                 (progn
 187                   (setq p (match-end 1)
 188                         q (match-end 3)
 189                         ew (substring src p q)
 190                         dw (eword-decode-encoded-word ew must-unfold))
 191                   (if (string= ew dw)
 192                     (progn
 193                       (setq dst (concat dst (substring src 0 q))
 194                             src (substring src q))
 195                       nil)
 196                     t)))
 197               (setq dst (concat dst dw)
 198                     src (substring src q)))))
 199         (cons dst src))
 200       nil)))
 201
 202 (defun eword-decode-entire-string (string
 203                                    eword-regexp
 204                                    after-regexp
 205                                    safe-regexp
 206                                    escape ; ?\\ or nil.
 207                                    delimiters ; list of chars.
 208                                    chars-must-be-quote
 209                                    must-unfold
 210                                    code-conversion)
 211   (if (and code-conversion
 212            (not (mime-charset-to-coding-system code-conversion)))
 213       (setq code-conversion default-mime-charset))
 214   (let ((equal-safe-regexp (concat "\\`=?" safe-regexp))
 215         (dst "")
 216         (buf "")
 217         (src string)
 218         (ew-enable t))
 219     (while (< 0 (length src))
 220       (let ((ch (aref src 0))
 221             (decoded (and
 222                         ew-enable
 223                         (eword-decode-first-encoded-words src
 224                           eword-regexp after-regexp must-unfold))))
 225         (if (and (not (string= buf ""))
 226                  (or decoded (memq ch delimiters)))
 227           (setq dst (concat dst
 228                       (std11-wrap-as-quoted-pairs
 229                         (decode-mime-charset-string buf code-conversion)
 230                         chars-must-be-quote))
 231                 buf ""))
 232         (cond
 233           (decoded
 234             (setq dst (concat dst
 235                         (std11-wrap-as-quoted-pairs
 236                           (car decoded)
 237                           chars-must-be-quote))
 238                   src (cdr decoded)))
 239           ((memq ch delimiters)
 240             (setq dst (concat dst (list ch))
 241                   src (substring src 1)
 242                   ew-enable t))
 243           ((eq ch escape)
 244             (setq buf (concat buf (list (aref src 1)))
 245                   src (substring src 2)
 246                   ew-enable t))
 247           ((string-match "\\`[ \t\n]+" src)
 248             (setq buf (concat buf (substring src 0 (match-end 0)))
 249                   src (substring src (match-end 0))
 250                   ew-enable t))
 251           ((and (string-match equal-safe-regexp src)
 252                 (< 0 (match-end 0)))
 253             (setq buf (concat buf (substring src 0 (match-end 0)))
 254                   src (substring src (match-end 0))
 255                   ew-enable eword-decode-sticked-encoded-word))
 256           (t (error "something wrong")))))
 257     (if (not (string= buf ""))
 258       (setq dst (concat dst
 259                   (std11-wrap-as-quoted-pairs
 260                     (decode-mime-charset-string buf code-conversion)
 261                     chars-must-be-quote))))
 262     dst))
 263
 264
 265 ;;; @ for string
 266 ;;;
 267
 268 (defun eword-decode-unstructured (string code-conversion &optional must-unfold)
 269   (eword-decode-entire-string
 270     string
 271     eword-encoded-word-in-unstructured-regexp
 272     eword-after-encoded-word-in-unstructured-regexp
 273     "[^ \t\n=]*"
 274     nil
 275     nil
 276     nil
 277     must-unfold
 278     code-conversion))
 279
 280 (defun eword-decode-comment (string code-conversion &optional must-unfold)
 281   (eword-decode-entire-string
 282     string
 283     eword-encoded-word-in-comment-regexp
 284     eword-after-encoded-word-in-comment-regexp
 285     "[^ \t\n()\\\\=]*"
 286     ?\\
 287     '(?\( ?\))
 288     '(?\( ?\) ?\\ ?\r ?\n)
 289     must-unfold
 290     code-conversion))
 291
 292 (defun eword-decode-quoted-string (string code-conversion &optional must-unfold)
 293   (eword-decode-entire-string
 294     string
 295     eword-encoded-word-in-quoted-string-regexp
 296     eword-after-encoded-word-in-quoted-string-regexp
 297     "[^ \t\n\"\\\\=]*"
 298     ?\\
 299     '(?\")
 300     '(?\" ?\\ ?\r ?\n)
 301     must-unfold
 302     code-conversion))
 303
 304 (defun eword-decode-string (string &optional must-unfold code-conversion)
 305   "Decode MIME encoded-words in STRING.
 306
 307 STRING is unfolded before decoding.
 308
 309 If an encoded-word is broken or your emacs implementation can not
 310 decode the charset included in it, it is not decoded.
 311
 312 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 313 if there are in decoded encoded-words (generated by bad manner MUA
 314 such as a version of Net$cape).
 315
 316 If CODE-CONVERSION is nil, it decodes only encoded-words.  If it is
 317 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
 318 Otherwise it decodes non-ASCII bit patterns as the
 319 default-mime-charset."
 320   (eword-decode-unstructured
 321     (std11-unfold-string string)
 322     code-conversion
 323     must-unfold))
 324
 325 (defun eword-decode-and-fold-structured-field
 326   (string start-column &optional max-column must-unfold)
 327   "Decode and fold (fill) STRING as structured field body.
 328 It decodes non us-ascii characters in FULL-NAME encoded as
 329 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 330 characters are regarded as variable `default-mime-charset'.
 331
 332 If an encoded-word is broken or your emacs implementation can not
 333 decode the charset included in it, it is not decoded.
 334
 335 If MAX-COLUMN is omitted, `fill-column' is used.
 336
 337 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 338 if there are in decoded encoded-words (generated by bad manner MUA
 339 such as a version of Net$cape)."
 340   (rotate-memo args-eword-decode-and-fold-structured-field
 341                (list string start-column max-column must-unfold))
 342   (or max-column
 343       (setq max-column fill-column))
 344   (let* ((field-name (make-string (1- start-column) ?X))
 345          (field-body (ew-lf-crlf-to-crlf string))
 346          (ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
 347          (decoded (ew-decode-field field-name field-body)))
 348     (unless (equal field-body decoded)
 349       (setq decoded (ew-crlf-refold decoded start-column max-column)))
 350     (ew-crlf-to-lf decoded)))
 351
 352 (defun eword-decode-and-unfold-structured-field (string)
 353   "Decode and unfold STRING as structured field body.
 354 It decodes non us-ascii characters in FULL-NAME encoded as
 355 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 356 characters are regarded as variable `default-mime-charset'.
 357
 358 If an encoded-word is broken or your emacs implementation can not
 359 decode the charset included in it, it is not decoded."
 360   (rotate-memo args-eword-decode-and-unfold-structured-field (list string))
 361   (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
 362          (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
 363     (ew-crlf-to-lf (ew-crlf-unfold decoded))))
 364
 365 (defun eword-decode-structured-field-body (string &optional must-unfold
 366                                                   start-column max-column)
 367   "Decode non us-ascii characters in STRING as structured field body.
 368 STRING is unfolded before decoding.
 369
 370 It decodes non us-ascii characters in FULL-NAME encoded as
 371 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 372 characters are regarded as variable `default-mime-charset'.
 373
 374 If an encoded-word is broken or your emacs implementation can not
 375 decode the charset included in it, it is not decoded.
 376
 377 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 378 if there are in decoded encoded-words (generated by bad manner MUA
 379 such as a version of Net$cape)."
 380   (rotate-memo args-eword-decode-structured-field-body
 381                (list string must-unfold start-column max-column))
 382   (if start-column
 383       ;; fold with max-column
 384       (eword-decode-and-fold-structured-field
 385        string start-column max-column must-unfold)
 386     ;; Don't fold
 387     (let* ((ew-decode-field-default-syntax '(ew-scan-unibyte-std11))
 388            (decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
 389       (ew-crlf-to-lf decoded))))
 390
 391 (defun eword-decode-unstructured-field-body (string &optional must-unfold)
 392   "Decode non us-ascii characters in STRING as unstructured field body.
 393 STRING is unfolded before decoding.
 394
 395 It decodes non us-ascii characters in FULL-NAME encoded as
 396 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 397 characters are regarded as variable `default-mime-charset'.
 398
 399 If an encoded-word is broken or your emacs implementation can not
 400 decode the charset included in it, it is not decoded.
 401
 402 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 403 if there are in decoded encoded-words (generated by bad manner MUA
 404 such as a version of Net$cape)."
 405   (rotate-memo args-eword-decode-unstructured-field-body
 406                (list string must-unfold))
 407   (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
 408     (ew-crlf-to-lf (ew-crlf-unfold decoded))))
 409
 410 (defun eword-decode-and-unfold-unstructured-field (string)
 411   "Decode and unfold STRING as unstructured field body.
 412 It decodes non us-ascii characters in FULL-NAME encoded as
 413 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 414 characters are regarded as variable `default-mime-charset'.
 415
 416 If an encoded-word is broken or your emacs implementation can not
 417 decode the charset included in it, it is not decoded."
 418   (rotate-memo args-eword-decode-and-unfold-unstructured-field
 419                (list string))
 420   (let ((decoded (ew-decode-field "" (ew-lf-crlf-to-crlf string))))
 421     (ew-crlf-to-lf (ew-crlf-unfold decoded))))
 422
 423
 424 ;;; @ for region
 425 ;;;
 426
 427 (defun eword-decode-region (start end &optional unfolding must-unfold
 428                                                 code-conversion)
 429   "Decode MIME encoded-words in region between START and END.
 430
 431 If UNFOLDING is not nil, it unfolds before decoding.
 432
 433 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 434 if there are in decoded encoded-words (generated by bad manner MUA
 435 such as a version of Net$cape).
 436
 437 If CODE-CONVERSION is nil, it decodes only encoded-words.  If it is
 438 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
 439 Otherwise it decodes non-ASCII bit patterns as the
 440 default-mime-charset."
 441   (interactive "*r")
 442   (rotate-memo args-eword-decode-region
 443                (list start end (buffer-substring start end) unfolding must-unfold code-conversion))
 444   (save-excursion
 445     (save-restriction
 446       (narrow-to-region start end)
 447       (if unfolding
 448           (eword-decode-unfold)
 449         )
 450       (let ((str (eword-decode-unstructured
 451                    (buffer-substring (point-min) (point-max))
 452                    code-conversion
 453                    must-unfold)))
 454         (delete-region (point-min) (point-max))
 455         (insert str)))))
 456
 457
 458 ;;; @ for message header
 459 ;;;
 460
 461 (defcustom eword-decode-ignored-field-list
 462   '(Newsgroups Path Lines Nntp-Posting-Host Received Message-Id Date)
 463   "*List of field-names to be ignored when decoding.
 464 Each field name must be symbol."
 465   :group 'eword-decode
 466   :type '(repeat symbol))
 467
 468 (defcustom eword-decode-structured-field-list
 469   '(Reply-To Resent-Reply-To From Resent-From Sender Resent-Sender
 470              To Resent-To Cc Resent-Cc Bcc Resent-Bcc Dcc
 471              Mail-Followup-To
 472              Mime-Version Content-Type Content-Transfer-Encoding
 473              Content-Disposition User-Agent)
 474   "*List of field-names to decode as structured field.
 475 Each field name must be symbol."
 476   :group 'eword-decode
 477   :type '(repeat symbol))
 478
 479 (defun eword-decode-field-body
 480   (field-body field-name &optional unfolded max-column)
 481   "Decode FIELD-BODY as FIELD-NAME, and return the result.
 482
 483 If UNFOLDED is non-nil, it is assumed that FIELD-BODY is
 484 already unfolded.
 485
 486 If MAX-COLUMN is non-nil, the result is folded with MAX-COLUMN
 487 or `fill-column' if MAX-COLUMN is t.
 488 Otherwise, the result is unfolded.
 489
 490 MIME encoded-word in FIELD-BODY is recognized according to
 491 `eword-decode-ignored-field-list',
 492 `eword-decode-structured-field-list' and FIELD-NAME.
 493
 494 Non MIME encoded-word part in FILED-BODY is decoded with
 495 `default-mime-charset'."
 496   (if (symbolp field-name) (setq field-name (symbol-name field-name)))
 497   (let ((decoded
 498           (if unfolded
 499             (let ((ew-ignore-76bytes-limit t))
 500               (ew-decode-field
 501                field-name (ew-lf-crlf-to-crlf field-body)))
 502             (ew-decode-field
 503              field-name (ew-lf-crlf-to-crlf field-body)))))
 504     (if max-column
 505         (setq decoded (ew-crlf-refold
 506                        decoded
 507                        (1+ (string-width field-name))
 508                        (if (eq max-column t) fill-column max-column)))
 509       (setq decoded (ew-crlf-unfold decoded)))
 510     (ew-crlf-to-lf decoded)))
 511
 512 (defun eword-decode-header (&optional code-conversion separator)
 513   "Decode MIME encoded-words in header fields.
 514 If CODE-CONVERSION is nil, it decodes only encoded-words.  If it is
 515 mime-charset, it decodes non-ASCII bit patterns as the mime-charset.
 516 Otherwise it decodes non-ASCII bit patterns as the
 517 default-mime-charset.
 518 If SEPARATOR is not nil, it is used as header separator."
 519   (interactive "*")
 520   (rotate-memo args-eword-decode-header (list code-conversion))
 521   (unless code-conversion
 522     (message "eword-decode-header is called without code-conversion")
 523     (sit-for 2))
 524   (if (and code-conversion
 525            (not (mime-charset-to-coding-system code-conversion)))
 526       (setq code-conversion default-mime-charset))
 527   (save-excursion
 528     (save-restriction
 529       (std11-narrow-to-header separator)
 530       (rotate-memo args-h-eword-decode-header (buffer-substring (point-min) (point-max)))
 531       (if code-conversion
 532           (let (beg p end field-name field-body decoded)
 533             (goto-char (point-min))
 534             (while (re-search-forward std11-field-head-regexp nil t)
 535               (setq beg (match-beginning 0)
 536                     p (match-end 0)
 537                     field-name (buffer-substring beg (1- p))
 538                     end (std11-field-end)
 539                     field-body (ew-lf-crlf-to-crlf
 540                                 (buffer-substring p end))
 541                     decoded (ew-decode-field
 542                              field-name field-body))
 543               (unless (equal field-body decoded)
 544                 (setq decoded (ew-crlf-refold
 545                                decoded
 546                                (1+ (string-width field-name))
 547                                fill-column)))
 548               (delete-region p end)
 549               (insert (ew-crlf-to-lf decoded))
 550               (add-text-properties beg (min (1+ (point)) (point-max))
 551                                    (list 'original-field-name field-name
 552                                          'original-field-body field-body))
 553               ))
 554         (eword-decode-region (point-min) (point-max) t nil nil)
 555         ))))
 556
 557 (defun eword-decode-unfold ()
 558   (goto-char (point-min))
 559   (let (field beg end)
 560     (while (re-search-forward std11-field-head-regexp nil t)
 561       (setq beg (match-beginning 0)
 562             end (std11-field-end))
 563       (setq field (buffer-substring beg end))
 564       (if (string-match eword-encoded-word-regexp field)
 565           (save-restriction
 566             (narrow-to-region (goto-char beg) end)
 567             (while (re-search-forward "\n\\([ \t]\\)" nil t)
 568               (replace-match (match-string 1))
 569               )
 570             (goto-char (point-max))
 571             ))
 572       )))
 573
 574
 575 ;;; @ encoded-word decoder
 576 ;;;
 577
 578 (defvar eword-decode-encoded-word-error-handler
 579   'eword-decode-encoded-word-default-error-handler)
 580
 581 (defvar eword-warning-face nil
 582   "Face used for invalid encoded-word.")
 583
 584 (defun eword-decode-encoded-word-default-error-handler (word signal)
 585   (and (add-text-properties 0 (length word)
 586                             (and eword-warning-face
 587                                  (list 'face eword-warning-face))
 588                             word)
 589        word))
 590
 591 (defun eword-decode-encoded-word (word &optional must-unfold)
 592   "Decode WORD if it is an encoded-word.
 593
 594 If your emacs implementation can not decode the charset of WORD, it
 595 returns WORD.  Similarly the encoded-word is broken, it returns WORD.
 596
 597 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 598 if there are in decoded encoded-word (generated by bad manner MUA such
 599 as a version of Net$cape)."
 600   (or (if (string-match eword-encoded-word-regexp word)
 601           (let ((charset
 602                  (substring word (match-beginning 1) (match-end 1))
 603                  )
 604                 (encoding
 605                  (upcase
 606                   (substring word (match-beginning 2) (match-end 2))
 607                   ))
 608                 (text
 609                  (substring word (match-beginning 3) (match-end 3))
 610                  ))
 611             (condition-case err
 612                 (eword-decode-encoded-text charset encoding text must-unfold)
 613               (error
 614                (funcall eword-decode-encoded-word-error-handler word err)
 615                ))
 616             ))
 617       word))
 618
 619
 620 ;;; @ encoded-text decoder
 621 ;;;
 622
 623 (defun eword-decode-encoded-text (charset encoding string
 624                                           &optional must-unfold)
 625   "Decode STRING as an encoded-text.
 626
 627 If your emacs implementation can not decode CHARSET, it returns nil.
 628
 629 If ENCODING is not \"B\" or \"Q\", it occurs error.
 630 So you should write error-handling code if you don't want break by errors.
 631
 632 If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even
 633 if there are in decoded encoded-text (generated by bad manner MUA such
 634 as a version of Net$cape)."
 635   (let ((cs (mime-charset-to-coding-system charset)))
 636     (if cs
 637         (let ((dest (encoded-text-decode-string string encoding)))
 638           (when dest
 639             (setq dest (decode-mime-charset-string dest charset))
 640             (if must-unfold
 641                 (mapconcat (function
 642                             (lambda (chr)
 643                               (cond ((eq chr ?\n) "")
 644                                     ((eq chr ?\t) " ")
 645                                     (t (char-to-string chr)))
 646                               ))
 647                            (std11-unfold-string dest)
 648                            "")
 649               dest))))))
 650
 651
 652 ;;; @ lexical analyze
 653 ;;;
 654
 655 (defvar eword-lexical-analyze-cache nil)
 656 (defvar eword-lexical-analyze-cache-max 299
 657   "*Max position of eword-lexical-analyze-cache.
 658 It is max size of eword-lexical-analyze-cache - 1.")
 659
 660 (defcustom eword-lexical-analyzers
 661   '(eword-analyze-quoted-string
 662     eword-analyze-domain-literal
 663     eword-analyze-comment
 664     eword-analyze-spaces
 665     eword-analyze-special
 666     eword-analyze-encoded-word
 667     eword-analyze-atom)
 668   "*List of functions to return result of lexical analyze.
 669 Each function must have two arguments: STRING and MUST-UNFOLD.
 670 STRING is the target string to be analyzed.
 671 If MUST-UNFOLD is not nil, each function must unfold and eliminate
 672 bare-CR and bare-LF from the result even if they are included in
 673 content of the encoded-word.
 674 Each function must return nil if it can not analyze STRING as its
 675 format.
 676
 677 Previous function is preferred to next function.  If a function
 678 returns nil, next function is used.  Otherwise the return value will
 679 be the result."
 680   :group 'eword-decode
 681   :type '(repeat function))
 682
 683 (defun eword-analyze-quoted-string (string &optional must-unfold)
 684   (let ((p (std11-check-enclosure string ?\" ?\")))
 685     (if p
 686         (cons (cons 'quoted-string
 687                     (if eword-decode-quoted-encoded-word
 688                         (eword-decode-quoted-string
 689                           (substring string 0 p)
 690                           default-mime-charset)
 691                       (std11-wrap-as-quoted-string
 692                        (decode-mime-charset-string
 693                         (std11-strip-quoted-pair (substring string 1 (1- p)))
 694                         default-mime-charset))))
 695               (substring string p)))
 696     ))
 697
 698 (defun eword-analyze-domain-literal (string &optional must-unfold)
 699   (std11-analyze-domain-literal string))
 700
 701 (defun eword-analyze-comment (string &optional must-unfold)
 702   (let ((len (length string)))
 703     (if (and (< 0 len) (eq (aref string 0) ?\())
 704         (let ((p 0))
 705           (while (and p (< p len) (eq (aref string p) ?\())
 706             (setq p (std11-check-enclosure string ?\( ?\) t p)))
 707           (setq p (or p len))
 708           (cons (cons 'comment
 709                       (eword-decode-comment
 710                         (std11-unfold-string (substring string 0 p))
 711                         default-mime-charset))
 712                 (substring string p)))
 713       nil)))
 714
 715 (defun eword-analyze-spaces (string &optional must-unfold)
 716   (std11-analyze-spaces string))
 717
 718 (defun eword-analyze-special (string &optional must-unfold)
 719   (std11-analyze-special string))
 720
 721 (defun eword-analyze-encoded-word (string &optional must-unfold)
 722   (let ((decoded (eword-decode-first-encoded-words
 723                   string
 724                   eword-encoded-word-in-phrase-regexp
 725                   eword-after-encoded-word-in-phrase-regexp
 726                   must-unfold)))
 727     (if decoded
 728         (let ((s (car decoded)))
 729           (while (or (string-match std11-atom-regexp s)
 730                      (string-match std11-spaces-regexp s))
 731             (setq s (substring s (match-end 0))))
 732           (if (= (length s) 0)
 733               (cons (cons 'atom (car decoded)) (cdr decoded))
 734             (cons (cons 'quoted-string
 735                         (std11-wrap-as-quoted-string (car decoded)))
 736                   (cdr decoded)))))))
 737
 738 (defun eword-analyze-atom (string &optional must-unfold)
 739   (if (string-match std11-atom-regexp (string-as-unibyte string))
 740       (let ((end (match-end 0)))
 741         (if (and eword-decode-sticked-encoded-word
 742                  (string-match eword-encoded-word-in-phrase-regexp
 743                                (substring string 0 end))
 744                  (< 0 (match-beginning 0)))
 745             (setq end (match-beginning 0)))
 746         (cons (cons 'atom (decode-mime-charset-string
 747                            (substring string 0 end)
 748                            default-mime-charset))
 749               (substring string end)
 750               ))))
 751
 752 (defun eword-lexical-analyze-internal (string must-unfold)
 753   (let ((last 'eword-analyze-spaces)
 754         dest ret)
 755     (while (not (string-equal string ""))
 756       (setq ret
 757             (let ((rest eword-lexical-analyzers)
 758                   func r)
 759               (while (and (setq func (car rest))
 760                           (or
 761                            (and
 762                             (not eword-decode-sticked-encoded-word)
 763                             (not (eq last 'eword-analyze-spaces))
 764                             (eq func 'eword-analyze-encoded-word))
 765                            (null (setq r (funcall func string must-unfold))))
 766                           )
 767                 (setq rest (cdr rest)))
 768               (setq last func)
 769               (or r `((error . ,string) . ""))
 770               ))
 771       (setq dest (cons (car ret) dest))
 772       (setq string (cdr ret))
 773       )
 774     (nreverse dest)
 775     ))
 776
 777 (defun eword-lexical-analyze (string &optional must-unfold)
 778   "Return lexical analyzed list corresponding STRING.
 779 It is like std11-lexical-analyze, but it decodes non us-ascii
 780 characters encoded as encoded-words or invalid \"raw\" format.
 781 \"Raw\" non us-ascii characters are regarded as variable
 782 `default-mime-charset'."
 783   (let* ((str (copy-sequence string))
 784          (key (cons str (cons default-mime-charset must-unfold)))
 785          ret)
 786     (set-text-properties 0 (length str) nil str)
 787     (if (setq ret (assoc key eword-lexical-analyze-cache))
 788         (cdr ret)
 789       (setq ret (eword-lexical-analyze-internal str must-unfold))
 790       (setq eword-lexical-analyze-cache
 791             (cons (cons key ret)
 792                   (last eword-lexical-analyze-cache
 793                         eword-lexical-analyze-cache-max)))
 794       ret)))
 795
 796 (defun eword-decode-token (token)
 797   (cdr token))
 798
 799 (defun eword-extract-address-components (string)
 800   "Extract full name and canonical address from STRING.
 801 Returns a list of the form (FULL-NAME CANONICAL-ADDRESS).
 802 If no name can be extracted, FULL-NAME will be nil.
 803 It decodes non us-ascii characters in FULL-NAME encoded as
 804 encoded-words or invalid \"raw\" string.  \"Raw\" non us-ascii
 805 characters are regarded as variable `default-mime-charset'."
 806   (rotate-memo args-eword-extract-address-components (list string))
 807   (let* ((structure (car (std11-parse-address
 808                           (eword-lexical-analyze
 809                            (std11-unfold-string string) 'must-unfold))))
 810          (phrase  (std11-full-name-string structure))
 811          (address (std11-address-string structure))
 812          )
 813     (list phrase address)
 814     ))
 815
 816
 817 ;;; @ end
 818 ;;;
 819
 820 (provide 'eword-decode)
 821
 822 ;;; eword-decode.el ends here