From 44c6ba5bde5efca67e9e10efec973058674c2c88 Mon Sep 17 00:00:00 2001 From: yamaoka Date: Sun, 25 Dec 2005 10:45:52 +0000 Subject: [PATCH] * eword-decode.el: Change the way to decode successive encoded-words: decode B- or Q-encoding in each encoded-word, concatenate them, and decode it as charset. See the following threads for more information: http://news.gmane.org/group/gmane.emacs.pretest.bugs/thread=9541 http://news.gmane.org/group/gmane.emacs.gnus.general/thread=61176 (eword-decode-allow-incomplete-encoded-text): New variable. (eword-decode-encoded-words): New function. (eword-decode-string): Use it. (eword-decode-region): Use it. (eword-analyze-encoded-word): Use it. (eword-decode-encoded-word): Abolish. (eword-decode-encoded-text): Abolish. (eword-decode-encoded-word-error-handler): Abolish. (eword-warning-face): Abolish. (eword-decode-encoded-word-default-error-handler): Abolish. --- ChangeLog | 20 ++++ eword-decode.el | 273 ++++++++++++++++++++++++++----------------------------- 2 files changed, 151 insertions(+), 142 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7bcbe60..34cbe2e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +2005-12-25 Katsumi Yamaoka + + * eword-decode.el: Change the way to decode successive + encoded-words: decode B- or Q-encoding in each encoded-word, + concatenate them, and decode it as charset. See the following + threads for more information: + http://news.gmane.org/group/gmane.emacs.pretest.bugs/thread=9541 + http://news.gmane.org/group/gmane.emacs.gnus.general/thread=61176 + (eword-decode-allow-incomplete-encoded-text): New variable. + (eword-decode-encoded-words): New function. + (eword-decode-string): Use it. + (eword-decode-region): Use it. + (eword-analyze-encoded-word): Use it. + (eword-decode-encoded-word): Abolish. + (eword-decode-encoded-text): Abolish. + (eword-decode-encoded-word-error-handler): Abolish. + (eword-warning-face): Abolish. + (eword-decode-encoded-word-default-error-handler): Abolish. + + 2005-12-25 MORIOKA Tomohiko * FLIM: Version 1.14.8 (Shij-Dò) released.-A diff --git a/eword-decode.el b/eword-decode.el index fe46018..ff38088 100644 --- a/eword-decode.el +++ b/eword-decode.el @@ -1,6 +1,7 @@ ;;; eword-decode.el --- RFC 2047 based encoded-word decoder for GNU Emacs -;; Copyright (C) 1995,96,97,98,99,2000,01,03,04 Free Software Foundation, Inc. +;; Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2003, 2004, +;; 2005 Free Software Foundation, Inc. ;; Author: ENAMI Tsugutomo ;; MORIOKA Tomohiko @@ -88,30 +89,28 @@ If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even if there are in decoded encoded-words (generated by bad manner MUA such as a version of Net$cape)." (setq string (std11-unfold-string string)) - (let ((dest "")(ew nil) - beg end) - (while (and (string-match eword-encoded-word-regexp string) - (setq beg (match-beginning 0) - end (match-end 0)) - ) - (if (> beg 0) - (if (not - (and (eq ew t) - (string-match "^[ \t]+$" (substring string 0 beg)) - )) - (setq dest (concat dest (substring string 0 beg))) - ) - ) - (setq dest - (concat dest - (eword-decode-encoded-word - (substring string beg end) must-unfold) - )) - (setq string (substring string end)) - (setq ew t) - ) - (concat dest string) - )) + (let ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)")) + (next 0) + match start words) + (while (setq match (string-match regexp string next)) + (setq start (match-beginning 1) + words nil) + (while match + (setq next (match-end 0)) + (push (list (match-string 2 string) ;; charset + (match-string 3 string) ;; language + (match-string 4 string) ;; encoding + (match-string 5 string) ;; encoded-text + (match-string 1 string)) ;; encoded-word + words) + (setq match (and (string-match regexp string next) + (= next (match-beginning 0))))) + (setq words (eword-decode-encoded-words (nreverse words) must-unfold) + string (concat (substring string 0 start) + words + (substring string next)) + next (+ start (length words))))) + string) (defun eword-decode-structured-field-body (string &optional start-column max-column @@ -223,24 +222,25 @@ such as a version of Net$cape)." (save-restriction (narrow-to-region start end) (if unfolding - (eword-decode-unfold) - ) + (eword-decode-unfold)) (goto-char (point-min)) - (while (re-search-forward (concat "\\(" eword-encoded-word-regexp "\\)" - "\\(\n?[ \t]\\)+" - "\\(" eword-encoded-word-regexp "\\)") - nil t) - (replace-match "\\1\\7") - (goto-char (point-min)) - ) - (while (re-search-forward eword-encoded-word-regexp nil t) - (insert (eword-decode-encoded-word - (prog1 - (buffer-substring (match-beginning 0) (match-end 0)) - (delete-region (match-beginning 0) (match-end 0)) - ) must-unfold)) - ) - ))) + (let ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)")) + match words) + (while (setq match (re-search-forward regexp nil t)) + (setq start (match-beginning 1) + words nil) + (while match + (goto-char (setq end (match-end 0))) + (push (list (match-string 2) ;; charset + (match-string 3) ;; language + (match-string 4) ;; encoding + (match-string 5) ;; encoded-text + (match-string 1)) ;; encoded-word + words) + (setq match (looking-at regexp))) + (delete-region start end) + (insert + (eword-decode-encoded-words (nreverse words) must-unfold))))))) (defun eword-decode-unfold () (goto-char (point-min)) @@ -511,86 +511,82 @@ If SEPARATOR is not nil, it is used as header separator." (make-obsolete 'eword-decode-header 'mime-decode-header-in-buffer) -;;; @ encoded-word decoder -;;; - -(defvar eword-decode-encoded-word-error-handler - 'eword-decode-encoded-word-default-error-handler) - -(defvar eword-warning-face nil - "Face used for invalid encoded-word.") - -(defun eword-decode-encoded-word-default-error-handler (word signal) - (and (add-text-properties 0 (length word) - (and eword-warning-face - (list 'face eword-warning-face)) - word) - word)) - -(defun eword-decode-encoded-word (word &optional must-unfold) - "Decode WORD as an encoded-word. - -If charset is unknown or unsupported, return WORD. -If encoding is unknown, or some error occurs while decoding, -`eword-decode-encoded-word-error-handler' is called with WORD and an -error condition. - -If MUST-UNFOLD is non-nil, unfold decoded WORD." - (or (and (string-match eword-encoded-word-regexp word) - (condition-case err - (eword-decode-encoded-text - ;; charset - (substring word (match-beginning 1)(match-end 1)) - ;; language - (when (match-beginning 2) - (intern - (downcase - (substring word (1+ (match-beginning 2))(match-end 2))))) - ;; encoding - (upcase - (substring word (match-beginning 3)(match-end 3))) - ;; encoded-text - (substring word (match-beginning 4)(match-end 4)) - must-unfold) - (error - (funcall eword-decode-encoded-word-error-handler word err)))) - word)) - - -;;; @ encoded-text decoder +;;; @ encoded-words decoder ;;; -(defun eword-decode-encoded-text (charset language encoding string - &optional must-unfold) - "Decode STRING as an encoded-text. - -If your emacs implementation can not decode CHARSET, it returns nil. +(defvar eword-decode-allow-incomplete-encoded-text t + "*Non-nil means allow incomplete encoded-text in successive encoded-words. +Dividing of encoded-text in the place other than character boundaries +violates RFC2047 section 5, while we have a capability to decode it. +If it is non-nil, the decoder will decode B- or Q-encoding in each +encoded-word, concatenate them, and decode it by charset. Otherwise, +the decoder will fully decode each encoded-word before concatenating +them.") -If LANGUAGE is non-nil, it is put to `mime-language' text-property. -If ENCODING is not \"B\" or \"Q\", it occurs error. -So you should write error-handling code if you don't want break by errors. +(defun eword-decode-encoded-words (words must-unfold) + "Decode successive encoded-words in WORDS and return a decoded string. +Each element of WORDS looks like (CHARSET LANGUAGE ENCODING ENCODED-TEXT +ENCODED-WORD). If MUST-UNFOLD is non-nil, it unfolds and eliminates line-breaks even -if there are in decoded encoded-text (generated by bad manner MUA such -as a version of Net$cape)." - (when (mime-charset-to-coding-system charset) - (let ((dest (encoded-text-decode-string string encoding))) - (when dest - (setq dest (decode-mime-charset-string dest charset)) - (when must-unfold - (setq dest - (mapconcat - (function - (lambda (chr) - (cond ((eq chr ?\n) "") - ((eq chr ?\r) "") - ((eq chr ?\t) " ") - (t (char-to-string chr))))) - (std11-unfold-string dest) ""))) - (when language - (put-text-property 0 (length dest) 'mime-language language dest)) - dest)))) - +if there are in decoded encoded-words (generated by bad manner MUA +such as a version of Net$cape)." + (let (word language charset encoding text rest) + (while words + (setq word (pop words) + language (nth 1 word)) + (if (and (or (mime-charset-to-coding-system (setq charset (car word))) + (progn + (message "Unknown charset: %s" charset) + nil)) + (cond ((member (setq encoding (nth 2 word)) '("B" "Q")) + t) + ((member encoding '("b" "q")) + (setq encoding (upcase encoding))) + (t + (message "Invalid encoding: %s" encoding) + nil)) + (condition-case err + (setq text + (encoded-text-decode-string (nth 3 word) encoding)) + (error + (message "%s" (error-message-string err)) + nil))) + (if (and eword-decode-allow-incomplete-encoded-text + rest + (caaar rest) + (string-equal (downcase charset) (downcase (caaar rest))) + (equal language (cdaar rest))) + ;; Concatenate text of which the charset is the same. + (setcdr (car rest) (concat (cdar rest) text)) + (push (cons (cons charset language) text) rest)) + ;; Don't decode encoded-word. + (push (cons (cons nil language) (nth 4 word)) rest))) + (while rest + (setq word (or (and (setq charset (caaar rest)) + (condition-case err + (decode-mime-charset-string (cdar rest) charset) + (error + (message "%s" (error-message-string err)) + nil))) + (concat (when (cdr rest) " ") + (cdar rest) + (when (and words + (not (eq (string-to-char words) ? ))) + " ")))) + (when must-unfold + (setq word (mapconcat (lambda (chr) + (cond ((eq chr ?\n) "") + ((eq chr ?\r) "") + ((eq chr ?\t) " ") + (t (char-to-string chr)))) + (std11-unfold-string word) + ""))) + (when (setq language (cdaar rest)) + (put-text-property 0 (length word) 'mime-language language word)) + (setq words (concat word words) + rest (cdr rest))) + words)) ;;; @ lexical analyze ;;; @@ -710,31 +706,24 @@ be the result.") (std11-analyze-special string start)) (defun eword-analyze-encoded-word (string start &optional must-unfold) - (if (and (string-match eword-encoded-word-regexp string start) - (= (match-beginning 0) start)) - (let ((end (match-end 0)) - (dest (eword-decode-encoded-word (match-string 0 string) - must-unfold)) - ) - ;;(setq string (substring string end)) - (setq start end) - (while (and (string-match (eval-when-compile - (concat "[ \t\n]*\\(" - eword-encoded-word-regexp - "\\)")) - string start) - (= (match-beginning 0) start)) - (setq end (match-end 0)) - (setq dest - (concat dest - (eword-decode-encoded-word (match-string 1 string) - must-unfold)) - ;;string (substring string end)) - start end) - ) - (cons (cons 'atom dest) ;;string) - end) - ))) + (let* ((regexp (concat "[\n\t ]*\\(" eword-encoded-word-regexp "\\)")) + (match (and (string-match regexp string start) + (= start (match-beginning 0)))) + next words) + (while match + (setq next (match-end 0)) + (push (list (match-string 2 string) ;; charset + (match-string 3 string) ;; language + (match-string 4 string) ;; encoding + (match-string 5 string) ;; encoded-text + (match-string 1 string)) ;; encoded-word + words) + (setq match (and (string-match regexp string next) + (= next (match-beginning 0))))) + (when words + (cons (cons 'atom (eword-decode-encoded-words (nreverse words) + must-unfold)) + next)))) (defun eword-analyze-atom (string start &optional must-unfold) (if (and (string-match std11-atom-regexp string start) -- 1.7.10.4