X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Frfc1843.el;h=05832cc03ea60f44db6c4edae1057b91986bdb8f;hb=97eab2c2c08b5eb94dc620856f9376ea266897d0;hp=e184e0b209dc94002b430c25e87bb981871fc4e9;hpb=626f073c51a02196a9e89dbc5e69de355debfaca;p=elisp%2Fgnus.git- diff --git a/lisp/rfc1843.el b/lisp/rfc1843.el index e184e0b..05832cc 100644 --- a/lisp/rfc1843.el +++ b/lisp/rfc1843.el @@ -1,13 +1,10 @@ ;;; rfc1843.el --- HZ (rfc1843) decoding -;; Copyright (c) 1998 by Shenghuo Zhu +;; Copyright (c) 1998, 1999, 2000 Free Software Foundation, Inc. ;; Author: Shenghuo Zhu -;; $Revision: 1.1.2.2 $ -;; Keywords: news HZ -;; Time-stamp: - -;; This file is not part of GNU Emacs, but the same permissions -;; apply. +;; Keywords: news HZ HZ+ mail i18n + +;; This file is a part of GNU Emacs. ;; GNU Emacs is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published @@ -35,25 +32,26 @@ ;;; Code: +(eval-when-compile (require 'cl)) (require 'mm-util) -(defvar rfc1843-word-regexp - "~\\({\\([\041-\167][\041-\176]\\| \\)+\\(~}\\|$\\)") +(defvar rfc1843-word-regexp + "~\\({\\([\041-\167][\041-\176]\\| \\)+\\)\\(~}\\|$\\)") (defvar rfc1843-word-regexp-strictly - "~\\({\\([\041-\167][\041-\176]\\)+\\(~}\\|$\\)") + "~\\({\\([\041-\167][\041-\176]\\)+\\)\\(~}\\|$\\)") -(defvar rfc1843-hzp-word-regexp +(defvar rfc1843-hzp-word-regexp "~\\({\\([\041-\167][\041-\176]\\| \\)+\\|\ [<>]\\([\041-\175][\041-\176]\\| \\)+\\)\\(~}\\|$\\)") (defvar rfc1843-hzp-word-regexp-strictly - "~\\({\\([\041-\167][\041-\176]\\)+\\|\ + "~\\({\\([\041-\167][\041-\176]\\)+\\|\ [<>]\\([\041-\175][\041-\176]\\)+\\)\\(~}\\|$\\)") (defcustom rfc1843-decode-loosely nil "Loosely check HZ encoding if non-nil. -When it is set non-nil, only buffers or strings with strictly +When it is set non-nil, only buffers or strings with strictly HZ-encoded are decoded." :type 'boolean :group 'gnus) @@ -62,7 +60,7 @@ HZ-encoded are decoded." "HZ+ decoding support if non-nil. HZ+ specification (also known as HZP) is to provide a standardized 7-bit representation of mixed Big5, GB, and ASCII text for convenient -e-mail transmission, news posting, etc. +e-mail transmission, news posting, etc. The document of HZ+ 0.78 specification can be found at ftp://ftp.math.psu.edu/pub/simpson/chinese/hzp/hzp.doc" :type 'boolean @@ -79,32 +77,35 @@ ftp://ftp.math.psu.edu/pub/simpson/chinese/hzp/hzp.doc" (let (str firstc) (save-excursion (goto-char from) - (if (or rfc1843-decode-loosely - (re-search-forward (if rfc1843-decode-hzp - rfc1843-hzp-word-regexp-strictly + (if (or rfc1843-decode-loosely + (re-search-forward (if rfc1843-decode-hzp + rfc1843-hzp-word-regexp-strictly rfc1843-word-regexp-strictly) to t)) - (save-restriction + (save-restriction (narrow-to-region from to) (goto-char (point-min)) (while (re-search-forward (if rfc1843-decode-hzp rfc1843-hzp-word-regexp - rfc1843-word-regexp) (point-max) t) - (setq str (match-string 1)) - (setq firstc (aref str 0)) - (insert (mm-decode-coding-string + rfc1843-word-regexp) (point-max) t) + ;;; Text with extents may cause XEmacs crash + (setq str (buffer-substring-no-properties + (match-beginning 1) + (match-end 1))) + (setq firstc (aref str 0)) + (insert (mm-decode-coding-string (rfc1843-decode - (prog1 + (prog1 (substring str 1) (delete-region (match-beginning 0) (match-end 0))) firstc) (if (eq firstc ?{) 'cn-gb-2312 'cn-big5)))) (goto-char (point-min)) - (while (search-forward "~" (point-max) t) - (cond ((eq (following-char) ?\n) + (while (search-forward "~" (point-max) t) + (cond ((eq (char-after) ?\n) (delete-char -1) (delete-char 1)) - ((eq (following-char) ?~) - (delete-char 1))))))))) + ((eq (char-after) ?~) + (delete-char 1))))))))) (defun rfc1843-decode-string (string) "Decode HZ STRING and return the results." @@ -118,7 +119,7 @@ ftp://ftp.math.psu.edu/pub/simpson/chinese/hzp/hzp.doc" (buffer-string)))) (defun rfc1843-decode (word &optional firstc) - "Decode HZ WORD and return it" + "Decode HZ WORD and return it." (let ((i -1) (s (substring word 0)) v) (if (or (not firstc) (eq firstc ?{)) (while (< (incf i) (length s)) @@ -133,38 +134,50 @@ ftp://ftp.math.psu.edu/pub/simpson/chinese/hzp/hzp.doc" s)) (defun rfc1843-decode-article-body () - "Decode HZ encoded text in the article body." - (if (string-match (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") - gnus-newsgroup-name) - (save-excursion - (save-restriction - (message-narrow-to-head) - (goto-char (point-max)) - (widen) - (rfc1843-decode-region (point) (point-max)))))) + "Decode HZ encoded text in the article body." + (if (string-match (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") + (or gnus-newsgroup-name "")) + (save-excursion + (save-restriction + (message-narrow-to-head) + (let* ((inhibit-point-motion-hooks t) + (case-fold-search t) + (ct (message-fetch-field "Content-Type" t)) + (ctl (and ct (ignore-errors + (mail-header-parse-content-type ct))))) + (if (and ctl (not (string-match "/" (car ctl)))) + (setq ctl nil)) + (goto-char (point-max)) + (widen) + (forward-line 1) + (narrow-to-region (point) (point-max)) + (when (or (not ctl) + (equal (car ctl) "text/plain")) + (rfc1843-decode-region (point) (point-max)))))))) (defvar rfc1843-old-gnus-decode-header-function nil) (defvar gnus-decode-header-methods) (defvar gnus-decode-encoded-word-methods) +(defvar gnus-decode-encoded-word-function) (defun rfc1843-gnus-setup () "Setup HZ decoding for Gnus." (require 'gnus-art) (require 'gnus-sum) (add-hook 'gnus-article-decode-hook 'rfc1843-decode-article-body t) - (setq gnus-decode-encoded-word-function + (setq gnus-decode-encoded-word-function 'gnus-multi-decode-encoded-word-string - gnus-decode-header-function + gnus-decode-header-function 'gnus-multi-decode-header - gnus-decode-encoded-word-methods - (nconc gnus-decode-encoded-word-methods - (list - (cons (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") + gnus-decode-encoded-word-methods + (nconc gnus-decode-encoded-word-methods + (list + (cons (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") 'rfc1843-decode-string))) - gnus-decode-header-methods - (nconc gnus-decode-header-methods - (list - (cons (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") + gnus-decode-header-methods + (nconc gnus-decode-header-methods + (list + (cons (concat "\\<\\(" rfc1843-newsgroups-regexp "\\)\\>") 'rfc1843-decode-region))))) (provide 'rfc1843)