X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Fnnrss.el;h=211ca3ec07a308f3ff57c171736e7d25feec4848;hb=1c657e802036c760bb3e59477f1410f6ae043309;hp=de4a5a96c718b6fa5b930aee5f16cf70456af0c6;hpb=818235ac7f33073995ec59721791f461dce5ea34;p=elisp%2Fgnus.git- diff --git a/lisp/nnrss.el b/lisp/nnrss.el index de4a5a9..211ca3e 100644 --- a/lisp/nnrss.el +++ b/lisp/nnrss.el @@ -1,5 +1,5 @@ ;;; nnrss.el --- interfacing with RSS -;; Copyright (C) 2001, 2002, 2003, 2004 Free Software Foundation, Inc. +;; Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. ;; Author: Shenghuo Zhu ;; Keywords: RSS @@ -31,16 +31,24 @@ (require 'nnoo) (require 'nnmail) (require 'message) -(require 'mm-util) +(require 'nnheader) (require 'gnus-util) (require 'time-date) -(require 'rfc2231) +(require 'mime-parse) (require 'mm-url) +(require 'eword-encode) +(require 'mime-edit) +(require 'mime-view) (eval-when-compile (ignore-errors (require 'xml))) (eval '(require 'xml)) +;; Reload mm-util emulating macros for compiling. +(eval-when-compile + (let ((features (delq 'mm-util (copy-sequence features)))) + (load "nnheader" nil t))) + (nnoo-declare nnrss) (defvoo nnrss-directory (nnheader-concat gnus-directory "rss/") @@ -78,11 +86,23 @@ The arguments are (ENTRY GROUP ARTICLE). ENTRY is the record of the current headline. GROUP is the group name. ARTICLE is the article number of the current headline.") +(defvar nnrss-file-coding-system nnheader-auto-save-coding-system + "Coding system used when reading and writing files.") + (nnoo-define-basics nnrss) ;;; Interface functions +(defsubst nnrss-format-string (string) + (gnus-replace-in-string string " *\n *" " ")) + +(defun nnrss-decode-group-name (group) + (if (and group (mm-coding-system-p 'utf-8)) + (setq group (mm-decode-coding-string group 'utf-8)) + group)) + (deffoo nnrss-retrieve-headers (articles &optional group server fetch-old) + (setq group (nnrss-decode-group-name group)) (nnrss-possibly-change-group group server) (let (e) (save-excursion @@ -91,21 +111,26 @@ ARTICLE is the article number of the current headline.") (dolist (article articles) (if (setq e (assq article nnrss-group-data)) (insert (number-to-string (car e)) "\t" ;; number - (if (nth 3 e) - (nnrss-format-string (nth 3 e)) "") - "\t" ;; subject - (if (nth 4 e) - (nnrss-format-string (nth 4 e)) - "(nobody)") - "\t" ;;from + ;; subject + (or (nth 3 e) "") + "\t" + ;; from + (or (nth 4 e) "(nobody)") + "\t" + ;; date (or (nth 5 e) "") - "\t" ;; date + "\t" + ;; id (format "<%d@%s.nnrss>" (car e) group) - "\t" ;; id - "\t" ;; refs - "-1" "\t" ;; chars - "-1" "\t" ;; lines - "" "\t" ;; Xref + "\t" + ;; refs + "\t" + ;; chars + "-1" "\t" + ;; lines + "-1" "\t" + ;; Xref + "" "\t" (if (and (nth 6 e) (memq nnrss-description-field nnmail-extra-headers)) @@ -126,6 +151,7 @@ ARTICLE is the article number of the current headline.") 'nov) (deffoo nnrss-request-group (group &optional server dont-check) + (setq group (nnrss-decode-group-name group)) (nnrss-possibly-change-group group server) (if dont-check t @@ -139,54 +165,81 @@ ARTICLE is the article number of the current headline.") (deffoo nnrss-close-group (group &optional server) t) +(defun nnrss-body-presentation-method () + "Return a body presentation method used with MIME-View. +The return value will be `html' or `text'." + (in-calist-package 'mime-view) + (let ((default (cdr (assq 'body-presentation-method + (car (ctree-find-calist + mime-preview-condition + '((type . text) (subtype . t))))))) + (html (cdr (assq 'body-presentation-method + (car (ctree-find-calist + mime-preview-condition + '((type . text) (subtype . html)))))))) + (if (or (not default) + (not html) + (eq default html)) + 'text + 'html))) + (deffoo nnrss-request-article (article &optional group server buffer) + (setq group (nnrss-decode-group-name group)) + (when (stringp article) + (setq article (if (string-match "\\`<\\([0-9]+\\)@" article) + (string-to-number (match-string 1 article)) + 0))) (nnrss-possibly-change-group group server) (let ((e (assq article nnrss-group-data)) - (boundary "=-=-=-=-=-=-=-=-=-") (nntp-server-buffer (or buffer nntp-server-buffer)) post err) (when e (catch 'error (with-current-buffer nntp-server-buffer (erase-buffer) - (goto-char (point-min)) - (insert "Mime-Version: 1.0\nContent-Type: multipart/alternative; boundary=\"" boundary "\"\n") (if group - (insert "Newsgroups: " group "\n")) + (mm-with-unibyte-current-buffer + (insert "Newsgroups: " + (if (mm-coding-system-p 'utf-8) + (mm-encode-coding-string group 'utf-8) + group) + "\n"))) (if (nth 3 e) - (insert "Subject: " (nnrss-format-string (nth 3 e)) "\n")) + (insert "Subject: " (nth 3 e) "\n")) (if (nth 4 e) - (insert "From: " (nnrss-format-string (nth 4 e)) "\n")) + (insert "From: " (nth 4 e) "\n")) (if (nth 5 e) (insert "Date: " (nnrss-format-string (nth 5 e)) "\n")) - (insert "Message-ID: " (format "<%d@%s.nnrss>" (car e) group) "\n") + (insert (format "Message-ID: <%d@%s.nnrss>\n" + (car e) + (gnus-replace-in-string group "[\t\n ]+" "_"))) (insert "\n") (let ((text (if (nth 6 e) - (nnrss-string-as-multibyte (nth 6 e)))) - (link (if (nth 2 e) - (nth 2 e)))) - (insert "\n\n--" boundary "\nContent-Type: text/plain\n\n") - (let ((point (point))) - (when text - (insert text) - (goto-char point) - (while (re-search-forward "\n" nil t) - (replace-match " ")) - (goto-char (point-max)) - (insert "\n\n")) - (when link - (insert link))) - (insert "\n\n--" boundary "\nContent-Type: text/html\n\n") - (let ((point (point))) - (when text - (insert "\n" text "\n") - (goto-char point) - (while (re-search-forward "\n" nil t) - (replace-match " ")) - (goto-char (point-max)) - (insert "\n\n")) - (when link - (insert "

link

\n")))) + (mapconcat 'identity + (delete "" (split-string (nth 6 e) "\n+")) + " "))) + (link (nth 2 e)) + (mail-header-separator "") + mime-edit-insert-user-agent-field) + (when (or text link) + (if (eq 'html (nnrss-body-presentation-method)) + (progn + (mime-edit-insert-text "html") + (insert "\n") + (when text + (insert text "\n")) + (when link + (insert "

link

\n")) + (insert "\n")) + (mime-edit-insert-text "plain") + (if text + (progn + (insert text "\n") + (when link + (insert "\n" link "\n"))) + (when link + (insert link "\n")))) + (mime-edit-translate-body))) (when nnrss-content-function (funcall nnrss-content-function e group article))))) (cond @@ -211,6 +264,7 @@ ARTICLE is the article number of the current headline.") (deffoo nnrss-request-expire-articles (articles group &optional server force) + (setq group (nnrss-decode-group-name group)) (nnrss-possibly-change-group group server) (let (e days not-expirable changed) (dolist (art articles) @@ -228,7 +282,13 @@ ARTICLE is the article number of the current headline.") not-expirable)) (deffoo nnrss-request-delete-group (group &optional force server) + (setq group (nnrss-decode-group-name group)) (nnrss-possibly-change-group group server) + (let (elem) + ;; There may be two or more entries in `nnrss-group-alist' since + ;; this function didn't delete them formerly. + (while (setq elem (assoc group nnrss-group-alist)) + (setq nnrss-group-alist (delq elem nnrss-group-alist)))) (setq nnrss-server-data (delq (assoc group nnrss-server-data) nnrss-server-data)) (nnrss-save-server-data server) @@ -250,34 +310,62 @@ ARTICLE is the article number of the current headline.") ;;; Internal functions (eval-when-compile (defun xml-rpc-method-call (&rest args))) + +(defun nnrss-get-encoding () + "Return an encoding attribute specified in the current xml contents." + (goto-char (point-min)) + (mm-coding-system-p + (if (re-search-forward + "<\\?[^>]*encoding=\\(?:\"\\([^\">]+\\)\"\\|'\\([^'>]+\\)'\\)" + nil t) + (intern-soft (downcase (or (match-string-no-properties 1) + (match-string-no-properties 2)))) + ;; The default encoding for xml. + 'utf-8))) + (defun nnrss-fetch (url &optional local) "Fetch URL and put it in a the expected Lisp structure." - (with-temp-buffer - ;some CVS versions of url.el need this to close the connection quickly - (let* (xmlform htmlform) + (mm-with-unibyte-buffer + ;;some CVS versions of url.el need this to close the connection quickly + (let (cs xmlform htmlform) ;; bit o' work necessary for w3 pre-cvs and post-cvs (if local (let ((coding-system-for-read 'binary)) (insert-file-contents url)) - (mm-url-insert url)) - -;; Because xml-parse-region can't deal with anything that isn't -;; xml and w3-parse-buffer can't deal with some xml, we have to -;; parse with xml-parse-region first and, if that fails, parse -;; with w3-parse-buffer. Yuck. Eventually, someone should find out -;; why w3-parse-buffer fails to parse some well-formed xml and -;; fix it. - - (condition-case err - (setq xmlform (xml-parse-region (point-min) (point-max))) - (error (if (fboundp 'w3-parse-buffer) - (setq htmlform (caddar (w3-parse-buffer - (current-buffer)))) - (message "nnrss: Not valid XML and w3 parse not available (%s)" - url)))) - (if htmlform - htmlform - xmlform)))) + (let (;; FIXME: shouldn't binding `coding-system-for-read' be + ;; moved to `mm-url-insert'? + (coding-system-for-read 'binary) + ;; mm-url will load mm-util. d-e-m-c should be bound to + ;; t then, because of `mm-emacs-mule'. + (default-enable-multibyte-characters t)) + (mm-url-insert url))) + (nnheader-remove-cr-followed-by-lf) + ;; Decode text according to the encoding attribute. + (when (setq cs (nnrss-get-encoding)) + (mm-decode-coding-region (point-min) (point-max) cs) + (mm-enable-multibyte)) + (goto-char (point-min)) + + ;; Because xml-parse-region can't deal with anything that isn't + ;; xml and w3-parse-buffer can't deal with some xml, we have to + ;; parse with xml-parse-region first and, if that fails, parse + ;; with w3-parse-buffer. Yuck. Eventually, someone should find out + ;; why w3-parse-buffer fails to parse some well-formed xml and + ;; fix it. + + (condition-case err1 + (setq xmlform (xml-parse-region (point-min) (point-max))) + (error + (condition-case err2 + (setq htmlform (caddar (w3-parse-buffer + (current-buffer)))) + (error + (message "\ +nnrss: %s: Not valid XML %s and w3-parse doesn't work %s" + url err1 err2))))) + (if htmlform + htmlform + xmlform)))) (defun nnrss-possibly-change-group (&optional group server) (when (and server @@ -290,9 +378,8 @@ ARTICLE is the article number of the current headline.") (defvar nnrss-extra-categories '(nnrss-snarf-moreover-categories)) (defun nnrss-generate-active () - (if (y-or-n-p "Fetch extra categories? ") - (dolist (func nnrss-extra-categories) - (funcall func))) + (when (y-or-n-p "Fetch extra categories? ") + (mapc 'funcall nnrss-extra-categories)) (save-excursion (set-buffer nntp-server-buffer) (erase-buffer) @@ -308,14 +395,23 @@ ARTICLE is the article number of the current headline.") (setq nnrss-server-data nil) (let ((file (nnrss-make-filename "nnrss" server))) (when (file-exists-p file) - (let ((coding-system-for-read 'binary)) - (load file nil nil t))))) + ;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII + ;; file names. So, we use `insert-file-contents' instead. + (mm-with-multibyte-buffer + (let ((coding-system-for-read nnrss-file-coding-system) + (file-name-coding-system nnmail-pathname-coding-system)) + (insert-file-contents file) + (eval-region (point-min) (point-max))))))) (defun nnrss-save-server-data (server) (gnus-make-directory nnrss-directory) - (let ((coding-system-for-write 'binary)) + (let ((coding-system-for-write nnrss-file-coding-system) + (file-name-coding-system nnmail-pathname-coding-system)) (with-temp-file (nnrss-make-filename "nnrss" server) + (insert (format ";; -*- coding: %s; -*-\n" + nnrss-file-coding-system)) (gnus-prin1 `(setq nnrss-group-alist ',nnrss-group-alist)) + (insert "\n") (gnus-prin1 `(setq nnrss-server-data ',nnrss-server-data))))) (defun nnrss-read-group-data (group server) @@ -328,10 +424,15 @@ ARTICLE is the article number of the current headline.") (setq nnrss-group-min (+ nnrss-group-max 1))) (let ((file (nnrss-make-filename group server))) (when (file-exists-p file) - (let ((coding-system-for-read 'binary)) - (load file nil t t)) + ;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII + ;; file names. So, we use `insert-file-contents' instead. + (mm-with-multibyte-buffer + (let ((coding-system-for-read nnrss-file-coding-system) + (file-name-coding-system nnmail-pathname-coding-system)) + (insert-file-contents file) + (eval-region (point-min) (point-max)))) (dolist (e nnrss-group-data) - (puthash (nth 2 e) e nnrss-group-hashtb) + (puthash (or (nth 2 e) (nth 6 e)) t nnrss-group-hashtb) (when (and (car e) (> nnrss-group-min (car e))) (setq nnrss-group-min (car e))) (when (and (car e) (< nnrss-group-max (car e))) @@ -339,9 +440,12 @@ ARTICLE is the article number of the current headline.") (defun nnrss-save-group-data (group server) (gnus-make-directory nnrss-directory) - (let ((coding-system-for-write 'binary)) + (let ((coding-system-for-write nnrss-file-coding-system) + (file-name-coding-system nnmail-pathname-coding-system)) (with-temp-file (nnrss-make-filename group server) - (gnus-prin1 `(setq nnrss-group-data ',nnrss-group-data ))))) + (insert (format ";; -*- coding: %s; -*-\n" + nnrss-file-coding-system)) + (gnus-prin1 `(setq nnrss-group-data ',nnrss-group-data))))) (defun nnrss-make-filename (name server) (expand-file-name @@ -372,18 +476,29 @@ ARTICLE is the article number of the current headline.") (mm-with-unibyte-current-buffer (mm-url-insert url))) -(defun nnrss-decode-entities-unibyte-string (string) +(defun nnrss-decode-entities-string (string) (if string - (mm-with-unibyte-buffer + (mm-with-multibyte-buffer (insert string) (mm-url-decode-entities-nbsp) (buffer-string)))) (defalias 'nnrss-insert 'nnrss-insert-w3) -(if (featurep 'xemacs) - (defalias 'nnrss-string-as-multibyte 'identity) - (defalias 'nnrss-string-as-multibyte 'string-as-multibyte)) +(defun nnrss-mime-encode-string (string) + (mm-with-multibyte-buffer + (insert string) + (mm-url-decode-entities-nbsp) + (goto-char (point-min)) + (while (re-search-forward "[\t\n ]+" nil t) + (replace-match " ")) + (goto-char (point-min)) + (skip-chars-forward " ") + (delete-region (point-min) (point)) + (goto-char (point-max)) + (skip-chars-forward " ") + (delete-region (point) (point-max)) + (eword-encode-string (buffer-string) (eval '(- -1 (lsh -1 -1)))))) ;;; Snarf functions @@ -420,12 +535,16 @@ ARTICLE is the article number of the current headline.") content-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/modules/content/")) (dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) xml))) (when (and (listp item) - (eq (intern (concat rss-ns "item")) (car item)) - (setq url (nnrss-decode-entities-unibyte-string - (nnrss-node-text rss-ns 'link (cddr item)))) - (not (gethash url nnrss-group-hashtb))) + (string= (concat rss-ns "item") (car item)) + (if (setq url (nnrss-decode-entities-string + (nnrss-node-text rss-ns 'link (cddr item)))) + (not (gethash url nnrss-group-hashtb)) + (setq extra (or (nnrss-node-text content-ns 'encoded item) + (nnrss-node-text rss-ns 'description item))) + (not (gethash extra nnrss-group-hashtb)))) (setq subject (nnrss-node-text rss-ns 'title item)) - (setq extra (or (nnrss-node-text content-ns 'encoded item) + (setq extra (or extra + (nnrss-node-text content-ns 'encoded item) (nnrss-node-text rss-ns 'description item))) (setq author (or (nnrss-node-text rss-ns 'author item) (nnrss-node-text dc-ns 'creator item) @@ -438,13 +557,14 @@ ARTICLE is the article number of the current headline.") (incf nnrss-group-max) (current-time) url - (and subject (nnrss-decode-entities-unibyte-string subject)) - (and author (nnrss-decode-entities-unibyte-string author)) + (and subject (nnrss-mime-encode-string subject)) + (and author (nnrss-mime-encode-string author)) date - (and extra (nnrss-decode-entities-unibyte-string extra))) + (and extra (nnrss-decode-entities-string extra))) nnrss-group-data) - (puthash url (car nnrss-group-data) nnrss-group-hashtb) - (setq changed t))) + (puthash (or url extra) t nnrss-group-hashtb) + (setq changed t)) + (setq extra nil)) (when changed (nnrss-save-group-data group server) (let ((pair (assoc group nnrss-server-data))) @@ -484,11 +604,11 @@ Export subscriptions to a buffer in OPML Format." " " (user-full-name) "\n" " \n" " \n")) - (mapcar (lambda (sub) - (insert (concat - " \n"))) - nnrss-group-alist) + (mapc (lambda (sub) + (insert (concat + " \n"))) + nnrss-group-alist) (insert (concat " \n" "\n"))) @@ -532,8 +652,8 @@ It is useful when `(setq nnrss-use-local t)'." (setq category (match-string 1)) (setq url (match-string 2) name (mm-url-decode-entities-string - (rfc2231-decode-encoded-string - (match-string 3)))) + (cadr (mime-decode-parameters + (list "c*" (match-string 3)))))) (if category (setq name (concat category "." name))) (unless (assoc name nnrss-server-data) @@ -542,9 +662,6 @@ It is useful when `(setq nnrss-use-local t)'." (if changed (nnrss-save-server-data "")))) -(defun nnrss-format-string (string) - (gnus-replace-in-string (nnrss-string-as-multibyte string) " *\n *" " ")) - (defun nnrss-node-text (namespace local-name element) (let* ((node (assq (intern (concat namespace (symbol-name local-name))) element)) @@ -565,24 +682,27 @@ It is useful when `(setq nnrss-use-local t)'." (defun nnrss-find-el (tag data &optional found-list) "Find the all matching elements in the data. Careful with this on large documents!" - (if (listp data) - (mapcar (lambda (bit) - (when (car-safe bit) - (when (equal tag (car bit)) - (setq found-list - (append found-list - (list bit)))) - (if (and (listp (car-safe (caddr bit))) - (not (stringp (caddr bit)))) - (setq found-list - (append found-list - (nnrss-find-el - tag (caddr bit)))) - (setq found-list - (append found-list - (nnrss-find-el - tag (cddr bit))))))) - data)) + (when (listp data) + (mapc (lambda (bit) + (when (car-safe bit) + (when (equal tag (car bit)) + ;; Old xml.el may return a list of string. + (when (consp (caddr bit)) + (setcar (cddr bit) (caaddr bit))) + (setq found-list + (append found-list + (list bit)))) + (if (and (listp (car-safe (caddr bit))) + (not (stringp (caddr bit)))) + (setq found-list + (append found-list + (nnrss-find-el + tag (caddr bit)))) + (setq found-list + (append found-list + (nnrss-find-el + tag (cddr bit))))))) + data)) found-list) (defun nnrss-rsslink-p (el) @@ -629,27 +749,27 @@ whether they are `offsite' or `onsite'." rss-onsite-in rdf-onsite-in xml-onsite-in rss-offsite-end rdf-offsite-end xml-offsite-end rss-offsite-in rdf-offsite-in xml-offsite-in) - (mapcar (lambda (href) - (if (not (null href)) - (cond ((string-match "\\.rss$" href) - (nnrss-match-macro - base-uri href rss-onsite-end rss-offsite-end)) - ((string-match "\\.rdf$" href) - (nnrss-match-macro - base-uri href rdf-onsite-end rdf-offsite-end)) - ((string-match "\\.xml$" href) - (nnrss-match-macro - base-uri href xml-onsite-end xml-offsite-end)) - ((string-match "rss" href) - (nnrss-match-macro - base-uri href rss-onsite-in rss-offsite-in)) - ((string-match "rdf" href) - (nnrss-match-macro - base-uri href rdf-onsite-in rdf-offsite-in)) - ((string-match "xml" href) - (nnrss-match-macro - base-uri href xml-onsite-in xml-offsite-in))))) - hrefs) + (mapc (lambda (href) + (if (not (null href)) + (cond ((string-match "\\.rss$" href) + (nnrss-match-macro + base-uri href rss-onsite-end rss-offsite-end)) + ((string-match "\\.rdf$" href) + (nnrss-match-macro + base-uri href rdf-onsite-end rdf-offsite-end)) + ((string-match "\\.xml$" href) + (nnrss-match-macro + base-uri href xml-onsite-end xml-offsite-end)) + ((string-match "rss" href) + (nnrss-match-macro + base-uri href rss-onsite-in rss-offsite-in)) + ((string-match "rdf" href) + (nnrss-match-macro + base-uri href rdf-onsite-in rdf-offsite-in)) + ((string-match "xml" href) + (nnrss-match-macro + base-uri href xml-onsite-in xml-offsite-in))))) + hrefs) (append rss-onsite-end rdf-onsite-end xml-onsite-end rss-onsite-in rdf-onsite-in xml-onsite-in @@ -774,7 +894,7 @@ prefix), return the prefix." "") ((eq (length nslist) 2) ; extract prefix (cadr nslist))))) - (if (and ns (not (eq ns ""))) + (if (and ns (not (string= ns ""))) (concat ns ":") ns))) @@ -782,4 +902,3 @@ prefix), return the prefix." ;;; nnrss.el ends here -