X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Fnnweb.el;h=7aaa5f9cb93375438e8306ccef9fec55c264a9c2;hb=0563df167689ba46e219f7915c6f5b321da614ce;hp=9d855f963743372a544512381e94e3497c379fc6;hpb=73edf76920c3d86afa1628ca8f1509394cb7b26c;p=elisp%2Fgnus.git- diff --git a/lisp/nnweb.el b/lisp/nnweb.el index 9d855f9..7aaa5f9 100644 --- a/lisp/nnweb.el +++ b/lisp/nnweb.el @@ -1,5 +1,6 @@ ;;; nnweb.el --- retrieving articles via web search engines -;; Copyright (C) 1996,97,98,99 Free Software Foundation, Inc. +;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001 +;; Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; Keywords: news @@ -29,23 +30,25 @@ ;;; Code: (eval-when-compile (require 'cl)) +(eval-when-compile (require 'gnus-clfns)) (require 'nnoo) (require 'message) (require 'gnus-util) (require 'gnus) (require 'nnmail) -(require 'mm-util) (eval-when-compile (ignore-errors (require 'w3) (require 'url) (require 'w3-forms))) + ;; Report failure to find w3 at load time if appropriate. -(eval '(progn - (require 'w3) - (require 'url) - (require 'w3-forms))) +(unless noninteractive + (eval '(progn + (require 'w3) + (require 'url) + (require 'w3-forms)))) (nnoo-declare nnweb) @@ -58,19 +61,28 @@ Valid types include `dejanews', `dejanewsold', `reference', and `altavista'.") (defvar nnweb-type-definition - '((dejanews - (article . ignore) - (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanews-search) - (address . "http://www.deja.com/=dnc/qs.xp") - (identifier . nnweb-dejanews-identity)) - (dejanewsold - (article . ignore) - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanewsold-search) - (address . "http://www.deja.com/dnquery.xp") - (identifier . nnweb-dejanews-identity)) + '( + (dejanews ;; bought by google.com + (article . nnweb-google-wash-article) + (id . "http://groups.google.com/groups?as_umsgid=%s") + (reference . nnweb-google-reference) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (identifier . nnweb-google-identity)) +;;; (dejanews +;;; (article . ignore) +;;; (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") +;;; (map . nnweb-dejanews-create-mapping) +;;; (search . nnweb-dejanews-search) +;;; (address . "http://www.deja.com/=dnc/qs.xp") +;;; (identifier . nnweb-dejanews-identity)) +;;; (dejanewsold +;;; (article . ignore) +;;; (map . nnweb-dejanews-create-mapping) +;;; (search . nnweb-dejanewsold-search) +;;; (address . "http://www.deja.com/dnquery.xp") +;;; (identifier . nnweb-dejanews-identity)) (reference (article . nnweb-reference-wash-article) (map . nnweb-reference-create-mapping) @@ -113,14 +125,14 @@ and `altavista'.") (set-buffer nntp-server-buffer) (erase-buffer) (let (article header) - (while (setq article (pop articles)) - (when (setq header (cadr (assq article nnweb-articles))) - (nnheader-insert-nov header))) + (mm-with-unibyte-current-buffer + (while (setq article (pop articles)) + (when (setq header (cadr (assq article nnweb-articles))) + (nnheader-insert-nov header)))) 'nov))) (deffoo nnweb-request-scan (&optional group server) (nnweb-possibly-change-server group server) - (setq nnweb-hashtb (gnus-make-hashtable 4095)) (funcall (nnweb-definition 'map)) (unless nnweb-ephemeral-p (nnweb-write-active) @@ -131,15 +143,14 @@ and `altavista'.") (when (and group (not (equal group nnweb-group)) (not nnweb-ephemeral-p)) + (setq nnweb-group group + nnweb-articles nil) (let ((info (assoc group nnweb-group-alist))) (when info - (setq nnweb-group group) (setq nnweb-type (nth 2 info)) (setq nnweb-search (nth 3 info)) (unless dont-check (nnweb-read-overview group))))) - (unless dont-check - (nnweb-request-scan group)) (cond ((not nnweb-articles) (nnheader-report 'nnweb "No matching articles")) @@ -169,22 +180,27 @@ and `altavista'.") (let* ((header (cadr (assq article nnweb-articles))) (url (and header (mail-header-xref header)))) (when (or (and url - (nnweb-fetch-url url)) + (mm-with-unibyte-current-buffer + (nnweb-fetch-url url))) (and (stringp article) (nnweb-definition 'id t) (let ((fetch (nnweb-definition 'id)) - art) + art active) (when (string-match "^<\\(.*\\)>$" article) (setq art (match-string 1 article))) - (and fetch - art - (nnweb-fetch-url - (format fetch article)))))) + (when (and fetch art) + (setq url (format fetch article)) + (mm-with-unibyte-current-buffer + (nnweb-fetch-url url)) + (if (nnweb-definition 'reference t) + (setq article + (funcall (nnweb-definition + 'reference) article))))))) (unless nnheader-callback-function (funcall (nnweb-definition 'article)) (nnweb-decode-entities)) (nnheader-report 'nnweb "Fetched article %s" article) - t)))) + (cons group (and (numberp article) article)))))) (deffoo nnweb-close-server (&optional server) (when (and (nnweb-server-opened server) @@ -203,9 +219,7 @@ and `altavista'.") t)) (deffoo nnweb-request-update-info (group info &optional server) - (nnweb-possibly-change-server group server) - ;;(setcar (cddr info) nil) - ) + (nnweb-possibly-change-server group server)) (deffoo nnweb-asynchronous-p () t) @@ -231,7 +245,7 @@ and `altavista'.") (defun nnweb-read-overview (group) "Read the overview of GROUP and build the map." (when (file-exists-p (nnweb-overview-file group)) - (with-temp-buffer + (mm-with-unibyte-buffer (nnheader-insert-file-contents (nnweb-overview-file group)) (goto-char (point-min)) (let (header) @@ -292,6 +306,7 @@ and `altavista'.") (when group (when (and (not nnweb-ephemeral-p) (not (equal group nnweb-group))) + (setq nnweb-hashtb (gnus-make-hashtable 4095)) (nnweb-request-group group nil t)))) (defun nnweb-init (server) @@ -299,22 +314,32 @@ and `altavista'.") (unless (gnus-buffer-live-p nnweb-buffer) (setq nnweb-buffer (save-excursion - (nnheader-set-temp-buffer - (format " *nnweb %s %s %s*" nnweb-type nnweb-search server)))))) + (mm-with-unibyte + (nnheader-set-temp-buffer + (format " *nnweb %s %s %s*" + nnweb-type nnweb-search server)) + (current-buffer)))))) (defun nnweb-fetch-url (url) - (save-excursion - (if (not nnheader-callback-function) - (let ((buf (current-buffer))) - (save-excursion - (set-buffer nnweb-buffer) + (let (buf) + (save-excursion + (if (not nnheader-callback-function) + (progn + (with-temp-buffer + (mm-enable-multibyte) + (let ((coding-system-for-read 'binary) + (coding-system-for-write 'binary) + (input-coding-system 'binary) + (output-coding-system 'binary) + (default-process-coding-system 'binary)) + (nnweb-insert url)) + (setq buf (buffer-string))) (erase-buffer) - (url-insert-file-contents url) - (copy-to-buffer buf (point-min) (point-max)) - t)) - (nnweb-url-retrieve-asynch - url 'nnweb-callback (current-buffer) nnheader-callback-function) - t))) + (insert buf) + t) + (nnweb-url-retrieve-asynch + url 'nnweb-callback (current-buffer) nnheader-callback-function) + t)))) (defun nnweb-callback (buffer callback) (when (gnus-buffer-live-p url-working-buffer) @@ -340,9 +365,13 @@ and `altavista'.") (setq url-current-callback-data data url-be-asynchronous t url-current-callback-func callback) - (url-retrieve url)) + (url-retrieve url nil)) (setq-default url-be-asynchronous old-asynch))) +(if (fboundp 'url-retrieve-synchronously) + (defun nnweb-url-retrieve-asynch (url callback &rest data) + (url-retrieve url callback data))) + ;;; ;;; DejaNews functions. ;;; @@ -368,20 +397,24 @@ and `altavista'.") (dolist (row (nth 2 (car (nth 2 table)))) (setq a (nnweb-parse-find 'a row) url (cdr (assq 'href (nth 1 a))) - text (nnweb-text row)) + text (nreverse (nnweb-text row))) (when a - (setq subject (nth 2 text) - group (nth 4 text) - date (nth 5 text) - from (nth 6 text)) - (string-match "\\([0-9]+\\)/\\([0-9]+\\)/\\([0-9]+\\)" date) - (setq date (format "%s %s %s" - (car (rassq (string-to-number - (match-string 2 date)) - parse-time-months)) - (match-string 3 date) (match-string 1 date))) + (setq subject (nth 4 text) + group (nth 2 text) + date (nth 1 text) + from (nth 0 text)) + (if (string-match "\\([0-9]+\\)/\\([0-9]+\\)/\\([0-9]+\\)" date) + (setq date (format "%s %s 00:00:00 %s" + (car (rassq (string-to-number + (match-string 2 date)) + parse-time-months)) + (match-string 3 date) + (match-string 1 date))) + (setq date "Jan 1 00:00:00 0000")) (incf i) (setq url (concat url "&fmt=text")) + (when (string-match "&context=[^&]+" url) + (setq url (replace-match "" t t url))) (unless (nnweb-get-hashtb url) (push (list @@ -469,7 +502,6 @@ and `altavista'.") (goto-char (point-min)) (search-forward "
" nil t) (delete-region (point-min) (point)) - ;(nnweb-decode-entities) (goto-char (point-min)) (while (re-search-forward "^ +[0-9]+\\." nil t) (narrow-to-region @@ -666,13 +698,145 @@ and `altavista'.") t) ;;; +;;; Deja bought by google.com +;;; + +(defun nnweb-google-wash-article () + (let ((case-fold-search t) url) + (goto-char (point-min)) + (re-search-forward "^
" nil t)
+    (narrow-to-region (point-min) (point))
+    (search-backward "" nil t 2)
+    (delete-region (point-min) (point))
+    (if (search-forward "[view thread]" nil t)
+	(replace-match ""))
+    (goto-char (point-min))
+    (while (search-forward "
" nil t) + (replace-match "\n")) + (nnweb-remove-markup) + (goto-char (point-min)) + (while (re-search-forward "^[ \t]*\n" nil t) + (replace-match "")) + (goto-char (point-max)) + (insert "\n") + (widen) + (narrow-to-region (point) (point-max)) + (search-forward "
" nil t) + (delete-region (point) (point-max)) + (nnweb-remove-markup) + (widen))) + +(defun nnweb-google-parse-1 (&optional Message-ID) + (let ((i 0) + (case-fold-search t) + (active (cadr (assoc nnweb-group nnweb-group-alist))) + Subject Score Date Newsgroups From + map url) + (unless active + (push (list nnweb-group (setq active (cons 1 0)) + nnweb-type nnweb-search) + nnweb-group-alist)) + ;; Go through all the article hits on this page. + (goto-char (point-min)) + (while (re-search-forward + "a href=/groups\\(\\?[^ \">]*seld=[0-9]+[^ \">]*\\)" nil t) + (setq url + (concat (nnweb-definition 'address) + (match-string 1))) + (narrow-to-region (search-forward ">" nil t) + (search-forward "" nil t)) + (nnweb-remove-markup) + (nnweb-decode-entities) + (setq Subject (buffer-string)) + (goto-char (point-max)) + (widen) + (forward-line 2) + (when (looking-at "
]+>") + (goto-char (match-end 0))) + (if (not (looking-at "]+>")) + (skip-chars-forward " \t") + (narrow-to-region (point) + (search-forward "" nil t)) + (nnweb-remove-markup) + (nnweb-decode-entities) + (setq Newsgroups (buffer-string)) + (goto-char (point-max)) + (widen) + (skip-chars-forward "- \t")) + (when (looking-at + "\\([0-9]+/[A-Za-z]+/[0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - ]+>" nil t) (replace-match "" t t))) -(defun nnweb-insert (url) - "Insert the contents from an URL in the current buffer." +(defun nnweb-insert (url &optional follow-refresh) + "Insert the contents from an URL in the current buffer. +If FOLLOW-REFRESH is non-nil, redirect refresh url in META." (let ((name buffer-file-name)) - (url-insert-file-contents url) + (if follow-refresh + (save-restriction + (narrow-to-region (point) (point)) + (url-insert-file-contents url) + (goto-char (point-min)) + (when (re-search-forward + "]*URL=\\([^\"]+\\)\"" nil t) + (let ((url (match-string 1))) + (delete-region (point-min) (point-max)) + (nnweb-insert url t)))) + (url-insert-file-contents url)) (setq buffer-file-name name))) (defun nnweb-parse-find (type parse &optional maxdepth) @@ -781,6 +969,11 @@ and `altavista'.") (listp (cdr element))) (nnweb-text-1 element))))) +(defun nnweb-replace-in-string (string match newtext) + (while (string-match match string) + (setq string (replace-match newtext t t string))) + string) + (provide 'nnweb) ;;; nnweb.el ends here