X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Fnnweb.el;h=d71f595cc4314a971173079398dd9ea5bae1d03b;hb=04ba5250e9e47ebe40860a0902d4ef6405ca143f;hp=9d855f963743372a544512381e94e3497c379fc6;hpb=73edf76920c3d86afa1628ca8f1509394cb7b26c;p=elisp%2Fgnus.git- diff --git a/lisp/nnweb.el b/lisp/nnweb.el index 9d855f9..d71f595 100644 --- a/lisp/nnweb.el +++ b/lisp/nnweb.el @@ -1,5 +1,6 @@ ;;; nnweb.el --- retrieving articles via web search engines -;; Copyright (C) 1996,97,98,99 Free Software Foundation, Inc. +;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003 +;; Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; Keywords: news @@ -23,12 +24,12 @@ ;;; Commentary: -;; Note: You need to have `url' and `w3' installed for this -;; backend to work. +;; Note: You need to have `w3' installed for some functions to work. ;;; Code: (eval-when-compile (require 'cl)) +(eval-when-compile (require 'gnus-clfns)) (require 'nnoo) (require 'message) @@ -36,58 +37,50 @@ (require 'gnus) (require 'nnmail) (require 'mm-util) -(eval-when-compile +(require 'mm-url) +(eval-and-compile (ignore-errors - (require 'w3) - (require 'url) - (require 'w3-forms))) -;; Report failure to find w3 at load time if appropriate. -(eval '(progn - (require 'w3) - (require 'url) - (require 'w3-forms))) + (require 'url))) +(autoload 'w3-parse-buffer "w3-parse") (nnoo-declare nnweb) (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") "Where nnweb will save its files.") -(defvoo nnweb-type 'dejanews +(defvoo nnweb-type 'google "What search engine type is being used. -Valid types include `dejanews', `dejanewsold', `reference', -and `altavista'.") +Valid types include `google', `dejanews', and `gmane'.") (defvar nnweb-type-definition - '((dejanews + '((google (article . ignore) - (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanews-search) - (address . "http://www.deja.com/=dnc/qs.xp") - (identifier . nnweb-dejanews-identity)) - (dejanewsold + (id . "http://groups.google.com/groups?selm=%s&output=gplain") + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (identifier . nnweb-google-identity)) + (dejanews ;; alias of google (article . ignore) - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanewsold-search) - (address . "http://www.deja.com/dnquery.xp") - (identifier . nnweb-dejanews-identity)) - (reference - (article . nnweb-reference-wash-article) - (map . nnweb-reference-create-mapping) - (search . nnweb-reference-search) - (address . "http://www.reference.com/cgi-bin/pn/go") - (identifier . identity)) - (altavista - (article . nnweb-altavista-wash-article) - (map . nnweb-altavista-create-mapping) - (search . nnweb-altavista-search) - (address . "http://www.altavista.digital.com/cgi-bin/query") - (id . "/cgi-bin/news?id@%s") - (identifier . identity))) + (id . "http://groups.google.com/groups?selm=%s&output=gplain") + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (identifier . nnweb-google-identity)) + (gmane + (article . nnweb-gmane-wash-article) + (id . "http://gmane.org/view.php?group=%s") + (reference . identity) + (map . nnweb-gmane-create-mapping) + (search . nnweb-gmane-search) + (address . "http://gmane.org/") + (identifier . nnweb-gmane-identity))) "Type-definition alist.") (defvoo nnweb-search nil - "Search string to feed to DejaNews.") + "Search string to feed to Google.") (defvoo nnweb-max-hits 999 "Maximum number of hits to display.") @@ -113,14 +106,16 @@ and `altavista'.") (set-buffer nntp-server-buffer) (erase-buffer) (let (article header) - (while (setq article (pop articles)) - (when (setq header (cadr (assq article nnweb-articles))) - (nnheader-insert-nov header))) + (mm-with-unibyte-current-buffer + (while (setq article (pop articles)) + (when (setq header (cadr (assq article nnweb-articles))) + (nnheader-insert-nov header)))) 'nov))) (deffoo nnweb-request-scan (&optional group server) (nnweb-possibly-change-server group server) - (setq nnweb-hashtb (gnus-make-hashtable 4095)) + (if nnweb-ephemeral-p + (setq nnweb-hashtb (gnus-make-hashtable 4095))) (funcall (nnweb-definition 'map)) (unless nnweb-ephemeral-p (nnweb-write-active) @@ -131,15 +126,14 @@ and `altavista'.") (when (and group (not (equal group nnweb-group)) (not nnweb-ephemeral-p)) + (setq nnweb-group group + nnweb-articles nil) (let ((info (assoc group nnweb-group-alist))) (when info - (setq nnweb-group group) (setq nnweb-type (nth 2 info)) (setq nnweb-search (nth 3 info)) (unless dont-check (nnweb-read-overview group))))) - (unless dont-check - (nnweb-request-scan group)) (cond ((not nnweb-articles) (nnheader-report 'nnweb "No matching articles")) @@ -169,22 +163,26 @@ and `altavista'.") (let* ((header (cadr (assq article nnweb-articles))) (url (and header (mail-header-xref header)))) (when (or (and url - (nnweb-fetch-url url)) + (mm-with-unibyte-current-buffer + (mm-url-insert url))) (and (stringp article) (nnweb-definition 'id t) (let ((fetch (nnweb-definition 'id)) - art) + art active) (when (string-match "^<\\(.*\\)>$" article) (setq art (match-string 1 article))) - (and fetch - art - (nnweb-fetch-url - (format fetch article)))))) + (when (and fetch art) + (setq url (format fetch art)) + (mm-with-unibyte-current-buffer + (mm-url-insert url)) + (if (nnweb-definition 'reference t) + (setq article + (funcall (nnweb-definition + 'reference) article))))))) (unless nnheader-callback-function - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities)) + (funcall (nnweb-definition 'article))) (nnheader-report 'nnweb "Fetched article %s" article) - t)))) + (cons group (and (numberp article) article)))))) (deffoo nnweb-close-server (&optional server) (when (and (nnweb-server-opened server) @@ -203,12 +201,10 @@ and `altavista'.") t)) (deffoo nnweb-request-update-info (group info &optional server) - (nnweb-possibly-change-server group server) - ;;(setcar (cddr info) nil) - ) + (nnweb-possibly-change-server group server)) (deffoo nnweb-asynchronous-p () - t) + nil) (deffoo nnweb-request-create-group (group &optional server args) (nnweb-possibly-change-server nil server) @@ -231,7 +227,7 @@ and `altavista'.") (defun nnweb-read-overview (group) "Read the overview of GROUP and build the map." (when (file-exists-p (nnweb-overview-file group)) - (with-temp-buffer + (mm-with-unibyte-buffer (nnheader-insert-file-contents (nnweb-overview-file group)) (goto-char (point-min)) (let (header) @@ -289,9 +285,11 @@ and `altavista'.") (nnweb-open-server server))) (unless nnweb-group-alist (nnweb-read-active)) + (unless nnweb-hashtb + (setq nnweb-hashtb (gnus-make-hashtable 4095))) (when group (when (and (not nnweb-ephemeral-p) - (not (equal group nnweb-group))) + (equal group nnweb-group)) (nnweb-request-group group nil t)))) (defun nnweb-init (server) @@ -299,372 +297,224 @@ and `altavista'.") (unless (gnus-buffer-live-p nnweb-buffer) (setq nnweb-buffer (save-excursion - (nnheader-set-temp-buffer - (format " *nnweb %s %s %s*" nnweb-type nnweb-search server)))))) - -(defun nnweb-fetch-url (url) - (save-excursion - (if (not nnheader-callback-function) - (let ((buf (current-buffer))) - (save-excursion - (set-buffer nnweb-buffer) - (erase-buffer) - (url-insert-file-contents url) - (copy-to-buffer buf (point-min) (point-max)) - t)) - (nnweb-url-retrieve-asynch - url 'nnweb-callback (current-buffer) nnheader-callback-function) - t))) - -(defun nnweb-callback (buffer callback) - (when (gnus-buffer-live-p url-working-buffer) - (save-excursion - (set-buffer url-working-buffer) - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities) - (set-buffer buffer) - (goto-char (point-max)) - (insert-buffer-substring url-working-buffer)) - (funcall callback t) - (gnus-kill-buffer url-working-buffer))) - -(defun nnweb-url-retrieve-asynch (url callback &rest data) - (let ((url-request-method "GET") - (old-asynch url-be-asynchronous) - (url-request-data nil) - (url-request-extra-headers nil) - (url-working-buffer (generate-new-buffer-name " *nnweb*"))) - (setq-default url-be-asynchronous t) - (save-excursion - (set-buffer (get-buffer-create url-working-buffer)) - (setq url-current-callback-data data - url-be-asynchronous t - url-current-callback-func callback) - (url-retrieve url)) - (setq-default url-be-asynchronous old-asynch))) + (mm-with-unibyte + (nnheader-set-temp-buffer + (format " *nnweb %s %s %s*" + nnweb-type nnweb-search server)) + (current-buffer)))))) ;;; -;;; DejaNews functions. +;;; Deja bought by google.com ;;; -(defun nnweb-dejanews-create-mapping () - "Perform the search and create an number-to-url alist." +(defun nnweb-google-wash-article () + (let ((case-fold-search t) url) + (goto-char (point-min)) + (re-search-forward "^
" nil t)
+    (narrow-to-region (point-min) (point))
+    (search-backward "" nil t)
+      (replace-match "\n"))
+    (mm-url-remove-markup)
+    (goto-char (point-min))
+    (while (re-search-forward "^[ \t]*\n" nil t)
+      (replace-match ""))
+    (goto-char (point-max))
+    (insert "\n")
+    (widen)
+    (narrow-to-region (point) (point-max))
+    (search-forward "" nil t)
+    (delete-region (point) (point-max))
+    (mm-url-remove-markup)
+    (widen)))
+
+(defun nnweb-google-parse-1 (&optional Message-ID)
+  (let ((i 0)
+	(case-fold-search t)
+	(active (cadr (assoc nnweb-group nnweb-group-alist)))
+	Subject Score Date Newsgroups From
+	map url mid)
+    (unless active
+      (push (list nnweb-group (setq active (cons 1 0))
+		  nnweb-type nnweb-search)
+	    nnweb-group-alist))
+    ;; Go through all the article hits on this page.
+    (goto-char (point-min))
+    (while (re-search-forward
+	    "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t)
+      (setq mid (match-string 2)
+	    url (format
+		 "http://groups.google.com/groups?selm=%s&output=gplain" mid))
+      (narrow-to-region (search-forward ">" nil t)
+			(search-forward "" nil t))
+      (mm-url-remove-markup)
+      (mm-url-decode-entities)
+      (setq Subject (buffer-string))
+      (goto-char (point-max))
+      (widen)
+      (forward-line 2)
+      (when (looking-at "
]+>") + (goto-char (match-end 0))) + (if (not (looking-at "]+>")) + (skip-chars-forward " \t") + (narrow-to-region (point) + (search-forward "" nil t)) + (mm-url-remove-markup) + (mm-url-decode-entities) + (setq Newsgroups (buffer-string)) + (goto-char (point-max)) + (widen) + (skip-chars-forward "- \t")) + (when (looking-at + "\\([0-9]+\\)[/ ]\\([A-Za-z]+\\)[/ ]\\([0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - ") - nil 0 0 url)) - map) - (nnweb-set-hashtb (cadar map) (car map))))) - ;; See whether there is a "Get next 20 hits" button here. + (setq nnweb-articles + (nconc nnweb-articles (nnweb-google-parse-1))) + ;; Check if there are more articles to fetch (goto-char (point-min)) + (incf i 100) (if (or (not (re-search-forward - "HREF=\"\\([^\"]+\\)\"[<>b]+Next result" nil t)) + "
]+\\).*Next" nil t)) (>= i nnweb-max-hits)) (setq more nil) - ;; Yup -- fetch it. - (setq more (match-string 1)) - (erase-buffer) - (url-insert-file-contents more))) + ;; Yup, there are more articles + (setq more (concat "http://groups.google.com" (match-string 1))) + (when more + (erase-buffer) + (mm-url-insert more)))) ;; Return the articles in the right order. (setq nnweb-articles - (sort (nconc nnweb-articles map) 'car-less-than-car)))))) + (sort nnweb-articles 'car-less-than-car)))))) -(defun nnweb-dejanews-search (search) - (nnweb-insert +(defun nnweb-google-search (search) + (mm-url-insert (concat (nnweb-definition 'address) "?" - (nnweb-encode-www-form-urlencoded - `(("ST" . "PS") - ("svcclass" . "dnyr") - ("QRY" . ,search) - ("defaultOp" . "AND") - ("DBS" . "1") - ("OP" . "dnquery.xp") - ("LNG" . "ALL") - ("maxhits" . "100") - ("threaded" . "0") - ("format" . "verbose2") - ("showsort" . "date") - ("agesign" . "1") - ("ageweight" . "1"))))) - t) - -(defun nnweb-dejanewsold-search (search) - (nnweb-fetch-form - (nnweb-definition 'address) - `(("query" . ,search) - ("defaultOp" . "AND") - ("svcclass" . "dnold") - ("maxhits" . "100") - ("format" . "verbose2") - ("threaded" . "0") - ("showsort" . "date") - ("agesign" . "1") - ("ageweight" . "1"))) + (mm-url-encode-www-form-urlencoded + `(("q" . ,search) + ("num". "100") + ("hq" . "") + ("hl" . "") + ("lr" . "") + ("safe" . "off") + ("sites" . "groups"))))) t) -(defun nnweb-dejanews-identity (url) +(defun nnweb-google-identity (url) "Return an unique identifier based on URL." - (if (string-match "AN=\\([0-9]+\\)" url) + (if (string-match "selm=\\([^ &>]+\\)" url) (match-string 1 url) url)) ;;; -;;; InReference +;;; gmane.org ;;; - -(defun nnweb-reference-create-mapping () - "Perform the search and create an number-to-url alist." +(defun nnweb-gmane-create-mapping () + "Perform the search and create a number-to-url alist." (save-excursion (set-buffer nnweb-buffer) (erase-buffer) (when (funcall (nnweb-definition 'search) nnweb-search) - (let ((i 0) - (more t) + (let ((more t) (case-fold-search t) (active (or (cadr (assoc nnweb-group nnweb-group-alist)) (cons 1 0))) - Subject Score Date Newsgroups From Message-ID - map url) - (while more - ;; Go through all the article hits on this page. - (goto-char (point-min)) - (search-forward "
" nil t) - (delete-region (point-min) (point)) - ;(nnweb-decode-entities) - (goto-char (point-min)) - (while (re-search-forward "^ +[0-9]+\\." nil t) - (narrow-to-region - (point) - (if (re-search-forward "^$" nil t) - (match-beginning 0) - (point-max))) - (goto-char (point-min)) - (when (looking-at ".*href=\"\\([^\"]+\\)\"") - (setq url (match-string 1))) - (nnweb-remove-markup) - (goto-char (point-min)) - (while (search-forward "\t" nil t) - (replace-match " ")) - (goto-char (point-min)) - (while (re-search-forward "^\\([^:]+\\): \\(.*\\)$" nil t) - (set (intern (match-string 1)) (match-string 2))) - (widen) - (search-forward "" nil t) - (incf i) - (unless (nnweb-get-hashtb url) - (push - (list - (incf (cdr active)) - (make-full-mail-header - (cdr active) (concat "(" Newsgroups ") " Subject) From Date - Message-ID - nil 0 (string-to-int Score) url)) - map) - (nnweb-set-hashtb (cadar map) (car map)))) - (setq more nil)) + subject group url + map) + ;; Remove stuff from the beginning of results + (goto-char (point-min)) + (search-forward "Search Results