X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Fnnweb.el;h=3ce22fdbd92bccbee1afb3eec1a30ef2ab138e24;hb=36bd162f4f7cd40453b8683e796730836c352b2a;hp=5a569414937130432e50b679577a4236d6436f56;hpb=027a90912122f2cb3e36d82310f32962e3ce2f71;p=elisp%2Fgnus.git- diff --git a/lisp/nnweb.el b/lisp/nnweb.el index 5a56941..3ce22fd 100644 --- a/lisp/nnweb.el +++ b/lisp/nnweb.el @@ -1,5 +1,6 @@ ;;; nnweb.el --- retrieving articles via web search engines -;; Copyright (C) 1996,97,98,99 Free Software Foundation, Inc. +;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001 +;; Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; Keywords: news @@ -23,12 +24,12 @@ ;;; Commentary: -;; Note: You need to have `url' and `w3' installed for this -;; backend to work. +;; Note: You need to have `w3' installed for some functions to work. ;;; Code: (eval-when-compile (require 'cl)) +(eval-when-compile (require 'gnus-clfns)) (require 'nnoo) (require 'message) @@ -36,41 +37,56 @@ (require 'gnus) (require 'nnmail) (require 'mm-util) -(eval-when-compile - (ignore-errors - (require 'w3) - (require 'url) - (require 'w3-forms))) -;; Report failure to find w3 at load time if appropriate. -(eval '(progn - (require 'w3) - (require 'url) - (require 'w3-forms))) +(require 'mm-url) +(autoload 'w3-parse-buffer "w3-parse") (nnoo-declare nnweb) (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") "Where nnweb will save its files.") -(defvoo nnweb-type 'dejanews +(defvoo nnweb-type 'google "What search engine type is being used. -Valid types include `dejanews', `dejanewsold', `reference', +Valid types include `google', `dejanews', `dejanewsold', `reference', and `altavista'.") (defvar nnweb-type-definition - '((dejanews + '( + (google + ;;(article . nnweb-google-wash-article) + ;;(id . "http://groups.google.com/groups?as_umsgid=%s") (article . ignore) - (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanews-search) - (address . "http://www.deja.com/=dnc/qs.xp") - (identifier . nnweb-dejanews-identity)) - (dejanewsold + (id . "http://groups.google.com/groups?selm=%s&output=gplain") + ;;(reference . nnweb-google-reference) + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (identifier . nnweb-google-identity)) + (dejanews ;; alias of google + ;;(article . nnweb-google-wash-article) + ;;(id . "http://groups.google.com/groups?as_umsgid=%s") (article . ignore) - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanewsold-search) - (address . "http://www.deja.com/dnquery.xp") - (identifier . nnweb-dejanews-identity)) + (id . "http://groups.google.com/groups?selm=%s&output=gplain") + ;;(reference . nnweb-google-reference) + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (identifier . nnweb-google-identity)) +;;; (dejanews +;;; (article . ignore) +;;; (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") +;;; (map . nnweb-dejanews-create-mapping) +;;; (search . nnweb-dejanews-search) +;;; (address . "http://www.deja.com/=dnc/qs.xp") +;;; (identifier . nnweb-dejanews-identity)) +;;; (dejanewsold +;;; (article . ignore) +;;; (map . nnweb-dejanews-create-mapping) +;;; (search . nnweb-dejanewsold-search) +;;; (address . "http://www.deja.com/dnquery.xp") +;;; (identifier . nnweb-dejanews-identity)) (reference (article . nnweb-reference-wash-article) (map . nnweb-reference-create-mapping) @@ -121,7 +137,8 @@ and `altavista'.") (deffoo nnweb-request-scan (&optional group server) (nnweb-possibly-change-server group server) - (setq nnweb-hashtb (gnus-make-hashtable 4095)) + (if nnweb-ephemeral-p + (setq nnweb-hashtb (gnus-make-hashtable 4095))) (funcall (nnweb-definition 'map)) (unless nnweb-ephemeral-p (nnweb-write-active) @@ -132,15 +149,14 @@ and `altavista'.") (when (and group (not (equal group nnweb-group)) (not nnweb-ephemeral-p)) + (setq nnweb-group group + nnweb-articles nil) (let ((info (assoc group nnweb-group-alist))) (when info - (setq nnweb-group group) (setq nnweb-type (nth 2 info)) (setq nnweb-search (nth 3 info)) (unless dont-check (nnweb-read-overview group))))) - (unless dont-check - (nnweb-request-scan group)) (cond ((not nnweb-articles) (nnheader-report 'nnweb "No matching articles")) @@ -171,21 +187,23 @@ and `altavista'.") (url (and header (mail-header-xref header)))) (when (or (and url (mm-with-unibyte-current-buffer - (nnweb-fetch-url url))) + (mm-url-insert url))) (and (stringp article) (nnweb-definition 'id t) (let ((fetch (nnweb-definition 'id)) - art) + art active) (when (string-match "^<\\(.*\\)>$" article) (setq art (match-string 1 article))) - (and fetch - art - (mm-with-unibyte-current-buffer - (nnweb-fetch-url - (format fetch article))))))) + (when (and fetch art) + (setq url (format fetch art)) + (mm-with-unibyte-current-buffer + (mm-url-insert url)) + (if (nnweb-definition 'reference t) + (setq article + (funcall (nnweb-definition + 'reference) article))))))) (unless nnheader-callback-function - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities)) + (funcall (nnweb-definition 'article))) (nnheader-report 'nnweb "Fetched article %s" article) (cons group (and (numberp article) article)))))) @@ -290,9 +308,11 @@ and `altavista'.") (nnweb-open-server server))) (unless nnweb-group-alist (nnweb-read-active)) + (unless nnweb-hashtb + (setq nnweb-hashtb (gnus-make-hashtable 4095))) (when group (when (and (not nnweb-ephemeral-p) - (not (equal group nnweb-group))) + (equal group nnweb-group)) (nnweb-request-group group nil t)))) (defun nnweb-init (server) @@ -306,51 +326,57 @@ and `altavista'.") nnweb-type nnweb-search server)) (current-buffer)))))) -(defun nnweb-fetch-url (url) - (let (buf) - (save-excursion - (if (not nnheader-callback-function) - (progn - (with-temp-buffer - (mm-enable-multibyte) - (let ((coding-system-for-read 'binary) - (coding-system-for-write 'binary) - (default-process-coding-system 'binary)) - (nnweb-insert url)) - (setq buf (buffer-string))) - (erase-buffer) - (insert buf) - t) - (nnweb-url-retrieve-asynch - url 'nnweb-callback (current-buffer) nnheader-callback-function) - t)))) - -(defun nnweb-callback (buffer callback) - (when (gnus-buffer-live-p url-working-buffer) - (save-excursion - (set-buffer url-working-buffer) - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities) - (set-buffer buffer) - (goto-char (point-max)) - (insert-buffer-substring url-working-buffer)) - (funcall callback t) - (gnus-kill-buffer url-working-buffer))) - -(defun nnweb-url-retrieve-asynch (url callback &rest data) - (let ((url-request-method "GET") - (old-asynch url-be-asynchronous) - (url-request-data nil) - (url-request-extra-headers nil) - (url-working-buffer (generate-new-buffer-name " *nnweb*"))) - (setq-default url-be-asynchronous t) - (save-excursion - (set-buffer (get-buffer-create url-working-buffer)) - (setq url-current-callback-data data - url-be-asynchronous t - url-current-callback-func callback) - (url-retrieve url)) - (setq-default url-be-asynchronous old-asynch))) +;; (defun nnweb-fetch-url (url) +;; (let (buf) +;; (save-excursion +;; (if (not nnheader-callback-function) +;; (progn +;; (with-temp-buffer +;; (mm-enable-multibyte) +;; (let ((coding-system-for-read 'binary) +;; (coding-system-for-write 'binary) +;; (input-coding-system 'binary) +;; (output-coding-system 'binary) +;; (default-process-coding-system 'binary)) +;; (nnweb-insert url)) +;; (setq buf (buffer-string))) +;; (erase-buffer) +;; (insert buf) +;; t) +;; (nnweb-url-retrieve-asynch +;; url 'nnweb-callback (current-buffer) nnheader-callback-function) +;; t)))) + +;; (defun nnweb-callback (buffer callback) +;; (when (gnus-buffer-live-p url-working-buffer) +;; (save-excursion +;; (set-buffer url-working-buffer) +;; (funcall (nnweb-definition 'article)) +;; (nnweb-decode-entities) +;; (set-buffer buffer) +;; (goto-char (point-max)) +;; (insert-buffer-substring url-working-buffer)) +;; (funcall callback t) +;; (gnus-kill-buffer url-working-buffer))) + +;; (defun nnweb-url-retrieve-asynch (url callback &rest data) +;; (let ((url-request-method "GET") +;; (old-asynch url-be-asynchronous) +;; (url-request-data nil) +;; (url-request-extra-headers nil) +;; (url-working-buffer (generate-new-buffer-name " *nnweb*"))) +;; (setq-default url-be-asynchronous t) +;; (save-excursion +;; (set-buffer (get-buffer-create url-working-buffer)) +;; (setq url-current-callback-data data +;; url-be-asynchronous t +;; url-current-callback-func callback) +;; (url-retrieve url nil)) +;; (setq-default url-be-asynchronous old-asynch))) + +;; (if (fboundp 'url-retrieve-synchronously) +;; (defun nnweb-url-retrieve-asynch (url callback &rest data) +;; (url-retrieve url callback data))) ;;; ;;; DejaNews functions. @@ -388,11 +414,13 @@ and `altavista'.") (car (rassq (string-to-number (match-string 2 date)) parse-time-months)) - (match-string 3 date) + (match-string 3 date) (match-string 1 date))) (setq date "Jan 1 00:00:00 0000")) (incf i) (setq url (concat url "&fmt=text")) + (when (string-match "&context=[^&]+" url) + (setq url (replace-match "" t t url))) (unless (nnweb-get-hashtb url) (push (list @@ -412,17 +440,17 @@ and `altavista'.") ;; Yup -- fetch it. (setq more (match-string 1)) (erase-buffer) - (url-insert-file-contents more))) + (mm-url-insert more))) ;; Return the articles in the right order. (setq nnweb-articles (sort (nconc nnweb-articles map) 'car-less-than-car)))))) (defun nnweb-dejanews-search (search) - (nnweb-insert + (mm-url-insert (concat (nnweb-definition 'address) "?" - (nnweb-encode-www-form-urlencoded + (mm-url-encode-www-form-urlencoded `(("ST" . "PS") ("svcclass" . "dnyr") ("QRY" . ,search) @@ -438,19 +466,19 @@ and `altavista'.") ("ageweight" . "1"))))) t) -(defun nnweb-dejanewsold-search (search) - (nnweb-fetch-form - (nnweb-definition 'address) - `(("query" . ,search) - ("defaultOp" . "AND") - ("svcclass" . "dnold") - ("maxhits" . "100") - ("format" . "verbose2") - ("threaded" . "0") - ("showsort" . "date") - ("agesign" . "1") - ("ageweight" . "1"))) - t) +;; (defun nnweb-dejanewsold-search (search) +;; (nnweb-fetch-form +;; (nnweb-definition 'address) +;; `(("query" . ,search) +;; ("defaultOp" . "AND") +;; ("svcclass" . "dnold") +;; ("maxhits" . "100") +;; ("format" . "verbose2") +;; ("threaded" . "0") +;; ("showsort" . "date") +;; ("agesign" . "1") +;; ("ageweight" . "1"))) +;; t) (defun nnweb-dejanews-identity (url) "Return an unique identifier based on URL." @@ -490,7 +518,7 @@ and `altavista'.") (goto-char (point-min)) (when (looking-at ".*href=\"\\([^\"]+\\)\"") (setq url (match-string 1))) - (nnweb-remove-markup) + (mm-url-remove-markup) (goto-char (point-min)) (while (search-forward "\t" nil t) (replace-match " ")) @@ -525,7 +553,7 @@ and `altavista'.") (let ((body (point-marker))) (search-forward "" nil t) (delete-region (point) (point-max)) - (nnweb-remove-markup) + (mm-url-remove-markup) (goto-char (point-min)) (while (looking-at " *$") (gnus-delete-line)) @@ -552,14 +580,15 @@ and `altavista'.") (while (search-forward "," nil t) (replace-match " " t t))) (widen) + (mm-url-decode-entities) (set-marker body nil)))) (defun nnweb-reference-search (search) - (url-insert-file-contents + (mm-url-insert (concat (nnweb-definition 'address) "?" - (nnweb-encode-www-form-urlencoded + (mm-url-encode-www-form-urlencoded `(("search" . "advanced") ("querytext" . ,search) ("subj" . "") @@ -611,7 +640,7 @@ and `altavista'.") (goto-char (point-min)) (while (search-forward "
" nil t) (replace-match "\n")) - (nnweb-decode-entities) + (mm-url-decode-entities) (goto-char (point-min)) (while (re-search-forward ".*href=\"\\([^\"]+\\)\">\\([^>]*\\)
\\([^-]+\\)- \\([^<]+\\)<.*href=\"news:\\([^\"]+\\)\">.*\">\\(.+\\)

" nil t) @@ -656,14 +685,15 @@ and `altavista'.") (while (re-search-forward "[0-9]+" nil t) (replace-match "<\\1> " t))) (widen) - (nnweb-remove-markup))) + (mm-url-remove-markup) + (mm-url-decode-entities))) (defun nnweb-altavista-search (search &optional part) - (url-insert-file-contents + (mm-url-insert (concat (nnweb-definition 'address) "?" - (nnweb-encode-www-form-urlencoded + (mm-url-encode-www-form-urlencoded `(("pg" . "aq") ("what" . "news") ,@(when part `(("stq" . ,(int-to-string (* part 30))))) @@ -676,13 +706,147 @@ and `altavista'.") t) ;;; +;;; Deja bought by google.com +;;; + +(defun nnweb-google-wash-article () + (let ((case-fold-search t) url) + (goto-char (point-min)) + (re-search-forward "^

" nil t)
+    (narrow-to-region (point-min) (point))
+    (search-backward "" nil t)
+      (replace-match "\n"))
+    (mm-url-remove-markup)
+    (goto-char (point-min))
+    (while (re-search-forward "^[ \t]*\n" nil t)
+      (replace-match ""))
+    (goto-char (point-max))
+    (insert "\n")
+    (widen)
+    (narrow-to-region (point) (point-max))
+    (search-forward "" nil t)
+    (delete-region (point) (point-max))
+    (mm-url-remove-markup)
+    (widen)))
+
+(defun nnweb-google-parse-1 (&optional Message-ID)
+  (let ((i 0)
+	(case-fold-search t)
+	(active (cadr (assoc nnweb-group nnweb-group-alist)))
+	Subject Score Date Newsgroups From
+	map url mid)
+    (unless active
+      (push (list nnweb-group (setq active (cons 1 0))
+		  nnweb-type nnweb-search)
+	    nnweb-group-alist))
+    ;; Go through all the article hits on this page.
+    (goto-char (point-min))
+    (while (re-search-forward
+	    "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t)
+      (setq mid (match-string 2)
+	    url (format
+		 "http://groups.google.com/groups?selm=%s&output=gplain" mid))
+      (narrow-to-region (search-forward ">" nil t)
+			(search-forward "" nil t))
+      (mm-url-remove-markup)
+      (mm-url-decode-entities)
+      (setq Subject (buffer-string))
+      (goto-char (point-max))
+      (widen)
+      (forward-line 1)
+      (when (looking-at "
]+>") + (goto-char (match-end 0))) + (if (not (looking-at "]+>")) + (skip-chars-forward " \t") + (narrow-to-region (point) + (search-forward "" nil t)) + (mm-url-remove-markup) + (mm-url-decode-entities) + (setq Newsgroups (buffer-string)) + (goto-char (point-max)) + (widen) + (skip-chars-forward "- \t")) + (when (looking-at + "\\([0-9]+[/ ][A-Za-z]+[/ ][0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - ]+\\)" url) + (match-string 1 url) + url)) + +;;; ;;; General web/w3 interface utility functions ;;; (defun nnweb-insert-html (parse) "Insert HTML based on a w3 parse tree." (if (stringp parse) - (insert parse) + (insert (nnheader-string-as-multibyte parse)) (insert "<" (symbol-name (car parse)) " ") (insert (mapconcat (lambda (param) @@ -697,74 +861,6 @@ and `altavista'.") (mapcar 'nnweb-insert-html (nth 2 parse)) (insert "\n"))) -(defun nnweb-encode-www-form-urlencoded (pairs) - "Return PAIRS encoded for forms." - (mapconcat - (function - (lambda (data) - (concat (w3-form-encode-xwfu (car data)) "=" - (w3-form-encode-xwfu (cdr data))))) - pairs "&")) - -(defun nnweb-fetch-form (url pairs) - "Fetch a form from URL with PAIRS as the data using the POST method." - (let ((url-request-data (nnweb-encode-www-form-urlencoded pairs)) - (url-request-method "POST") - (url-request-extra-headers - '(("Content-type" . "application/x-www-form-urlencoded")))) - (url-insert-file-contents url) - (setq buffer-file-name nil)) - t) - -(defun nnweb-decode-entities () - "Decode all HTML entities." - (goto-char (point-min)) - (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t) - (replace-match (char-to-string - (if (eq (aref (match-string 1) 0) ?\#) - (string-to-number (substring (match-string 1) 1)) - (or (cdr (assq (intern (match-string 1)) - w3-html-entities)) - ?#))) - t t))) - -(defun nnweb-decode-entities-string (str) - (with-temp-buffer - (insert str) - (nnweb-decode-entities) - (buffer-substring (point-min) (point-max)))) - -(defun nnweb-remove-markup () - "Remove all HTML markup, leaving just plain text." - (goto-char (point-min)) - (while (search-forward "" nil t) - (point-max)))) - (goto-char (point-min)) - (while (re-search-forward "<[^>]+>" nil t) - (replace-match "" t t))) - -(defun nnweb-insert (url &optional follow-refresh) - "Insert the contents from an URL in the current buffer. -If FOLLOW-REFRESH is non-nil, redirect refresh url in META." - (let ((name buffer-file-name)) - (if follow-refresh - (save-restriction - (narrow-to-region (point) (point)) - (url-insert-file-contents url) - (goto-char (point-min)) - (while (re-search-forward - "HTTP-EQUIV=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" - nil t) - (let ((url (match-string 1))) - (delete-region (point-min) (point-max)) - (nnweb-insert url)) - (goto-char (point-min))) - (goto-char (point-max))) - (url-insert-file-contents url)) - (setq buffer-file-name name))) - (defun nnweb-parse-find (type parse &optional maxdepth) "Find the element of TYPE in PARSE." (catch 'found