X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Fnnweb.el;h=1e4a8eac37f09073b5408f2991e4ba0142c5be26;hb=1270affddb81ba2dab7d9e1c6c6370b05c762e37;hp=8976209f954370fa3543e5a5ffc74b920d73795e;hpb=8ca543af8a7e56f1febcd53f0f70b312944ebe83;p=elisp%2Fgnus.git- diff --git a/lisp/nnweb.el b/lisp/nnweb.el index 8976209..1e4a8ea 100644 --- a/lisp/nnweb.el +++ b/lisp/nnweb.el @@ -1,6 +1,7 @@ ;;; nnweb.el --- retrieving articles via web search engines -;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004 -;; Free Software Foundation, Inc. + +;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, +;; 2004, 2005, 2006 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen ;; Keywords: news @@ -19,8 +20,8 @@ ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. ;;; Commentary: @@ -53,20 +54,24 @@ Valid types include `google', `dejanews', and `gmane'.") (defvar nnweb-type-definition '((google - (article . ignore) - (id . "http://groups.google.com/groups?selm=%s&output=gplain") + (id . "http://www.google.com/groups?as_umsgid=%s&hl=en&dmode=source") + (result . "http://groups.google.com/group/%s/msg/%s?dmode=source") + (article . nnweb-google-wash-article) (reference . identity) (map . nnweb-google-create-mapping) (search . nnweb-google-search) (address . "http://groups.google.com/groups") + (base . "http://groups.google.com") (identifier . nnweb-google-identity)) (dejanews ;; alias of google - (article . ignore) - (id . "http://groups.google.com/groups?selm=%s&output=gplain") + (id . "http://www.google.com/groups?as_umsgid=%s&hl=en&dmode=source") + (result . "http://groups.google.com/group/%s/msg/%s?dmode=source") + (article . nnweb-google-wash-article) (reference . identity) (map . nnweb-google-create-mapping) (search . nnweb-google-search) (address . "http://groups.google.com/groups") + (base . "http://groups.google.com") (identifier . nnweb-google-identity)) (gmane (article . nnweb-gmane-wash-article) @@ -74,7 +79,7 @@ Valid types include `google', `dejanews', and `gmane'.") (reference . identity) (map . nnweb-gmane-create-mapping) (search . nnweb-gmane-search) - (address . "http://gmane.org/") + (address . "http://search.gmane.org/nov.php") (identifier . nnweb-gmane-identity))) "Type-definition alist.") @@ -114,25 +119,20 @@ Valid types include `google', `dejanews', and `gmane'.") (deffoo nnweb-request-scan (&optional group server) (nnweb-possibly-change-server group server) (if nnweb-ephemeral-p - (setq nnweb-hashtb (gnus-make-hashtable 4095))) + (setq nnweb-hashtb (gnus-make-hashtable 4095)) + (unless nnweb-articles + (nnweb-read-overview group))) (funcall (nnweb-definition 'map)) (unless nnweb-ephemeral-p (nnweb-write-active) (nnweb-write-overview group))) (deffoo nnweb-request-group (group &optional server dont-check) - (nnweb-possibly-change-server nil server) - (when (and group - (not (equal group nnweb-group)) - (not nnweb-ephemeral-p)) - (setq nnweb-group group - nnweb-articles nil) - (let ((info (assoc group nnweb-group-alist))) - (when info - (setq nnweb-type (nth 2 info)) - (setq nnweb-search (nth 3 info)) - (unless dont-check - (nnweb-read-overview group))))) + (nnweb-possibly-change-server group server) + (unless (or nnweb-ephemeral-p + dont-check + nnweb-articles) + (nnweb-read-overview group)) (cond ((not nnweb-articles) (nnheader-report 'nnweb "No matching articles")) @@ -196,7 +196,7 @@ Valid types include `google', `dejanews', and `gmane'.") (nnweb-possibly-change-server nil server) (save-excursion (set-buffer nntp-server-buffer) - (nnmail-generate-active nnweb-group-alist) + (nnmail-generate-active (list (assoc server nnweb-group-alist))) t)) (deffoo nnweb-request-update-info (group info &optional server) @@ -208,7 +208,7 @@ Valid types include `google', `dejanews', and `gmane'.") (deffoo nnweb-request-create-group (group &optional server args) (nnweb-possibly-change-server nil server) (nnweb-request-delete-group group) - (push `(,group ,(cons 1 0) ,@args) nnweb-group-alist) + (push `(,group ,(cons 1 0)) nnweb-group-alist) (nnweb-write-active) t) @@ -278,18 +278,16 @@ Valid types include `google', `dejanews', and `gmane'.") def)) (defun nnweb-possibly-change-server (&optional group server) - (nnweb-init server) (when server (unless (nnweb-server-opened server) - (nnweb-open-server server))) + (nnweb-open-server server)) + (nnweb-init server)) (unless nnweb-group-alist (nnweb-read-active)) (unless nnweb-hashtb (setq nnweb-hashtb (gnus-make-hashtable 4095))) (when group - (when (and (not nnweb-ephemeral-p) - (equal group nnweb-group)) - (nnweb-request-group group nil t)))) + (setq nnweb-group group))) (defun nnweb-init (server) "Initialize buffers and such." @@ -303,53 +301,56 @@ Valid types include `google', `dejanews', and `gmane'.") (current-buffer)))))) ;;; -;;; Deja bought by google.com +;;; groups.google.com ;;; (defun nnweb-google-wash-article () - (let ((case-fold-search t) url) + ;; We have Google's masked e-mail addresses here. :-/ + (let ((case-fold-search t) + (start-re "
\n *")
+	(end-re "\n *
")) (goto-char (point-min)) - (re-search-forward "^
" nil t)
-    (narrow-to-region (point-min) (point))
-    (search-backward "" nil t)
-      (replace-match "\n"))
-    (mm-url-remove-markup)
-    (goto-char (point-min))
-    (while (re-search-forward "^[ \t]*\n" nil t)
-      (replace-match ""))
-    (goto-char (point-max))
-    (insert "\n")
-    (widen)
-    (narrow-to-region (point) (point-max))
-    (search-forward "" nil t)
-    (delete-region (point) (point-max))
-    (mm-url-remove-markup)
-    (widen)))
+    (if (save-excursion
+	  (or (re-search-forward "The requested message.*could not be found."
+				 nil t)
+	      (not (and (re-search-forward start-re nil t)
+			(re-search-forward end-re nil t)))))
+	;; FIXME: Don't know how to indicate "not found".
+	;; Should this function throw an error?  --rsteib
+	(progn
+	  (gnus-message 3 "Requested article not found")
+	  (erase-buffer))
+      (delete-region (point-min)
+		     (re-search-forward start-re))
+      (goto-char (point-min))
+      (delete-region (progn
+		       (re-search-forward end-re)
+		       (match-beginning 0))
+		     (point-max))
+      (mm-url-decode-entities))))
 
 (defun nnweb-google-parse-1 (&optional Message-ID)
+  "Parse search result in current buffer."
   (let ((i 0)
 	(case-fold-search t)
 	(active (cadr (assoc nnweb-group nnweb-group-alist)))
 	Subject Score Date Newsgroups From
 	map url mid)
     (unless active
-      (push (list nnweb-group (setq active (cons 1 0))
-		  nnweb-type nnweb-search)
+      (push (list nnweb-group (setq active (cons 1 0)))
 	    nnweb-group-alist))
     ;; Go through all the article hits on this page.
     (goto-char (point-min))
-    (while (re-search-forward
-	    "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t)
-      (setq mid (match-string 2)
+    (while
+	(re-search-forward
+	 "a +href=\"/group/\\([^>\"]+\\)/browse_thread/[^>]+#\\([0-9a-f]+\\)"
+	 nil t)
+      (setq Newsgroups (match-string-no-properties 1)
+	    ;; Note: Starting with Google Groups 2, `mid' is a Google-internal
+	    ;; ID, not a proper Message-ID.
+	    mid (match-string-no-properties 2)
 	    url (format
-		 "http://groups.google.com/groups?selm=%s&output=gplain" mid))
+		 (nnweb-definition 'result) Newsgroups mid))
       (narrow-to-region (search-forward ">" nil t)
 			(search-forward "" nil t))
       (mm-url-remove-markup)
@@ -357,25 +358,22 @@ Valid types include `google', `dejanews', and `gmane'.")
       (setq Subject (buffer-string))
       (goto-char (point-max))
       (widen)
-      (forward-line 2)
-      (when (looking-at "
]+>") - (goto-char (match-end 0))) - (if (not (looking-at "]+>")) - (skip-chars-forward " \t") - (narrow-to-region (point) - (search-forward "" nil t)) - (mm-url-remove-markup) - (mm-url-decode-entities) - (setq Newsgroups (buffer-string)) - (goto-char (point-max)) - (widen) - (skip-chars-forward "- \t")) + (narrow-to-region (point) + (search-forward "]+\\).*Next" nil t)) + "\"]+\\)\">= i nnweb-max-hits)) (setq more nil) ;; Yup, there are more articles - (setq more (concat "http://groups.google.com" (match-string 1))) + (setq more (concat (nnweb-definition 'base) (match-string 1))) (when more (erase-buffer) + (nnheader-message 7 "Searching google...(%d)" i) (mm-url-insert more)))) ;; Return the articles in the right order. + (nnheader-message 7 "Searching google...done") (setq nnweb-articles (sort nnweb-articles 'car-less-than-car)))))) @@ -435,12 +437,13 @@ Valid types include `google', `dejanews', and `gmane'.") "?" (mm-url-encode-www-form-urlencoded `(("q" . ,search) - ("num". "100") + ("num" . "100") ("hq" . "") - ("hl" . "") + ("hl" . "en") ("lr" . "") ("safe" . "off") - ("sites" . "groups"))))) + ("sites" . "groups") + ("filter" . "0"))))) t) (defun nnweb-google-identity (url) @@ -456,46 +459,61 @@ Valid types include `google', `dejanews', and `gmane'.") "Perform the search and create a number-to-url alist." (save-excursion (set-buffer nnweb-buffer) - (erase-buffer) - (when (funcall (nnweb-definition 'search) nnweb-search) - (let ((more t) - (case-fold-search t) - (active (or (cadr (assoc nnweb-group nnweb-group-alist)) - (cons 1 0))) - subject group url - map) - ;; Remove stuff from the beginning of results + (let ((case-fold-search t) + (active (or (cadr (assoc nnweb-group nnweb-group-alist)) + (cons 1 0))) + map) + (erase-buffer) + (nnheader-message 7 "Searching Gmane..." ) + (when (funcall (nnweb-definition 'search) nnweb-search) (goto-char (point-min)) - (search-forward "Search Results
    " nil t) - (delete-region (point-min) (point)) - (goto-char (point-min)) - ;; Iterate over the actual hits - (while (re-search-forward ".*href=\"\\([^\"]+\\)\">\\(.*\\)" nil t) - (setq url (concat "http://gmane.org/" (match-string 1))) - (setq subject (match-string 2)) - (unless (nnweb-get-hashtb url) - (push - (list - (incf (cdr active)) - (make-full-mail-header - (cdr active) (concat "(" group ") " subject) nil nil - nil nil 0 0 url)) - map) - (nnweb-set-hashtb (cadar map) (car map)))) - ;; Return the articles in the right order. - (setq nnweb-articles - (sort (nconc nnweb-articles map) 'car-less-than-car)))))) + ;; Skip the status line + (forward-line 1) + ;; Thanks to Olly Betts we now have NOV lines in our buffer! + (while (not (eobp)) + (unless (or (eolp) (looking-at "\x0d")) + (let ((header (nnheader-parse-nov))) + (let ((xref (mail-header-xref header)) + (from (mail-header-from header)) + (subject (mail-header-subject header)) + (rfc2047-encoding-type 'mime)) + (when (string-match " \\([^:]+\\):\\([0-9]+\\)" xref) + (mail-header-set-xref + header + (format "http://article.gmane.org/%s/%s/raw" + (match-string 1 xref) + (match-string 2 xref)))) + + ;; Add host part to gmane-encrypted addresses + (when (string-match "@$" from) + (mail-header-set-from header + (concat from "public.gmane.org"))) + + (mail-header-set-subject header + (rfc2047-encode-string subject)) + + (unless (nnweb-get-hashtb (mail-header-xref header)) + (push + (list + (incf (cdr active)) + header) + map) + (nnweb-set-hashtb (cadar map) (car map)))))) + (forward-line 1))) + (nnheader-message 7 "Searching Gmane...done") + (setq nnweb-articles + (sort (nconc nnweb-articles map) 'car-less-than-car))))) (defun nnweb-gmane-wash-article () (let ((case-fold-search t)) (goto-char (point-min)) - (search-forward "" nil t) - (delete-region (point-min) (point)) - (goto-char (point-min)) - (while (looking-at "^
  • \\([^ ]+\\).*
  • ") - (replace-match "\\1\\2" t) - (forward-line 1)) - (mm-url-remove-markup))) + (when (search-forward "" nil t) + (delete-region (point-min) (point)) + (goto-char (point-min)) + (while (looking-at "^
  • \\([^ ]+\\).*
  • ") + (replace-match "\\1\\2" t) + (forward-line 1)) + (mm-url-remove-markup)))) (defun nnweb-gmane-search (search) (mm-url-insert @@ -503,11 +521,13 @@ Valid types include `google', `dejanews', and `gmane'.") (nnweb-definition 'address) "?" (mm-url-encode-www-form-urlencoded - `(("query" . ,search))))) + `(("query" . ,search) + ("HITSPERPAGE" . ,(number-to-string nnweb-max-hits)))))) (setq buffer-file-name nil) + (set-buffer-multibyte t) + (mm-decode-coding-region (point-min) (point-max) 'utf-8) t) - (defun nnweb-gmane-identity (url) "Return a unique identifier based on URL." (if (string-match "group=\\(.+\\)" url)