X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Fnnweb.el;h=760ef7581ebd71eeb4563abd04b6c2bcfd322bbf;hb=8d5b94488b8fe507a83eb5475ecaa54afb8a98b8;hp=dcdf4d48d2f3975ad0a6c3b160944bc62858d58e;hpb=2cc5659442ce551b395b9aeebe213947e415ac6d;p=elisp%2Fgnus.git- diff --git a/lisp/nnweb.el b/lisp/nnweb.el index dcdf4d4..760ef75 100644 --- a/lisp/nnweb.el +++ b/lisp/nnweb.el @@ -1,5 +1,5 @@ ;;; nnweb.el --- retrieving articles via web search engines -;; Copyright (C) 1996, 1997, 1998, 1999, 2000 +;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001 ;; Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen @@ -61,19 +61,31 @@ Valid types include `dejanews', `dejanewsold', `reference', and `altavista'.") (defvar nnweb-type-definition - '((dejanews + '( + (dejanews ;; bought by google.com + ;;(article . nnweb-google-wash-article) + ;;(id . "http://groups.google.com/groups?as_umsgid=%s") (article . ignore) - (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanews-search) - (address . "http://www.deja.com/=dnc/qs.xp") - (identifier . nnweb-dejanews-identity)) - (dejanewsold - (article . ignore) - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanewsold-search) - (address . "http://www.deja.com/dnquery.xp") - (identifier . nnweb-dejanews-identity)) + (id . "http://groups.google.com/groups?selm=%s&output=gplain") + ;;(reference . nnweb-google-reference) + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (identifier . nnweb-google-identity)) +;;; (dejanews +;;; (article . ignore) +;;; (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") +;;; (map . nnweb-dejanews-create-mapping) +;;; (search . nnweb-dejanews-search) +;;; (address . "http://www.deja.com/=dnc/qs.xp") +;;; (identifier . nnweb-dejanews-identity)) +;;; (dejanewsold +;;; (article . ignore) +;;; (map . nnweb-dejanews-create-mapping) +;;; (search . nnweb-dejanewsold-search) +;;; (address . "http://www.deja.com/dnquery.xp") +;;; (identifier . nnweb-dejanews-identity)) (reference (article . nnweb-reference-wash-article) (map . nnweb-reference-create-mapping) @@ -134,9 +146,10 @@ and `altavista'.") (when (and group (not (equal group nnweb-group)) (not nnweb-ephemeral-p)) + (setq nnweb-group group + nnweb-articles nil) (let ((info (assoc group nnweb-group-alist))) (when info - (setq nnweb-group group) (setq nnweb-type (nth 2 info)) (setq nnweb-search (nth 3 info)) (unless dont-check @@ -175,17 +188,19 @@ and `altavista'.") (and (stringp article) (nnweb-definition 'id t) (let ((fetch (nnweb-definition 'id)) - art) + art active) (when (string-match "^<\\(.*\\)>$" article) (setq art (match-string 1 article))) - (and fetch - art - (mm-with-unibyte-current-buffer - (nnweb-fetch-url - (format fetch article))))))) + (when (and fetch art) + (setq url (format fetch art)) + (mm-with-unibyte-current-buffer + (nnweb-fetch-url url)) + (if (nnweb-definition 'reference t) + (setq article + (funcall (nnweb-definition + 'reference) article))))))) (unless nnheader-callback-function - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities)) + (funcall (nnweb-definition 'article))) (nnheader-report 'nnweb "Fetched article %s" article) (cons group (and (numberp article) article)))))) @@ -395,7 +410,7 @@ and `altavista'.") (car (rassq (string-to-number (match-string 2 date)) parse-time-months)) - (match-string 3 date) + (match-string 3 date) (match-string 1 date))) (setq date "Jan 1 00:00:00 0000")) (incf i) @@ -561,6 +576,7 @@ and `altavista'.") (while (search-forward "," nil t) (replace-match " " t t))) (widen) + (nnweb-decode-entities) (set-marker body nil)))) (defun nnweb-reference-search (search) @@ -665,7 +681,8 @@ and `altavista'.") (while (re-search-forward "[0-9]+" nil t) (replace-match "<\\1> " t))) (widen) - (nnweb-remove-markup))) + (nnweb-remove-markup) + (nnweb-decode-entities))) (defun nnweb-altavista-search (search &optional part) (url-insert-file-contents @@ -685,13 +702,147 @@ and `altavista'.") t) ;;; +;;; Deja bought by google.com +;;; + +(defun nnweb-google-wash-article () + (let ((case-fold-search t) url) + (goto-char (point-min)) + (re-search-forward "^
" nil t)
+    (narrow-to-region (point-min) (point))
+    (search-backward "" nil t)
+      (replace-match "\n"))
+    (nnweb-remove-markup)
+    (goto-char (point-min))
+    (while (re-search-forward "^[ \t]*\n" nil t)
+      (replace-match ""))
+    (goto-char (point-max))
+    (insert "\n")
+    (widen)
+    (narrow-to-region (point) (point-max))
+    (search-forward "" nil t)
+    (delete-region (point) (point-max))
+    (nnweb-remove-markup)
+    (widen)))
+
+(defun nnweb-google-parse-1 (&optional Message-ID)
+  (let ((i 0)
+	(case-fold-search t)
+	(active (cadr (assoc nnweb-group nnweb-group-alist)))
+	Subject Score Date Newsgroups From
+	map url mid)
+    (unless active
+      (push (list nnweb-group (setq active (cons 1 0))
+		  nnweb-type nnweb-search)
+	    nnweb-group-alist))
+    ;; Go through all the article hits on this page.
+    (goto-char (point-min))
+    (while (re-search-forward
+	    "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t)
+      (setq mid (match-string 2)
+	    url (format 
+		 "http://groups.google.com/groups?selm=%s&output=gplain" mid))
+      (narrow-to-region (search-forward ">" nil t)
+			(search-forward "" nil t))
+      (nnweb-remove-markup)
+      (nnweb-decode-entities)
+      (setq Subject (buffer-string))
+      (goto-char (point-max))
+      (widen)
+      (forward-line 2)
+      (when (looking-at "
]+>") + (goto-char (match-end 0))) + (if (not (looking-at "]+>")) + (skip-chars-forward " \t") + (narrow-to-region (point) + (search-forward "" nil t)) + (nnweb-remove-markup) + (nnweb-decode-entities) + (setq Newsgroups (buffer-string)) + (goto-char (point-max)) + (widen) + (skip-chars-forward "- \t")) + (when (looking-at + "\\([0-9]+[/ ][A-Za-z]+[/ ][0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - ]+\\)" url) + (match-string 1 url) + url)) + +;;; ;;; General web/w3 interface utility functions ;;; (defun nnweb-insert-html (parse) "Insert HTML based on a w3 parse tree." (if (stringp parse) - (insert parse) + (insert (nnheader-string-as-multibyte parse)) (insert "<" (symbol-name (car parse)) " ") (insert (mapconcat (lambda (param) @@ -730,13 +881,13 @@ and `altavista'.") (goto-char (point-min)) (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t) (let ((elem (if (eq (aref (match-string 1) 0) ?\#) - (let ((c - (string-to-number (substring - (match-string 1) 1)))) - (if (mm-char-or-char-int-p c) c 32)) - (or (cdr (assq (intern (match-string 1)) - w3-html-entities)) - ?#)))) + (let ((c + (string-to-number (substring + (match-string 1) 1)))) + (if (mm-char-or-char-int-p c) c 32)) + (or (cdr (assq (intern (match-string 1)) + w3-html-entities)) + ?#)))) (unless (stringp elem) (setq elem (char-to-string elem))) (replace-match elem t t)))) @@ -767,7 +918,7 @@ If FOLLOW-REFRESH is non-nil, redirect refresh url in META." (narrow-to-region (point) (point)) (url-insert-file-contents url) (goto-char (point-min)) - (when (re-search-forward + (when (re-search-forward "]*URL=\\([^\"]+\\)\"" nil t) (let ((url (match-string 1))) (delete-region (point-min) (point-max))