X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Fnnweb.el;h=875853a6c1ee5ca98a8cf495f819dd6ab7367c4c;hb=3304290c446e3fd89151ebe599b3c0cea8522329;hp=4cc1fc9dc53ce19a0c2ebd23969ffb80d1d8c2ff;hpb=4c2e20a67169654caf07221554d9e637d3f7bbfa;p=elisp%2Fgnus.git- diff --git a/lisp/nnweb.el b/lisp/nnweb.el index 4cc1fc9..875853a 100644 --- a/lisp/nnweb.el +++ b/lisp/nnweb.el @@ -1,7 +1,7 @@ ;;; nnweb.el --- retrieving articles via web search engines -;; Copyright (C) 1996,97 Free Software Foundation, Inc. +;; Copyright (C) 1996,97,98,99 Free Software Foundation, Inc. -;; Author: Lars Magne Ingebrigtsen +;; Author: Lars Magne Ingebrigtsen ;; Keywords: news ;; This file is part of GNU Emacs. @@ -34,11 +34,18 @@ (require 'message) (require 'gnus-util) (require 'gnus) -(require 'w3) -(require 'url) (require 'nnmail) -(ignore-errors - (require 'w3-forms)) +(require 'mm-util) +(eval-when-compile + (ignore-errors + (require 'w3) + (require 'url) + (require 'w3-forms))) +;; Report failure to find w3 at load time if appropriate. +(eval '(progn + (require 'w3) + (require 'url) + (require 'w3-forms))) (nnoo-declare nnweb) @@ -46,14 +53,23 @@ "Where nnweb will save its files.") (defvoo nnweb-type 'dejanews - "What search engine type is being used.") + "What search engine type is being used. +Valid types include `dejanews', `dejanewsold', `reference', +and `altavista'.") (defvar nnweb-type-definition '((dejanews - (article . nnweb-dejanews-wash-article) + (article . ignore) + (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") (map . nnweb-dejanews-create-mapping) (search . nnweb-dejanews-search) - (address . "http://xp9.dejanews.com/dnquery.xp") + (address . "http://www.deja.com/=dnc/qs.xp") + (identifier . nnweb-dejanews-identity)) + (dejanewsold + (article . ignore) + (map . nnweb-dejanews-create-mapping) + (search . nnweb-dejanewsold-search) + (address . "http://www.deja.com/dnquery.xp") (identifier . nnweb-dejanews-identity)) (reference (article . nnweb-reference-wash-article) @@ -73,7 +89,7 @@ (defvoo nnweb-search nil "Search string to feed to DejaNews.") -(defvoo nnweb-max-hits 100 +(defvoo nnweb-max-hits 999 "Maximum number of hits to display.") (defvoo nnweb-ephemeral-p nil @@ -97,9 +113,10 @@ (set-buffer nntp-server-buffer) (erase-buffer) (let (article header) - (while (setq article (pop articles)) - (when (setq header (cadr (assq article nnweb-articles))) - (nnheader-insert-nov header))) + (mm-with-unibyte-current-buffer + (while (setq article (pop articles)) + (when (setq header (cadr (assq article nnweb-articles))) + (nnheader-insert-nov header)))) 'nov))) (deffoo nnweb-request-scan (&optional group server) @@ -116,11 +133,14 @@ (not (equal group nnweb-group)) (not nnweb-ephemeral-p)) (let ((info (assoc group nnweb-group-alist))) - (setq nnweb-group group) - (setq nnweb-type (nth 2 info)) - (setq nnweb-search (nth 3 info)) - (unless dont-check - (nnweb-read-overview group)))) + (when info + (setq nnweb-group group) + (setq nnweb-type (nth 2 info)) + (setq nnweb-search (nth 3 info)) + (unless dont-check + (nnweb-read-overview group))))) + (unless dont-check + (nnweb-request-scan group)) (cond ((not nnweb-articles) (nnheader-report 'nnweb "No matching articles")) @@ -150,7 +170,8 @@ (let* ((header (cadr (assq article nnweb-articles))) (url (and header (mail-header-xref header)))) (when (or (and url - (nnweb-fetch-url url)) + (mm-with-unibyte-current-buffer + (nnweb-fetch-url url))) (and (stringp article) (nnweb-definition 'id t) (let ((fetch (nnweb-definition 'id)) @@ -159,8 +180,9 @@ (setq art (match-string 1 article))) (and fetch art - (nnweb-fetch-url - (format fetch article)))))) + (mm-with-unibyte-current-buffer + (nnweb-fetch-url + (format fetch article))))))) (unless nnheader-callback-function (funcall (nnweb-definition 'article)) (nnweb-decode-entities)) @@ -200,7 +222,8 @@ (deffoo nnweb-request-delete-group (group &optional force server) (nnweb-possibly-change-server group server) - (gnus-delete-assoc group nnweb-group-alist) + (gnus-pull group nnweb-group-alist t) + (nnweb-write-active) (gnus-delete-file (nnweb-overview-file group)) t) @@ -211,7 +234,7 @@ (defun nnweb-read-overview (group) "Read the overview of GROUP and build the map." (when (file-exists-p (nnweb-overview-file group)) - (nnheader-temp-write nil + (mm-with-unibyte-buffer (nnheader-insert-file-contents (nnweb-overview-file group)) (goto-char (point-min)) (let (header) @@ -225,7 +248,7 @@ (defun nnweb-write-overview (group) "Write the overview file for GROUP." - (nnheader-temp-write (nnweb-overview-file group) + (with-temp-file (nnweb-overview-file group) (let ((articles nnweb-articles)) (while articles (nnheader-insert-nov (cadr (pop articles))))))) @@ -246,7 +269,8 @@ (defun nnweb-write-active () "Save the active file." - (nnheader-temp-write (nnheader-concat nnweb-directory "active") + (gnus-make-directory nnweb-directory) + (with-temp-file (nnheader-concat nnweb-directory "active") (prin1 `(setq nnweb-group-alist ',nnweb-group-alist) (current-buffer)))) (defun nnweb-read-active () @@ -278,22 +302,34 @@ (unless (gnus-buffer-live-p nnweb-buffer) (setq nnweb-buffer (save-excursion - (nnheader-set-temp-buffer - (format " *nnweb %s %s %s*" nnweb-type nnweb-search server)))))) + (let ((multibyte (default-value 'enable-multibyte-characters))) + (unwind-protect + (progn + (setq-default enable-multibyte-characters nil) + (nnheader-set-temp-buffer + (format " *nnweb %s %s %s*" + nnweb-type nnweb-search server))) + (setq-default enable-multibyte-characters multibyte)) + (current-buffer)))))) (defun nnweb-fetch-url (url) - (save-excursion - (if (not nnheader-callback-function) - (let ((buf (current-buffer))) - (save-excursion - (set-buffer nnweb-buffer) + (let (buf) + (save-excursion + (if (not nnheader-callback-function) + (progn + (with-temp-buffer + (mm-enable-multibyte) + (let ((coding-system-for-read 'binary) + (coding-system-for-write 'binary) + (default-process-coding-system 'binary)) + (nnweb-insert url)) + (setq buf (buffer-string))) (erase-buffer) - (url-insert-file-contents url) - (copy-to-buffer buf (point-min) (point-max)) - t)) - (nnweb-url-retrieve-asynch - url 'nnweb-callback (current-buffer) nnheader-callback-function) - t))) + (insert buf) + t) + (nnweb-url-retrieve-asynch + url 'nnweb-callback (current-buffer) nnheader-callback-function) + t)))) (defun nnweb-callback (buffer callback) (when (gnus-buffer-live-p url-working-buffer) @@ -322,42 +358,6 @@ (url-retrieve url)) (setq-default url-be-asynchronous old-asynch))) -(defun nnweb-encode-www-form-urlencoded (pairs) - "Return PAIRS encoded for forms." - (mapconcat - (function - (lambda (data) - (concat (w3-form-encode-xwfu (car data)) "=" - (w3-form-encode-xwfu (cdr data))))) - pairs "&")) - -(defun nnweb-fetch-form (url pairs) - (let ((url-request-data (nnweb-encode-www-form-urlencoded pairs)) - (url-request-method "POST") - (url-request-extra-headers - '(("Content-type" . "application/x-www-form-urlencoded")))) - (url-insert-file-contents url) - (setq buffer-file-name nil)) - t) - -(defun nnweb-decode-entities () - (goto-char (point-min)) - (while (re-search-forward "&\\([a-z]+\\);" nil t) - (replace-match (char-to-string (or (cdr (assq (intern (match-string 1)) - w3-html-entities)) - ?#)) - t t))) - -(defun nnweb-remove-markup () - (goto-char (point-min)) - (while (search-forward "" nil t) - (point-max)))) - (goto-char (point-min)) - (while (re-search-forward "<[^>]+>" nil t) - (replace-match "" t t))) - ;;; ;;; DejaNews functions. ;;; @@ -373,49 +373,46 @@ (case-fold-search t) (active (or (cadr (assoc nnweb-group nnweb-group-alist)) (cons 1 0))) - Subject Score Date Newsgroup Author - map url) + subject date from + map url parse a table group text) (while more ;; Go through all the article hits on this page. (goto-char (point-min)) - (nnweb-decode-entities) - (goto-char (point-min)) - (while (re-search-forward "^ +[0-9]+\\." nil t) - (narrow-to-region - (point) - (cond ((re-search-forward "^ +[0-9]+\\." nil t) - (match-beginning 0)) - ((search-forward "\n\n" nil t) - (point)) - (t - (point-max)))) - (goto-char (point-min)) - (when (looking-at ".*HREF=\"\\([^\"]+\\)\"") - (setq url (match-string 1))) - (nnweb-remove-markup) - (goto-char (point-min)) - (while (search-forward "\t" nil t) - (replace-match " ")) - (goto-char (point-min)) - (while (re-search-forward "^ +\\([^:]+\\): +\\(.*\\)$" nil t) - (set (intern (match-string 1)) (match-string 2))) - (widen) - (when (string-match "#[0-9]+/[0-9]+ *$" Subject) - (setq Subject (substring Subject 0 (match-beginning 0)))) - (incf i) - (unless (nnweb-get-hashtb url) - (push - (list - (incf (cdr active)) - (make-full-mail-header - (cdr active) (concat "(" Newsgroup ") " Subject) Author Date - (concat "<" (nnweb-identifier url) "@dejanews>") - nil 0 (string-to-int Score) url)) - map) - (nnweb-set-hashtb (cadar map) (car map)))) + (setq parse (w3-parse-buffer (current-buffer)) + table (nth 1 (nnweb-parse-find-all 'table parse))) + (dolist (row (nth 2 (car (nth 2 table)))) + (setq a (nnweb-parse-find 'a row) + url (cdr (assq 'href (nth 1 a))) + text (nreverse (nnweb-text row))) + (when a + (setq subject (nth 4 text) + group (nth 2 text) + date (nth 1 text) + from (nth 0 text)) + (if (string-match "\\([0-9]+\\)/\\([0-9]+\\)/\\([0-9]+\\)" date) + (setq date (format "%s %s 00:00:00 %s" + (car (rassq (string-to-number + (match-string 2 date)) + parse-time-months)) + (match-string 3 date) + (match-string 1 date))) + (setq date "Jan 1 00:00:00 0000")) + (incf i) + (setq url (concat url "&fmt=text")) + (unless (nnweb-get-hashtb url) + (push + (list + (incf (cdr active)) + (make-full-mail-header + (cdr active) (concat subject " (" group ")") from date + (concat "<" (nnweb-identifier url) "@dejanews>") + nil 0 0 url)) + map) + (nnweb-set-hashtb (cadar map) (car map))))) ;; See whether there is a "Get next 20 hits" button here. + (goto-char (point-min)) (if (or (not (re-search-forward - "HREF=\"\\([^\"]+\\)\">Get next" nil t)) + "HREF=\"\\([^\"]+\\)\"[<>b]+Next result" nil t)) (>= i nnweb-max-hits)) (setq more nil) ;; Yup -- fetch it. @@ -426,44 +423,44 @@ (setq nnweb-articles (sort (nconc nnweb-articles map) 'car-less-than-car)))))) -(defun nnweb-dejanews-wash-article () - (let ((case-fold-search t)) - (goto-char (point-min)) - (re-search-forward "
" nil t)
-    (delete-region (point-min) (point))
-    (re-search-forward "
" nil t) - (delete-region (point) (point-max)) - (nnweb-remove-markup) - (goto-char (point-min)) - (while (and (looking-at " *$") - (not (eobp))) - (gnus-delete-line)) - (while (looking-at "\\(^[^ ]+:\\) *") - (replace-match "\\1 " t) - (forward-line 1)) - (when (re-search-forward "\n\n+" nil t) - (replace-match "\n" t t)) - (goto-char (point-min)) - (when (search-forward "[More Headers]" nil t) - (replace-match "" t t)))) - (defun nnweb-dejanews-search (search) + (nnweb-insert + (concat + (nnweb-definition 'address) + "?" + (nnweb-encode-www-form-urlencoded + `(("ST" . "PS") + ("svcclass" . "dnyr") + ("QRY" . ,search) + ("defaultOp" . "AND") + ("DBS" . "1") + ("OP" . "dnquery.xp") + ("LNG" . "ALL") + ("maxhits" . "100") + ("threaded" . "0") + ("format" . "verbose2") + ("showsort" . "date") + ("agesign" . "1") + ("ageweight" . "1"))))) + t) + +(defun nnweb-dejanewsold-search (search) (nnweb-fetch-form (nnweb-definition 'address) `(("query" . ,search) ("defaultOp" . "AND") - ("svcclass" . "dncurrent") + ("svcclass" . "dnold") ("maxhits" . "100") - ("format" . "verbose") + ("format" . "verbose2") ("threaded" . "0") - ("showsort" . "score") + ("showsort" . "date") ("agesign" . "1") ("ageweight" . "1"))) t) (defun nnweb-dejanews-identity (url) "Return an unique identifier based on URL." - (if (string-match "recnum=\\([0-9]+\\)" url) + (if (string-match "AN=\\([0-9]+\\)" url) (match-string 1 url) url)) @@ -685,6 +682,122 @@ (setq buffer-file-name nil) t) +;;; +;;; General web/w3 interface utility functions +;;; + +(defun nnweb-insert-html (parse) + "Insert HTML based on a w3 parse tree." + (if (stringp parse) + (insert parse) + (insert "<" (symbol-name (car parse)) " ") + (insert (mapconcat + (lambda (param) + (concat (symbol-name (car param)) "=" + (prin1-to-string + (if (consp (cdr param)) + (cadr param) + (cdr param))))) + (nth 1 parse) + " ")) + (insert ">\n") + (mapcar 'nnweb-insert-html (nth 2 parse)) + (insert "\n"))) + +(defun nnweb-encode-www-form-urlencoded (pairs) + "Return PAIRS encoded for forms." + (mapconcat + (function + (lambda (data) + (concat (w3-form-encode-xwfu (car data)) "=" + (w3-form-encode-xwfu (cdr data))))) + pairs "&")) + +(defun nnweb-fetch-form (url pairs) + "Fetch a form from URL with PAIRS as the data using the POST method." + (let ((url-request-data (nnweb-encode-www-form-urlencoded pairs)) + (url-request-method "POST") + (url-request-extra-headers + '(("Content-type" . "application/x-www-form-urlencoded")))) + (url-insert-file-contents url) + (setq buffer-file-name nil)) + t) + +(defun nnweb-decode-entities () + "Decode all HTML entities." + (goto-char (point-min)) + (while (re-search-forward "&\\([a-z]+\\);" nil t) + (replace-match (char-to-string (or (cdr (assq (intern (match-string 1)) + w3-html-entities)) + ?#)) + t t))) + +(defun nnweb-remove-markup () + "Remove all HTML markup, leaving just plain text." + (goto-char (point-min)) + (while (search-forward "" nil t) + (point-max)))) + (goto-char (point-min)) + (while (re-search-forward "<[^>]+>" nil t) + (replace-match "" t t))) + +(defun nnweb-insert (url) + "Insert the contents from an URL in the current buffer." + (let ((name buffer-file-name)) + (url-insert-file-contents url) + (setq buffer-file-name name))) + +(defun nnweb-parse-find (type parse &optional maxdepth) + "Find the element of TYPE in PARSE." + (catch 'found + (nnweb-parse-find-1 type parse maxdepth))) + +(defun nnweb-parse-find-1 (type contents maxdepth) + (when (or (null maxdepth) + (not (zerop maxdepth))) + (when (consp contents) + (when (eq (car contents) type) + (throw 'found contents)) + (when (listp (cdr contents)) + (dolist (element contents) + (when (consp element) + (nnweb-parse-find-1 type element + (and maxdepth (1- maxdepth))))))))) + +(defun nnweb-parse-find-all (type parse) + "Find all elements of TYPE in PARSE." + (catch 'found + (nnweb-parse-find-all-1 type parse))) + +(defun nnweb-parse-find-all-1 (type contents) + (let (result) + (when (consp contents) + (if (eq (car contents) type) + (push contents result) + (when (listp (cdr contents)) + (dolist (element contents) + (when (consp element) + (setq result + (nconc result (nnweb-parse-find-all-1 type element)))))))) + result)) + +(defvar nnweb-text) +(defun nnweb-text (parse) + "Return a list of text contents in PARSE." + (let ((nnweb-text nil)) + (nnweb-text-1 parse) + (nreverse nnweb-text))) + +(defun nnweb-text-1 (contents) + (dolist (element contents) + (if (stringp element) + (push element nnweb-text) + (when (and (consp element) + (listp (cdr element))) + (nnweb-text-1 element))))) + (provide 'nnweb) ;;; nnweb.el ends here