X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Fnnweb.el;h=3ce22fdbd92bccbee1afb3eec1a30ef2ab138e24;hb=36bd162f4f7cd40453b8683e796730836c352b2a;hp=af1a7563098150395ea5985f039abefaa9cedba0;hpb=4305c2ea86b2e1d044bfb8b98e5558504bc09781;p=elisp%2Fgnus.git- diff --git a/lisp/nnweb.el b/lisp/nnweb.el index af1a756..3ce22fd 100644 --- a/lisp/nnweb.el +++ b/lisp/nnweb.el @@ -24,8 +24,7 @@ ;;; Commentary: -;; Note: You need to have `url' and `w3' installed for this -;; backend to work. +;; Note: You need to have `w3' installed for some functions to work. ;;; Code: @@ -37,35 +36,40 @@ (require 'gnus-util) (require 'gnus) (require 'nnmail) -(eval-when-compile - (ignore-errors - (require 'w3) - (require 'url) - (require 'w3-forms))) - -;; Report failure to find w3 at load time if appropriate. -(unless noninteractive - (eval '(progn - (require 'w3) - (require 'url) - (require 'w3-forms)))) +(require 'mm-util) +(require 'mm-url) +(autoload 'w3-parse-buffer "w3-parse") (nnoo-declare nnweb) (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") "Where nnweb will save its files.") -(defvoo nnweb-type 'dejanews +(defvoo nnweb-type 'google "What search engine type is being used. -Valid types include `dejanews', `dejanewsold', `reference', +Valid types include `google', `dejanews', `dejanewsold', `reference', and `altavista'.") (defvar nnweb-type-definition '( - (dejanews ;; bought by google.com - (article . nnweb-google-wash-article) - (id . "http://groups.google.com/groups?as_umsgid=%s") - (reference . nnweb-google-reference) + (google + ;;(article . nnweb-google-wash-article) + ;;(id . "http://groups.google.com/groups?as_umsgid=%s") + (article . ignore) + (id . "http://groups.google.com/groups?selm=%s&output=gplain") + ;;(reference . nnweb-google-reference) + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (identifier . nnweb-google-identity)) + (dejanews ;; alias of google + ;;(article . nnweb-google-wash-article) + ;;(id . "http://groups.google.com/groups?as_umsgid=%s") + (article . ignore) + (id . "http://groups.google.com/groups?selm=%s&output=gplain") + ;;(reference . nnweb-google-reference) + (reference . identity) (map . nnweb-google-create-mapping) (search . nnweb-google-search) (address . "http://groups.google.com/groups") @@ -133,6 +137,8 @@ and `altavista'.") (deffoo nnweb-request-scan (&optional group server) (nnweb-possibly-change-server group server) + (if nnweb-ephemeral-p + (setq nnweb-hashtb (gnus-make-hashtable 4095))) (funcall (nnweb-definition 'map)) (unless nnweb-ephemeral-p (nnweb-write-active) @@ -181,7 +187,7 @@ and `altavista'.") (url (and header (mail-header-xref header)))) (when (or (and url (mm-with-unibyte-current-buffer - (nnweb-fetch-url url))) + (mm-url-insert url))) (and (stringp article) (nnweb-definition 'id t) (let ((fetch (nnweb-definition 'id)) @@ -189,16 +195,15 @@ and `altavista'.") (when (string-match "^<\\(.*\\)>$" article) (setq art (match-string 1 article))) (when (and fetch art) - (setq url (format fetch article)) + (setq url (format fetch art)) (mm-with-unibyte-current-buffer - (nnweb-fetch-url url)) + (mm-url-insert url)) (if (nnweb-definition 'reference t) (setq article (funcall (nnweb-definition 'reference) article))))))) (unless nnheader-callback-function - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities)) + (funcall (nnweb-definition 'article))) (nnheader-report 'nnweb "Fetched article %s" article) (cons group (and (numberp article) article)))))) @@ -303,10 +308,11 @@ and `altavista'.") (nnweb-open-server server))) (unless nnweb-group-alist (nnweb-read-active)) + (unless nnweb-hashtb + (setq nnweb-hashtb (gnus-make-hashtable 4095))) (when group (when (and (not nnweb-ephemeral-p) - (not (equal group nnweb-group))) - (setq nnweb-hashtb (gnus-make-hashtable 4095)) + (equal group nnweb-group)) (nnweb-request-group group nil t)))) (defun nnweb-init (server) @@ -320,57 +326,57 @@ and `altavista'.") nnweb-type nnweb-search server)) (current-buffer)))))) -(defun nnweb-fetch-url (url) - (let (buf) - (save-excursion - (if (not nnheader-callback-function) - (progn - (with-temp-buffer - (mm-enable-multibyte) - (let ((coding-system-for-read 'binary) - (coding-system-for-write 'binary) - (input-coding-system 'binary) - (output-coding-system 'binary) - (default-process-coding-system 'binary)) - (nnweb-insert url)) - (setq buf (buffer-string))) - (erase-buffer) - (insert buf) - t) - (nnweb-url-retrieve-asynch - url 'nnweb-callback (current-buffer) nnheader-callback-function) - t)))) - -(defun nnweb-callback (buffer callback) - (when (gnus-buffer-live-p url-working-buffer) - (save-excursion - (set-buffer url-working-buffer) - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities) - (set-buffer buffer) - (goto-char (point-max)) - (insert-buffer-substring url-working-buffer)) - (funcall callback t) - (gnus-kill-buffer url-working-buffer))) - -(defun nnweb-url-retrieve-asynch (url callback &rest data) - (let ((url-request-method "GET") - (old-asynch url-be-asynchronous) - (url-request-data nil) - (url-request-extra-headers nil) - (url-working-buffer (generate-new-buffer-name " *nnweb*"))) - (setq-default url-be-asynchronous t) - (save-excursion - (set-buffer (get-buffer-create url-working-buffer)) - (setq url-current-callback-data data - url-be-asynchronous t - url-current-callback-func callback) - (url-retrieve url nil)) - (setq-default url-be-asynchronous old-asynch))) - -(if (fboundp 'url-retrieve-synchronously) - (defun nnweb-url-retrieve-asynch (url callback &rest data) - (url-retrieve url callback data))) +;; (defun nnweb-fetch-url (url) +;; (let (buf) +;; (save-excursion +;; (if (not nnheader-callback-function) +;; (progn +;; (with-temp-buffer +;; (mm-enable-multibyte) +;; (let ((coding-system-for-read 'binary) +;; (coding-system-for-write 'binary) +;; (input-coding-system 'binary) +;; (output-coding-system 'binary) +;; (default-process-coding-system 'binary)) +;; (nnweb-insert url)) +;; (setq buf (buffer-string))) +;; (erase-buffer) +;; (insert buf) +;; t) +;; (nnweb-url-retrieve-asynch +;; url 'nnweb-callback (current-buffer) nnheader-callback-function) +;; t)))) + +;; (defun nnweb-callback (buffer callback) +;; (when (gnus-buffer-live-p url-working-buffer) +;; (save-excursion +;; (set-buffer url-working-buffer) +;; (funcall (nnweb-definition 'article)) +;; (nnweb-decode-entities) +;; (set-buffer buffer) +;; (goto-char (point-max)) +;; (insert-buffer-substring url-working-buffer)) +;; (funcall callback t) +;; (gnus-kill-buffer url-working-buffer))) + +;; (defun nnweb-url-retrieve-asynch (url callback &rest data) +;; (let ((url-request-method "GET") +;; (old-asynch url-be-asynchronous) +;; (url-request-data nil) +;; (url-request-extra-headers nil) +;; (url-working-buffer (generate-new-buffer-name " *nnweb*"))) +;; (setq-default url-be-asynchronous t) +;; (save-excursion +;; (set-buffer (get-buffer-create url-working-buffer)) +;; (setq url-current-callback-data data +;; url-be-asynchronous t +;; url-current-callback-func callback) +;; (url-retrieve url nil)) +;; (setq-default url-be-asynchronous old-asynch))) + +;; (if (fboundp 'url-retrieve-synchronously) +;; (defun nnweb-url-retrieve-asynch (url callback &rest data) +;; (url-retrieve url callback data))) ;;; ;;; DejaNews functions. @@ -434,17 +440,17 @@ and `altavista'.") ;; Yup -- fetch it. (setq more (match-string 1)) (erase-buffer) - (url-insert-file-contents more))) + (mm-url-insert more))) ;; Return the articles in the right order. (setq nnweb-articles (sort (nconc nnweb-articles map) 'car-less-than-car)))))) (defun nnweb-dejanews-search (search) - (nnweb-insert + (mm-url-insert (concat (nnweb-definition 'address) "?" - (nnweb-encode-www-form-urlencoded + (mm-url-encode-www-form-urlencoded `(("ST" . "PS") ("svcclass" . "dnyr") ("QRY" . ,search) @@ -460,19 +466,19 @@ and `altavista'.") ("ageweight" . "1"))))) t) -(defun nnweb-dejanewsold-search (search) - (nnweb-fetch-form - (nnweb-definition 'address) - `(("query" . ,search) - ("defaultOp" . "AND") - ("svcclass" . "dnold") - ("maxhits" . "100") - ("format" . "verbose2") - ("threaded" . "0") - ("showsort" . "date") - ("agesign" . "1") - ("ageweight" . "1"))) - t) +;; (defun nnweb-dejanewsold-search (search) +;; (nnweb-fetch-form +;; (nnweb-definition 'address) +;; `(("query" . ,search) +;; ("defaultOp" . "AND") +;; ("svcclass" . "dnold") +;; ("maxhits" . "100") +;; ("format" . "verbose2") +;; ("threaded" . "0") +;; ("showsort" . "date") +;; ("agesign" . "1") +;; ("ageweight" . "1"))) +;; t) (defun nnweb-dejanews-identity (url) "Return an unique identifier based on URL." @@ -512,7 +518,7 @@ and `altavista'.") (goto-char (point-min)) (when (looking-at ".*href=\"\\([^\"]+\\)\"") (setq url (match-string 1))) - (nnweb-remove-markup) + (mm-url-remove-markup) (goto-char (point-min)) (while (search-forward "\t" nil t) (replace-match " ")) @@ -547,7 +553,7 @@ and `altavista'.") (let ((body (point-marker))) (search-forward "" nil t) (delete-region (point) (point-max)) - (nnweb-remove-markup) + (mm-url-remove-markup) (goto-char (point-min)) (while (looking-at " *$") (gnus-delete-line)) @@ -574,14 +580,15 @@ and `altavista'.") (while (search-forward "," nil t) (replace-match " " t t))) (widen) + (mm-url-decode-entities) (set-marker body nil)))) (defun nnweb-reference-search (search) - (url-insert-file-contents + (mm-url-insert (concat (nnweb-definition 'address) "?" - (nnweb-encode-www-form-urlencoded + (mm-url-encode-www-form-urlencoded `(("search" . "advanced") ("querytext" . ,search) ("subj" . "") @@ -633,7 +640,7 @@ and `altavista'.") (goto-char (point-min)) (while (search-forward "
" nil t) (replace-match "\n")) - (nnweb-decode-entities) + (mm-url-decode-entities) (goto-char (point-min)) (while (re-search-forward ".*href=\"\\([^\"]+\\)\">\\([^>]*\\)
\\([^-]+\\)- \\([^<]+\\)<.*href=\"news:\\([^\"]+\\)\">.*\">\\(.+\\)

" nil t) @@ -678,14 +685,15 @@ and `altavista'.") (while (re-search-forward "[0-9]+" nil t) (replace-match "<\\1> " t))) (widen) - (nnweb-remove-markup))) + (mm-url-remove-markup) + (mm-url-decode-entities))) (defun nnweb-altavista-search (search &optional part) - (url-insert-file-contents + (mm-url-insert (concat (nnweb-definition 'address) "?" - (nnweb-encode-www-form-urlencoded + (mm-url-encode-www-form-urlencoded `(("pg" . "aq") ("what" . "news") ,@(when part `(("stq" . ,(int-to-string (* part 30))))) @@ -715,7 +723,7 @@ and `altavista'.") (goto-char (point-min)) (while (search-forward "
" nil t) (replace-match "\n")) - (nnweb-remove-markup) + (mm-url-remove-markup) (goto-char (point-min)) (while (re-search-forward "^[ \t]*\n" nil t) (replace-match "")) @@ -725,7 +733,7 @@ and `altavista'.") (narrow-to-region (point) (point-max)) (search-forward "" nil t) (delete-region (point) (point-max)) - (nnweb-remove-markup) + (mm-url-remove-markup) (widen))) (defun nnweb-google-parse-1 (&optional Message-ID) @@ -733,7 +741,7 @@ and `altavista'.") (case-fold-search t) (active (cadr (assoc nnweb-group nnweb-group-alist))) Subject Score Date Newsgroups From - map url) + map url mid) (unless active (push (list nnweb-group (setq active (cons 1 0)) nnweb-type nnweb-search) @@ -741,26 +749,26 @@ and `altavista'.") ;; Go through all the article hits on this page. (goto-char (point-min)) (while (re-search-forward - "a href=/groups\\(\\?[^ \">]*selm=[^ \">]*\\)" nil t) - (setq url - (concat (nnweb-definition 'address) - (match-string 1))) + "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t) + (setq mid (match-string 2) + url (format + "http://groups.google.com/groups?selm=%s&output=gplain" mid)) (narrow-to-region (search-forward ">" nil t) (search-forward "" nil t)) - (nnweb-remove-markup) - (nnweb-decode-entities) + (mm-url-remove-markup) + (mm-url-decode-entities) (setq Subject (buffer-string)) (goto-char (point-max)) (widen) - (forward-line 2) + (forward-line 1) (when (looking-at "
]+>") (goto-char (match-end 0))) (if (not (looking-at "]+>")) (skip-chars-forward " \t") (narrow-to-region (point) (search-forward "" nil t)) - (nnweb-remove-markup) - (nnweb-decode-entities) + (mm-url-remove-markup) + (mm-url-decode-entities) (setq Newsgroups (buffer-string)) (goto-char (point-max)) (widen) @@ -779,7 +787,7 @@ and `altavista'.") (cdr active) (if Newsgroups (concat "(" Newsgroups ") " Subject) Subject) - From Date Message-ID + From Date (or Message-ID mid) nil 0 0 url)) map) (nnweb-set-hashtb (cadar map) (car map)))) @@ -791,7 +799,7 @@ and `altavista'.") (nconc nnweb-articles map)) (when (setq header (cadar map)) (mm-with-unibyte-current-buffer - (nnweb-fetch-url (mail-header-xref header))) + (mm-url-insert (mail-header-xref header))) (caar map)))) (defun nnweb-google-create-mapping () @@ -800,22 +808,22 @@ and `altavista'.") (set-buffer nnweb-buffer) (erase-buffer) (when (funcall (nnweb-definition 'search) nnweb-search) - (let ((more t)) - (while more - (setq nnweb-articles - (nconc nnweb-articles (nnweb-google-parse-1))) - ;; FIXME: There is more. - (setq more nil)) - ;; Return the articles in the right order. + (let ((more t)) + (while more (setq nnweb-articles - (sort nnweb-articles 'car-less-than-car)))))) + (nconc nnweb-articles (nnweb-google-parse-1))) + ;; FIXME: There is more. + (setq more nil)) + ;; Return the articles in the right order. + (setq nnweb-articles + (sort nnweb-articles 'car-less-than-car)))))) (defun nnweb-google-search (search) - (nnweb-insert + (mm-url-insert (concat (nnweb-definition 'address) "?" - (nnweb-encode-www-form-urlencoded + (mm-url-encode-www-form-urlencoded `(("q" . ,search) ("num". "100") ("hq" . "") @@ -827,7 +835,7 @@ and `altavista'.") (defun nnweb-google-identity (url) "Return an unique identifier based on URL." - (if (string-match "seld=\\([0-9]+\\)" url) + (if (string-match "selm=\\([^ &>]+\\)" url) (match-string 1 url) url)) @@ -853,75 +861,6 @@ and `altavista'.") (mapcar 'nnweb-insert-html (nth 2 parse)) (insert "\n"))) -(defun nnweb-encode-www-form-urlencoded (pairs) - "Return PAIRS encoded for forms." - (mapconcat - (function - (lambda (data) - (concat (w3-form-encode-xwfu (car data)) "=" - (w3-form-encode-xwfu (cdr data))))) - pairs "&")) - -(defun nnweb-fetch-form (url pairs) - "Fetch a form from URL with PAIRS as the data using the POST method." - (let ((url-request-data (nnweb-encode-www-form-urlencoded pairs)) - (url-request-method "POST") - (url-request-extra-headers - '(("Content-type" . "application/x-www-form-urlencoded")))) - (url-insert-file-contents url) - (setq buffer-file-name nil)) - t) - -(defun nnweb-decode-entities () - "Decode all HTML entities." - (goto-char (point-min)) - (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t) - (let ((elem (if (eq (aref (match-string 1) 0) ?\#) - (let ((c - (string-to-number (substring - (match-string 1) 1)))) - (if (mm-char-or-char-int-p c) c 32)) - (or (cdr (assq (intern (match-string 1)) - w3-html-entities)) - ?#)))) - (unless (stringp elem) - (setq elem (char-to-string elem))) - (replace-match elem t t)))) - -(defun nnweb-decode-entities-string (string) - (with-temp-buffer - (insert string) - (nnweb-decode-entities) - (buffer-substring (point-min) (point-max)))) - -(defun nnweb-remove-markup () - "Remove all HTML markup, leaving just plain text." - (goto-char (point-min)) - (while (search-forward "" nil t) - (point-max)))) - (goto-char (point-min)) - (while (re-search-forward "<[^>]+>" nil t) - (replace-match "" t t))) - -(defun nnweb-insert (url &optional follow-refresh) - "Insert the contents from an URL in the current buffer. -If FOLLOW-REFRESH is non-nil, redirect refresh url in META." - (let ((name buffer-file-name)) - (if follow-refresh - (save-restriction - (narrow-to-region (point) (point)) - (url-insert-file-contents url) - (goto-char (point-min)) - (when (re-search-forward - "]*URL=\\([^\"]+\\)\"" nil t) - (let ((url (match-string 1))) - (delete-region (point-min) (point-max)) - (nnweb-insert url t)))) - (url-insert-file-contents url)) - (setq buffer-file-name name))) - (defun nnweb-parse-find (type parse &optional maxdepth) "Find the element of TYPE in PARSE." (catch 'found @@ -971,11 +910,6 @@ If FOLLOW-REFRESH is non-nil, redirect refresh url in META." (listp (cdr element))) (nnweb-text-1 element))))) -(defun nnweb-replace-in-string (string match newtext) - (while (string-match match string) - (setq string (replace-match newtext t t string))) - string) - (provide 'nnweb) ;;; nnweb.el ends here