+;;;
+;;; Deja bought by google.com
+;;;
+
+(defun nnweb-google-wash-article ()
+ (let ((case-fold-search t) url)
+ (goto-char (point-min))
+ (re-search-forward "^<pre>" nil t)
+ (narrow-to-region (point-min) (point))
+ (search-backward "<table " nil t 2)
+ (delete-region (point-min) (point))
+ (if (re-search-forward "Search Result [0-9]+" nil t)
+ (replace-match ""))
+ (if (re-search-forward "View complete thread ([0-9]+ articles?)" nil t)
+ (replace-match ""))
+ (goto-char (point-min))
+ (while (search-forward "<br>" nil t)
+ (replace-match "\n"))
+ (nnweb-remove-markup)
+ (goto-char (point-min))
+ (while (re-search-forward "^[ \t]*\n" nil t)
+ (replace-match ""))
+ (goto-char (point-max))
+ (insert "\n")
+ (widen)
+ (narrow-to-region (point) (point-max))
+ (search-forward "</pre>" nil t)
+ (delete-region (point) (point-max))
+ (nnweb-remove-markup)
+ (widen)))
+
+(defun nnweb-google-parse-1 (&optional Message-ID)
+ (let ((i 0)
+ (case-fold-search t)
+ (active (cadr (assoc nnweb-group nnweb-group-alist)))
+ Subject Score Date Newsgroups From
+ map url mid)
+ (unless active
+ (push (list nnweb-group (setq active (cons 1 0))
+ nnweb-type nnweb-search)
+ nnweb-group-alist))
+ ;; Go through all the article hits on this page.
+ (goto-char (point-min))
+ (while (re-search-forward
+ "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t)
+ (setq mid (match-string 2)
+ url (format
+ "http://groups.google.com/groups?selm=%s&output=gplain" mid))
+ (narrow-to-region (search-forward ">" nil t)
+ (search-forward "</a>" nil t))
+ (nnweb-remove-markup)
+ (nnweb-decode-entities)
+ (setq Subject (buffer-string))
+ (goto-char (point-max))
+ (widen)
+ (forward-line 2)
+ (when (looking-at "<br><font[^>]+>")
+ (goto-char (match-end 0)))
+ (if (not (looking-at "<a[^>]+>"))
+ (skip-chars-forward " \t")
+ (narrow-to-region (point)
+ (search-forward "</a>" nil t))
+ (nnweb-remove-markup)
+ (nnweb-decode-entities)
+ (setq Newsgroups (buffer-string))
+ (goto-char (point-max))
+ (widen)
+ (skip-chars-forward "- \t"))
+ (when (looking-at
+ "\\([0-9]+[/ ][A-Za-z]+[/ ][0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - <a")
+ (setq From (match-string 2)
+ Date (match-string 1)))
+ (forward-line 1)
+ (incf i)
+ (unless (nnweb-get-hashtb url)
+ (push
+ (list
+ (incf (cdr active))
+ (make-full-mail-header
+ (cdr active) (if Newsgroups
+ (concat "(" Newsgroups ") " Subject)
+ Subject)
+ From Date (or Message-ID mid)
+ nil 0 0 url))
+ map)
+ (nnweb-set-hashtb (cadar map) (car map))))
+ map))
+
+(defun nnweb-google-reference (id)
+ (let ((map (nnweb-google-parse-1 id)) header)
+ (setq nnweb-articles
+ (nconc nnweb-articles map))
+ (when (setq header (cadar map))
+ (mm-with-unibyte-current-buffer
+ (nnweb-fetch-url (mail-header-xref header)))
+ (caar map))))
+
+(defun nnweb-google-create-mapping ()
+ "Perform the search and create an number-to-url alist."
+ (save-excursion
+ (set-buffer nnweb-buffer)
+ (erase-buffer)
+ (when (funcall (nnweb-definition 'search) nnweb-search)
+ (let ((more t))
+ (while more
+ (setq nnweb-articles
+ (nconc nnweb-articles (nnweb-google-parse-1)))
+ ;; FIXME: There is more.
+ (setq more nil))
+ ;; Return the articles in the right order.
+ (setq nnweb-articles
+ (sort nnweb-articles 'car-less-than-car))))))
+
+(defun nnweb-google-search (search)
+ (nnweb-insert
+ (concat
+ (nnweb-definition 'address)
+ "?"
+ (nnweb-encode-www-form-urlencoded
+ `(("q" . ,search)
+ ("num". "100")
+ ("hq" . "")
+ ("hl" . "")
+ ("lr" . "")
+ ("safe" . "off")
+ ("sites" . "groups")))))
+ t)
+
+(defun nnweb-google-identity (url)
+ "Return an unique identifier based on URL."
+ (if (string-match "selm=\\([^ &>]+\\)" url)
+ (match-string 1 url)
+ url))
+
+;;;
+;;; General web/w3 interface utility functions
+;;;
+
+(defun nnweb-insert-html (parse)
+ "Insert HTML based on a w3 parse tree."
+ (if (stringp parse)
+ (insert (nnheader-string-as-multibyte parse))
+ (insert "<" (symbol-name (car parse)) " ")
+ (insert (mapconcat
+ (lambda (param)
+ (concat (symbol-name (car param)) "="
+ (prin1-to-string
+ (if (consp (cdr param))
+ (cadr param)
+ (cdr param)))))
+ (nth 1 parse)
+ " "))
+ (insert ">\n")
+ (mapcar 'nnweb-insert-html (nth 2 parse))
+ (insert "</" (symbol-name (car parse)) ">\n")))
+
+(defun nnweb-encode-www-form-urlencoded (pairs)
+ "Return PAIRS encoded for forms."
+ (mapconcat
+ (function
+ (lambda (data)
+ (concat (w3-form-encode-xwfu (car data)) "="
+ (w3-form-encode-xwfu (cdr data)))))
+ pairs "&"))
+
+(defun nnweb-fetch-form (url pairs)
+ "Fetch a form from URL with PAIRS as the data using the POST method."
+ (let ((url-request-data (nnweb-encode-www-form-urlencoded pairs))
+ (url-request-method "POST")
+ (url-request-extra-headers
+ '(("Content-type" . "application/x-www-form-urlencoded"))))
+ (url-insert-file-contents url)
+ (setq buffer-file-name nil))
+ t)
+
+(defun nnweb-decode-entities ()
+ "Decode all HTML entities."
+ (goto-char (point-min))
+ (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t)
+ (let ((elem (if (eq (aref (match-string 1) 0) ?\#)
+ (let ((c
+ (string-to-number (substring
+ (match-string 1) 1))))
+ (if (mm-char-or-char-int-p c) c 32))
+ (or (cdr (assq (intern (match-string 1))
+ w3-html-entities))
+ ?#))))
+ (unless (stringp elem)
+ (setq elem (char-to-string elem)))
+ (replace-match elem t t))))
+
+(defun nnweb-decode-entities-string (string)
+ (with-temp-buffer
+ (insert string)
+ (nnweb-decode-entities)
+ (buffer-substring (point-min) (point-max))))
+
+(defun nnweb-remove-markup ()
+ "Remove all HTML markup, leaving just plain text."
+ (goto-char (point-min))
+ (while (search-forward "<!--" nil t)
+ (delete-region (match-beginning 0)
+ (or (search-forward "-->" nil t)
+ (point-max))))
+ (goto-char (point-min))
+ (while (re-search-forward "<[^>]+>" nil t)
+ (replace-match "" t t)))
+
+(defun nnweb-insert (url &optional follow-refresh)
+ "Insert the contents from an URL in the current buffer.
+If FOLLOW-REFRESH is non-nil, redirect refresh url in META."
+ (let ((name buffer-file-name))
+ (if follow-refresh
+ (save-restriction
+ (narrow-to-region (point) (point))
+ (url-insert-file-contents url)
+ (goto-char (point-min))
+ (when (re-search-forward
+ "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t)
+ (let ((url (match-string 1)))
+ (delete-region (point-min) (point-max))
+ (nnweb-insert url t))))
+ (url-insert-file-contents url))
+ (setq buffer-file-name name)))
+
+(defun nnweb-parse-find (type parse &optional maxdepth)
+ "Find the element of TYPE in PARSE."
+ (catch 'found
+ (nnweb-parse-find-1 type parse maxdepth)))
+
+(defun nnweb-parse-find-1 (type contents maxdepth)
+ (when (or (null maxdepth)
+ (not (zerop maxdepth)))
+ (when (consp contents)
+ (when (eq (car contents) type)
+ (throw 'found contents))
+ (when (listp (cdr contents))
+ (dolist (element contents)
+ (when (consp element)
+ (nnweb-parse-find-1 type element
+ (and maxdepth (1- maxdepth)))))))))
+
+(defun nnweb-parse-find-all (type parse)
+ "Find all elements of TYPE in PARSE."
+ (catch 'found
+ (nnweb-parse-find-all-1 type parse)))
+
+(defun nnweb-parse-find-all-1 (type contents)
+ (let (result)
+ (when (consp contents)
+ (if (eq (car contents) type)
+ (push contents result)
+ (when (listp (cdr contents))
+ (dolist (element contents)
+ (when (consp element)
+ (setq result
+ (nconc result (nnweb-parse-find-all-1 type element))))))))
+ result))
+
+(defvar nnweb-text)
+(defun nnweb-text (parse)
+ "Return a list of text contents in PARSE."
+ (let ((nnweb-text nil))
+ (nnweb-text-1 parse)
+ (nreverse nnweb-text)))
+
+(defun nnweb-text-1 (contents)
+ (dolist (element contents)
+ (if (stringp element)
+ (push element nnweb-text)
+ (when (and (consp element)
+ (listp (cdr element)))
+ (nnweb-text-1 element)))))
+
+(defun nnweb-replace-in-string (string match newtext)
+ (while (string-match match string)
+ (setq string (replace-match newtext t t string)))
+ string)
+