Synch with Gnus.
[elisp/gnus.git-] / lisp / nnshimbun.el
index ed60a93..d30091e 100644 (file)
@@ -1,7 +1,8 @@
-;;; -*- mode: Emacs-Lisp; coding: junet -*-
+;;; nnshimbun.el --- interfacing with web newspapers -*- coding: junet; -*-
 
-;;; Author: TSUCHIYA Masatoshi <tsuchiya@pine.kuee.kyoto-u.ac.jp>
-;;; Keywords: news
+;; Authors: TSUCHIYA Masatoshi <tsuchiya@pine.kuee.kyoto-u.ac.jp>
+;;          Akihiro Arisawa    <ari@atesoft.advantest.co.jp>
+;; Keywords: news
 
 ;;; Copyright:
 
 
 (defvar nnshimbun-check-interval 300)
 
+(defconst nnshimbun-mew-groups
+  '(("meadow-develop" "meadow-develop" nil t)
+    ("meadow-users-jp" "meadow-users-jp")
+    ("mule-win32" "mule-win32")
+    ("mew-win32" "mew-win32")
+    ("mew-dist" "mew-dist/3300" t)
+    ("mgp-users-jp" "mgp-users-jp/A" t t)))
+
 (defvar nnshimbun-type-definition
   `(("asahi"
      (url . "http://spin.asahi.com/")
      (contents-start . "\n<!--  honbun start  -->\n")
      (contents-end   . "\n<!--  honbun end  -->\n"))
     ("zdnet"
-     (url . "http://zdseek.pub.softbank.co.jp/news/")
+     (url . "http://www.zdnet.co.jp/news/")
      (groups "comp")
      (coding-system  . ,(if (boundp 'MULE) '*sjis* 'shift_jis))
      (generate-nov   . nnshimbun-generate-nov-for-each-group)
      (index-url      . nnshimbun-url)
      (from-address   . "zdnn@softbank.co.jp")
      (make-contents  . nnshimbun-make-html-contents)
-     (contents-start . "<!--BODY-->")
-     (contents-end   . "<!--BODYEND-->"))
+     (contents-start . "\\(<!--BODY-->\\|<!--DATE-->\\)")
+     (contents-end   . "\\(<!--BODYEND-->\\|<!--BYLINEEND-->\\)"))
+    ("mew"
+     (url . "http://www.mew.org/archive/")
+     (groups ,@(mapcar #'car nnshimbun-mew-groups))
+     (coding-system . ,(if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
+     (generate-nov  . nnshimbun-generate-nov-for-each-group)
+     (get-headers   . nnshimbun-mew-get-headers)
+     (index-url     . (nnshimbun-mew-concat-url "index.html"))
+     (make-contents . nnshimbun-make-mhonarc-contents))
+    ("xemacs"
+     (url . "http://www.xemacs.org/list-archives/")
+     (groups "xemacs-announce" "xemacs-beta-ja" "xemacs-beta"
+            "xemacs-build-reports" "xemacs-cvs" "xemacs-mule"
+            "xemacs-nt" "xemacs-patches" "xemacs-users-ja" "xemacs")
+     (coding-system . ,(if (boundp 'MULE) '*euc-japan* 'euc-jp))
+     (generate-nov  . nnshimbun-generate-nov-for-each-group)
+     (get-headers   . nnshimbun-xemacs-get-headers)
+     (index-url     . (nnshimbun-xemacs-concat-url nil))
+     (make-contents . nnshimbun-make-mhonarc-contents))
+    ("netbsd"
+     (url . "http://www.jp.netbsd.org/ja/JP/ml/")
+     (groups "announce-ja" "junk-ja" "tech-misc-ja" "tech-pkg-ja"
+            "port-arm32-ja" "port-hpcmips-ja" "port-mac68k-ja"
+            "port-mips-ja" "port-powerpc-ja" "hpcmips-changes-ja"
+            "members-ja" "admin-ja" "www-changes-ja")
+     (coding-system  . ,(if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
+     (generate-nov   . nnshimbun-generate-nov-for-each-group)
+     (get-headers    . nnshimbun-netbsd-get-headers)
+     (index-url      . (format "%s%s/index.html" nnshimbun-url nnshimbun-current-group))
+     (make-contents  . nnshimbun-make-mhonarc-contents))
     ))
 
 (defvar nnshimbun-x-face-alist
       (nnheader-report 'nnml "Couldn't retrieve article: %s" (prin1-to-string article))
       nil)))
 
+(defsubst nnshimbun-header-xref (x)
+  (if (and (setq x (mail-header-xref x))
+          (string-match "^Xref: " x))
+      (substring x 6)
+    x))
+
 (defun nnshimbun-request-article-1 (article &optional group server to-buffer)
   (if (nnshimbun-backlog
        (gnus-backlog-request-article
                           (set-buffer (nnshimbun-open-nov group))
                           (and (nnheader-find-nov-line article)
                                (nnheader-parse-nov))))
-       (let* ((xref (substring (mail-header-xref header) 6))
-              (x-faces (cdr (or (assoc server nnshimbun-x-face-alist)
+       (let* ((xref (nnshimbun-header-xref header))
+              (x-faces (cdr (or (assoc (or server
+                                           (nnoo-current-server 'nnshimbun))
+                                       nnshimbun-x-face-alist)
                                 (assoc "default" nnshimbun-x-face-alist))))
               (x-face (cdr (or (assoc group x-faces)
                                (assoc "default" x-faces)))))
        (insert "Subject: " (or (mime-entity-fetch-field header 'Subject) "(none)") "\n"
                "From: " (or (mime-entity-fetch-field header 'From) "(nobody)") "\n"
                "Date: " (or (mail-header-date header) "") "\n"
-               "Message-ID: " (or (mail-header-id header) (nnmail-message-id)) "\n"
-               "References: " (or (mail-header-references header) "") "\n"
-               "Lines: ")
-       (princ (or (mail-header-lines header) 0) (current-buffer))
-       (insert "\n")
-       (if (mail-header-xref header)
-           (insert (mail-header-xref header) "\n")))
+               "Message-ID: " (or (mail-header-id header) (nnmail-message-id)) "\n")
+       (let ((refs (mail-header-references header)))
+         (and refs
+              (string< "" refs)
+              (insert "References: " refs "\n")))
+       (insert "Lines: " (number-to-string (or (mail-header-lines header) 0)) "\n"
+               "Xref: " (nnshimbun-header-xref header) "\n"))
     ;; For pure Gnus.
     (defun nnshimbun-insert-header (header)
       (nnheader-insert-header header)
       (delete-char -1)
-      (if (mail-header-xref header)
-         (insert (mail-header-xref header) "\n")))))
+      (insert "Xref: " (nnshimbun-header-xref header) "\n"))))
 
 (deffoo nnshimbun-retrieve-headers (articles &optional group server fetch-old)
   (when (nnshimbun-possibly-change-group group server)
          (setq found t)))
       (unless found
        (goto-char (point-min))
-       (when (search-forward (concat "X-Nnshimbun-Original-Id: " id) nil t)
+       (when (search-forward (concat "X-Nnshimbun-Id: " id) nil t)
          (forward-line 0)
          (setq found t)))
       (if found
     (when (nnheader-find-nov-line (mail-header-number header))
       (dolist (arg args)
        (if (eq (car arg) 'id)
-           (let ((extra (mail-header-extra header)) x)
-             (when (setq x (assq 'X-Nnshimbun-Original-Id extra))
-               (setq extra (delq x extra)))
-             (mail-header-set-extra
-              header
-              (cons (cons 'X-Nnshimbun-Original-Id (cdr arg)) extra)))
+           (let ((extra (mail-header-extra header)))
+             (unless (assq 'X-Nnshimbun-Id extra)
+               (mail-header-set-extra
+                header
+                (cons (cons 'X-Nnshimbun-Id (mail-header-id header))
+                      extra)))
+             (mail-header-set-id header (cdr arg)))
          (let ((func (intern (concat "mail-header-set-" (symbol-name (car arg))))))
            (if (cdr arg) (eval (list func header (cdr arg)))))))
-      (let ((xref (mail-header-xref header)))
-       (when (string-match "^Xref: " xref)
-         (mail-header-set-xref header (substring xref 6))))
+      (mail-header-set-xref header (nnshimbun-header-xref header))
       (delete-region (point) (progn (forward-line 1) (point)))
       (nnheader-insert-nov header))))
 
@@ -719,11 +763,11 @@ is enclosed by at least one regexp grouping construct."
 
 (defun nnshimbun-make-text-or-html-contents (header &optional x-face)
   (let ((case-fold-search t) (html t) (start))
-    (when (and (search-forward nnshimbun-contents-start nil t)
+    (when (and (re-search-forward nnshimbun-contents-start nil t)
               (setq start (point))
-              (search-forward nnshimbun-contents-end nil t))
+              (re-search-forward nnshimbun-contents-end nil t))
+      (delete-region (match-beginning 0) (point-max))
       (delete-region (point-min) start)
-      (delete-region (- (point) (length nnshimbun-contents-end)) (point-max))
       (nnshimbun-shallow-rendering)
       (setq html nil))
     (goto-char (point-min))
@@ -740,11 +784,11 @@ is enclosed by at least one regexp grouping construct."
 
 (defun nnshimbun-make-html-contents (header &optional x-face)
   (let (start)
-    (when (and (search-forward nnshimbun-contents-start nil t)
+    (when (and (re-search-forward nnshimbun-contents-start nil t)
               (setq start (point))
-              (search-forward nnshimbun-contents-end nil t))
-      (delete-region (point-min) start)
-      (delete-region (- (point) (length nnshimbun-contents-end)) (point-max)))
+              (re-search-forward nnshimbun-contents-end nil t))
+      (delete-region (match-beginning 0) (point-max))
+      (delete-region (point-min) start))
     (goto-char (point-min))
     (nnshimbun-insert-header header)
     (insert "Content-Type: text/html; charset=ISO-2022-JP\n"
@@ -757,6 +801,75 @@ is enclosed by at least one regexp grouping construct."
     (encode-coding-string (buffer-string)
                          (mime-charset-to-coding-system "ISO-2022-JP"))))
 
+(defun nnshimbun-make-mhonarc-contents (header &rest args)
+  (require 'mml)
+  (if (search-forward "<!--X-Head-End-->" nil t)
+      (progn
+       (forward-line 0)
+       ;; Processing headers.
+       (save-restriction
+         (narrow-to-region (point-min) (point))
+         (nnweb-decode-entities)
+         (goto-char (point-min))
+         (while (search-forward "\n<!--X-" nil t)
+           (replace-match "\n"))
+         (goto-char (point-min))
+         (while (search-forward " -->\n" nil t)
+           (replace-match "\n"))
+         (goto-char (point-min))
+         (while (search-forward "\t" nil t)
+           (replace-match " "))
+         (goto-char (point-min))
+         (let (buf refs)
+           (while (not (eobp))
+             (cond
+              ((looking-at "<!--")
+               (delete-region (point) (progn (forward-line 1) (point))))
+              ((looking-at "Subject: +")
+               (push (cons 'subject (nnheader-header-value)) buf)
+               (delete-region (point) (progn (forward-line 1) (point))))
+              ((looking-at "From: +")
+               (push (cons 'from (nnheader-header-value)) buf)
+               (delete-region (point) (progn (forward-line 1) (point))))
+              ((looking-at "Date: +")
+               (push (cons 'date (nnheader-header-value)) buf)
+               (delete-region (point) (progn (forward-line 1) (point))))
+              ((looking-at "Message-Id: +")
+               (push (cons 'id (concat "<" (nnheader-header-value) ">")) buf)
+               (delete-region (point) (progn (forward-line 1) (point))))
+              ((looking-at "Reference: +")
+               (push (concat "<" (nnheader-header-value) ">") refs)
+               (delete-region (point) (progn (forward-line 1) (point))))
+              ((looking-at "Content-Type: ")
+               (unless (search-forward "charset" (gnus-point-at-eol) t)
+                 (end-of-line)
+                 (insert "; charset=ISO-2022-JP"))
+               (forward-line 1))
+              (t (forward-line 1))))
+           (insert "MIME-Version: 1.0\n")
+           (if refs (push (cons 'references (mapconcat 'identity refs " ")) buf))
+           (nnshimbun-nov-fix-header nnshimbun-current-group header buf)
+           (goto-char (point-min))
+           (nnshimbun-insert-header header))
+         (goto-char (point-max)))
+       ;; Processing body.
+       (save-restriction
+         (narrow-to-region (point) (point-max))
+         (delete-region
+          (point)
+          (progn
+            (search-forward "\n<!--X-Body-of-Message-->\n" nil t)
+            (point)))
+         (when (search-forward "\n<!--X-Body-of-Message-End-->\n" nil t)
+           (forward-line -1)
+           (delete-region (point) (point-max)))
+         (nnweb-remove-markup)
+         (nnweb-decode-entities)))
+    (goto-char (point-min))
+    (nnshimbun-insert-header header)
+    (insert "Content-Type: text/html; charset=ISO-2022-JP\nMIME-Version: 1.0\n\n"))
+  (encode-coding-string (buffer-string)
+                       (mime-charset-to-coding-system "ISO-2022-JP")))
 
 
 ;;; www.asahi.com
@@ -785,7 +898,7 @@ is enclosed by at least one regexp grouping construct."
                                (buffer-substring
                                 (match-end 0)
                                 (progn (search-forward "<br>" nil t) (point)))
-                               "<[^>]+>")
+                               "\\(<[^>]+>\\|\r\\)")
                               ""))
                   nnshimbun-from-address
                   "" id "" 0 0 (concat nnshimbun-url url))
@@ -993,18 +1106,18 @@ is enclosed by at least one regexp grouping construct."
        (delete-region start (point))))
     (goto-char (point-min))
     (while (re-search-forward
-           "<a href=\"\\(\\([0-9][0-9]\\)\\([0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([^\\.]+\\).html\\)\"><font size=\"4\"><strong>"
+           "<a href=\"\\(/news/\\)?\\(\\([0-9][0-9]\\)\\([0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([^\\.]+\\).html\\)\"><font size=\"4\"><strong>"
            nil t)
-      (let ((year  (+ 2000 (string-to-number (match-string 2))))
-           (month (string-to-number (match-string 3)))
-           (day   (string-to-number (match-string 4)))
+      (let ((year  (+ 2000 (string-to-number (match-string 3))))
+           (month (string-to-number (match-string 4)))
+           (day   (string-to-number (match-string 5)))
            (id    (format "<%s%s%s%s%%%s>"
-                          (match-string 2)
                           (match-string 3)
                           (match-string 4)
                           (match-string 5)
+                          (match-string 6)
                           nnshimbun-current-group))
-           (url (match-string 1)))
+           (url (match-string 2)))
        (push (make-full-mail-header
               0
               (nnshimbun-mime-encode-string
@@ -1023,5 +1136,189 @@ is enclosed by at least one regexp grouping construct."
 
 
 
+;;; MLs on www.mew.org
+
+(defmacro nnshimbun-mew-concat-url (url)
+  `(concat nnshimbun-url
+          (nth 1 (assoc nnshimbun-current-group nnshimbun-mew-groups))
+          "/"
+          ,url))
+
+(defmacro nnshimbun-mew-reverse-order-p ()
+  `(nth 2 (assoc nnshimbun-current-group nnshimbun-mew-groups)))
+
+(defmacro nnshimbun-mew-spew-p ()
+  `(nth 3 (assoc nnshimbun-current-group nnshimbun-mew-groups)))
+
+(defsubst nnshimbun-mew-retrieve-xover (aux)
+  (erase-buffer)
+  (nnshimbun-retrieve-url
+   (nnshimbun-mew-concat-url (if (= aux 1) "index.html" (format "mail%d.html" aux)))
+   t))
+
+(defconst nnshimbun-mew-regexp "<A[^>]*HREF=\"\\(msg\\([0-9]+\\).html\\)\">\\([^<]+\\)<")
+
+(defmacro nnshimbun-mew-extract-header-values ()
+  `(progn
+     (setq url (nnshimbun-mew-concat-url (match-string 1))
+          id (format "<%05d%%%s>"
+                     (1- (string-to-number (match-string 2)))
+                     nnshimbun-current-group)
+          subject (match-string 3))
+     (forward-line 1)
+     (if (nnshimbun-search-id nnshimbun-current-group id)
+        (throw 'stop headers)
+       (push (make-full-mail-header
+             0
+             (nnshimbun-mime-encode-string subject)
+             (if (looking-at "<EM>\\([^<]+\\)<")
+                 (nnshimbun-mime-encode-string (match-string 1))
+               "")
+             "" id "" 0 0 url)
+            headers))))
+
+(eval-and-compile
+  (if (fboundp 'mime-entity-fetch-field)
+      ;; For Semi-Gnus.
+      (defmacro nnshimbun-mew-mail-header-subject (header)
+       `(mime-entity-fetch-field ,header 'Subject))
+    ;; For pure Gnus.
+    (defalias 'nnshimbun-mew-mail-header-subject 'mail-header-subject)))
+
+(defun nnshimbun-mew-get-headers ()
+  (if (nnshimbun-mew-spew-p)
+      (let ((headers (nnshimbun-mew-get-headers-1)))
+       (erase-buffer)
+       (insert-buffer-substring (nnshimbun-open-nov nnshimbun-current-group))
+       (delq nil
+             (mapcar
+              (lambda (header)
+                (goto-char (point-min))
+                (let ((subject (nnshimbun-mew-mail-header-subject header))
+                      (found))
+                  (while (and (not found)
+                              (search-forward subject nil t))
+                    (if (not (and (search-backward "\t" nil t)
+                                  (not (search-backward "\t" (gnus-point-at-bol) t))))
+                        (forward-line 1)
+                      (setq found t)))
+                  (if found
+                      nil
+                    (goto-char (point-max))
+                    (nnheader-insert-nov header)
+                    header)))
+              headers)))
+    (nnshimbun-mew-get-headers-1)))
+
+(defun nnshimbun-mew-get-headers-1 ()
+  (let (headers)
+    (when (re-search-forward
+          "<A[^>]*HREF=\"mail\\([0-9]+\\)\\.html\">\\[?Last Page\\]?</A>" nil t)
+      (let ((limit (string-to-number (match-string 1))))
+       (catch 'stop
+         (if (nnshimbun-mew-reverse-order-p)
+             (let ((aux 1))
+               (while (let (id url subject)
+                        (while (re-search-forward nnshimbun-mew-regexp nil t)
+                          (nnshimbun-mew-extract-header-values))
+                        (< aux limit))
+                 (nnshimbun-mew-retrieve-xover (setq aux (1+ aux)))))
+           (while (> limit 0)
+             (nnshimbun-mew-retrieve-xover limit)
+             (setq limit (1- limit))
+             (let (id url subject)
+               (goto-char (point-max))
+               (while (re-search-backward nnshimbun-mew-regexp nil t)
+                 (nnshimbun-mew-extract-header-values)
+                 (forward-line -2)))))
+         headers)))))
+
+
+
+;;; MLs on www.xemacs.org
+
+(defmacro nnshimbun-xemacs-concat-url (url)
+  `(concat nnshimbun-url nnshimbun-current-group "/" ,url))
+
+(defun nnshimbun-xemacs-get-headers ()
+  (let (headers auxs aux)
+    (catch 'stop
+      (while (re-search-forward
+             (concat "<A HREF=\"/list-archives/" nnshimbun-current-group
+                     "/\\([12][0-9][0-9][0-9][0-1][0-9]\\)/\">\\[Index\\]")
+             nil t)
+       (setq auxs (append auxs (list (match-string 1)))))
+      (while auxs
+       (erase-buffer)
+       (nnshimbun-retrieve-url
+        (nnshimbun-xemacs-concat-url (concat (setq aux (car auxs)) "/")))
+       (let (id url subject)
+         (goto-char (point-max))
+         (while (re-search-backward
+                 "<A[^>]*HREF=\"\\(msg\\([0-9]+\\).html\\)\">\\([^<]+\\)<"
+                 nil t)
+           (setq url (nnshimbun-xemacs-concat-url
+                      (concat aux "/" (match-string 1)))
+                 id (format "<%s%05d%%%s>"
+                            aux
+                            (string-to-number (match-string 2))
+                            nnshimbun-current-group)
+                 subject (match-string 3))
+           (forward-line 1)
+           (if (nnshimbun-search-id nnshimbun-current-group id)
+               (throw 'stop headers)
+             (push (make-full-mail-header
+                    0
+                    (nnshimbun-mime-encode-string subject)
+                    (if (looking-at "<td><em>\\([^<]+\\)<")
+                        (match-string 1)
+                      "")
+                    "" id "" 0 0 url)
+                   headers))
+           (message "%s" id)
+           (forward-line -2)))
+       (setq auxs (cdr auxs))))
+    headers))
+
+;;; MLs on www.jp.netbsd.org
+
+(defun nnshimbun-netbsd-get-headers ()
+  (let ((case-fold-search t) headers months)
+    (goto-char (point-min))
+    (while (re-search-forward "<A HREF=\"\\([0-9]+\\)/\\(threads.html\\)?\">" nil t)
+      (push (match-string 1) months))
+    (setq months (nreverse months))
+    (catch 'exit
+      (dolist (month months)
+       (erase-buffer)
+       (nnshimbun-retrieve-url
+        (format "%s%s/%s/maillist.html" nnshimbun-url nnshimbun-current-group month)
+        t)
+       (let (id url subject)
+         (while (re-search-forward
+                 "<A[^>]*HREF=\"\\(msg\\([0-9]+\\)\\.html\\)\">\\([^<]+\\)</A>"
+                 nil t)
+           (setq url (format "%s%s/%s/%s"
+                             nnshimbun-url
+                             nnshimbun-current-group
+                             month
+                             (match-string 1))
+                 id (format "<%s%05d%%%s>"
+                            month
+                            (string-to-number (match-string 2))
+                            nnshimbun-current-group)
+                 subject (match-string 3))
+           (if (nnshimbun-search-id nnshimbun-current-group id)
+               (throw 'exit headers)
+             (push (make-full-mail-header
+                    0
+                    (nnshimbun-mime-encode-string subject)
+                    (if (looking-at "</STRONG> *<EM>\\([^<]+\\)<")
+                        (nnshimbun-mime-encode-string (match-string 1))
+                      "")
+                    "" id "" 0 0 url)
+                   headers)))))
+      headers)))
+
 (provide 'nnshimbun)
 ;;; nnshimbun.el ends here.