-;;; -*- mode: Emacs-Lisp; coding: junet -*-
+;;; nnshimbun.el --- interfacing with web newspapers -*- coding: junet; -*-
-;;; Author: TSUCHIYA Masatoshi <tsuchiya@pine.kuee.kyoto-u.ac.jp>
-;;; Keywords: news
+;; Authors: TSUCHIYA Masatoshi <tsuchiya@pine.kuee.kyoto-u.ac.jp>
+;; Akihiro Arisawa <ari@atesoft.advantest.co.jp>
+;; Keywords: news
;;; Copyright:
(defvar nnshimbun-check-interval 300)
+(defconst nnshimbun-mew-groups
+ '(("meadow-develop" "meadow-develop" nil t)
+ ("meadow-users-jp" "meadow-users-jp")
+ ("mule-win32" "mule-win32")
+ ("mew-win32" "mew-win32")
+ ("mew-dist" "mew-dist/3300" t)
+ ("mgp-users-jp" "mgp-users-jp/A" t t)))
+
(defvar nnshimbun-type-definition
`(("asahi"
(url . "http://spin.asahi.com/")
(contents-start . "\n<!-- honbun start -->\n")
(contents-end . "\n<!-- honbun end -->\n"))
("zdnet"
- (url . "http://zdseek.pub.softbank.co.jp/news/")
+ (url . "http://www.zdnet.co.jp/news/")
(groups "comp")
(coding-system . ,(if (boundp 'MULE) '*sjis* 'shift_jis))
(generate-nov . nnshimbun-generate-nov-for-each-group)
(index-url . nnshimbun-url)
(from-address . "zdnn@softbank.co.jp")
(make-contents . nnshimbun-make-html-contents)
- (contents-start . "<!--BODY-->")
- (contents-end . "<!--BODYEND-->"))
+ (contents-start . "\\(<!--BODY-->\\|<!--DATE-->\\)")
+ (contents-end . "\\(<!--BODYEND-->\\|<!--BYLINEEND-->\\)"))
+ ("mew"
+ (url . "http://www.mew.org/archive/")
+ (groups ,@(mapcar #'car nnshimbun-mew-groups))
+ (coding-system . ,(if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
+ (generate-nov . nnshimbun-generate-nov-for-each-group)
+ (get-headers . nnshimbun-mew-get-headers)
+ (index-url . (nnshimbun-mew-concat-url "index.html"))
+ (make-contents . nnshimbun-make-mhonarc-contents))
+ ("xemacs"
+ (url . "http://www.xemacs.org/list-archives/")
+ (groups "xemacs-announce" "xemacs-beta-ja" "xemacs-beta"
+ "xemacs-build-reports" "xemacs-cvs" "xemacs-mule"
+ "xemacs-nt" "xemacs-patches" "xemacs-users-ja" "xemacs")
+ (coding-system . ,(if (boundp 'MULE) '*euc-japan* 'euc-jp))
+ (generate-nov . nnshimbun-generate-nov-for-each-group)
+ (get-headers . nnshimbun-xemacs-get-headers)
+ (index-url . (nnshimbun-xemacs-concat-url nil))
+ (make-contents . nnshimbun-make-mhonarc-contents))
+ ("netbsd"
+ (url . "http://www.jp.netbsd.org/ja/JP/ml/")
+ (groups "announce-ja" "junk-ja" "tech-misc-ja" "tech-pkg-ja"
+ "port-arm32-ja" "port-hpcmips-ja" "port-mac68k-ja"
+ "port-mips-ja" "port-powerpc-ja" "hpcmips-changes-ja"
+ "members-ja" "admin-ja" "www-changes-ja")
+ (coding-system . ,(if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
+ (generate-nov . nnshimbun-generate-nov-for-each-group)
+ (get-headers . nnshimbun-netbsd-get-headers)
+ (index-url . (format "%s%s/index.html" nnshimbun-url nnshimbun-current-group))
+ (make-contents . nnshimbun-make-mhonarc-contents))
))
(defvar nnshimbun-x-face-alist
(nnheader-report 'nnml "Couldn't retrieve article: %s" (prin1-to-string article))
nil)))
+(defsubst nnshimbun-header-xref (x)
+ (if (and (setq x (mail-header-xref x))
+ (string-match "^Xref: " x))
+ (substring x 6)
+ x))
+
(defun nnshimbun-request-article-1 (article &optional group server to-buffer)
(if (nnshimbun-backlog
(gnus-backlog-request-article
(set-buffer (nnshimbun-open-nov group))
(and (nnheader-find-nov-line article)
(nnheader-parse-nov))))
- (let* ((xref (substring (mail-header-xref header) 6))
- (x-faces (cdr (or (assoc server nnshimbun-x-face-alist)
+ (let* ((xref (nnshimbun-header-xref header))
+ (x-faces (cdr (or (assoc (or server
+ (nnoo-current-server 'nnshimbun))
+ nnshimbun-x-face-alist)
(assoc "default" nnshimbun-x-face-alist))))
(x-face (cdr (or (assoc group x-faces)
(assoc "default" x-faces)))))
(insert "Subject: " (or (mime-entity-fetch-field header 'Subject) "(none)") "\n"
"From: " (or (mime-entity-fetch-field header 'From) "(nobody)") "\n"
"Date: " (or (mail-header-date header) "") "\n"
- "Message-ID: " (or (mail-header-id header) (nnmail-message-id)) "\n"
- "References: " (or (mail-header-references header) "") "\n"
- "Lines: ")
- (princ (or (mail-header-lines header) 0) (current-buffer))
- (insert "\n")
- (if (mail-header-xref header)
- (insert (mail-header-xref header) "\n")))
+ "Message-ID: " (or (mail-header-id header) (nnmail-message-id)) "\n")
+ (let ((refs (mail-header-references header)))
+ (and refs
+ (string< "" refs)
+ (insert "References: " refs "\n")))
+ (insert "Lines: " (number-to-string (or (mail-header-lines header) 0)) "\n"
+ "Xref: " (nnshimbun-header-xref header) "\n"))
;; For pure Gnus.
(defun nnshimbun-insert-header (header)
(nnheader-insert-header header)
(delete-char -1)
- (if (mail-header-xref header)
- (insert (mail-header-xref header) "\n")))))
+ (insert "Xref: " (nnshimbun-header-xref header) "\n"))))
(deffoo nnshimbun-retrieve-headers (articles &optional group server fetch-old)
(when (nnshimbun-possibly-change-group group server)
(setq found t)))
(unless found
(goto-char (point-min))
- (when (search-forward (concat "X-Nnshimbun-Original-Id: " id) nil t)
+ (when (search-forward (concat "X-Nnshimbun-Id: " id) nil t)
(forward-line 0)
(setq found t)))
(if found
(when (nnheader-find-nov-line (mail-header-number header))
(dolist (arg args)
(if (eq (car arg) 'id)
- (let ((extra (mail-header-extra header)) x)
- (when (setq x (assq 'X-Nnshimbun-Original-Id extra))
- (setq extra (delq x extra)))
- (mail-header-set-extra
- header
- (cons (cons 'X-Nnshimbun-Original-Id (cdr arg)) extra)))
+ (let ((extra (mail-header-extra header)))
+ (unless (assq 'X-Nnshimbun-Id extra)
+ (mail-header-set-extra
+ header
+ (cons (cons 'X-Nnshimbun-Id (mail-header-id header))
+ extra)))
+ (mail-header-set-id header (cdr arg)))
(let ((func (intern (concat "mail-header-set-" (symbol-name (car arg))))))
(if (cdr arg) (eval (list func header (cdr arg)))))))
- (let ((xref (mail-header-xref header)))
- (when (string-match "^Xref: " xref)
- (mail-header-set-xref header (substring xref 6))))
+ (mail-header-set-xref header (nnshimbun-header-xref header))
(delete-region (point) (progn (forward-line 1) (point)))
(nnheader-insert-nov header))))
(defun nnshimbun-make-text-or-html-contents (header &optional x-face)
(let ((case-fold-search t) (html t) (start))
- (when (and (search-forward nnshimbun-contents-start nil t)
+ (when (and (re-search-forward nnshimbun-contents-start nil t)
(setq start (point))
- (search-forward nnshimbun-contents-end nil t))
+ (re-search-forward nnshimbun-contents-end nil t))
+ (delete-region (match-beginning 0) (point-max))
(delete-region (point-min) start)
- (delete-region (- (point) (length nnshimbun-contents-end)) (point-max))
(nnshimbun-shallow-rendering)
(setq html nil))
(goto-char (point-min))
(defun nnshimbun-make-html-contents (header &optional x-face)
(let (start)
- (when (and (search-forward nnshimbun-contents-start nil t)
+ (when (and (re-search-forward nnshimbun-contents-start nil t)
(setq start (point))
- (search-forward nnshimbun-contents-end nil t))
- (delete-region (point-min) start)
- (delete-region (- (point) (length nnshimbun-contents-end)) (point-max)))
+ (re-search-forward nnshimbun-contents-end nil t))
+ (delete-region (match-beginning 0) (point-max))
+ (delete-region (point-min) start))
(goto-char (point-min))
(nnshimbun-insert-header header)
(insert "Content-Type: text/html; charset=ISO-2022-JP\n"
(encode-coding-string (buffer-string)
(mime-charset-to-coding-system "ISO-2022-JP"))))
+(defun nnshimbun-make-mhonarc-contents (header &rest args)
+ (require 'mml)
+ (if (search-forward "<!--X-Head-End-->" nil t)
+ (progn
+ (forward-line 0)
+ ;; Processing headers.
+ (save-restriction
+ (narrow-to-region (point-min) (point))
+ (nnweb-decode-entities)
+ (goto-char (point-min))
+ (while (search-forward "\n<!--X-" nil t)
+ (replace-match "\n"))
+ (goto-char (point-min))
+ (while (search-forward " -->\n" nil t)
+ (replace-match "\n"))
+ (goto-char (point-min))
+ (while (search-forward "\t" nil t)
+ (replace-match " "))
+ (goto-char (point-min))
+ (let (buf refs)
+ (while (not (eobp))
+ (cond
+ ((looking-at "<!--")
+ (delete-region (point) (progn (forward-line 1) (point))))
+ ((looking-at "Subject: +")
+ (push (cons 'subject (nnheader-header-value)) buf)
+ (delete-region (point) (progn (forward-line 1) (point))))
+ ((looking-at "From: +")
+ (push (cons 'from (nnheader-header-value)) buf)
+ (delete-region (point) (progn (forward-line 1) (point))))
+ ((looking-at "Date: +")
+ (push (cons 'date (nnheader-header-value)) buf)
+ (delete-region (point) (progn (forward-line 1) (point))))
+ ((looking-at "Message-Id: +")
+ (push (cons 'id (concat "<" (nnheader-header-value) ">")) buf)
+ (delete-region (point) (progn (forward-line 1) (point))))
+ ((looking-at "Reference: +")
+ (push (concat "<" (nnheader-header-value) ">") refs)
+ (delete-region (point) (progn (forward-line 1) (point))))
+ ((looking-at "Content-Type: ")
+ (unless (search-forward "charset" (gnus-point-at-eol) t)
+ (end-of-line)
+ (insert "; charset=ISO-2022-JP"))
+ (forward-line 1))
+ (t (forward-line 1))))
+ (insert "MIME-Version: 1.0\n")
+ (if refs (push (cons 'references (mapconcat 'identity refs " ")) buf))
+ (nnshimbun-nov-fix-header nnshimbun-current-group header buf)
+ (goto-char (point-min))
+ (nnshimbun-insert-header header))
+ (goto-char (point-max)))
+ ;; Processing body.
+ (save-restriction
+ (narrow-to-region (point) (point-max))
+ (delete-region
+ (point)
+ (progn
+ (search-forward "\n<!--X-Body-of-Message-->\n" nil t)
+ (point)))
+ (when (search-forward "\n<!--X-Body-of-Message-End-->\n" nil t)
+ (forward-line -1)
+ (delete-region (point) (point-max)))
+ (nnweb-remove-markup)
+ (nnweb-decode-entities)))
+ (goto-char (point-min))
+ (nnshimbun-insert-header header)
+ (insert "Content-Type: text/html; charset=ISO-2022-JP\nMIME-Version: 1.0\n\n"))
+ (encode-coding-string (buffer-string)
+ (mime-charset-to-coding-system "ISO-2022-JP")))
;;; www.asahi.com
(buffer-substring
(match-end 0)
(progn (search-forward "<br>" nil t) (point)))
- "<[^>]+>")
+ "\\(<[^>]+>\\|\r\\)")
""))
nnshimbun-from-address
"" id "" 0 0 (concat nnshimbun-url url))
(delete-region start (point))))
(goto-char (point-min))
(while (re-search-forward
- "<a href=\"\\(\\([0-9][0-9]\\)\\([0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([^\\.]+\\).html\\)\"><font size=\"4\"><strong>"
+ "<a href=\"\\(/news/\\)?\\(\\([0-9][0-9]\\)\\([0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([^\\.]+\\).html\\)\"><font size=\"4\"><strong>"
nil t)
- (let ((year (+ 2000 (string-to-number (match-string 2))))
- (month (string-to-number (match-string 3)))
- (day (string-to-number (match-string 4)))
+ (let ((year (+ 2000 (string-to-number (match-string 3))))
+ (month (string-to-number (match-string 4)))
+ (day (string-to-number (match-string 5)))
(id (format "<%s%s%s%s%%%s>"
- (match-string 2)
(match-string 3)
(match-string 4)
(match-string 5)
+ (match-string 6)
nnshimbun-current-group))
- (url (match-string 1)))
+ (url (match-string 2)))
(push (make-full-mail-header
0
(nnshimbun-mime-encode-string
+;;; MLs on www.mew.org
+
+(defmacro nnshimbun-mew-concat-url (url)
+ `(concat nnshimbun-url
+ (nth 1 (assoc nnshimbun-current-group nnshimbun-mew-groups))
+ "/"
+ ,url))
+
+(defmacro nnshimbun-mew-reverse-order-p ()
+ `(nth 2 (assoc nnshimbun-current-group nnshimbun-mew-groups)))
+
+(defmacro nnshimbun-mew-spew-p ()
+ `(nth 3 (assoc nnshimbun-current-group nnshimbun-mew-groups)))
+
+(defsubst nnshimbun-mew-retrieve-xover (aux)
+ (erase-buffer)
+ (nnshimbun-retrieve-url
+ (nnshimbun-mew-concat-url (if (= aux 1) "index.html" (format "mail%d.html" aux)))
+ t))
+
+(defconst nnshimbun-mew-regexp "<A[^>]*HREF=\"\\(msg\\([0-9]+\\).html\\)\">\\([^<]+\\)<")
+
+(defmacro nnshimbun-mew-extract-header-values ()
+ `(progn
+ (setq url (nnshimbun-mew-concat-url (match-string 1))
+ id (format "<%05d%%%s>"
+ (1- (string-to-number (match-string 2)))
+ nnshimbun-current-group)
+ subject (match-string 3))
+ (forward-line 1)
+ (if (nnshimbun-search-id nnshimbun-current-group id)
+ (throw 'stop headers)
+ (push (make-full-mail-header
+ 0
+ (nnshimbun-mime-encode-string subject)
+ (if (looking-at "<EM>\\([^<]+\\)<")
+ (nnshimbun-mime-encode-string (match-string 1))
+ "")
+ "" id "" 0 0 url)
+ headers))))
+
+(eval-and-compile
+ (if (fboundp 'mime-entity-fetch-field)
+ ;; For Semi-Gnus.
+ (defmacro nnshimbun-mew-mail-header-subject (header)
+ `(mime-entity-fetch-field ,header 'Subject))
+ ;; For pure Gnus.
+ (defalias 'nnshimbun-mew-mail-header-subject 'mail-header-subject)))
+
+(defun nnshimbun-mew-get-headers ()
+ (if (nnshimbun-mew-spew-p)
+ (let ((headers (nnshimbun-mew-get-headers-1)))
+ (erase-buffer)
+ (insert-buffer-substring (nnshimbun-open-nov nnshimbun-current-group))
+ (delq nil
+ (mapcar
+ (lambda (header)
+ (goto-char (point-min))
+ (let ((subject (nnshimbun-mew-mail-header-subject header))
+ (found))
+ (while (and (not found)
+ (search-forward subject nil t))
+ (if (not (and (search-backward "\t" nil t)
+ (not (search-backward "\t" (gnus-point-at-bol) t))))
+ (forward-line 1)
+ (setq found t)))
+ (if found
+ nil
+ (goto-char (point-max))
+ (nnheader-insert-nov header)
+ header)))
+ headers)))
+ (nnshimbun-mew-get-headers-1)))
+
+(defun nnshimbun-mew-get-headers-1 ()
+ (let (headers)
+ (when (re-search-forward
+ "<A[^>]*HREF=\"mail\\([0-9]+\\)\\.html\">\\[?Last Page\\]?</A>" nil t)
+ (let ((limit (string-to-number (match-string 1))))
+ (catch 'stop
+ (if (nnshimbun-mew-reverse-order-p)
+ (let ((aux 1))
+ (while (let (id url subject)
+ (while (re-search-forward nnshimbun-mew-regexp nil t)
+ (nnshimbun-mew-extract-header-values))
+ (< aux limit))
+ (nnshimbun-mew-retrieve-xover (setq aux (1+ aux)))))
+ (while (> limit 0)
+ (nnshimbun-mew-retrieve-xover limit)
+ (setq limit (1- limit))
+ (let (id url subject)
+ (goto-char (point-max))
+ (while (re-search-backward nnshimbun-mew-regexp nil t)
+ (nnshimbun-mew-extract-header-values)
+ (forward-line -2)))))
+ headers)))))
+
+
+
+;;; MLs on www.xemacs.org
+
+(defmacro nnshimbun-xemacs-concat-url (url)
+ `(concat nnshimbun-url nnshimbun-current-group "/" ,url))
+
+(defun nnshimbun-xemacs-get-headers ()
+ (let (headers auxs aux)
+ (catch 'stop
+ (while (re-search-forward
+ (concat "<A HREF=\"/list-archives/" nnshimbun-current-group
+ "/\\([12][0-9][0-9][0-9][0-1][0-9]\\)/\">\\[Index\\]")
+ nil t)
+ (setq auxs (append auxs (list (match-string 1)))))
+ (while auxs
+ (erase-buffer)
+ (nnshimbun-retrieve-url
+ (nnshimbun-xemacs-concat-url (concat (setq aux (car auxs)) "/")))
+ (let (id url subject)
+ (goto-char (point-max))
+ (while (re-search-backward
+ "<A[^>]*HREF=\"\\(msg\\([0-9]+\\).html\\)\">\\([^<]+\\)<"
+ nil t)
+ (setq url (nnshimbun-xemacs-concat-url
+ (concat aux "/" (match-string 1)))
+ id (format "<%s%05d%%%s>"
+ aux
+ (string-to-number (match-string 2))
+ nnshimbun-current-group)
+ subject (match-string 3))
+ (forward-line 1)
+ (if (nnshimbun-search-id nnshimbun-current-group id)
+ (throw 'stop headers)
+ (push (make-full-mail-header
+ 0
+ (nnshimbun-mime-encode-string subject)
+ (if (looking-at "<td><em>\\([^<]+\\)<")
+ (match-string 1)
+ "")
+ "" id "" 0 0 url)
+ headers))
+ (message "%s" id)
+ (forward-line -2)))
+ (setq auxs (cdr auxs))))
+ headers))
+
+;;; MLs on www.jp.netbsd.org
+
+(defun nnshimbun-netbsd-get-headers ()
+ (let ((case-fold-search t) headers months)
+ (goto-char (point-min))
+ (while (re-search-forward "<A HREF=\"\\([0-9]+\\)/\\(threads.html\\)?\">" nil t)
+ (push (match-string 1) months))
+ (setq months (nreverse months))
+ (catch 'exit
+ (dolist (month months)
+ (erase-buffer)
+ (nnshimbun-retrieve-url
+ (format "%s%s/%s/maillist.html" nnshimbun-url nnshimbun-current-group month)
+ t)
+ (let (id url subject)
+ (while (re-search-forward
+ "<A[^>]*HREF=\"\\(msg\\([0-9]+\\)\\.html\\)\">\\([^<]+\\)</A>"
+ nil t)
+ (setq url (format "%s%s/%s/%s"
+ nnshimbun-url
+ nnshimbun-current-group
+ month
+ (match-string 1))
+ id (format "<%s%05d%%%s>"
+ month
+ (string-to-number (match-string 2))
+ nnshimbun-current-group)
+ subject (match-string 3))
+ (if (nnshimbun-search-id nnshimbun-current-group id)
+ (throw 'exit headers)
+ (push (make-full-mail-header
+ 0
+ (nnshimbun-mime-encode-string subject)
+ (if (looking-at "</STRONG> *<EM>\\([^<]+\\)<")
+ (nnshimbun-mime-encode-string (match-string 1))
+ "")
+ "" id "" 0 0 url)
+ headers)))))
+ headers)))
+
(provide 'nnshimbun)
;;; nnshimbun.el ends here.