* nnshimbun.el: Enclose w3m stuff with `eval-and-compile';
[elisp/gnus.git-] / lisp / nnshimbun.el
1 ;;; nnshimbun.el --- interfacing with web newspapers -*- coding: junet; -*-
2
3 ;; Authors: TSUCHIYA Masatoshi <tsuchiya@pine.kuee.kyoto-u.ac.jp>
4 ;;          Akihiro Arisawa    <ari@atesoft.advantest.co.jp>
5 ;; Keywords: news
6
7 ;;; Copyright:
8
9 ;; This file is a part of Semi-Gnus.
10
11 ;; This program is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; This program is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with this program; if not, you can either send email to this
23 ;; program's maintainer or write to: The Free Software Foundation,
24 ;; Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;; Gnus backend to read newspapers on WEB.
29
30
31 ;;; Defintinos:
32
33 (gnus-declare-backend "nnshimbun" 'address)
34
35 (eval-when-compile (require 'cl))
36 (eval-when-compile (require 'gnus-clfns))
37 (eval-when-compile (require 'static))
38
39 (require 'nnheader)
40 (require 'nnmail)
41 (require 'nnoo)
42 (require 'gnus-bcklg)
43 (eval-when-compile (ignore-errors (require 'nnweb)))
44 ;; Report failure to find w3 at load time if appropriate.
45 (eval '(require 'nnweb))
46 (require 'mcharset)
47
48
49 (nnoo-declare nnshimbun)
50
51 (defvar nnshimbun-check-interval 300)
52
53 (defconst nnshimbun-mew-groups
54   '(("meadow-develop" "meadow-develop" nil t)
55     ("meadow-users-jp" "meadow-users-jp")
56     ("mule-win32" "mule-win32")
57     ("mew-win32" "mew-win32")
58     ("mew-dist" "mew-dist/3300" t)
59     ("mgp-users-jp" "mgp-users-jp/A" t t)))
60
61 (defvar nnshimbun-type-definition
62   `(("asahi"
63      (url . "http://spin.asahi.com/")
64      (groups "national" "business" "politics" "international" "sports")
65      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
66      (generate-nov   . nnshimbun-generate-nov-for-each-group)
67      (get-headers    . nnshimbun-asahi-get-headers)
68      (index-url      . (format "%s%s/update/list.html" nnshimbun-url nnshimbun-current-group))
69      (from-address   . "webmaster@www.asahi.com")
70      (make-contents  . nnshimbun-make-text-or-html-contents)
71      (contents-start . "\n<!-- Start of kiji -->\n")
72      (contents-end   . "\n<!-- End of kiji -->\n"))
73     ("sponichi"
74      (url . "http://www.sponichi.co.jp/")
75      (groups "baseball" "soccer" "usa" "others" "society" "entertainment" "horseracing")
76      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
77      (generate-nov   . nnshimbun-generate-nov-for-each-group)
78      (get-headers    . nnshimbun-sponichi-get-headers)
79      (index-url      . (format "%s%s/index.html" nnshimbun-url nnshimbun-current-group))
80      (from-address   . "webmaster@www.sponichi.co.jp")
81      (make-contents  . nnshimbun-make-text-or-html-contents)
82      (contents-start . "\n<span class=\"text\">\e$B!!\e(B")
83      (contents-end   . "\n"))
84     ("cnet"
85      (url . "http://cnet.sphere.ne.jp/")
86      (groups "comp")
87      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
88      (generate-nov   . nnshimbun-generate-nov-for-each-group)
89      (get-headers    . nnshimbun-cnet-get-headers)
90      (index-url      . (format "%s/News/Oneweek/" nnshimbun-url))
91      (from-address   . "cnet@sphere.ad.jp")
92      (make-contents  . nnshimbun-make-html-contents)
93      (contents-start . "\n<!--KIJI-->\n")
94      (contents-end   . "\n<!--/KIJI-->\n"))
95     ("wired"
96      (url . "http://www.hotwired.co.jp/")
97      (groups "business" "culture" "technology")
98      (coding-system  . ,(static-if (boundp 'MULE) '*euc-japan* 'euc-jp))
99      (generate-nov   . nnshimbun-generate-nov-for-all-groups)
100      (get-headers    . nnshimbun-wired-get-all-headers)
101      (index-url)
102      (from-address   . "webmaster@www.hotwired.co.jp")
103      (make-contents  . nnshimbun-make-html-contents)
104      (contents-start . "\n<!-- START_OF_BODY -->\n")
105      (contents-end   . "\n<!-- END_OF_BODY -->\n"))
106     ("yomiuri"
107      (url . "http://www.yomiuri.co.jp/")
108      (groups "shakai" "sports" "seiji" "keizai" "kokusai" "fuho")
109      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
110      (generate-nov   . nnshimbun-generate-nov-for-all-groups)
111      (get-headers    . nnshimbun-yomiuri-get-all-headers)
112      (index-url      . (concat nnshimbun-url "main.htm"))
113      (from-address   . "webmaster@www.yomiuri.co.jp")
114      (make-contents  . nnshimbun-make-text-or-html-contents)
115      (contents-start . "\n<!--  honbun start  -->\n")
116      (contents-end   . "\n<!--  honbun end  -->\n"))
117     ("zdnet"
118      (url . "http://www.zdnet.co.jp/news/")
119      (groups "comp")
120      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
121      (generate-nov   . nnshimbun-generate-nov-for-each-group)
122      (get-headers    . nnshimbun-zdnet-get-headers)
123      (index-url      . nnshimbun-url)
124      (from-address   . "zdnn@softbank.co.jp")
125      (make-contents  . nnshimbun-make-html-contents)
126      (contents-start . "\\(<!--BODY-->\\|<!--DATE-->\\)")
127      (contents-end   . "\\(<!--BODYEND-->\\|<!--BYLINEEND-->\\)"))
128     ("mew"
129      (url . "http://www.mew.org/archive/")
130      (groups ,@(mapcar #'car nnshimbun-mew-groups))
131      (coding-system . ,(static-if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
132      (generate-nov  . nnshimbun-generate-nov-for-each-group)
133      (get-headers   . nnshimbun-mew-get-headers)
134      (index-url     . (nnshimbun-mew-concat-url "index.html"))
135      (make-contents . nnshimbun-make-mhonarc-contents))
136     ("xemacs"
137      (url . "http://list-archives.xemacs.org/")
138      (groups "xemacs-announce" "xemacs-beta-ja" "xemacs-beta"
139              "xemacs-build-reports" "xemacs-cvs" "xemacs-mule"
140              "xemacs-nt" "xemacs-patches" "xemacs-users-ja" "xemacs")
141      (coding-system . ,(static-if (boundp 'MULE) '*euc-japan* 'euc-jp))
142      (generate-nov  . nnshimbun-generate-nov-for-each-group)
143      (get-headers   . nnshimbun-xemacs-get-headers)
144      (index-url     . (nnshimbun-xemacs-concat-url nil))
145      (make-contents . nnshimbun-make-mhonarc-contents))
146     ("netbsd"
147      (url . "http://www.jp.netbsd.org/ja/JP/ml/")
148      (groups "announce-ja" "junk-ja" "tech-misc-ja" "tech-pkg-ja"
149              "port-arm32-ja" "port-hpcmips-ja" "port-mac68k-ja"
150              "port-mips-ja" "port-powerpc-ja" "hpcmips-changes-ja"
151              "members-ja" "admin-ja" "www-changes-ja")
152      (coding-system  . ,(static-if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
153      (generate-nov   . nnshimbun-generate-nov-for-each-group)
154      (get-headers    . nnshimbun-netbsd-get-headers)
155      (index-url      . (format "%s%s/index.html" nnshimbun-url nnshimbun-current-group))
156      (make-contents  . nnshimbun-make-mhonarc-contents))
157     ("bbdb-ml"
158      (url . "http://www.rc.tutrp.tut.ac.jp/bbdb-ml/")
159      (groups "bbdb-ml")
160      (coding-system . ,(static-if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
161      (generate-nov . nnshimbun-generate-nov-for-each-group)
162      (get-headers . nnshimbun-fml-get-headers)
163      (index-url . nnshimbun-url)
164      (make-contents . nnshimbun-make-fml-contents))
165     ))
166
167 (defvar nnshimbun-x-face-alist
168   '(("default" .
169      (("default" .
170        "X-Face: Ygq$6P.,%Xt$U)DS)cRY@k$VkW!7(X'X'?U{{osjjFG\"E]hND;SPJ-J?O?R|a?L
171         g2$0rVng=O3Lt}?~IId8Jj&vP^3*o=LKUyk(`t%0c!;t6REk=JbpsEn9MrN7gZ%"))))
172   "Alist of server vs. alist of group vs. X-Face field.  It looks like:
173
174 \((\"asahi\" . ((\"national\" . \"X-face: ***\")
175              (\"business\" . \"X-Face: ***\")
176                 ;;
177                 ;;
178              (\"default\" . \"X-face: ***\")))
179  (\"sponichi\" . ((\"baseball\" . \"X-face: ***\")
180                 (\"soccer\" . \"X-Face: ***\")
181                 ;;
182                 ;;
183                 (\"default\" . \"X-face: ***\")))
184                 ;;
185  (\"default\" . ((\"default\" . \"X-face: ***\")))")
186
187 (defvoo nnshimbun-directory (nnheader-concat gnus-directory "shimbun/")
188   "Where nnshimbun will save its files.")
189
190 (defvoo nnshimbun-nov-is-evil nil
191   "*Non-nil means that nnshimbun will never retrieve NOV headers.")
192
193 (defvoo nnshimbun-nov-file-name ".overview")
194
195 (defvoo nnshimbun-pre-fetch-article nil
196   "*Non nil means that nnshimbun fetch unread articles when scanning groups.")
197
198 ;; set by nnshimbun-possibly-change-group
199 (defvoo nnshimbun-buffer nil)
200 (defvoo nnshimbun-current-directory nil)
201 (defvoo nnshimbun-current-group nil)
202
203 ;; set by nnshimbun-open-server
204 (defvoo nnshimbun-url nil)
205 (defvoo nnshimbun-coding-system nil)
206 (defvoo nnshimbun-groups nil)
207 (defvoo nnshimbun-generate-nov nil)
208 (defvoo nnshimbun-get-headers nil)
209 (defvoo nnshimbun-index-url nil)
210 (defvoo nnshimbun-from-address nil)
211 (defvoo nnshimbun-make-contents nil)
212 (defvoo nnshimbun-contents-start nil)
213 (defvoo nnshimbun-contents-end nil)
214 (defvoo nnshimbun-server-directory nil)
215
216 (defvoo nnshimbun-status-string "")
217 (defvoo nnshimbun-nov-last-check nil)
218 (defvoo nnshimbun-nov-buffer-alist nil)
219 (defvoo nnshimbun-nov-buffer-file-name nil)
220
221 (defvoo nnshimbun-keep-backlog 300)
222 (defvoo nnshimbun-backlog-articles nil)
223 (defvoo nnshimbun-backlog-hashtb nil)
224
225 (defconst nnshimbun-meta-content-type-charset-regexp
226   (eval-when-compile
227     (concat "<meta[ \t]+http-equiv=\"?Content-type\"?[ \t]+content=\"\\([^;]+\\)"
228             ";[ \t]*charset=\"?\\([^\"]+\\)\"?"
229             ">"))
230   "Regexp used in parsing `<META HTTP-EQUIV=\"Content-Type\" content=\"...;charset=...\">
231 for a charset indication")
232
233 (defconst nnshimbun-meta-charset-content-type-regexp
234   (eval-when-compile
235     (concat "<meta[ \t]+content=\"\\([^;]+\\)"
236             ";[ \t]*charset=\"?\\([^\"]+\\)\"?"
237             "[ \t]+http-equiv=\"?Content-type\"?>"))
238   "Regexp used in parsing `<META content=\"...;charset=...\" HTTP-EQUIV=\"Content-Type\">
239 for a charset indication")
240
241
242
243 ;;; backlog
244 (defmacro nnshimbun-backlog (&rest form)
245   `(let ((gnus-keep-backlog nnshimbun-keep-backlog)
246          (gnus-backlog-buffer (format " *nnshimbun backlog %s*" (nnoo-current-server 'nnshimbun)))
247          (gnus-backlog-articles nnshimbun-backlog-articles)
248          (gnus-backlog-hashtb nnshimbun-backlog-hashtb))
249      (unwind-protect
250          (progn ,@form)
251        (setq nnshimbun-backlog-articles gnus-backlog-articles
252              nnshimbun-backlog-hashtb gnus-backlog-hashtb))))
253 (put 'nnshimbun-backlog 'lisp-indent-function 0)
254 (put 'nnshimbun-backlog 'edebug-form-spec '(form body))
255
256
257
258 ;;; Interface Functions
259 (nnoo-define-basics nnshimbun)
260
261 (deffoo nnshimbun-open-server (server &optional defs)
262   ;; Set default values.
263   (dolist (default (cdr (assoc server nnshimbun-type-definition)))
264     (let ((symbol (intern (concat "nnshimbun-" (symbol-name (car default))))))
265       (unless (assq symbol defs)
266         (push (list symbol (cdr default)) defs))))
267   ;; Set directory for server working files.
268   (push (list 'nnshimbun-server-directory
269               (file-name-as-directory
270                (expand-file-name server nnshimbun-directory)))
271         defs)
272   (nnoo-change-server 'nnshimbun server defs)
273   (nnshimbun-possibly-change-group nil server)
274   ;; Make directories.
275   (unless (file-exists-p nnshimbun-directory)
276     (ignore-errors (make-directory nnshimbun-directory t)))
277   (cond
278    ((not (file-exists-p nnshimbun-directory))
279     (nnshimbun-close-server)
280     (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-directory))
281    ((not (file-directory-p (file-truename nnshimbun-directory)))
282     (nnshimbun-close-server)
283     (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-directory))
284    (t
285     (unless (file-exists-p nnshimbun-server-directory)
286       (ignore-errors (make-directory nnshimbun-server-directory t)))
287     (cond
288      ((not (file-exists-p nnshimbun-server-directory))
289       (nnshimbun-close-server)
290       (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-server-directory))
291      ((not (file-directory-p (file-truename nnshimbun-server-directory)))
292       (nnshimbun-close-server)
293       (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-server-directory))
294      (t
295       (nnheader-report 'nnshimbun "Opened server %s using directory %s"
296                        server nnshimbun-server-directory)
297       t)))))
298
299 (deffoo nnshimbun-close-server (&optional server)
300   (and (nnshimbun-server-opened server)
301        (gnus-buffer-live-p nnshimbun-buffer)
302        (kill-buffer nnshimbun-buffer))
303   (nnshimbun-backlog (gnus-backlog-shutdown))
304   (nnshimbun-save-nov)
305   (nnoo-close-server 'nnshimbun server)
306   t)
307
308 (static-when (boundp 'MULE)
309   (unless (coding-system-p 'euc-japan)
310     (copy-coding-system '*euc-japan* 'euc-japan))
311   (unless (coding-system-p 'shift_jis)
312     (copy-coding-system '*sjis* 'shift_jis))
313   (eval-and-compile
314     (defalias-maybe 'coding-system-category 'get-code-mnemonic)))
315
316 (eval-when-compile
317   (defvar w3m-work-buffer-name)
318   (autoload 'w3m-retrieve "w3m"))
319 (eval-and-compile
320   (if (and (ignore-errors (require 'w3m))
321            (fboundp 'w3m-retrieve))
322 ;; When w3m.el is available.
323 (defun nnshimbun-retrieve-url (url &optional no-cache)
324   "Rertrieve URL contents and insert to current buffer."
325   (when (w3m-retrieve url nil no-cache)
326     (insert-buffer w3m-work-buffer-name)))
327 ;; Otherwise.
328 (defun nnshimbun-retrieve-url (url &optional no-cache)
329   "Rertrieve URL contents and insert to current buffer."
330   (let ((buf (current-buffer))
331         (url-working-buffer url-working-buffer))
332     (let ((old-asynch (default-value 'url-be-asynchronous))
333           (old-caching (default-value 'url-automatic-caching))
334           (old-mode (default-value 'url-standalone-mode)))
335       (setq-default url-be-asynchronous nil)
336       (when no-cache
337         (setq-default url-automatic-caching nil)
338         (setq-default url-standalone-mode nil))
339       (unwind-protect
340           (let ((coding-system-for-read 'binary)
341                 (coding-system-for-write 'binary)
342                 (input-coding-system 'binary)
343                 (output-coding-system 'binary)
344                 (default-enable-multibyte-characters nil))
345             (set-buffer
346              (setq url-working-buffer
347                    (cdr (url-retrieve url no-cache))))
348             (url-uncompress))
349         (setq-default url-be-asynchronous old-asynch)
350         (setq-default url-automatic-caching old-caching)
351         (setq-default url-standalone-mode old-mode)))
352     (let ((charset
353            (or (and (boundp 'url-current-mime-charset)
354                     (symbol-value 'url-current-mime-charset))
355                (let ((case-fold-search t))
356                  (goto-char (point-min))
357                  (if (or (re-search-forward
358                           nnshimbun-meta-content-type-charset-regexp nil t)
359                          (re-search-forward
360                           nnshimbun-meta-charset-content-type-regexp nil t))
361                      (buffer-substring-no-properties (match-beginning 2)
362                                                      (match-end 2)))))))
363       (decode-coding-region
364        (point-min) (point-max)
365        (if charset
366            (let ((mime-charset-coding-system-alist
367                   (append '((euc-jp . euc-japan)
368                             (shift-jis . shift_jis)
369                             (shift_jis . shift_jis)
370                             (sjis . shift_jis)
371                             (x-euc-jp . euc-japan)
372                             (x-shift-jis . shift_jis)
373                             (x-shift_jis . shift_jis)
374                             (x-sjis . shift_jis))
375                           mime-charset-coding-system-alist)))
376              (mime-charset-to-coding-system charset))
377          (let ((default (condition-case nil
378                             (coding-system-category nnshimbun-coding-system)
379                           (error nil)))
380                (candidate (detect-coding-region (point-min) (point-max))))
381            (unless (listp candidate)
382              (setq candidate (list candidate)))
383            (catch 'coding
384              (dolist (coding candidate)
385                (if (eq default (coding-system-category coding))
386                    (throw 'coding coding)))
387              (if (eq (coding-system-category 'binary)
388                      (coding-system-category (car candidate)))
389                  nnshimbun-coding-system
390                (car candidate)))))))
391     (set-buffer-multibyte t)
392     (set-buffer buf)
393     (insert-buffer url-working-buffer)
394     (kill-buffer url-working-buffer)))
395 ))
396
397 (deffoo nnshimbun-request-article (article &optional group server to-buffer)
398   (when (nnshimbun-possibly-change-group group server)
399     (if (stringp article)
400         (setq article (nnshimbun-search-id group article)))
401     (if (integerp article)
402         (nnshimbun-request-article-1 article group server to-buffer)
403       (nnheader-report 'nnml "Couldn't retrieve article: %s" (prin1-to-string article))
404       nil)))
405
406 (defsubst nnshimbun-header-xref (x)
407   (if (and (setq x (mail-header-xref x))
408            (string-match "^Xref: " x))
409       (substring x 6)
410     x))
411
412 (defun nnshimbun-request-article-1 (article &optional group server to-buffer)
413   (if (nnshimbun-backlog
414         (gnus-backlog-request-article
415          group article (or to-buffer nntp-server-buffer)))
416       (cons group article)
417     (let (header contents)
418       (when (setq header (save-excursion
419                            (set-buffer (nnshimbun-open-nov group))
420                            (and (nnheader-find-nov-line article)
421                                 (nnheader-parse-nov))))
422         (let* ((xref (nnshimbun-header-xref header))
423                (x-faces (cdr (or (assoc (or server
424                                             (nnoo-current-server 'nnshimbun))
425                                         nnshimbun-x-face-alist)
426                                  (assoc "default" nnshimbun-x-face-alist))))
427                (x-face (cdr (or (assoc group x-faces)
428                                 (assoc "default" x-faces)))))
429           (save-excursion
430             (set-buffer nnshimbun-buffer)
431             (erase-buffer)
432             (nnshimbun-retrieve-url xref)
433             (nnheader-message 6 "nnshimbun: Make contents...")
434             (goto-char (point-min))
435             (setq contents (funcall nnshimbun-make-contents header x-face))
436             (nnheader-message 6 "nnshimbun: Make contents...done"))))
437       (when contents
438         (save-excursion
439           (set-buffer (or to-buffer nntp-server-buffer))
440           (erase-buffer)
441           (insert contents)
442           (nnshimbun-backlog
443             (gnus-backlog-enter-article group article (current-buffer)))
444           (nnheader-report 'nnshimbun "Article %s retrieved" (mail-header-id header))
445           (cons group (mail-header-number header)))))))
446
447 (deffoo nnshimbun-request-group (group &optional server dont-check)
448   (let ((pathname-coding-system 'binary))
449     (cond
450      ((not (nnshimbun-possibly-change-group group server))
451       (nnheader-report 'nnshimbun "Invalid group (no such directory)"))
452      ((not (file-exists-p nnshimbun-current-directory))
453       (nnheader-report 'nnshimbun "Directory %s does not exist"
454                        nnshimbun-current-directory))
455      ((not (file-directory-p nnshimbun-current-directory))
456       (nnheader-report 'nnshimbun "%s is not a directory" nnshimbun-current-directory))
457      (dont-check
458       (nnheader-report 'nnshimbun "Group %s selected" group)
459       t)
460      (t
461       (let (beg end lines)
462         (save-excursion
463           (set-buffer (nnshimbun-open-nov group))
464           (goto-char (point-min))
465           (setq beg (ignore-errors (read (current-buffer))))
466           (goto-char (point-max))
467           (forward-line -1)
468           (setq end (ignore-errors (read (current-buffer)))
469                 lines (count-lines (point-min) (point-max))))
470         (nnheader-report 'nnshimbunw "Selected group %s" group)
471         (nnheader-insert "211 %d %d %d %s\n"
472                          lines (or beg 0) (or end 0) group))))))
473
474 (deffoo nnshimbun-request-scan (&optional group server)
475   (nnshimbun-possibly-change-group group server)
476   (nnshimbun-generate-nov-database group))
477
478 (deffoo nnshimbun-close-group (group &optional server)
479   (nnshimbun-write-nov group)
480   t)
481
482 (deffoo nnshimbun-request-list (&optional server)
483   (save-excursion
484     (set-buffer nntp-server-buffer)
485     (erase-buffer)
486     (dolist (group nnshimbun-groups)
487       (when (nnshimbun-possibly-change-group group server)
488         (let (beg end)
489           (save-excursion
490             (set-buffer (nnshimbun-open-nov group))
491             (goto-char (point-min))
492             (setq beg (ignore-errors (read (current-buffer))))
493             (goto-char (point-max))
494             (forward-line -1)
495             (setq end (ignore-errors (read (current-buffer)))))
496           (insert (format "%s %d %d n\n" group (or end 0) (or beg 0)))))))
497   t) ; return value
498
499 (eval-and-compile
500   (if (fboundp 'mime-entity-fetch-field)
501       ;; For Semi-Gnus.
502       (defun nnshimbun-insert-header (header)
503         (insert "Subject: " (or (mime-entity-fetch-field header 'Subject) "(none)") "\n"
504                 "From: " (or (mime-entity-fetch-field header 'From) "(nobody)") "\n"
505                 "Date: " (or (mail-header-date header) "") "\n"
506                 "Message-ID: " (or (mail-header-id header) (nnmail-message-id)) "\n")
507         (let ((refs (mail-header-references header)))
508           (and refs
509                (string< "" refs)
510                (insert "References: " refs "\n")))
511         (insert "Lines: " (number-to-string (or (mail-header-lines header) 0)) "\n"
512                 "Xref: " (nnshimbun-header-xref header) "\n"))
513     ;; For pure Gnus.
514     (defun nnshimbun-insert-header (header)
515       (nnheader-insert-header header)
516       (delete-char -1)
517       (insert "Xref: " (nnshimbun-header-xref header) "\n"))))
518
519 (deffoo nnshimbun-retrieve-headers (articles &optional group server fetch-old)
520   (when (nnshimbun-possibly-change-group group server)
521     (if (nnshimbun-retrieve-headers-with-nov articles fetch-old)
522         'nov
523       (save-excursion
524         (set-buffer nntp-server-buffer)
525         (erase-buffer)
526         (let (header)
527           (dolist (art articles)
528             (if (stringp art)
529                 (setq art (nnshimbun-search-id group art)))
530             (if (integerp art)
531                 (when (setq header
532                             (save-excursion
533                               (set-buffer (nnshimbun-open-nov group))
534                               (and (nnheader-find-nov-line art)
535                                    (nnheader-parse-nov))))
536                   (insert (format "220 %d Article retrieved.\n" art))
537                   (nnshimbun-insert-header header)
538                   (insert ".\n")
539                   (delete-region (point) (point-max))))))
540         'header))))
541
542 (defun nnshimbun-retrieve-headers-with-nov (articles &optional fetch-old)
543   (if (or gnus-nov-is-evil nnshimbun-nov-is-evil)
544       nil
545     (let ((nov (expand-file-name nnshimbun-nov-file-name nnshimbun-current-directory)))
546       (when (file-exists-p nov)
547         (save-excursion
548           (set-buffer nntp-server-buffer)
549           (erase-buffer)
550           (nnheader-insert-file-contents nov)
551           (if (and fetch-old (not (numberp fetch-old)))
552               t                         ; Don't remove anything.
553             (nnheader-nov-delete-outside-range
554              (if fetch-old (max 1 (- (car articles) fetch-old))
555                (car articles))
556              (car (last articles)))
557             t))))))
558
559
560
561 ;;; Nov Database Operations
562
563 (defun nnshimbun-generate-nov-database (group)
564   (prog1 (funcall nnshimbun-generate-nov group)
565     (nnshimbun-write-nov group)))
566
567 (defun nnshimbun-generate-nov-for-each-group (group)
568   (nnshimbun-possibly-change-group group)
569   (save-excursion
570     (set-buffer (nnshimbun-open-nov group))
571     (let (i)
572       (goto-char (point-max))
573       (forward-line -1)
574       (setq i (or (ignore-errors (read (current-buffer))) 0))
575       (dolist (header (save-excursion
576                         (set-buffer nnshimbun-buffer)
577                         (erase-buffer)
578                         (nnshimbun-retrieve-url (eval nnshimbun-index-url) t)
579                         (goto-char (point-min))
580                         (funcall nnshimbun-get-headers)))
581         (unless (nnshimbun-search-id group (mail-header-id header))
582           (mail-header-set-number header (setq i (1+ i)))
583           (goto-char (point-max))
584           (nnheader-insert-nov header)
585           (if nnshimbun-pre-fetch-article
586               (nnshimbun-request-article-1 i group nil nnshimbun-buffer)))))))
587
588 (defun nnshimbun-generate-nov-for-all-groups (&rest args)
589   (unless (and nnshimbun-nov-last-check
590                (< (nnshimbun-lapse-seconds nnshimbun-nov-last-check)
591                   nnshimbun-check-interval))
592     (save-excursion
593       (dolist (list (funcall nnshimbun-get-headers))
594         (let ((group (car list)))
595           (nnshimbun-possibly-change-group group)
596           (when (cdr list)
597             (set-buffer (nnshimbun-open-nov group))
598             (let (i)
599               (goto-char (point-max))
600               (forward-line -1)
601               (setq i (or (ignore-errors (read (current-buffer))) 0))
602               (dolist (header (cdr list))
603                 (unless (nnshimbun-search-id group (mail-header-id header))
604                   (mail-header-set-number header (setq i (1+ i)))
605                   (goto-char (point-max))
606                   (nnheader-insert-nov header)
607                   (if nnshimbun-pre-fetch-article
608                       (nnshimbun-request-article-1 i group nil nnshimbun-buffer))))))))
609       (nnshimbun-save-nov)
610       (setq nnshimbun-nov-last-check (current-time)))))
611
612 (defun nnshimbun-search-id (group id &optional nov)
613   (save-excursion
614     (set-buffer (nnshimbun-open-nov group))
615     (goto-char (point-min))
616     (let (found)
617       (while (and (not found)
618                   (search-forward id nil t)) ; We find the ID.
619         ;; And the id is in the fourth field.
620         (if (not (and (search-backward "\t" nil t 4)
621                       (not (search-backward "\t" (gnus-point-at-bol) t))))
622             (forward-line 1)
623           (forward-line 0)
624           (setq found t)))
625       (unless found
626         (goto-char (point-min))
627         (when (search-forward (concat "X-Nnshimbun-Id: " id) nil t)
628           (forward-line 0)
629           (setq found t)))
630       (if found
631           (if nov
632               (nnheader-parse-nov)
633             ;; We return the article number.
634             (ignore-errors (read (current-buffer))))))))
635
636 (defun nnshimbun-nov-fix-header (group header args)
637   (save-excursion
638     (set-buffer (nnshimbun-open-nov group))
639     (when (nnheader-find-nov-line (mail-header-number header))
640       (dolist (arg args)
641         (if (eq (car arg) 'id)
642             (let ((extra (mail-header-extra header)))
643               (unless (assq 'X-Nnshimbun-Id extra)
644                 (mail-header-set-extra
645                  header
646                  (cons (cons 'X-Nnshimbun-Id (mail-header-id header))
647                        extra)))
648               (mail-header-set-id header (cdr arg)))
649           (let ((func (intern (concat "mail-header-set-" (symbol-name (car arg))))))
650             (if (cdr arg) (eval (list func header (cdr arg)))))))
651       (mail-header-set-xref header (nnshimbun-header-xref header))
652       (delete-region (point) (progn (forward-line 1) (point)))
653       (nnheader-insert-nov header))))
654
655 (defun nnshimbun-open-nov (group)
656   (let ((buffer (cdr (assoc group nnshimbun-nov-buffer-alist))))
657     (if (buffer-live-p buffer)
658         buffer
659       (setq buffer (gnus-get-buffer-create
660                     (format " *nnshimbun overview %s %s*"
661                             (nnoo-current-server 'nnshimbun) group)))
662       (save-excursion
663         (set-buffer buffer)
664         (set (make-local-variable 'nnshimbun-nov-buffer-file-name)
665              (expand-file-name
666               nnshimbun-nov-file-name
667               (nnmail-group-pathname group nnshimbun-server-directory)))
668         (erase-buffer)
669         (when (file-exists-p nnshimbun-nov-buffer-file-name)
670           (nnheader-insert-file-contents nnshimbun-nov-buffer-file-name))
671         (set-buffer-modified-p nil))
672       (push (cons group buffer) nnshimbun-nov-buffer-alist)
673       buffer)))
674
675 (defun nnshimbun-write-nov (group)
676   (let ((buffer (cdr (assoc group nnshimbun-nov-buffer-alist))))
677     (when (buffer-live-p buffer)
678       (save-excursion
679         (set-buffer buffer)
680         (buffer-modified-p)
681         (nnmail-write-region 1 (point-max) nnshimbun-nov-buffer-file-name
682                              nil 'nomesg)))))
683
684 (defun nnshimbun-save-nov ()
685   (save-excursion
686     (while nnshimbun-nov-buffer-alist
687       (when (buffer-name (cdar nnshimbun-nov-buffer-alist))
688         (set-buffer (cdar nnshimbun-nov-buffer-alist))
689         (when (buffer-modified-p)
690           (nnmail-write-region 1 (point-max) nnshimbun-nov-buffer-file-name
691                                nil 'nomesg))
692         (set-buffer-modified-p nil)
693         (kill-buffer (current-buffer)))
694       (setq nnshimbun-nov-buffer-alist (cdr nnshimbun-nov-buffer-alist)))))
695
696
697
698 ;;; Server Initialize
699 (defun nnshimbun-possibly-change-group (group &optional server)
700   (when server
701     (unless (nnshimbun-server-opened server)
702       (nnshimbun-open-server server)))
703   (unless (gnus-buffer-live-p nnshimbun-buffer)
704     (setq nnshimbun-buffer
705           (save-excursion
706             (nnheader-set-temp-buffer
707              (format " *nnshimbun %s*" (nnoo-current-server 'nnshimbun))))))
708   (if (not group)
709       t
710     (let ((pathname (nnmail-group-pathname group nnshimbun-server-directory))
711           (pathname-coding-system 'binary))
712       (unless (equal pathname nnshimbun-current-directory)
713         (setq nnshimbun-current-directory pathname
714               nnshimbun-current-group group))
715       (unless (file-exists-p nnshimbun-current-directory)
716         (ignore-errors (make-directory nnshimbun-current-directory t)))
717       (cond
718        ((not (file-exists-p nnshimbun-current-directory))
719         (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-current-directory))
720        ((not (file-directory-p (file-truename nnshimbun-current-directory)))
721         (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-current-directory))
722        (t t)))))
723
724
725
726 ;;; Misc Functions
727
728 (eval-and-compile
729   (if (fboundp 'eword-encode-string)
730       ;; For Semi-Gnus.
731       (defun nnshimbun-mime-encode-string (string)
732         (mapconcat
733          #'identity
734          (split-string (eword-encode-string (nnweb-decode-entities-string string)) "\n")
735          ""))
736     ;; For pure Gnus.
737     (defun nnshimbun-mime-encode-string (string)
738       (mapconcat
739        #'identity
740        (split-string
741         (with-temp-buffer
742           (insert (nnweb-decode-entities-string string))
743           (rfc2047-encode-region (point-min) (point-max))
744           (buffer-substring (point-min) (point-max)))
745         "\n")
746        ""))))
747
748 (defun nnshimbun-lapse-seconds (time)
749   (let ((now (current-time)))
750     (+ (* (- (car now) (car time)) 65536)
751        (- (nth 1 now) (nth 1 time)))))
752
753 (defun nnshimbun-make-date-string (year month day &optional time)
754   (format "%02d %s %04d %s +0900"
755           day
756           (aref [nil "Jan" "Feb" "Mar" "Apr" "May" "Jun"
757                      "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"]
758                 month)
759           (cond ((< year 69)
760                  (+ year 2000))
761                 ((< year 100)
762                  (+ year 1900))
763                 ((< year 1000)  ; possible 3-digit years.
764                  (+ year 1900))
765                 (t year))
766           (or time "00:00")))
767
768 (if (fboundp 'regexp-opt)
769     (defalias 'nnshimbun-regexp-opt 'regexp-opt)
770   (defun nnshimbun-regexp-opt (strings &optional paren)
771     "Return a regexp to match a string in STRINGS.
772 Each string should be unique in STRINGS and should not contain any regexps,
773 quoted or not.  If optional PAREN is non-nil, ensure that the returned regexp
774 is enclosed by at least one regexp grouping construct."
775     (let ((open-paren (if paren "\\(" "")) (close-paren (if paren "\\)" "")))
776       (concat open-paren (mapconcat 'regexp-quote strings "\\|") close-paren))))
777
778
779 ;; Fast fill-region function
780
781 (defvar nnshimbun-fill-column (min 80 (- (frame-width) 4)))
782
783 (defconst nnshimbun-kinsoku-bol-list
784   (append "!)-_~}]:;',.?\e$B!"!#!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2!3!4!5!6!7\e(B\
785 \e$B!8!9!:!;!<!=!>!?!@!A!B!C!D!E!G!I!K!M!O!Q!S!U!W!Y![!k!l!m!n$!$#$%$'$)\e(B\
786 \e$B$C$c$e$g$n%!%#%%%'%)%C%c%e%g%n%u%v\e(B" nil))
787
788 (defconst nnshimbun-kinsoku-eol-list
789   (append "({[`\e$B!F!H!J!L!N!P!R!T!V!X!Z!k!l!m!x\e(B" nil))
790
791 (defun nnshimbun-fill-line ()
792   (forward-line 0)
793   (let ((top (point)) chr)
794     (while (if (>= (move-to-column nnshimbun-fill-column)
795                    nnshimbun-fill-column)
796                (not (progn
797                       (if (memq (preceding-char) nnshimbun-kinsoku-eol-list)
798                           (progn
799                             (backward-char)
800                             (while (memq (preceding-char) nnshimbun-kinsoku-eol-list)
801                               (backward-char))
802                             (insert "\n"))
803                         (while (memq (setq chr (following-char)) nnshimbun-kinsoku-bol-list)
804                           (forward-char))
805                         (if (looking-at "\\s-+")
806                             (or (eolp) (delete-region (point) (match-end 0)))
807                           (or (> (char-width chr) 1)
808                               (re-search-backward "\\<" top t)
809                               (end-of-line)))
810                         (or (eolp) (insert "\n"))))))
811       (setq top (point))))
812   (forward-line 1)
813   (not (eobp)))
814
815 (defsubst nnshimbun-shallow-rendering ()
816   (goto-char (point-min))
817   (while (search-forward "<p>" nil t)
818     (insert "\n\n"))
819   (goto-char (point-min))
820   (while (search-forward "<br>" nil t)
821     (insert "\n"))
822   (nnweb-remove-markup)
823   (nnweb-decode-entities)
824   (goto-char (point-min))
825   (while (nnshimbun-fill-line))
826   (goto-char (point-min))
827   (when (skip-chars-forward "\n")
828     (delete-region (point-min) (point)))
829   (while (search-forward "\n\n" nil t)
830     (let ((p (point)))
831       (when (skip-chars-forward "\n")
832         (delete-region p (point)))))
833   (goto-char (point-max))
834   (when (skip-chars-backward "\n")
835     (delete-region (point) (point-max)))
836   (insert "\n"))
837
838 (defun nnshimbun-make-text-or-html-contents (header &optional x-face)
839   (let ((case-fold-search t) (html t) (start))
840     (when (and (re-search-forward nnshimbun-contents-start nil t)
841                (setq start (point))
842                (re-search-forward nnshimbun-contents-end nil t))
843       (delete-region (match-beginning 0) (point-max))
844       (delete-region (point-min) start)
845       (nnshimbun-shallow-rendering)
846       (setq html nil))
847     (goto-char (point-min))
848     (nnshimbun-insert-header header)
849     (insert "Content-Type: " (if html "text/html" "text/plain")
850             "; charset=ISO-2022-JP\nMIME-Version: 1.0\n")
851     (when x-face
852       (insert x-face)
853       (unless (bolp)
854         (insert "\n")))
855     (insert "\n")
856     (encode-coding-string (buffer-string)
857                           (mime-charset-to-coding-system "ISO-2022-JP"))))
858
859 (defun nnshimbun-make-html-contents (header &optional x-face)
860   (let (start)
861     (when (and (re-search-forward nnshimbun-contents-start nil t)
862                (setq start (point))
863                (re-search-forward nnshimbun-contents-end nil t))
864       (delete-region (match-beginning 0) (point-max))
865       (delete-region (point-min) start))
866     (goto-char (point-min))
867     (nnshimbun-insert-header header)
868     (insert "Content-Type: text/html; charset=ISO-2022-JP\n"
869             "MIME-Version: 1.0\n")
870     (when x-face
871       (insert x-face)
872       (unless (bolp)
873         (insert "\n")))
874     (insert "\n")
875     (encode-coding-string (buffer-string)
876                           (mime-charset-to-coding-system "ISO-2022-JP"))))
877
878 (defun nnshimbun-make-mhonarc-contents (header &rest args)
879   (require 'mml)
880   (if (search-forward "<!--X-Head-End-->" nil t)
881       (progn
882         (forward-line 0)
883         ;; Processing headers.
884         (save-restriction
885           (narrow-to-region (point-min) (point))
886           (nnweb-decode-entities)
887           (goto-char (point-min))
888           (while (search-forward "\n<!--X-" nil t)
889             (replace-match "\n"))
890           (goto-char (point-min))
891           (while (search-forward " -->\n" nil t)
892             (replace-match "\n"))
893           (goto-char (point-min))
894           (while (search-forward "\t" nil t)
895             (replace-match " "))
896           (goto-char (point-min))
897           (let (buf refs)
898             (while (not (eobp))
899               (cond
900                ((looking-at "<!--")
901                 (delete-region (point) (progn (forward-line 1) (point))))
902                ((looking-at "Subject: +")
903                 (push (cons 'subject (nnheader-header-value)) buf)
904                 (delete-region (point) (progn (forward-line 1) (point))))
905                ((looking-at "From: +")
906                 (push (cons 'from (nnheader-header-value)) buf)
907                 (delete-region (point) (progn (forward-line 1) (point))))
908                ((looking-at "Date: +")
909                 (push (cons 'date (nnheader-header-value)) buf)
910                 (delete-region (point) (progn (forward-line 1) (point))))
911                ((looking-at "Message-Id: +")
912                 (push (cons 'id (concat "<" (nnheader-header-value) ">")) buf)
913                 (delete-region (point) (progn (forward-line 1) (point))))
914                ((looking-at "Reference: +")
915                 (push (concat "<" (nnheader-header-value) ">") refs)
916                 (delete-region (point) (progn (forward-line 1) (point))))
917                ((looking-at "Content-Type: ")
918                 (unless (search-forward "charset" (gnus-point-at-eol) t)
919                   (end-of-line)
920                   (insert "; charset=ISO-2022-JP"))
921                 (forward-line 1))
922                (t (forward-line 1))))
923             (insert "MIME-Version: 1.0\n")
924             (if refs (push (cons 'references (mapconcat 'identity refs " ")) buf))
925             (nnshimbun-nov-fix-header nnshimbun-current-group header buf)
926             (goto-char (point-min))
927             (nnshimbun-insert-header header))
928           (goto-char (point-max)))
929         ;; Processing body.
930         (save-restriction
931           (narrow-to-region (point) (point-max))
932           (delete-region
933            (point)
934            (progn
935              (search-forward "\n<!--X-Body-of-Message-->\n" nil t)
936              (point)))
937           (when (search-forward "\n<!--X-Body-of-Message-End-->\n" nil t)
938             (forward-line -1)
939             (delete-region (point) (point-max)))
940           (nnweb-remove-markup)
941           (nnweb-decode-entities)))
942     (goto-char (point-min))
943     (nnshimbun-insert-header header)
944     (insert "Content-Type: text/html; charset=ISO-2022-JP\nMIME-Version: 1.0\n\n"))
945   (encode-coding-string (buffer-string)
946                         (mime-charset-to-coding-system "ISO-2022-JP")))
947
948 (defun nnshimbun-make-fml-contents (header &rest args)
949   (require 'mml)
950   (catch 'stop
951     (if (search-forward "<SPAN CLASS=mailheaders>" nil t)
952         (delete-region (point-min) (point))
953       (throw 'stop nil))
954     (if (search-forward "</PRE>")
955         (progn
956           (beginning-of-line)
957           (delete-region (point) (point-max)))
958       (throw 'stop nil))
959     (if (search-backward "</SPAN>")
960         (progn
961           (beginning-of-line)
962           (kill-line))
963       (throw 'stop nil))
964     (save-restriction
965       (narrow-to-region (point-min) (point))
966       (subst-char-in-region (point-min) (point-max) ?\t ?  t)
967       (nnweb-decode-entities)
968       (goto-char (point-min))
969       (let (buf field value start value-beg end)
970         (while (and (setq start (point))
971                     (re-search-forward "<SPAN CLASS=\\(.*\\)>\\(.*\\)</SPAN>:"
972                                        nil t)
973                     (setq field (match-string 2))
974                     (re-search-forward 
975                      (concat "<SPAN CLASS=" (match-string 1) "-value>") nil t)
976                     (setq value-beg (point))
977                     (search-forward "</SPAN>" nil t)
978                     (setq end (point)))
979           (setq value (buffer-substring value-beg
980                                         (progn (search-backward "</SPAN>")
981                                                (point))))
982           (delete-region start end)
983           (cond ((string= field "Date")
984                  (push (cons 'date value) buf))
985                 ((string= field "From")
986                  (push (cons 'from value) buf))
987                 ((string= field "Subject")
988                  (push (cons 'subject value) buf))
989                 ((string= field "Message-Id")
990                  (push (cons 'id value) buf))
991                 ((string= field "References")
992                  (push (cons 'references value) buf))
993                 (t
994                  (insert (concat field ": " value "\n")))))
995         (nnshimbun-nov-fix-header nnshimbun-current-group header buf)
996         (goto-char (point-min))
997         (nnshimbun-insert-header header))
998       (goto-char (point-max)))
999     ;; Processing body.
1000     (save-restriction
1001       (narrow-to-region (point) (point-max))
1002       (nnweb-remove-markup)
1003       (nnweb-decode-entities)))
1004   (encode-coding-string (buffer-string)
1005                         (mime-charset-to-coding-system "ISO-2022-JP")))
1006
1007 ;;; www.asahi.com
1008
1009 (defun nnshimbun-asahi-get-headers ()
1010   (when (search-forward "\n<!-- Start of past -->\n" nil t)
1011     (delete-region (point-min) (point))
1012     (when (search-forward "\n<!-- End of past -->\n" nil t)
1013       (forward-line -1)
1014       (delete-region (point) (point-max))
1015       (goto-char (point-min))
1016       (let (headers)
1017         (while (re-search-forward
1018                 "^<a href=\"\\(\\([0-9][0-9][0-9][0-9]\\)/\\([A-z]*[0-9]*\\)\\.html\\)\">[ \t\r\f\n]*"
1019                 nil t)
1020           (let ((id (format "<%s%s%%%s>"
1021                             (match-string 2)
1022                             (match-string 3)
1023                             nnshimbun-current-group))
1024                 (url (match-string 1)))
1025             (push (make-full-mail-header
1026                    0
1027                    (nnshimbun-mime-encode-string
1028                     (mapconcat 'identity
1029                                (split-string
1030                                 (buffer-substring
1031                                  (match-end 0)
1032                                  (progn (search-forward "<br>" nil t) (point)))
1033                                 "\\(<[^>]+>\\|\r\\)")
1034                                ""))
1035                    nnshimbun-from-address
1036                    "" id "" 0 0
1037                    (format "%s%s/update/%s" nnshimbun-url nnshimbun-current-group url))
1038                   headers)))
1039         (setq headers (nreverse headers))
1040         (let ((i 0))
1041           (while (and (nth i headers)
1042                       (re-search-forward
1043                        "^(\\([0-9][0-9]\\)/\\([0-9][0-9]\\) \\([0-9][0-9]:[0-9][0-9]\\))"
1044                        nil t))
1045             (let ((month (string-to-number (match-string 1)))
1046                   (date (decode-time (current-time))))
1047               (mail-header-set-date
1048                (nth i headers)
1049                (nnshimbun-make-date-string
1050                 (if (and (eq 12 month) (eq 1 (nth 4 date)))
1051                     (1- (nth 5 date))
1052                   (nth 5 date))
1053                 month
1054                 (string-to-number (match-string 2))
1055                 (match-string 3))))
1056             (setq i (1+ i))))
1057         (nreverse headers)))))
1058
1059
1060
1061 ;;; www.sponichi.co.jp
1062
1063 (defun nnshimbun-sponichi-get-headers ()
1064   (when (search-forward "\e$B%K%e!<%9%$%s%G%C%/%9\e(B" nil t)
1065     (delete-region (point-min) (point))
1066     (when (search-forward "\e$B%"%I%?%0\e(B" nil t)
1067       (forward-line 2)
1068       (delete-region (point) (point-max))
1069       (goto-char (point-min))
1070       (let ((case-fold-search t) headers)
1071         (while (re-search-forward
1072                 "^<a href=\"/\\(\\([A-z]*\\)/kiji/\\([0-9][0-9][0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([0-9][0-9]\\)\\.html\\)\">"
1073                 nil t)
1074           (let ((url (match-string 1))
1075                 (id (format "<%s%s%s%s%%%s>"
1076                             (match-string 3)
1077                             (match-string 4)
1078                             (match-string 5)
1079                             (match-string 6)
1080                             nnshimbun-current-group))
1081                 (date (nnshimbun-make-date-string
1082                        (string-to-number (match-string 3))
1083                        (string-to-number (match-string 4))
1084                        (string-to-number (match-string 5)))))
1085             (push (make-full-mail-header
1086                    0
1087                    (nnshimbun-mime-encode-string
1088                     (mapconcat 'identity
1089                                (split-string
1090                                 (buffer-substring
1091                                  (match-end 0)
1092                                  (progn (search-forward "<br>" nil t) (point)))
1093                                 "<[^>]+>")
1094                                ""))
1095                    nnshimbun-from-address
1096                    date id "" 0 0 (concat nnshimbun-url url))
1097                   headers)))
1098         headers))))
1099
1100
1101
1102 ;;; CNET Japan
1103
1104 (defun nnshimbun-cnet-get-headers ()
1105   (let ((case-fold-search t) headers)
1106     (while (search-forward "\n<!--*****\e$B8+=P$7\e(B*****-->\n" nil t)
1107       (let ((subject (buffer-substring (point) (gnus-point-at-eol)))
1108             (point (point)))
1109         (forward-line -2)
1110         (when (looking-at "<a href=\"/\\(News/\\([0-9][0-9][0-9][0-9]\\)/Item/\\([0-9][0-9]\\([0-9][0-9]\\)\\([0-9][0-9]\\)-[0-9]+\\).html\\)\">")
1111           (let ((url (match-string 1))
1112                 (id  (format "<%s%s%%%s>"
1113                              (match-string 2)
1114                              (match-string 3)
1115                              nnshimbun-current-group))
1116                 (date (nnshimbun-make-date-string
1117                        (string-to-number (match-string 2))
1118                        (string-to-number (match-string 4))
1119                        (string-to-number (match-string 5)))))
1120             (push (make-full-mail-header
1121                    0
1122                    (nnshimbun-mime-encode-string subject)
1123                    nnshimbun-from-address
1124                    date id "" 0 0 (concat nnshimbun-url url))
1125                   headers)))
1126         (goto-char point)))
1127     headers))
1128
1129
1130
1131 ;;; Wired
1132
1133 (defun nnshimbun-wired-get-all-headers ()
1134   (save-excursion
1135     (set-buffer nnshimbun-buffer)
1136     (let ((group-header-alist (mapcar (lambda (g) (cons g nil)) nnshimbun-groups))
1137           (case-fold-search t)
1138           (regexp (format
1139                    "<a href=\"\\(%s\\|/\\)\\(news/news/\\(%s\\)/story/\\(\\([0-9][0-9][0-9][0-9]\\)\\([0-9][0-9]\\)\\([0-9][0-9]\\)[0-9]+\\)\\.html\\)\"><b>"
1140                    (regexp-quote nnshimbun-url)
1141                    (nnshimbun-regexp-opt nnshimbun-groups))))
1142       (dolist (xover (list (concat nnshimbun-url "news/news/index.html")
1143                            (concat nnshimbun-url "news/news/last_seven.html")))
1144         (erase-buffer)
1145         (nnshimbun-retrieve-url xover t)
1146         (goto-char (point-min))
1147         (while (re-search-forward regexp nil t)
1148           (let* ((url   (concat nnshimbun-url (match-string 2)))
1149                  (group (downcase (match-string 3)))
1150                  (id    (format "<%s%%%s>" (match-string 4) group))
1151                  (date  (nnshimbun-make-date-string
1152                          (string-to-number (match-string 5))
1153                          (string-to-number (match-string 6))
1154                          (string-to-number (match-string 7))))
1155                  (header (make-full-mail-header
1156                           0
1157                           (nnshimbun-mime-encode-string
1158                            (mapconcat 'identity
1159                                       (split-string
1160                                        (buffer-substring
1161                                         (match-end 0)
1162                                         (progn (search-forward "</b>" nil t) (point)))
1163                                        "<[^>]+>")
1164                                       ""))
1165                           nnshimbun-from-address
1166                           date id "" 0 0 url))
1167                  (x (assoc group group-header-alist)))
1168             (setcdr x (cons header (cdr x))))))
1169       group-header-alist)))
1170
1171
1172
1173 ;;; www.yomiuri.co.jp
1174
1175 (defun nnshimbun-yomiuri-get-all-headers ()
1176   (save-excursion
1177     (set-buffer nnshimbun-buffer)
1178     (erase-buffer)
1179     (nnshimbun-retrieve-url (eval nnshimbun-index-url) t)
1180     (let ((case-fold-search t)
1181           (group-header-alist (mapcar (lambda (g) (cons g nil)) nnshimbun-groups)))
1182       (dolist (group nnshimbun-groups)
1183         (let (start)
1184           (goto-char (point-min))
1185           (when (and (search-forward (format "\n<!-- /news/%s=start -->\n" group) nil t)
1186                      (setq start (point))
1187                      (search-forward (format "\n<!-- /news/%s=end -->\n" group) nil t))
1188             (forward-line -1)
1189             (save-restriction
1190               (narrow-to-region start (point))
1191               (goto-char start)
1192               (while (re-search-forward
1193                       "<a href=\"/\\([0-9]+\\)/\\(\\(\\([0-9][0-9][0-9][0-9]\\)\\([0-9][0-9]\\)\\([0-9][0-9]\\)[A-z0-9]+\\)\\.htm\\)\"[^>]*>"
1194                       nil t)
1195                 (let ((url   (concat (match-string 1) "a/" (match-string 2)))
1196                       (id    (format "<%s%s%%%s>"
1197                                      (match-string 1)
1198                                      (match-string 3)
1199                                      group))
1200                       (year  (string-to-number (match-string 4)))
1201                       (month (string-to-number (match-string 5)))
1202                       (day   (string-to-number (match-string 6)))
1203                       (subject (mapconcat
1204                                 'identity
1205                                 (split-string
1206                                  (buffer-substring
1207                                   (match-end 0)
1208                                   (progn (search-forward "<br>" nil t) (point)))
1209                                  "<[^>]+>")
1210                                 ""))
1211                       date x)
1212                   (when (string-match "^\e$B"!\e(B" subject)
1213                     (setq subject (substring subject (match-end 0))))
1214                   (if (string-match "(\\([0-9][0-9]:[0-9][0-9]\\))$" subject)
1215                       (setq date (nnshimbun-make-date-string
1216                                   year month day (match-string 1 subject))
1217                             subject (substring subject 0 (match-beginning 0)))
1218                     (setq date (nnshimbun-make-date-string year month day)))
1219                   (setcdr (setq x (assoc group group-header-alist))
1220                           (cons (make-full-mail-header
1221                                  0
1222                                  (nnshimbun-mime-encode-string subject)
1223                                  nnshimbun-from-address
1224                                  date id "" 0 0 (concat nnshimbun-url url))
1225                                 (cdr x)))))))))
1226       group-header-alist)))
1227
1228
1229
1230 ;;; Zdnet Japan
1231
1232 (defun nnshimbun-zdnet-get-headers ()
1233   (let ((case-fold-search t) headers)
1234     (goto-char (point-min))
1235     (let (start)
1236       (while (and (search-forward "<!--" nil t)
1237                   (setq start (- (point) 4))
1238                   (search-forward "-->" nil t))
1239         (delete-region start (point))))
1240     (goto-char (point-min))
1241     (while (re-search-forward
1242             "<a href=\"\\(/news/\\)?\\(\\([0-9][0-9]\\)\\([0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([^\\.]+\\).html\\)\"><font size=\"4\"><strong>"
1243             nil t)
1244       (let ((year  (+ 2000 (string-to-number (match-string 3))))
1245             (month (string-to-number (match-string 4)))
1246             (day   (string-to-number (match-string 5)))
1247             (id    (format "<%s%s%s%s%%%s>"
1248                            (match-string 3)
1249                            (match-string 4)
1250                            (match-string 5)
1251                            (match-string 6)
1252                            nnshimbun-current-group))
1253             (url (match-string 2)))
1254         (push (make-full-mail-header
1255                0
1256                (nnshimbun-mime-encode-string
1257                 (mapconcat 'identity
1258                            (split-string
1259                             (buffer-substring
1260                              (match-end 0)
1261                              (progn (search-forward "</a>" nil t) (point)))
1262                             "<[^>]+>")
1263                            ""))
1264                nnshimbun-from-address
1265                (nnshimbun-make-date-string year month day)
1266                id  "" 0 0 (concat nnshimbun-url url))
1267               headers)))
1268     (nreverse headers)))
1269
1270
1271
1272 ;;; MLs on www.mew.org
1273
1274 (defmacro nnshimbun-mew-concat-url (url)
1275   `(concat nnshimbun-url
1276            (nth 1 (assoc nnshimbun-current-group nnshimbun-mew-groups))
1277            "/"
1278            ,url))
1279
1280 (defmacro nnshimbun-mew-reverse-order-p ()
1281   `(nth 2 (assoc nnshimbun-current-group nnshimbun-mew-groups)))
1282
1283 (defmacro nnshimbun-mew-spew-p ()
1284   `(nth 3 (assoc nnshimbun-current-group nnshimbun-mew-groups)))
1285
1286 (defsubst nnshimbun-mew-retrieve-xover (aux)
1287   (erase-buffer)
1288   (nnshimbun-retrieve-url
1289    (nnshimbun-mew-concat-url (if (= aux 1) "index.html" (format "mail%d.html" aux)))
1290    t))
1291
1292 (defconst nnshimbun-mew-regexp "<A[^>]*HREF=\"\\(msg\\([0-9]+\\).html\\)\">\\([^<]+\\)<")
1293
1294 (defmacro nnshimbun-mew-extract-header-values ()
1295   `(progn
1296      (setq url (nnshimbun-mew-concat-url (match-string 1))
1297            id (format "<%05d%%%s>"
1298                       (1- (string-to-number (match-string 2)))
1299                       nnshimbun-current-group)
1300            subject (match-string 3))
1301      (forward-line 1)
1302      (if (nnshimbun-search-id nnshimbun-current-group id)
1303          (throw 'stop headers)
1304        (push (make-full-mail-header
1305               0
1306               (nnshimbun-mime-encode-string subject)
1307               (if (looking-at "<EM>\\([^<]+\\)<")
1308                   (nnshimbun-mime-encode-string (match-string 1))
1309                 "")
1310               "" id "" 0 0 url)
1311              headers))))
1312
1313 (eval-and-compile
1314   (if (fboundp 'mime-entity-fetch-field)
1315       ;; For Semi-Gnus.
1316       (defmacro nnshimbun-mew-mail-header-subject (header)
1317         `(mime-entity-fetch-field ,header 'Subject))
1318     ;; For pure Gnus.
1319     (defalias 'nnshimbun-mew-mail-header-subject 'mail-header-subject)))
1320
1321 (defun nnshimbun-mew-get-headers ()
1322   (if (nnshimbun-mew-spew-p)
1323       (let ((headers (nnshimbun-mew-get-headers-1)))
1324         (erase-buffer)
1325         (insert-buffer-substring (nnshimbun-open-nov nnshimbun-current-group))
1326         (delq nil
1327               (mapcar
1328                (lambda (header)
1329                  (goto-char (point-min))
1330                  (let ((subject (nnshimbun-mew-mail-header-subject header))
1331                        (found))
1332                    (while (and (not found)
1333                                (search-forward subject nil t))
1334                      (if (not (and (search-backward "\t" nil t)
1335                                    (not (search-backward "\t" (gnus-point-at-bol) t))))
1336                          (forward-line 1)
1337                        (setq found t)))
1338                    (if found
1339                        nil
1340                      (goto-char (point-max))
1341                      (nnheader-insert-nov header)
1342                      header)))
1343                headers)))
1344     (nnshimbun-mew-get-headers-1)))
1345
1346 (defun nnshimbun-mew-get-headers-1 ()
1347   (let (headers)
1348     (when (re-search-forward
1349            "<A[^>]*HREF=\"mail\\([0-9]+\\)\\.html\">\\[?Last Page\\]?</A>" nil t)
1350       (let ((limit (string-to-number (match-string 1))))
1351         (catch 'stop
1352           (if (nnshimbun-mew-reverse-order-p)
1353               (let ((aux 1))
1354                 (while (let (id url subject)
1355                          (while (re-search-forward nnshimbun-mew-regexp nil t)
1356                            (nnshimbun-mew-extract-header-values))
1357                          (< aux limit))
1358                   (nnshimbun-mew-retrieve-xover (setq aux (1+ aux)))))
1359             (while (> limit 0)
1360               (nnshimbun-mew-retrieve-xover limit)
1361               (setq limit (1- limit))
1362               (let (id url subject)
1363                 (goto-char (point-max))
1364                 (while (re-search-backward nnshimbun-mew-regexp nil t)
1365                   (nnshimbun-mew-extract-header-values)
1366                   (forward-line -2)))))
1367           headers)))))
1368
1369
1370
1371 ;;; MLs on www.xemacs.org
1372
1373 (defmacro nnshimbun-xemacs-concat-url (url)
1374   `(concat nnshimbun-url nnshimbun-current-group "/" ,url))
1375
1376 (defun nnshimbun-xemacs-get-headers ()
1377   (let (headers auxs aux)
1378     (catch 'stop
1379       (while (re-search-forward
1380               (concat "<A HREF=\"/" nnshimbun-current-group
1381                       "/\\([12][0-9][0-9][0-9][0-1][0-9]\\)/\">\\[Index\\]")
1382               nil t)
1383         (setq auxs (append auxs (list (match-string 1)))))
1384       (while auxs
1385         (erase-buffer)
1386         (nnshimbun-retrieve-url
1387          (nnshimbun-xemacs-concat-url (concat (setq aux (car auxs)) "/")))
1388         (let (id url subject)
1389           (goto-char (point-max))
1390           (while (re-search-backward
1391                   "<A[^>]*HREF=\"\\(msg\\([0-9]+\\).html\\)\">\\([^<]+\\)<"
1392                   nil t)
1393             (setq url (nnshimbun-xemacs-concat-url
1394                        (concat aux "/" (match-string 1)))
1395                   id (format "<%s%05d%%%s>"
1396                              aux
1397                              (string-to-number (match-string 2))
1398                              nnshimbun-current-group)
1399                   subject (match-string 3))
1400             (forward-line 1)
1401             (if (nnshimbun-search-id nnshimbun-current-group id)
1402                 (throw 'stop headers)
1403               (push (make-full-mail-header
1404                      0
1405                      (nnshimbun-mime-encode-string subject)
1406                      (if (looking-at "<td><em>\\([^<]+\\)<")
1407                          (match-string 1)
1408                        "")
1409                      "" id "" 0 0 url)
1410                     headers))
1411             (message "%s" id)
1412             (forward-line -2)))
1413         (setq auxs (cdr auxs))))
1414     headers))
1415
1416 ;;; MLs on www.jp.netbsd.org
1417
1418 (defun nnshimbun-netbsd-get-headers ()
1419   (let ((case-fold-search t) headers months)
1420     (goto-char (point-min))
1421     (while (re-search-forward "<A HREF=\"\\([0-9]+\\)/\\(threads.html\\)?\">" nil t)
1422       (push (match-string 1) months))
1423     (setq months (nreverse months))
1424     (catch 'exit
1425       (dolist (month months)
1426         (erase-buffer)
1427         (nnshimbun-retrieve-url
1428          (format "%s%s/%s/maillist.html" nnshimbun-url nnshimbun-current-group month)
1429          t)
1430         (let (id url subject)
1431           (while (re-search-forward
1432                   "<A[^>]*HREF=\"\\(msg\\([0-9]+\\)\\.html\\)\">\\([^<]+\\)</A>"
1433                   nil t)
1434             (setq url (format "%s%s/%s/%s"
1435                               nnshimbun-url
1436                               nnshimbun-current-group
1437                               month
1438                               (match-string 1))
1439                   id (format "<%s%05d%%%s>"
1440                              month
1441                              (string-to-number (match-string 2))
1442                              nnshimbun-current-group)
1443                   subject (match-string 3))
1444             (if (nnshimbun-search-id nnshimbun-current-group id)
1445                 (throw 'exit headers)
1446               (push (make-full-mail-header
1447                      0
1448                      (nnshimbun-mime-encode-string subject)
1449                      (if (looking-at "</STRONG> *<EM>\\([^<]+\\)<")
1450                          (nnshimbun-mime-encode-string (match-string 1))
1451                        "")
1452                      "" id "" 0 0 url)
1453                     headers)))))
1454       headers)))
1455
1456 ;;; MLs using fml
1457 (defun nnshimbun-fml-get-headers ()
1458   (let (headers auxs aux)
1459     (catch 'stop
1460       (while (re-search-forward "<a href=\"\\([0-9]+\\(\\.week\\|\\.month\\)?\\)/index.html\">" nil t)
1461         (setq auxs (append auxs (list (match-string 1)))))
1462       (while auxs
1463         (erase-buffer)
1464         (nnshimbun-retrieve-url
1465          (concat nnshimbun-url (setq aux (car auxs)) "/"))
1466         (subst-char-in-region (point-min) (point-max) ?\t ?  t)
1467         (let (id url date subject from)
1468           (goto-char (point-min))
1469           (while (re-search-forward
1470                   "<LI><A HREF=\"\\([0-9]+\\.html\\)\">Article .*</A> <DIV><SPAN CLASS=article>Article <SPAN CLASS=article-value>\\([0-9]+\\)</SPAN></SPAN> at <SPAN CLASS=Date-value>\\([^<]*\\)</SPAN> <SPAN CLASS=Subject>Subject: <SPAN CLASS=Subject-value>\\([^<]*\\)</SPAN></SPAN></DIV><DIV><SPAN CLASS=From>From: <SPAN CLASS=From-value>\\([^<]*\\)</SPAN></SPAN></DIV>"
1471                   nil t)
1472             (setq url (concat nnshimbun-url aux "/" (match-string 1))
1473                   id (format "<%s%05d%%%s>"
1474                              aux
1475                              (string-to-number (match-string 2))
1476                              nnshimbun-current-group)
1477                   date (match-string 3)
1478                   subject (match-string 4)
1479                   from (match-string 5))
1480             (forward-line 1)
1481             (if (nnshimbun-search-id nnshimbun-current-group id)
1482                 (throw 'stop headers)
1483               (push (make-full-mail-header
1484                      0
1485                      (nnshimbun-mime-encode-string subject)
1486                      from date id "" 0 0 url)
1487                     headers))
1488             ;;(message "%s" id)
1489             ))
1490         (setq auxs (cdr auxs))))
1491     headers))
1492
1493 (provide 'nnshimbun)
1494 ;;; nnshimbun.el ends here.