* gnus-vers.el (gnus-revision-number): Increment to 04.
[elisp/gnus.git-] / lisp / nnshimbun.el
1 ;;; nnshimbun.el --- interfacing with web newspapers -*- coding: junet; -*-
2
3 ;; Authors: TSUCHIYA Masatoshi <tsuchiya@pine.kuee.kyoto-u.ac.jp>
4 ;;          Akihiro Arisawa    <ari@atesoft.advantest.co.jp>
5 ;; Keywords: news
6
7 ;;; Copyright:
8
9 ;; This file is a part of Semi-Gnus.
10
11 ;; This program is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; This program is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with this program; if not, you can either send email to this
23 ;; program's maintainer or write to: The Free Software Foundation,
24 ;; Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;; Gnus backend to read newspapers on WEB.
29
30
31 ;;; Defintinos:
32
33 (gnus-declare-backend "nnshimbun" 'address)
34
35 (eval-when-compile (require 'cl))
36 (eval-when-compile (require 'gnus-clfns))
37 (eval-when-compile (require 'static))
38
39 (require 'nnheader)
40 (require 'nnmail)
41 (require 'nnoo)
42 (require 'gnus-bcklg)
43 (eval-when-compile (ignore-errors (require 'nnweb)))
44 ;; Report failure to find w3 at load time if appropriate.
45 (eval '(require 'nnweb))
46 (require 'mcharset)
47
48
49 (nnoo-declare nnshimbun)
50
51 (defvar nnshimbun-check-interval 300)
52
53 (defconst nnshimbun-mew-groups
54   '(("meadow-develop" "meadow-develop" nil t)
55     ("meadow-users-jp" "meadow-users-jp")
56     ("mule-win32" "mule-win32")
57     ("mew-win32" "mew-win32")
58     ("mew-dist" "mew-dist/3300" t)
59     ("mgp-users-jp" "mgp-users-jp/A" t t)))
60
61 (defvar nnshimbun-type-definition
62   `(("asahi"
63      (url . "http://spin.asahi.com/")
64      (groups "national" "business" "politics" "international" "sports" "personal" "feneral")
65      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
66      (generate-nov   . nnshimbun-generate-nov-for-each-group)
67      (get-headers    . nnshimbun-asahi-get-headers)
68      (index-url      . (format "%sp%s.html" nnshimbun-url nnshimbun-current-group))
69      (from-address   . "webmaster@www.asahi.com")
70      (make-contents  . nnshimbun-make-text-or-html-contents)
71      (contents-start . "\n<!-- Start of kiji -->\n")
72      (contents-end   . "\n<!-- End of kiji -->\n"))
73     ("sponichi"
74      (url . "http://www.sponichi.co.jp/")
75      (groups "baseball" "soccer" "usa" "others" "society" "entertainment" "horseracing")
76      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
77      (generate-nov   . nnshimbun-generate-nov-for-each-group)
78      (get-headers    . nnshimbun-sponichi-get-headers)
79      (index-url      . (format "%s%s/index.html" nnshimbun-url nnshimbun-current-group))
80      (from-address   . "webmaster@www.sponichi.co.jp")
81      (make-contents  . nnshimbun-make-text-or-html-contents)
82      (contents-start . "\n<span class=\"text\">\e$B!!\e(B")
83      (contents-end   . "\n"))
84     ("cnet"
85      (url . "http://cnet.sphere.ne.jp/")
86      (groups "comp")
87      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
88      (generate-nov   . nnshimbun-generate-nov-for-each-group)
89      (get-headers    . nnshimbun-cnet-get-headers)
90      (index-url      . (format "%s/News/Oneweek/" nnshimbun-url))
91      (from-address   . "cnet@sphere.ad.jp")
92      (make-contents  . nnshimbun-make-html-contents)
93      (contents-start . "\n<!--KIJI-->\n")
94      (contents-end   . "\n<!--/KIJI-->\n"))
95     ("wired"
96      (url . "http://www.hotwired.co.jp/")
97      (groups "business" "culture" "technology")
98      (coding-system  . ,(static-if (boundp 'MULE) '*euc-japan* 'euc-jp))
99      (generate-nov   . nnshimbun-generate-nov-for-all-groups)
100      (get-headers    . nnshimbun-wired-get-all-headers)
101      (index-url)
102      (from-address   . "webmaster@www.hotwired.co.jp")
103      (make-contents  . nnshimbun-make-html-contents)
104      (contents-start . "\n<!-- START_OF_BODY -->\n")
105      (contents-end   . "\n<!-- END_OF_BODY -->\n"))
106     ("yomiuri"
107      (url . "http://www.yomiuri.co.jp/")
108      (groups "shakai" "sports" "seiji" "keizai" "kokusai" "fuho")
109      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
110      (generate-nov   . nnshimbun-generate-nov-for-all-groups)
111      (get-headers    . nnshimbun-yomiuri-get-all-headers)
112      (index-url      . (concat nnshimbun-url "main.htm"))
113      (from-address   . "webmaster@www.yomiuri.co.jp")
114      (make-contents  . nnshimbun-make-text-or-html-contents)
115      (contents-start . "\n<!--  honbun start  -->\n")
116      (contents-end   . "\n<!--  honbun end  -->\n"))
117     ("zdnet"
118      (url . "http://www.zdnet.co.jp/news/")
119      (groups "comp")
120      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
121      (generate-nov   . nnshimbun-generate-nov-for-each-group)
122      (get-headers    . nnshimbun-zdnet-get-headers)
123      (index-url      . nnshimbun-url)
124      (from-address   . "zdnn@softbank.co.jp")
125      (make-contents  . nnshimbun-make-html-contents)
126      (contents-start . "\\(<!--BODY-->\\|<!--DATE-->\\)")
127      (contents-end   . "\\(<!--BODYEND-->\\|<!--BYLINEEND-->\\)"))
128     ("mew"
129      (url . "http://www.mew.org/archive/")
130      (groups ,@(mapcar #'car nnshimbun-mew-groups))
131      (coding-system . ,(static-if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
132      (generate-nov  . nnshimbun-generate-nov-for-each-group)
133      (get-headers   . nnshimbun-mew-get-headers)
134      (index-url     . (nnshimbun-mew-concat-url "index.html"))
135      (make-contents . nnshimbun-make-mhonarc-contents))
136     ("xemacs"
137      (url . "http://list-archives.xemacs.org/")
138      (groups "xemacs-announce" "xemacs-beta-ja" "xemacs-beta"
139              "xemacs-build-reports" "xemacs-cvs" "xemacs-mule"
140              "xemacs-nt" "xemacs-patches" "xemacs-users-ja" "xemacs")
141      (coding-system . ,(static-if (boundp 'MULE) '*euc-japan* 'euc-jp))
142      (generate-nov  . nnshimbun-generate-nov-for-each-group)
143      (get-headers   . nnshimbun-xemacs-get-headers)
144      (index-url     . (nnshimbun-xemacs-concat-url nil))
145      (make-contents . nnshimbun-make-mhonarc-contents))
146     ("netbsd"
147      (url . "http://www.jp.netbsd.org/ja/JP/ml/")
148      (groups "announce-ja" "junk-ja" "tech-misc-ja" "tech-pkg-ja"
149              "port-arm32-ja" "port-hpcmips-ja" "port-mac68k-ja"
150              "port-mips-ja" "port-powerpc-ja" "hpcmips-changes-ja"
151              "members-ja" "admin-ja" "www-changes-ja")
152      (coding-system  . ,(static-if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
153      (generate-nov   . nnshimbun-generate-nov-for-each-group)
154      (get-headers    . nnshimbun-netbsd-get-headers)
155      (index-url      . (format "%s%s/index.html" nnshimbun-url nnshimbun-current-group))
156      (make-contents  . nnshimbun-make-mhonarc-contents))
157     ("bbdb-ml"
158      (url . "http://www.rc.tutrp.tut.ac.jp/bbdb-ml/")
159      (groups "bbdb-ml")
160      (coding-system . ,(static-if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
161      (generate-nov . nnshimbun-generate-nov-for-each-group)
162      (get-headers . nnshimbun-fml-get-headers)
163      (index-url . nnshimbun-url)
164      (make-contents . nnshimbun-make-fml-contents))
165     ))
166
167 (defvar nnshimbun-x-face-alist
168   '(("default" .
169      (("default" .
170        "X-Face: Ygq$6P.,%Xt$U)DS)cRY@k$VkW!7(X'X'?U{{osjjFG\"E]hND;SPJ-J?O?R|a?L
171         g2$0rVng=O3Lt}?~IId8Jj&vP^3*o=LKUyk(`t%0c!;t6REk=JbpsEn9MrN7gZ%"))))
172   "Alist of server vs. alist of group vs. X-Face field.  It looks like:
173
174 \((\"asahi\" . ((\"national\" . \"X-face: ***\")
175              (\"business\" . \"X-Face: ***\")
176                 ;;
177                 ;;
178              (\"default\" . \"X-face: ***\")))
179  (\"sponichi\" . ((\"baseball\" . \"X-face: ***\")
180                 (\"soccer\" . \"X-Face: ***\")
181                 ;;
182                 ;;
183                 (\"default\" . \"X-face: ***\")))
184                 ;;
185  (\"default\" . ((\"default\" . \"X-face: ***\")))")
186
187 (defvoo nnshimbun-directory (nnheader-concat gnus-directory "shimbun/")
188   "Where nnshimbun will save its files.")
189
190 (defvoo nnshimbun-nov-is-evil nil
191   "*Non-nil means that nnshimbun will never retrieve NOV headers.")
192
193 (defvoo nnshimbun-nov-file-name ".overview")
194
195 (defvoo nnshimbun-pre-fetch-article nil
196   "*Non nil means that nnshimbun fetch unread articles when scanning groups.")
197
198 ;; set by nnshimbun-possibly-change-group
199 (defvoo nnshimbun-buffer nil)
200 (defvoo nnshimbun-current-directory nil)
201 (defvoo nnshimbun-current-group nil)
202
203 ;; set by nnshimbun-open-server
204 (defvoo nnshimbun-url nil)
205 (defvoo nnshimbun-coding-system nil)
206 (defvoo nnshimbun-groups nil)
207 (defvoo nnshimbun-generate-nov nil)
208 (defvoo nnshimbun-get-headers nil)
209 (defvoo nnshimbun-index-url nil)
210 (defvoo nnshimbun-from-address nil)
211 (defvoo nnshimbun-make-contents nil)
212 (defvoo nnshimbun-contents-start nil)
213 (defvoo nnshimbun-contents-end nil)
214 (defvoo nnshimbun-server-directory nil)
215
216 (defvoo nnshimbun-status-string "")
217 (defvoo nnshimbun-nov-last-check nil)
218 (defvoo nnshimbun-nov-buffer-alist nil)
219 (defvoo nnshimbun-nov-buffer-file-name nil)
220
221 (defvoo nnshimbun-keep-backlog 300)
222 (defvoo nnshimbun-backlog-articles nil)
223 (defvoo nnshimbun-backlog-hashtb nil)
224
225 (defconst nnshimbun-meta-content-type-charset-regexp
226   (eval-when-compile
227     (concat "<meta[ \t]+http-equiv=\"?Content-type\"?[ \t]+content=\"\\([^;]+\\)"
228             ";[ \t]*charset=\"?\\([^\"]+\\)\"?"
229             ">"))
230   "Regexp used in parsing `<META HTTP-EQUIV=\"Content-Type\" content=\"...;charset=...\">
231 for a charset indication")
232
233 (defconst nnshimbun-meta-charset-content-type-regexp
234   (eval-when-compile
235     (concat "<meta[ \t]+content=\"\\([^;]+\\)"
236             ";[ \t]*charset=\"?\\([^\"]+\\)\"?"
237             "[ \t]+http-equiv=\"?Content-type\"?>"))
238   "Regexp used in parsing `<META content=\"...;charset=...\" HTTP-EQUIV=\"Content-Type\">
239 for a charset indication")
240
241
242
243 ;;; backlog
244 (defmacro nnshimbun-backlog (&rest form)
245   `(let ((gnus-keep-backlog nnshimbun-keep-backlog)
246          (gnus-backlog-buffer (format " *nnshimbun backlog %s*" (nnoo-current-server 'nnshimbun)))
247          (gnus-backlog-articles nnshimbun-backlog-articles)
248          (gnus-backlog-hashtb nnshimbun-backlog-hashtb))
249      (unwind-protect
250          (progn ,@form)
251        (setq nnshimbun-backlog-articles gnus-backlog-articles
252              nnshimbun-backlog-hashtb gnus-backlog-hashtb))))
253 (put 'nnshimbun-backlog 'lisp-indent-function 0)
254 (put 'nnshimbun-backlog 'edebug-form-spec '(form body))
255
256
257
258 ;;; Interface Functions
259 (nnoo-define-basics nnshimbun)
260
261 (deffoo nnshimbun-open-server (server &optional defs)
262   ;; Set default values.
263   (dolist (default (cdr (assoc server nnshimbun-type-definition)))
264     (let ((symbol (intern (concat "nnshimbun-" (symbol-name (car default))))))
265       (unless (assq symbol defs)
266         (push (list symbol (cdr default)) defs))))
267   ;; Set directory for server working files.
268   (push (list 'nnshimbun-server-directory
269               (file-name-as-directory
270                (expand-file-name server nnshimbun-directory)))
271         defs)
272   (nnoo-change-server 'nnshimbun server defs)
273   (nnshimbun-possibly-change-group nil server)
274   ;; Make directories.
275   (unless (file-exists-p nnshimbun-directory)
276     (ignore-errors (make-directory nnshimbun-directory t)))
277   (cond
278    ((not (file-exists-p nnshimbun-directory))
279     (nnshimbun-close-server)
280     (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-directory))
281    ((not (file-directory-p (file-truename nnshimbun-directory)))
282     (nnshimbun-close-server)
283     (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-directory))
284    (t
285     (unless (file-exists-p nnshimbun-server-directory)
286       (ignore-errors (make-directory nnshimbun-server-directory t)))
287     (cond
288      ((not (file-exists-p nnshimbun-server-directory))
289       (nnshimbun-close-server)
290       (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-server-directory))
291      ((not (file-directory-p (file-truename nnshimbun-server-directory)))
292       (nnshimbun-close-server)
293       (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-server-directory))
294      (t
295       (nnheader-report 'nnshimbun "Opened server %s using directory %s"
296                        server nnshimbun-server-directory)
297       t)))))
298
299 (deffoo nnshimbun-close-server (&optional server)
300   (and (nnshimbun-server-opened server)
301        (gnus-buffer-live-p nnshimbun-buffer)
302        (kill-buffer nnshimbun-buffer))
303   (nnshimbun-backlog (gnus-backlog-shutdown))
304   (nnshimbun-save-nov)
305   (nnoo-close-server 'nnshimbun server)
306   t)
307
308 (static-when (boundp 'MULE)
309   (unless (coding-system-p 'euc-japan)
310     (copy-coding-system '*euc-japan* 'euc-japan))
311   (unless (coding-system-p 'shift_jis)
312     (copy-coding-system '*sjis* 'shift_jis))
313   (eval-and-compile
314     (defalias-maybe 'coding-system-category 'get-code-mnemonic)))
315
316 (defun nnshimbun-retrieve-url (url &optional no-cache)
317   "Rertrieve URL contents and insert to current buffer."
318   (let ((buf (current-buffer))
319         (url-working-buffer url-working-buffer))
320     (let ((old-asynch (default-value 'url-be-asynchronous))
321           (old-caching (default-value 'url-automatic-caching))
322           (old-mode (default-value 'url-standalone-mode)))
323       (setq-default url-be-asynchronous nil)
324       (when no-cache
325         (setq-default url-automatic-caching nil)
326         (setq-default url-standalone-mode nil))
327       (unwind-protect
328           (let ((coding-system-for-read 'binary)
329                 (coding-system-for-write 'binary)
330                 (input-coding-system 'binary)
331                 (output-coding-system 'binary)
332                 (default-enable-multibyte-characters nil))
333             (set-buffer
334              (setq url-working-buffer
335                    (cdr (url-retrieve url no-cache))))
336             (url-uncompress))
337         (setq-default url-be-asynchronous old-asynch)
338         (setq-default url-automatic-caching old-caching)
339         (setq-default url-standalone-mode old-mode)))
340     (let ((charset
341            (or url-current-mime-charset
342                (let ((case-fold-search t))
343                  (goto-char (point-min))
344                  (if (or (re-search-forward
345                           nnshimbun-meta-content-type-charset-regexp nil t)
346                          (re-search-forward
347                           nnshimbun-meta-charset-content-type-regexp nil t))
348                      (buffer-substring-no-properties (match-beginning 2)
349                                                      (match-end 2)))))))
350       (decode-coding-region
351        (point-min) (point-max)
352        (if charset
353            (let ((mime-charset-coding-system-alist
354                   (append '((euc-jp . euc-japan)
355                             (shift-jis . shift_jis)
356                             (shift_jis . shift_jis)
357                             (sjis . shift_jis)
358                             (x-euc-jp . euc-japan)
359                             (x-shift-jis . shift_jis)
360                             (x-shift_jis . shift_jis)
361                             (x-sjis . shift_jis))
362                           mime-charset-coding-system-alist)))
363              (mime-charset-to-coding-system charset))
364          (let ((default (condition-case nil
365                             (coding-system-category nnshimbun-coding-system)
366                           (error nil)))
367                (candidate (detect-coding-region (point-min) (point-max))))
368            (unless (listp candidate)
369              (setq candidate (list candidate)))
370            (catch 'coding
371              (dolist (coding candidate)
372                (if (eq default (coding-system-category coding))
373                    (throw 'coding coding)))
374              (if (eq (coding-system-category 'binary)
375                      (coding-system-category (car candidate)))
376                  nnshimbun-coding-system
377                (car candidate)))))))
378     (set-buffer-multibyte t)
379     (set-buffer buf)
380     (insert-buffer url-working-buffer)
381     (kill-buffer url-working-buffer)))
382
383 (deffoo nnshimbun-request-article (article &optional group server to-buffer)
384   (when (nnshimbun-possibly-change-group group server)
385     (if (stringp article)
386         (setq article (nnshimbun-search-id group article)))
387     (if (integerp article)
388         (nnshimbun-request-article-1 article group server to-buffer)
389       (nnheader-report 'nnml "Couldn't retrieve article: %s" (prin1-to-string article))
390       nil)))
391
392 (defsubst nnshimbun-header-xref (x)
393   (if (and (setq x (mail-header-xref x))
394            (string-match "^Xref: " x))
395       (substring x 6)
396     x))
397
398 (defun nnshimbun-request-article-1 (article &optional group server to-buffer)
399   (if (nnshimbun-backlog
400         (gnus-backlog-request-article
401          group article (or to-buffer nntp-server-buffer)))
402       (cons group article)
403     (let (header contents)
404       (when (setq header (save-excursion
405                            (set-buffer (nnshimbun-open-nov group))
406                            (and (nnheader-find-nov-line article)
407                                 (nnheader-parse-nov))))
408         (let* ((xref (nnshimbun-header-xref header))
409                (x-faces (cdr (or (assoc (or server
410                                             (nnoo-current-server 'nnshimbun))
411                                         nnshimbun-x-face-alist)
412                                  (assoc "default" nnshimbun-x-face-alist))))
413                (x-face (cdr (or (assoc group x-faces)
414                                 (assoc "default" x-faces)))))
415           (save-excursion
416             (set-buffer nnshimbun-buffer)
417             (erase-buffer)
418             (nnshimbun-retrieve-url xref)
419             (nnheader-message 6 "nnshimbun: Make contents...")
420             (goto-char (point-min))
421             (setq contents (funcall nnshimbun-make-contents header x-face))
422             (nnheader-message 6 "nnshimbun: Make contents...done"))))
423       (when contents
424         (save-excursion
425           (set-buffer (or to-buffer nntp-server-buffer))
426           (erase-buffer)
427           (insert contents)
428           (nnshimbun-backlog
429             (gnus-backlog-enter-article group article (current-buffer)))
430           (nnheader-report 'nnshimbun "Article %s retrieved" (mail-header-id header))
431           (cons group (mail-header-number header)))))))
432
433 (deffoo nnshimbun-request-group (group &optional server dont-check)
434   (let ((pathname-coding-system 'binary))
435     (cond
436      ((not (nnshimbun-possibly-change-group group server))
437       (nnheader-report 'nnshimbun "Invalid group (no such directory)"))
438      ((not (file-exists-p nnshimbun-current-directory))
439       (nnheader-report 'nnshimbun "Directory %s does not exist"
440                        nnshimbun-current-directory))
441      ((not (file-directory-p nnshimbun-current-directory))
442       (nnheader-report 'nnshimbun "%s is not a directory" nnshimbun-current-directory))
443      (dont-check
444       (nnheader-report 'nnshimbun "Group %s selected" group)
445       t)
446      (t
447       (let (beg end lines)
448         (save-excursion
449           (set-buffer (nnshimbun-open-nov group))
450           (goto-char (point-min))
451           (setq beg (ignore-errors (read (current-buffer))))
452           (goto-char (point-max))
453           (forward-line -1)
454           (setq end (ignore-errors (read (current-buffer)))
455                 lines (count-lines (point-min) (point-max))))
456         (nnheader-report 'nnshimbunw "Selected group %s" group)
457         (nnheader-insert "211 %d %d %d %s\n"
458                          lines (or beg 0) (or end 0) group))))))
459
460 (deffoo nnshimbun-request-scan (&optional group server)
461   (nnshimbun-possibly-change-group group server)
462   (nnshimbun-generate-nov-database group))
463
464 (deffoo nnshimbun-close-group (group &optional server)
465   (nnshimbun-write-nov group)
466   t)
467
468 (deffoo nnshimbun-request-list (&optional server)
469   (save-excursion
470     (set-buffer nntp-server-buffer)
471     (erase-buffer)
472     (dolist (group nnshimbun-groups)
473       (when (nnshimbun-possibly-change-group group server)
474         (let (beg end)
475           (save-excursion
476             (set-buffer (nnshimbun-open-nov group))
477             (goto-char (point-min))
478             (setq beg (ignore-errors (read (current-buffer))))
479             (goto-char (point-max))
480             (forward-line -1)
481             (setq end (ignore-errors (read (current-buffer)))))
482           (insert (format "%s %d %d n\n" group (or end 0) (or beg 0)))))))
483   t) ; return value
484
485 (eval-and-compile
486   (if (fboundp 'mime-entity-fetch-field)
487       ;; For Semi-Gnus.
488       (defun nnshimbun-insert-header (header)
489         (insert "Subject: " (or (mime-entity-fetch-field header 'Subject) "(none)") "\n"
490                 "From: " (or (mime-entity-fetch-field header 'From) "(nobody)") "\n"
491                 "Date: " (or (mail-header-date header) "") "\n"
492                 "Message-ID: " (or (mail-header-id header) (nnmail-message-id)) "\n")
493         (let ((refs (mail-header-references header)))
494           (and refs
495                (string< "" refs)
496                (insert "References: " refs "\n")))
497         (insert "Lines: " (number-to-string (or (mail-header-lines header) 0)) "\n"
498                 "Xref: " (nnshimbun-header-xref header) "\n"))
499     ;; For pure Gnus.
500     (defun nnshimbun-insert-header (header)
501       (nnheader-insert-header header)
502       (delete-char -1)
503       (insert "Xref: " (nnshimbun-header-xref header) "\n"))))
504
505 (deffoo nnshimbun-retrieve-headers (articles &optional group server fetch-old)
506   (when (nnshimbun-possibly-change-group group server)
507     (if (nnshimbun-retrieve-headers-with-nov articles fetch-old)
508         'nov
509       (save-excursion
510         (set-buffer nntp-server-buffer)
511         (erase-buffer)
512         (let (header)
513           (dolist (art articles)
514             (if (stringp art)
515                 (setq art (nnshimbun-search-id group art)))
516             (if (integerp art)
517                 (when (setq header
518                             (save-excursion
519                               (set-buffer (nnshimbun-open-nov group))
520                               (and (nnheader-find-nov-line art)
521                                    (nnheader-parse-nov))))
522                   (insert (format "220 %d Article retrieved.\n" art))
523                   (nnshimbun-insert-header header)
524                   (insert ".\n")
525                   (delete-region (point) (point-max))))))
526         'header))))
527
528 (defun nnshimbun-retrieve-headers-with-nov (articles &optional fetch-old)
529   (if (or gnus-nov-is-evil nnshimbun-nov-is-evil)
530       nil
531     (let ((nov (expand-file-name nnshimbun-nov-file-name nnshimbun-current-directory)))
532       (when (file-exists-p nov)
533         (save-excursion
534           (set-buffer nntp-server-buffer)
535           (erase-buffer)
536           (nnheader-insert-file-contents nov)
537           (if (and fetch-old (not (numberp fetch-old)))
538               t                         ; Don't remove anything.
539             (nnheader-nov-delete-outside-range
540              (if fetch-old (max 1 (- (car articles) fetch-old))
541                (car articles))
542              (car (last articles)))
543             t))))))
544
545
546
547 ;;; Nov Database Operations
548
549 (defun nnshimbun-generate-nov-database (group)
550   (prog1 (funcall nnshimbun-generate-nov group)
551     (nnshimbun-write-nov group)))
552
553 (defun nnshimbun-generate-nov-for-each-group (group)
554   (nnshimbun-possibly-change-group group)
555   (save-excursion
556     (set-buffer (nnshimbun-open-nov group))
557     (let (i)
558       (goto-char (point-max))
559       (forward-line -1)
560       (setq i (or (ignore-errors (read (current-buffer))) 0))
561       (dolist (header (save-excursion
562                         (set-buffer nnshimbun-buffer)
563                         (erase-buffer)
564                         (nnshimbun-retrieve-url (eval nnshimbun-index-url) t)
565                         (goto-char (point-min))
566                         (funcall nnshimbun-get-headers)))
567         (unless (nnshimbun-search-id group (mail-header-id header))
568           (mail-header-set-number header (setq i (1+ i)))
569           (goto-char (point-max))
570           (nnheader-insert-nov header)
571           (if nnshimbun-pre-fetch-article
572               (nnshimbun-request-article-1 i group nil nnshimbun-buffer)))))))
573
574 (defun nnshimbun-generate-nov-for-all-groups (&rest args)
575   (unless (and nnshimbun-nov-last-check
576                (< (nnshimbun-lapse-seconds nnshimbun-nov-last-check)
577                   nnshimbun-check-interval))
578     (save-excursion
579       (dolist (list (funcall nnshimbun-get-headers))
580         (let ((group (car list)))
581           (nnshimbun-possibly-change-group group)
582           (when (cdr list)
583             (set-buffer (nnshimbun-open-nov group))
584             (let (i)
585               (goto-char (point-max))
586               (forward-line -1)
587               (setq i (or (ignore-errors (read (current-buffer))) 0))
588               (dolist (header (cdr list))
589                 (unless (nnshimbun-search-id group (mail-header-id header))
590                   (mail-header-set-number header (setq i (1+ i)))
591                   (goto-char (point-max))
592                   (nnheader-insert-nov header)
593                   (if nnshimbun-pre-fetch-article
594                       (nnshimbun-request-article-1 i group nil nnshimbun-buffer))))))))
595       (nnshimbun-save-nov)
596       (setq nnshimbun-nov-last-check (current-time)))))
597
598 (defun nnshimbun-search-id (group id &optional nov)
599   (save-excursion
600     (set-buffer (nnshimbun-open-nov group))
601     (goto-char (point-min))
602     (let (found)
603       (while (and (not found)
604                   (search-forward id nil t)) ; We find the ID.
605         ;; And the id is in the fourth field.
606         (if (not (and (search-backward "\t" nil t 4)
607                       (not (search-backward "\t" (gnus-point-at-bol) t))))
608             (forward-line 1)
609           (forward-line 0)
610           (setq found t)))
611       (unless found
612         (goto-char (point-min))
613         (when (search-forward (concat "X-Nnshimbun-Id: " id) nil t)
614           (forward-line 0)
615           (setq found t)))
616       (if found
617           (if nov
618               (nnheader-parse-nov)
619             ;; We return the article number.
620             (ignore-errors (read (current-buffer))))))))
621
622 (defun nnshimbun-nov-fix-header (group header args)
623   (save-excursion
624     (set-buffer (nnshimbun-open-nov group))
625     (when (nnheader-find-nov-line (mail-header-number header))
626       (dolist (arg args)
627         (if (eq (car arg) 'id)
628             (let ((extra (mail-header-extra header)))
629               (unless (assq 'X-Nnshimbun-Id extra)
630                 (mail-header-set-extra
631                  header
632                  (cons (cons 'X-Nnshimbun-Id (mail-header-id header))
633                        extra)))
634               (mail-header-set-id header (cdr arg)))
635           (let ((func (intern (concat "mail-header-set-" (symbol-name (car arg))))))
636             (if (cdr arg) (eval (list func header (cdr arg)))))))
637       (mail-header-set-xref header (nnshimbun-header-xref header))
638       (delete-region (point) (progn (forward-line 1) (point)))
639       (nnheader-insert-nov header))))
640
641 (defun nnshimbun-open-nov (group)
642   (let ((buffer (cdr (assoc group nnshimbun-nov-buffer-alist))))
643     (if (buffer-live-p buffer)
644         buffer
645       (setq buffer (gnus-get-buffer-create
646                     (format " *nnshimbun overview %s %s*"
647                             (nnoo-current-server 'nnshimbun) group)))
648       (save-excursion
649         (set-buffer buffer)
650         (set (make-local-variable 'nnshimbun-nov-buffer-file-name)
651              (expand-file-name
652               nnshimbun-nov-file-name
653               (nnmail-group-pathname group nnshimbun-server-directory)))
654         (erase-buffer)
655         (when (file-exists-p nnshimbun-nov-buffer-file-name)
656           (nnheader-insert-file-contents nnshimbun-nov-buffer-file-name))
657         (set-buffer-modified-p nil))
658       (push (cons group buffer) nnshimbun-nov-buffer-alist)
659       buffer)))
660
661 (defun nnshimbun-write-nov (group)
662   (let ((buffer (cdr (assoc group nnshimbun-nov-buffer-alist))))
663     (when (buffer-live-p buffer)
664       (save-excursion
665         (set-buffer buffer)
666         (buffer-modified-p)
667         (nnmail-write-region 1 (point-max) nnshimbun-nov-buffer-file-name
668                              nil 'nomesg)))))
669
670 (defun nnshimbun-save-nov ()
671   (save-excursion
672     (while nnshimbun-nov-buffer-alist
673       (when (buffer-name (cdar nnshimbun-nov-buffer-alist))
674         (set-buffer (cdar nnshimbun-nov-buffer-alist))
675         (when (buffer-modified-p)
676           (nnmail-write-region 1 (point-max) nnshimbun-nov-buffer-file-name
677                                nil 'nomesg))
678         (set-buffer-modified-p nil)
679         (kill-buffer (current-buffer)))
680       (setq nnshimbun-nov-buffer-alist (cdr nnshimbun-nov-buffer-alist)))))
681
682
683
684 ;;; Server Initialize
685 (defun nnshimbun-possibly-change-group (group &optional server)
686   (when server
687     (unless (nnshimbun-server-opened server)
688       (nnshimbun-open-server server)))
689   (unless (gnus-buffer-live-p nnshimbun-buffer)
690     (setq nnshimbun-buffer
691           (save-excursion
692             (nnheader-set-temp-buffer
693              (format " *nnshimbun %s*" (nnoo-current-server 'nnshimbun))))))
694   (if (not group)
695       t
696     (let ((pathname (nnmail-group-pathname group nnshimbun-server-directory))
697           (pathname-coding-system 'binary))
698       (unless (equal pathname nnshimbun-current-directory)
699         (setq nnshimbun-current-directory pathname
700               nnshimbun-current-group group))
701       (unless (file-exists-p nnshimbun-current-directory)
702         (ignore-errors (make-directory nnshimbun-current-directory t)))
703       (cond
704        ((not (file-exists-p nnshimbun-current-directory))
705         (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-current-directory))
706        ((not (file-directory-p (file-truename nnshimbun-current-directory)))
707         (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-current-directory))
708        (t t)))))
709
710
711
712 ;;; Misc Functions
713
714 (eval-and-compile
715   (if (fboundp 'eword-encode-string)
716       ;; For Semi-Gnus.
717       (defun nnshimbun-mime-encode-string (string)
718         (mapconcat
719          #'identity
720          (split-string (eword-encode-string (nnweb-decode-entities-string string)) "\n")
721          ""))
722     ;; For pure Gnus.
723     (defun nnshimbun-mime-encode-string (string)
724       (mapconcat
725        #'identity
726        (split-string
727         (with-temp-buffer
728           (insert (nnweb-decode-entities-string string))
729           (rfc2047-encode-region (point-min) (point-max))
730           (buffer-substring (point-min) (point-max)))
731         "\n")
732        ""))))
733
734 (defun nnshimbun-lapse-seconds (time)
735   (let ((now (current-time)))
736     (+ (* (- (car now) (car time)) 65536)
737        (- (nth 1 now) (nth 1 time)))))
738
739 (defun nnshimbun-make-date-string (year month day &optional time)
740   (format "%02d %s %04d %s +0900"
741           day
742           (aref [nil "Jan" "Feb" "Mar" "Apr" "May" "Jun"
743                      "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"]
744                 month)
745           (cond ((< year 69)
746                  (+ year 2000))
747                 ((< year 100)
748                  (+ year 1900))
749                 ((< year 1000)  ; possible 3-digit years.
750                  (+ year 1900))
751                 (t year))
752           (or time "00:00")))
753
754 (if (fboundp 'regexp-opt)
755     (defalias 'nnshimbun-regexp-opt 'regexp-opt)
756   (defun nnshimbun-regexp-opt (strings &optional paren)
757     "Return a regexp to match a string in STRINGS.
758 Each string should be unique in STRINGS and should not contain any regexps,
759 quoted or not.  If optional PAREN is non-nil, ensure that the returned regexp
760 is enclosed by at least one regexp grouping construct."
761     (let ((open-paren (if paren "\\(" "")) (close-paren (if paren "\\)" "")))
762       (concat open-paren (mapconcat 'regexp-quote strings "\\|") close-paren))))
763
764
765 ;; Fast fill-region function
766
767 (defvar nnshimbun-fill-column (min 80 (- (frame-width) 4)))
768
769 (defconst nnshimbun-kinsoku-bol-list
770   (funcall
771    (if (fboundp 'string-to-char-list)
772        'string-to-char-list
773      'string-to-list) "\
774 !)-_~}]:;',.?\e$B!"!#!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2!3!4!5!6!7!8!9!:!;!<!=!>!?!@!A\e(B\
775 \e$B!B!C!D!E!G!I!K!M!O!Q!S!U!W!Y![!k!l!m!n$!$#$%$'$)$C$c$e$g$n%!%#%%%'%)%C%c%e%g%n%u%v\e(B"))
776
777 (defconst nnshimbun-kinsoku-eol-list
778   (funcall
779    (if (fboundp 'string-to-char-list)
780        'string-to-char-list
781      'string-to-list)
782    "({[`\e$B!F!H!J!L!N!P!R!T!V!X!Z!k!l!m!x\e(B"))
783
784 (defun nnshimbun-fill-line ()
785   (forward-line 0)
786   (let ((top (point)) chr)
787     (while (if (>= (move-to-column nnshimbun-fill-column)
788                    nnshimbun-fill-column)
789                (not (progn
790                       (if (memq (preceding-char) nnshimbun-kinsoku-eol-list)
791                           (progn
792                             (backward-char)
793                             (while (memq (preceding-char) nnshimbun-kinsoku-eol-list)
794                               (backward-char))
795                             (insert "\n"))
796                         (while (memq (setq chr (following-char)) nnshimbun-kinsoku-bol-list)
797                           (forward-char))
798                         (if (looking-at "\\s-+")
799                             (or (eolp) (delete-region (point) (match-end 0)))
800                           (or (> (char-width chr) 1)
801                               (re-search-backward "\\<" top t)
802                               (end-of-line)))
803                         (or (eolp) (insert "\n"))))))
804       (setq top (point))))
805   (forward-line 1)
806   (not (eobp)))
807
808 (defsubst nnshimbun-shallow-rendering ()
809   (goto-char (point-min))
810   (while (search-forward "<p>" nil t)
811     (insert "\n\n"))
812   (goto-char (point-min))
813   (while (search-forward "<br>" nil t)
814     (insert "\n"))
815   (nnweb-remove-markup)
816   (nnweb-decode-entities)
817   (goto-char (point-min))
818   (while (nnshimbun-fill-line))
819   (goto-char (point-min))
820   (when (skip-chars-forward "\n")
821     (delete-region (point-min) (point)))
822   (while (search-forward "\n\n" nil t)
823     (let ((p (point)))
824       (when (skip-chars-forward "\n")
825         (delete-region p (point)))))
826   (goto-char (point-max))
827   (when (skip-chars-backward "\n")
828     (delete-region (point) (point-max)))
829   (insert "\n"))
830
831 (defun nnshimbun-make-text-or-html-contents (header &optional x-face)
832   (let ((case-fold-search t) (html t) (start))
833     (when (and (re-search-forward nnshimbun-contents-start nil t)
834                (setq start (point))
835                (re-search-forward nnshimbun-contents-end nil t))
836       (delete-region (match-beginning 0) (point-max))
837       (delete-region (point-min) start)
838       (nnshimbun-shallow-rendering)
839       (setq html nil))
840     (goto-char (point-min))
841     (nnshimbun-insert-header header)
842     (insert "Content-Type: " (if html "text/html" "text/plain")
843             "; charset=ISO-2022-JP\nMIME-Version: 1.0\n")
844     (when x-face
845       (insert x-face)
846       (unless (bolp)
847         (insert "\n")))
848     (insert "\n")
849     (encode-coding-string (buffer-string)
850                           (mime-charset-to-coding-system "ISO-2022-JP"))))
851
852 (defun nnshimbun-make-html-contents (header &optional x-face)
853   (let (start)
854     (when (and (re-search-forward nnshimbun-contents-start nil t)
855                (setq start (point))
856                (re-search-forward nnshimbun-contents-end nil t))
857       (delete-region (match-beginning 0) (point-max))
858       (delete-region (point-min) start))
859     (goto-char (point-min))
860     (nnshimbun-insert-header header)
861     (insert "Content-Type: text/html; charset=ISO-2022-JP\n"
862             "MIME-Version: 1.0\n")
863     (when x-face
864       (insert x-face)
865       (unless (bolp)
866         (insert "\n")))
867     (insert "\n")
868     (encode-coding-string (buffer-string)
869                           (mime-charset-to-coding-system "ISO-2022-JP"))))
870
871 (defun nnshimbun-make-mhonarc-contents (header &rest args)
872   (require 'mml)
873   (if (search-forward "<!--X-Head-End-->" nil t)
874       (progn
875         (forward-line 0)
876         ;; Processing headers.
877         (save-restriction
878           (narrow-to-region (point-min) (point))
879           (nnweb-decode-entities)
880           (goto-char (point-min))
881           (while (search-forward "\n<!--X-" nil t)
882             (replace-match "\n"))
883           (goto-char (point-min))
884           (while (search-forward " -->\n" nil t)
885             (replace-match "\n"))
886           (goto-char (point-min))
887           (while (search-forward "\t" nil t)
888             (replace-match " "))
889           (goto-char (point-min))
890           (let (buf refs)
891             (while (not (eobp))
892               (cond
893                ((looking-at "<!--")
894                 (delete-region (point) (progn (forward-line 1) (point))))
895                ((looking-at "Subject: +")
896                 (push (cons 'subject (nnheader-header-value)) buf)
897                 (delete-region (point) (progn (forward-line 1) (point))))
898                ((looking-at "From: +")
899                 (push (cons 'from (nnheader-header-value)) buf)
900                 (delete-region (point) (progn (forward-line 1) (point))))
901                ((looking-at "Date: +")
902                 (push (cons 'date (nnheader-header-value)) buf)
903                 (delete-region (point) (progn (forward-line 1) (point))))
904                ((looking-at "Message-Id: +")
905                 (push (cons 'id (concat "<" (nnheader-header-value) ">")) buf)
906                 (delete-region (point) (progn (forward-line 1) (point))))
907                ((looking-at "Reference: +")
908                 (push (concat "<" (nnheader-header-value) ">") refs)
909                 (delete-region (point) (progn (forward-line 1) (point))))
910                ((looking-at "Content-Type: ")
911                 (unless (search-forward "charset" (gnus-point-at-eol) t)
912                   (end-of-line)
913                   (insert "; charset=ISO-2022-JP"))
914                 (forward-line 1))
915                (t (forward-line 1))))
916             (insert "MIME-Version: 1.0\n")
917             (if refs (push (cons 'references (mapconcat 'identity refs " ")) buf))
918             (nnshimbun-nov-fix-header nnshimbun-current-group header buf)
919             (goto-char (point-min))
920             (nnshimbun-insert-header header))
921           (goto-char (point-max)))
922         ;; Processing body.
923         (save-restriction
924           (narrow-to-region (point) (point-max))
925           (delete-region
926            (point)
927            (progn
928              (search-forward "\n<!--X-Body-of-Message-->\n" nil t)
929              (point)))
930           (when (search-forward "\n<!--X-Body-of-Message-End-->\n" nil t)
931             (forward-line -1)
932             (delete-region (point) (point-max)))
933           (nnweb-remove-markup)
934           (nnweb-decode-entities)))
935     (goto-char (point-min))
936     (nnshimbun-insert-header header)
937     (insert "Content-Type: text/html; charset=ISO-2022-JP\nMIME-Version: 1.0\n\n"))
938   (encode-coding-string (buffer-string)
939                         (mime-charset-to-coding-system "ISO-2022-JP")))
940
941 (defun nnshimbun-make-fml-contents (header &rest args)
942   (require 'mml)
943   (catch 'stop
944     (if (search-forward "<SPAN CLASS=mailheaders>" nil t)
945         (delete-region (point-min) (point))
946       (throw 'stop nil))
947     (if (search-forward "</PRE>")
948         (progn
949           (beginning-of-line)
950           (delete-region (point) (point-max)))
951       (throw 'stop nil))
952     (if (search-backward "</SPAN>")
953         (progn
954           (beginning-of-line)
955           (kill-line))
956       (throw 'stop nil))
957     (save-restriction
958       (narrow-to-region (point-min) (point))
959       (subst-char-in-region (point-min) (point-max) ?\t ?  t)
960       (nnweb-decode-entities)
961       (goto-char (point-min))
962       (let (buf field value start value-beg end)
963         (while (and (setq start (point))
964                     (re-search-forward "<SPAN CLASS=\\(.*\\)>\\(.*\\)</SPAN>:"
965                                        nil t)
966                     (setq field (match-string 2))
967                     (re-search-forward 
968                      (concat "<SPAN CLASS=" (match-string 1) "-value>") nil t)
969                     (setq value-beg (point))
970                     (search-forward "</SPAN>" nil t)
971                     (setq end (point)))
972           (setq value (buffer-substring value-beg
973                                         (progn (search-backward "</SPAN>")
974                                                (point))))
975           (delete-region start end)
976           (cond ((string= field "Date")
977                  (push (cons 'date value) buf))
978                 ((string= field "From")
979                  (push (cons 'from value) buf))
980                 ((string= field "Subject")
981                  (push (cons 'subject value) buf))
982                 ((string= field "Message-Id")
983                  (push (cons 'id value) buf))
984                 ((string= field "References")
985                  (push (cons 'references value) buf))
986                 (t
987                  (insert (concat field ": " value "\n")))))
988         (nnshimbun-nov-fix-header nnshimbun-current-group header buf)
989         (goto-char (point-min))
990         (nnshimbun-insert-header header))
991       (goto-char (point-max)))
992     ;; Processing body.
993     (save-restriction
994       (narrow-to-region (point) (point-max))
995       (nnweb-remove-markup)
996       (nnweb-decode-entities)))
997   (encode-coding-string (buffer-string)
998                         (mime-charset-to-coding-system "ISO-2022-JP")))
999
1000 ;;; www.asahi.com
1001
1002 (defun nnshimbun-asahi-get-headers ()
1003   (when (search-forward "\n<!-- Start of past -->\n" nil t)
1004     (delete-region (point-min) (point))
1005     (when (search-forward "\n<!-- End of past -->\n" nil t)
1006       (forward-line -1)
1007       (delete-region (point) (point-max))
1008       (goto-char (point-min))
1009       (let (headers)
1010         (while (re-search-forward
1011                 "^\e$B"#\e(B<a href=\"\\(\\([0-9][0-9][0-9][0-9]\\)/past/\\([A-z]*[0-9]*\\)\\.html\\)\"> *"
1012                 nil t)
1013           (let ((id (format "<%s%s%%%s>"
1014                             (match-string 2)
1015                             (match-string 3)
1016                             nnshimbun-current-group))
1017                 (url (match-string 1)))
1018             (push (make-full-mail-header
1019                    0
1020                    (nnshimbun-mime-encode-string
1021                     (mapconcat 'identity
1022                                (split-string
1023                                 (buffer-substring
1024                                  (match-end 0)
1025                                  (progn (search-forward "<br>" nil t) (point)))
1026                                 "\\(<[^>]+>\\|\r\\)")
1027                                ""))
1028                    nnshimbun-from-address
1029                    "" id "" 0 0 (concat nnshimbun-url url))
1030                   headers)))
1031         (setq headers (nreverse headers))
1032         (let ((i 0))
1033           (while (and (nth i headers)
1034                       (re-search-forward
1035                        "^\\[\\([0-9][0-9]\\)/\\([0-9][0-9]\\) \\([0-9][0-9]:[0-9][0-9]\\)\\]"
1036                        nil t))
1037             (let ((month (string-to-number (match-string 1)))
1038                   (date (decode-time (current-time))))
1039               (mail-header-set-date
1040                (nth i headers)
1041                (nnshimbun-make-date-string
1042                 (if (and (eq 12 month) (eq 1 (nth 4 date)))
1043                     (1- (nth 5 date))
1044                   (nth 5 date))
1045                 month
1046                 (string-to-number (match-string 2))
1047                 (match-string 3))))
1048             (setq i (1+ i))))
1049         (nreverse headers)))))
1050
1051
1052
1053 ;;; www.sponichi.co.jp
1054
1055 (defun nnshimbun-sponichi-get-headers ()
1056   (when (search-forward "\e$B%K%e!<%9%$%s%G%C%/%9\e(B" nil t)
1057     (delete-region (point-min) (point))
1058     (when (search-forward "\e$B%"%I%?%0\e(B" nil t)
1059       (forward-line 2)
1060       (delete-region (point) (point-max))
1061       (goto-char (point-min))
1062       (let ((case-fold-search t) headers)
1063         (while (re-search-forward
1064                 "^<a href=\"/\\(\\([A-z]*\\)/kiji/\\([0-9][0-9][0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([0-9][0-9]\\)\\.html\\)\">"
1065                 nil t)
1066           (let ((url (match-string 1))
1067                 (id (format "<%s%s%s%s%%%s>"
1068                             (match-string 3)
1069                             (match-string 4)
1070                             (match-string 5)
1071                             (match-string 6)
1072                             nnshimbun-current-group))
1073                 (date (nnshimbun-make-date-string
1074                        (string-to-number (match-string 3))
1075                        (string-to-number (match-string 4))
1076                        (string-to-number (match-string 5)))))
1077             (push (make-full-mail-header
1078                    0
1079                    (nnshimbun-mime-encode-string
1080                     (mapconcat 'identity
1081                                (split-string
1082                                 (buffer-substring
1083                                  (match-end 0)
1084                                  (progn (search-forward "<br>" nil t) (point)))
1085                                 "<[^>]+>")
1086                                ""))
1087                    nnshimbun-from-address
1088                    date id "" 0 0 (concat nnshimbun-url url))
1089                   headers)))
1090         headers))))
1091
1092
1093
1094 ;;; CNET Japan
1095
1096 (defun nnshimbun-cnet-get-headers ()
1097   (let ((case-fold-search t) headers)
1098     (while (search-forward "\n<!--*****\e$B8+=P$7\e(B*****-->\n" nil t)
1099       (let ((subject (buffer-substring (point) (gnus-point-at-eol)))
1100             (point (point)))
1101         (forward-line -2)
1102         (when (looking-at "<a href=\"/\\(News/\\([0-9][0-9][0-9][0-9]\\)/Item/\\([0-9][0-9]\\([0-9][0-9]\\)\\([0-9][0-9]\\)-[0-9]+\\).html\\)\">")
1103           (let ((url (match-string 1))
1104                 (id  (format "<%s%s%%%s>"
1105                              (match-string 2)
1106                              (match-string 3)
1107                              nnshimbun-current-group))
1108                 (date (nnshimbun-make-date-string
1109                        (string-to-number (match-string 2))
1110                        (string-to-number (match-string 4))
1111                        (string-to-number (match-string 5)))))
1112             (push (make-full-mail-header
1113                    0
1114                    (nnshimbun-mime-encode-string subject)
1115                    nnshimbun-from-address
1116                    date id "" 0 0 (concat nnshimbun-url url))
1117                   headers)))
1118         (goto-char point)))
1119     headers))
1120
1121
1122
1123 ;;; Wired
1124
1125 (defun nnshimbun-wired-get-all-headers ()
1126   (save-excursion
1127     (set-buffer nnshimbun-buffer)
1128     (let ((group-header-alist (mapcar (lambda (g) (cons g nil)) nnshimbun-groups))
1129           (case-fold-search t)
1130           (regexp (format
1131                    "<a href=\"\\(%s\\|/\\)\\(news/news/\\(%s\\)/story/\\(\\([0-9][0-9][0-9][0-9]\\)\\([0-9][0-9]\\)\\([0-9][0-9]\\)[0-9]+\\)\\.html\\)\"><b>"
1132                    (regexp-quote nnshimbun-url)
1133                    (nnshimbun-regexp-opt nnshimbun-groups))))
1134       (dolist (xover (list (concat nnshimbun-url "news/news/index.html")
1135                            (concat nnshimbun-url "news/news/last_seven.html")))
1136         (erase-buffer)
1137         (nnshimbun-retrieve-url xover t)
1138         (goto-char (point-min))
1139         (while (re-search-forward regexp nil t)
1140           (let* ((url   (concat nnshimbun-url (match-string 2)))
1141                  (group (downcase (match-string 3)))
1142                  (id    (format "<%s%%%s>" (match-string 4) group))
1143                  (date  (nnshimbun-make-date-string
1144                          (string-to-number (match-string 5))
1145                          (string-to-number (match-string 6))
1146                          (string-to-number (match-string 7))))
1147                  (header (make-full-mail-header
1148                           0
1149                           (nnshimbun-mime-encode-string
1150                            (mapconcat 'identity
1151                                       (split-string
1152                                        (buffer-substring
1153                                         (match-end 0)
1154                                         (progn (search-forward "</b>" nil t) (point)))
1155                                        "<[^>]+>")
1156                                       ""))
1157                           nnshimbun-from-address
1158                           date id "" 0 0 url))
1159                  (x (assoc group group-header-alist)))
1160             (setcdr x (cons header (cdr x))))))
1161       group-header-alist)))
1162
1163
1164
1165 ;;; www.yomiuri.co.jp
1166
1167 (defun nnshimbun-yomiuri-get-all-headers ()
1168   (save-excursion
1169     (set-buffer nnshimbun-buffer)
1170     (erase-buffer)
1171     (nnshimbun-retrieve-url (eval nnshimbun-index-url) t)
1172     (let ((case-fold-search t)
1173           (group-header-alist (mapcar (lambda (g) (cons g nil)) nnshimbun-groups)))
1174       (dolist (group nnshimbun-groups)
1175         (let (start)
1176           (goto-char (point-min))
1177           (when (and (search-forward (format "\n<!-- /news/%s=start -->\n" group) nil t)
1178                      (setq start (point))
1179                      (search-forward (format "\n<!-- /news/%s=end -->\n" group) nil t))
1180             (forward-line -1)
1181             (save-restriction
1182               (narrow-to-region start (point))
1183               (goto-char start)
1184               (while (re-search-forward
1185                       "<a href=\"/\\([0-9]+\\)/\\(\\(\\([0-9][0-9][0-9][0-9]\\)\\([0-9][0-9]\\)\\([0-9][0-9]\\)[A-z0-9]+\\)\\.htm\\)\"[^>]*>"
1186                       nil t)
1187                 (let ((url   (concat (match-string 1) "a/" (match-string 2)))
1188                       (id    (format "<%s%s%%%s>"
1189                                      (match-string 1)
1190                                      (match-string 3)
1191                                      group))
1192                       (year  (string-to-number (match-string 4)))
1193                       (month (string-to-number (match-string 5)))
1194                       (day   (string-to-number (match-string 6)))
1195                       (subject (mapconcat
1196                                 'identity
1197                                 (split-string
1198                                  (buffer-substring
1199                                   (match-end 0)
1200                                   (progn (search-forward "<br>" nil t) (point)))
1201                                  "<[^>]+>")
1202                                 ""))
1203                       date x)
1204                   (when (string-match "^\e$B"!\e(B" subject)
1205                     (setq subject (substring subject (match-end 0))))
1206                   (if (string-match "(\\([0-9][0-9]:[0-9][0-9]\\))$" subject)
1207                       (setq date (nnshimbun-make-date-string
1208                                   year month day (match-string 1 subject))
1209                             subject (substring subject 0 (match-beginning 0)))
1210                     (setq date (nnshimbun-make-date-string year month day)))
1211                   (setcdr (setq x (assoc group group-header-alist))
1212                           (cons (make-full-mail-header
1213                                  0
1214                                  (nnshimbun-mime-encode-string subject)
1215                                  nnshimbun-from-address
1216                                  date id "" 0 0 (concat nnshimbun-url url))
1217                                 (cdr x)))))))))
1218       group-header-alist)))
1219
1220
1221
1222 ;;; Zdnet Japan
1223
1224 (defun nnshimbun-zdnet-get-headers ()
1225   (let ((case-fold-search t) headers)
1226     (goto-char (point-min))
1227     (let (start)
1228       (while (and (search-forward "<!--" nil t)
1229                   (setq start (- (point) 4))
1230                   (search-forward "-->" nil t))
1231         (delete-region start (point))))
1232     (goto-char (point-min))
1233     (while (re-search-forward
1234             "<a href=\"\\(/news/\\)?\\(\\([0-9][0-9]\\)\\([0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([^\\.]+\\).html\\)\"><font size=\"4\"><strong>"
1235             nil t)
1236       (let ((year  (+ 2000 (string-to-number (match-string 3))))
1237             (month (string-to-number (match-string 4)))
1238             (day   (string-to-number (match-string 5)))
1239             (id    (format "<%s%s%s%s%%%s>"
1240                            (match-string 3)
1241                            (match-string 4)
1242                            (match-string 5)
1243                            (match-string 6)
1244                            nnshimbun-current-group))
1245             (url (match-string 2)))
1246         (push (make-full-mail-header
1247                0
1248                (nnshimbun-mime-encode-string
1249                 (mapconcat 'identity
1250                            (split-string
1251                             (buffer-substring
1252                              (match-end 0)
1253                              (progn (search-forward "</a>" nil t) (point)))
1254                             "<[^>]+>")
1255                            ""))
1256                nnshimbun-from-address
1257                (nnshimbun-make-date-string year month day)
1258                id  "" 0 0 (concat nnshimbun-url url))
1259               headers)))
1260     (nreverse headers)))
1261
1262
1263
1264 ;;; MLs on www.mew.org
1265
1266 (defmacro nnshimbun-mew-concat-url (url)
1267   `(concat nnshimbun-url
1268            (nth 1 (assoc nnshimbun-current-group nnshimbun-mew-groups))
1269            "/"
1270            ,url))
1271
1272 (defmacro nnshimbun-mew-reverse-order-p ()
1273   `(nth 2 (assoc nnshimbun-current-group nnshimbun-mew-groups)))
1274
1275 (defmacro nnshimbun-mew-spew-p ()
1276   `(nth 3 (assoc nnshimbun-current-group nnshimbun-mew-groups)))
1277
1278 (defsubst nnshimbun-mew-retrieve-xover (aux)
1279   (erase-buffer)
1280   (nnshimbun-retrieve-url
1281    (nnshimbun-mew-concat-url (if (= aux 1) "index.html" (format "mail%d.html" aux)))
1282    t))
1283
1284 (defconst nnshimbun-mew-regexp "<A[^>]*HREF=\"\\(msg\\([0-9]+\\).html\\)\">\\([^<]+\\)<")
1285
1286 (defmacro nnshimbun-mew-extract-header-values ()
1287   `(progn
1288      (setq url (nnshimbun-mew-concat-url (match-string 1))
1289            id (format "<%05d%%%s>"
1290                       (1- (string-to-number (match-string 2)))
1291                       nnshimbun-current-group)
1292            subject (match-string 3))
1293      (forward-line 1)
1294      (if (nnshimbun-search-id nnshimbun-current-group id)
1295          (throw 'stop headers)
1296        (push (make-full-mail-header
1297               0
1298               (nnshimbun-mime-encode-string subject)
1299               (if (looking-at "<EM>\\([^<]+\\)<")
1300                   (nnshimbun-mime-encode-string (match-string 1))
1301                 "")
1302               "" id "" 0 0 url)
1303              headers))))
1304
1305 (eval-and-compile
1306   (if (fboundp 'mime-entity-fetch-field)
1307       ;; For Semi-Gnus.
1308       (defmacro nnshimbun-mew-mail-header-subject (header)
1309         `(mime-entity-fetch-field ,header 'Subject))
1310     ;; For pure Gnus.
1311     (defalias 'nnshimbun-mew-mail-header-subject 'mail-header-subject)))
1312
1313 (defun nnshimbun-mew-get-headers ()
1314   (if (nnshimbun-mew-spew-p)
1315       (let ((headers (nnshimbun-mew-get-headers-1)))
1316         (erase-buffer)
1317         (insert-buffer-substring (nnshimbun-open-nov nnshimbun-current-group))
1318         (delq nil
1319               (mapcar
1320                (lambda (header)
1321                  (goto-char (point-min))
1322                  (let ((subject (nnshimbun-mew-mail-header-subject header))
1323                        (found))
1324                    (while (and (not found)
1325                                (search-forward subject nil t))
1326                      (if (not (and (search-backward "\t" nil t)
1327                                    (not (search-backward "\t" (gnus-point-at-bol) t))))
1328                          (forward-line 1)
1329                        (setq found t)))
1330                    (if found
1331                        nil
1332                      (goto-char (point-max))
1333                      (nnheader-insert-nov header)
1334                      header)))
1335                headers)))
1336     (nnshimbun-mew-get-headers-1)))
1337
1338 (defun nnshimbun-mew-get-headers-1 ()
1339   (let (headers)
1340     (when (re-search-forward
1341            "<A[^>]*HREF=\"mail\\([0-9]+\\)\\.html\">\\[?Last Page\\]?</A>" nil t)
1342       (let ((limit (string-to-number (match-string 1))))
1343         (catch 'stop
1344           (if (nnshimbun-mew-reverse-order-p)
1345               (let ((aux 1))
1346                 (while (let (id url subject)
1347                          (while (re-search-forward nnshimbun-mew-regexp nil t)
1348                            (nnshimbun-mew-extract-header-values))
1349                          (< aux limit))
1350                   (nnshimbun-mew-retrieve-xover (setq aux (1+ aux)))))
1351             (while (> limit 0)
1352               (nnshimbun-mew-retrieve-xover limit)
1353               (setq limit (1- limit))
1354               (let (id url subject)
1355                 (goto-char (point-max))
1356                 (while (re-search-backward nnshimbun-mew-regexp nil t)
1357                   (nnshimbun-mew-extract-header-values)
1358                   (forward-line -2)))))
1359           headers)))))
1360
1361
1362
1363 ;;; MLs on www.xemacs.org
1364
1365 (defmacro nnshimbun-xemacs-concat-url (url)
1366   `(concat nnshimbun-url nnshimbun-current-group "/" ,url))
1367
1368 (defun nnshimbun-xemacs-get-headers ()
1369   (let (headers auxs aux)
1370     (catch 'stop
1371       (while (re-search-forward
1372               (concat "<A HREF=\"/" nnshimbun-current-group
1373                       "/\\([12][0-9][0-9][0-9][0-1][0-9]\\)/\">\\[Index\\]")
1374               nil t)
1375         (setq auxs (append auxs (list (match-string 1)))))
1376       (while auxs
1377         (erase-buffer)
1378         (nnshimbun-retrieve-url
1379          (nnshimbun-xemacs-concat-url (concat (setq aux (car auxs)) "/")))
1380         (let (id url subject)
1381           (goto-char (point-max))
1382           (while (re-search-backward
1383                   "<A[^>]*HREF=\"\\(msg\\([0-9]+\\).html\\)\">\\([^<]+\\)<"
1384                   nil t)
1385             (setq url (nnshimbun-xemacs-concat-url
1386                        (concat aux "/" (match-string 1)))
1387                   id (format "<%s%05d%%%s>"
1388                              aux
1389                              (string-to-number (match-string 2))
1390                              nnshimbun-current-group)
1391                   subject (match-string 3))
1392             (forward-line 1)
1393             (if (nnshimbun-search-id nnshimbun-current-group id)
1394                 (throw 'stop headers)
1395               (push (make-full-mail-header
1396                      0
1397                      (nnshimbun-mime-encode-string subject)
1398                      (if (looking-at "<td><em>\\([^<]+\\)<")
1399                          (match-string 1)
1400                        "")
1401                      "" id "" 0 0 url)
1402                     headers))
1403             (message "%s" id)
1404             (forward-line -2)))
1405         (setq auxs (cdr auxs))))
1406     headers))
1407
1408 ;;; MLs on www.jp.netbsd.org
1409
1410 (defun nnshimbun-netbsd-get-headers ()
1411   (let ((case-fold-search t) headers months)
1412     (goto-char (point-min))
1413     (while (re-search-forward "<A HREF=\"\\([0-9]+\\)/\\(threads.html\\)?\">" nil t)
1414       (push (match-string 1) months))
1415     (setq months (nreverse months))
1416     (catch 'exit
1417       (dolist (month months)
1418         (erase-buffer)
1419         (nnshimbun-retrieve-url
1420          (format "%s%s/%s/maillist.html" nnshimbun-url nnshimbun-current-group month)
1421          t)
1422         (let (id url subject)
1423           (while (re-search-forward
1424                   "<A[^>]*HREF=\"\\(msg\\([0-9]+\\)\\.html\\)\">\\([^<]+\\)</A>"
1425                   nil t)
1426             (setq url (format "%s%s/%s/%s"
1427                               nnshimbun-url
1428                               nnshimbun-current-group
1429                               month
1430                               (match-string 1))
1431                   id (format "<%s%05d%%%s>"
1432                              month
1433                              (string-to-number (match-string 2))
1434                              nnshimbun-current-group)
1435                   subject (match-string 3))
1436             (if (nnshimbun-search-id nnshimbun-current-group id)
1437                 (throw 'exit headers)
1438               (push (make-full-mail-header
1439                      0
1440                      (nnshimbun-mime-encode-string subject)
1441                      (if (looking-at "</STRONG> *<EM>\\([^<]+\\)<")
1442                          (nnshimbun-mime-encode-string (match-string 1))
1443                        "")
1444                      "" id "" 0 0 url)
1445                     headers)))))
1446       headers)))
1447
1448 ;;; MLs using fml
1449 (defun nnshimbun-fml-get-headers ()
1450   (let (headers auxs aux)
1451     (catch 'stop
1452       (while (re-search-forward "<a href=\"\\([0-9]+\\(\\.week\\|\\.month\\)?\\)/index.html\">" nil t)
1453         (setq auxs (append auxs (list (match-string 1)))))
1454       (while auxs
1455         (erase-buffer)
1456         (nnshimbun-retrieve-url
1457          (concat nnshimbun-url (setq aux (car auxs)) "/"))
1458         (subst-char-in-region (point-min) (point-max) ?\t ?  t)
1459         (let (id url date subject from)
1460           (goto-char (point-min))
1461           (while (re-search-forward
1462                   "<LI><A HREF=\"\\([0-9]+\\.html\\)\">Article .*</A> <DIV><SPAN CLASS=article>Article <SPAN CLASS=article-value>\\([0-9]+\\)</SPAN></SPAN> at <SPAN CLASS=Date-value>\\([^<]*\\)</SPAN> <SPAN CLASS=Subject>Subject: <SPAN CLASS=Subject-value>\\([^<]*\\)</SPAN></SPAN></DIV><DIV><SPAN CLASS=From>From: <SPAN CLASS=From-value>\\([^<]*\\)</SPAN></SPAN></DIV>"
1463                   nil t)
1464             (setq url (concat nnshimbun-url aux "/" (match-string 1))
1465                   id (format "<%s%05d%%%s>"
1466                              aux
1467                              (string-to-number (match-string 2))
1468                              nnshimbun-current-group)
1469                   date (match-string 3)
1470                   subject (match-string 4)
1471                   from (match-string 5))
1472             (forward-line 1)
1473             (if (nnshimbun-search-id nnshimbun-current-group id)
1474                 (throw 'stop headers)
1475               (push (make-full-mail-header
1476                      0
1477                      (nnshimbun-mime-encode-string subject)
1478                      from date id "" 0 0 url)
1479                     headers))
1480             ;;(message "%s" id)
1481             ))
1482         (setq auxs (cdr auxs))))
1483     headers))
1484
1485 (provide 'nnshimbun)
1486 ;;; nnshimbun.el ends here.