Synch with Gnus.
[elisp/gnus.git-] / lisp / nnshimbun.el
1 ;;; nnshimbun.el --- interfacing with web newspapers -*- coding: junet; -*-
2
3 ;; Authors: TSUCHIYA Masatoshi <tsuchiya@pine.kuee.kyoto-u.ac.jp>
4 ;;          Akihiro Arisawa    <ari@atesoft.advantest.co.jp>
5 ;; Keywords: news
6
7 ;;; Copyright:
8
9 ;; This file is a part of Semi-Gnus.
10
11 ;; This program is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; This program is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with this program; if not, you can either send email to this
23 ;; program's maintainer or write to: The Free Software Foundation,
24 ;; Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;; Gnus backend to read newspapers on WEB.
29
30
31 ;;; Defintinos:
32
33 (gnus-declare-backend "nnshimbun" 'address)
34
35 (eval-when-compile (require 'cl))
36 (eval-when-compile (require 'static))
37
38 (require 'nnheader)
39 (require 'nnmail)
40 (require 'nnoo)
41 (require 'gnus-bcklg)
42 (eval-when-compile (ignore-errors (require 'nnweb)))
43 ;; Report failure to find w3 at load time if appropriate.
44 (eval '(require 'nnweb))
45 (require 'mcharset)
46
47
48 (nnoo-declare nnshimbun)
49
50 (defvar nnshimbun-check-interval 300)
51
52 (defconst nnshimbun-mew-groups
53   '(("meadow-develop" "meadow-develop" nil t)
54     ("meadow-users-jp" "meadow-users-jp")
55     ("mule-win32" "mule-win32")
56     ("mew-win32" "mew-win32")
57     ("mew-dist" "mew-dist/3300" t)
58     ("mgp-users-jp" "mgp-users-jp/A" t t)))
59
60 (defvar nnshimbun-type-definition
61   `(("asahi"
62      (url . "http://spin.asahi.com/")
63      (groups "national" "business" "politics" "international" "sports" "personal" "feneral")
64      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
65      (generate-nov   . nnshimbun-generate-nov-for-each-group)
66      (get-headers    . nnshimbun-asahi-get-headers)
67      (index-url      . (format "%sp%s.html" nnshimbun-url nnshimbun-current-group))
68      (from-address   . "webmaster@www.asahi.com")
69      (make-contents  . nnshimbun-make-text-or-html-contents)
70      (contents-start . "\n<!-- Start of kiji -->\n")
71      (contents-end   . "\n<!-- End of kiji -->\n"))
72     ("sponichi"
73      (url . "http://www.sponichi.co.jp/")
74      (groups "baseball" "soccer" "usa" "others" "society" "entertainment" "horseracing")
75      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
76      (generate-nov   . nnshimbun-generate-nov-for-each-group)
77      (get-headers    . nnshimbun-sponichi-get-headers)
78      (index-url      . (format "%s%s/index.html" nnshimbun-url nnshimbun-current-group))
79      (from-address   . "webmaster@www.sponichi.co.jp")
80      (make-contents  . nnshimbun-make-text-or-html-contents)
81      (contents-start . "\n<span class=\"text\">\e$B!!\e(B")
82      (contents-end   . "\n"))
83     ("cnet"
84      (url . "http://cnet.sphere.ne.jp/")
85      (groups "comp")
86      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
87      (generate-nov   . nnshimbun-generate-nov-for-each-group)
88      (get-headers    . nnshimbun-cnet-get-headers)
89      (index-url      . (format "%s/News/Oneweek/" nnshimbun-url))
90      (from-address   . "cnet@sphere.ad.jp")
91      (make-contents  . nnshimbun-make-html-contents)
92      (contents-start . "\n<!--KIJI-->\n")
93      (contents-end   . "\n<!--/KIJI-->\n"))
94     ("wired"
95      (url . "http://www.hotwired.co.jp/")
96      (groups "business" "culture" "technology")
97      (coding-system  . ,(static-if (boundp 'MULE) '*euc-japan* 'euc-jp))
98      (generate-nov   . nnshimbun-generate-nov-for-all-groups)
99      (get-headers    . nnshimbun-wired-get-all-headers)
100      (index-url)
101      (from-address   . "webmaster@www.hotwired.co.jp")
102      (make-contents  . nnshimbun-make-html-contents)
103      (contents-start . "\n<!-- START_OF_BODY -->\n")
104      (contents-end   . "\n<!-- END_OF_BODY -->\n"))
105     ("yomiuri"
106      (url . "http://www.yomiuri.co.jp/")
107      (groups "shakai" "sports" "seiji" "keizai" "kokusai" "fuho")
108      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
109      (generate-nov   . nnshimbun-generate-nov-for-all-groups)
110      (get-headers    . nnshimbun-yomiuri-get-all-headers)
111      (index-url      . (concat nnshimbun-url "main.htm"))
112      (from-address   . "webmaster@www.yomiuri.co.jp")
113      (make-contents  . nnshimbun-make-text-or-html-contents)
114      (contents-start . "\n<!--  honbun start  -->\n")
115      (contents-end   . "\n<!--  honbun end  -->\n"))
116     ("zdnet"
117      (url . "http://www.zdnet.co.jp/news/")
118      (groups "comp")
119      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
120      (generate-nov   . nnshimbun-generate-nov-for-each-group)
121      (get-headers    . nnshimbun-zdnet-get-headers)
122      (index-url      . nnshimbun-url)
123      (from-address   . "zdnn@softbank.co.jp")
124      (make-contents  . nnshimbun-make-html-contents)
125      (contents-start . "\\(<!--BODY-->\\|<!--DATE-->\\)")
126      (contents-end   . "\\(<!--BODYEND-->\\|<!--BYLINEEND-->\\)"))
127     ("mew"
128      (url . "http://www.mew.org/archive/")
129      (groups ,@(mapcar #'car nnshimbun-mew-groups))
130      (coding-system . ,(static-if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
131      (generate-nov  . nnshimbun-generate-nov-for-each-group)
132      (get-headers   . nnshimbun-mew-get-headers)
133      (index-url     . (nnshimbun-mew-concat-url "index.html"))
134      (make-contents . nnshimbun-make-mhonarc-contents))
135     ("xemacs"
136      (url . "http://www.xemacs.org/list-archives/")
137      (groups "xemacs-announce" "xemacs-beta-ja" "xemacs-beta"
138              "xemacs-build-reports" "xemacs-cvs" "xemacs-mule"
139              "xemacs-nt" "xemacs-patches" "xemacs-users-ja" "xemacs")
140      (coding-system . ,(static-if (boundp 'MULE) '*euc-japan* 'euc-jp))
141      (generate-nov  . nnshimbun-generate-nov-for-each-group)
142      (get-headers   . nnshimbun-xemacs-get-headers)
143      (index-url     . (nnshimbun-xemacs-concat-url nil))
144      (make-contents . nnshimbun-make-mhonarc-contents))
145     ("netbsd"
146      (url . "http://www.jp.netbsd.org/ja/JP/ml/")
147      (groups "announce-ja" "junk-ja" "tech-misc-ja" "tech-pkg-ja"
148              "port-arm32-ja" "port-hpcmips-ja" "port-mac68k-ja"
149              "port-mips-ja" "port-powerpc-ja" "hpcmips-changes-ja"
150              "members-ja" "admin-ja" "www-changes-ja")
151      (coding-system  . ,(static-if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
152      (generate-nov   . nnshimbun-generate-nov-for-each-group)
153      (get-headers    . nnshimbun-netbsd-get-headers)
154      (index-url      . (format "%s%s/index.html" nnshimbun-url nnshimbun-current-group))
155      (make-contents  . nnshimbun-make-mhonarc-contents))
156     ))
157
158 (defvar nnshimbun-x-face-alist
159   '(("default" .
160      (("default" .
161        "X-Face: Ygq$6P.,%Xt$U)DS)cRY@k$VkW!7(X'X'?U{{osjjFG\"E]hND;SPJ-J?O?R|a?L
162         g2$0rVng=O3Lt}?~IId8Jj&vP^3*o=LKUyk(`t%0c!;t6REk=JbpsEn9MrN7gZ%"))))
163   "Alist of server vs. alist of group vs. X-Face field.  It looks like:
164
165 \((\"asahi\" . ((\"national\" . \"X-face: ***\")
166              (\"business\" . \"X-Face: ***\")
167                 ;;
168                 ;;
169              (\"default\" . \"X-face: ***\")))
170  (\"sponichi\" . ((\"baseball\" . \"X-face: ***\")
171                 (\"soccer\" . \"X-Face: ***\")
172                 ;;
173                 ;;
174                 (\"default\" . \"X-face: ***\")))
175                 ;;
176  (\"default\" . ((\"default\" . \"X-face: ***\")))")
177
178 (defvoo nnshimbun-directory (nnheader-concat gnus-directory "shimbun/")
179   "Where nnshimbun will save its files.")
180
181 (defvoo nnshimbun-nov-is-evil nil
182   "*Non-nil means that nnshimbun will never retrieve NOV headers.")
183
184 (defvoo nnshimbun-nov-file-name ".overview")
185
186 (defvoo nnshimbun-pre-fetch-article nil
187   "*Non nil means that nnshimbun fetch unread articles when scanning groups.")
188
189 ;; set by nnshimbun-possibly-change-group
190 (defvoo nnshimbun-buffer nil)
191 (defvoo nnshimbun-current-directory nil)
192 (defvoo nnshimbun-current-group nil)
193
194 ;; set by nnshimbun-open-server
195 (defvoo nnshimbun-url nil)
196 (defvoo nnshimbun-coding-system nil)
197 (defvoo nnshimbun-groups nil)
198 (defvoo nnshimbun-generate-nov nil)
199 (defvoo nnshimbun-get-headers nil)
200 (defvoo nnshimbun-index-url nil)
201 (defvoo nnshimbun-from-address nil)
202 (defvoo nnshimbun-make-contents nil)
203 (defvoo nnshimbun-contents-start nil)
204 (defvoo nnshimbun-contents-end nil)
205 (defvoo nnshimbun-server-directory nil)
206
207 (defvoo nnshimbun-status-string "")
208 (defvoo nnshimbun-nov-last-check nil)
209 (defvoo nnshimbun-nov-buffer-alist nil)
210 (defvoo nnshimbun-nov-buffer-file-name nil)
211
212 (defvoo nnshimbun-keep-backlog 300)
213 (defvoo nnshimbun-backlog-articles nil)
214 (defvoo nnshimbun-backlog-hashtb nil)
215
216 (defconst nnshimbun-meta-content-type-charset-regexp
217   (eval-when-compile
218     (concat "<meta[ \t]+http-equiv=\"?Content-type\"?[ \t]+content=\"\\([^;]+\\)"
219             ";[ \t]*charset=\"?\\([^\"]+\\)\"?"
220             ">"))
221   "Regexp used in parsing `<META HTTP-EQUIV=\"Content-Type\" content=\"...;charset=...\">
222 for a charset indication")
223
224 (defconst nnshimbun-meta-charset-content-type-regexp
225   (eval-when-compile
226     (concat "<meta[ \t]+content=\"\\([^;]+\\)"
227             ";[ \t]*charset=\"?\\([^\"]+\\)\"?"
228             "[ \t]+http-equiv=\"?Content-type\"?>"))
229   "Regexp used in parsing `<META content=\"...;charset=...\" HTTP-EQUIV=\"Content-Type\">
230 for a charset indication")
231
232
233
234 ;;; backlog
235 (defmacro nnshimbun-backlog (&rest form)
236   `(let ((gnus-keep-backlog nnshimbun-keep-backlog)
237          (gnus-backlog-buffer (format " *nnshimbun backlog %s*" (nnoo-current-server 'nnshimbun)))
238          (gnus-backlog-articles nnshimbun-backlog-articles)
239          (gnus-backlog-hashtb nnshimbun-backlog-hashtb))
240      (unwind-protect
241          (progn ,@form)
242        (setq nnshimbun-backlog-articles gnus-backlog-articles
243              nnshimbun-backlog-hashtb gnus-backlog-hashtb))))
244 (put 'nnshimbun-backlog 'lisp-indent-function 0)
245 (put 'nnshimbun-backlog 'edebug-form-spec '(form body))
246
247
248
249 ;;; Interface Functions
250 (nnoo-define-basics nnshimbun)
251
252 (deffoo nnshimbun-open-server (server &optional defs)
253   ;; Set default values.
254   (dolist (default (cdr (assoc server nnshimbun-type-definition)))
255     (let ((symbol (intern (concat "nnshimbun-" (symbol-name (car default))))))
256       (unless (assq symbol defs)
257         (push (list symbol (cdr default)) defs))))
258   ;; Set directory for server working files.
259   (push (list 'nnshimbun-server-directory
260               (file-name-as-directory
261                (expand-file-name server nnshimbun-directory)))
262         defs)
263   (nnoo-change-server 'nnshimbun server defs)
264   (nnshimbun-possibly-change-group nil server)
265   ;; Make directories.
266   (unless (file-exists-p nnshimbun-directory)
267     (ignore-errors (make-directory nnshimbun-directory t)))
268   (cond
269    ((not (file-exists-p nnshimbun-directory))
270     (nnshimbun-close-server)
271     (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-directory))
272    ((not (file-directory-p (file-truename nnshimbun-directory)))
273     (nnshimbun-close-server)
274     (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-directory))
275    (t
276     (unless (file-exists-p nnshimbun-server-directory)
277       (ignore-errors (make-directory nnshimbun-server-directory t)))
278     (cond
279      ((not (file-exists-p nnshimbun-server-directory))
280       (nnshimbun-close-server)
281       (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-server-directory))
282      ((not (file-directory-p (file-truename nnshimbun-server-directory)))
283       (nnshimbun-close-server)
284       (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-server-directory))
285      (t
286       (nnheader-report 'nnshimbun "Opened server %s using directory %s"
287                        server nnshimbun-server-directory)
288       t)))))
289
290 (deffoo nnshimbun-close-server (&optional server)
291   (and (nnshimbun-server-opened server)
292        (gnus-buffer-live-p nnshimbun-buffer)
293        (kill-buffer nnshimbun-buffer))
294   (nnshimbun-backlog (gnus-backlog-shutdown))
295   (nnshimbun-save-nov)
296   (nnoo-close-server 'nnshimbun server)
297   t)
298
299 (static-when (boundp 'MULE)
300   (unless (coding-system-p 'euc-japan)
301     (copy-coding-system '*euc-japan* 'euc-japan))
302   (unless (coding-system-p 'shift_jis)
303     (copy-coding-system '*sjis* 'shift_jis))
304   (eval-and-compile
305     (defalias-maybe 'coding-system-category 'get-code-mnemonic)))
306
307 (defun nnshimbun-retrieve-url (url &optional no-cache)
308   "Rertrieve URL contents and insert to current buffer."
309   (let ((buf (current-buffer))
310         (url-working-buffer url-working-buffer))
311     (let ((old-asynch (default-value 'url-be-asynchronous))
312           (old-caching (default-value 'url-automatic-caching))
313           (old-mode (default-value 'url-standalone-mode)))
314       (setq-default url-be-asynchronous nil)
315       (when no-cache
316         (setq-default url-automatic-caching nil)
317         (setq-default url-standalone-mode nil))
318       (unwind-protect
319           (let ((coding-system-for-read 'binary)
320                 (coding-system-for-write 'binary)
321                 (input-coding-system 'binary)
322                 (output-coding-system 'binary)
323                 (default-enable-multibyte-characters nil))
324             (set-buffer
325              (setq url-working-buffer
326                    (cdr (url-retrieve url no-cache))))
327             (url-uncompress))
328         (setq-default url-be-asynchronous old-asynch)
329         (setq-default url-automatic-caching old-caching)
330         (setq-default url-standalone-mode old-mode)))
331     (let ((charset
332            (or url-current-mime-charset
333                (let ((case-fold-search t))
334                  (goto-char (point-min))
335                  (if (or (re-search-forward
336                           nnshimbun-meta-content-type-charset-regexp nil t)
337                          (re-search-forward
338                           nnshimbun-meta-charset-content-type-regexp nil t))
339                      (buffer-substring-no-properties (match-beginning 2)
340                                                      (match-end 2)))))))
341       (decode-coding-region
342        (point-min) (point-max)
343        (if charset
344            (let ((mime-charset-coding-system-alist
345                   (append '((euc-jp . euc-japan)
346                             (shift-jis . shift_jis)
347                             (shift_jis . shift_jis)
348                             (sjis . shift_jis)
349                             (x-euc-jp . euc-japan)
350                             (x-shift-jis . shift_jis)
351                             (x-shift_jis . shift_jis)
352                             (x-sjis . shift_jis))
353                           mime-charset-coding-system-alist)))
354              (mime-charset-to-coding-system charset))
355          (let ((default (condition-case nil
356                             (coding-system-category nnshimbun-coding-system)
357                           (error nil)))
358                (candidate (detect-coding-region (point-min) (point-max))))
359            (unless (listp candidate)
360              (setq candidate (list candidate)))
361            (catch 'coding
362              (dolist (coding candidate)
363                (if (eq default (coding-system-category coding))
364                    (throw 'coding coding)))
365              (if (eq (coding-system-category 'binary)
366                      (coding-system-category (car candidate)))
367                  nnshimbun-coding-system
368                (car candidate)))))))
369     (set-buffer-multibyte t)
370     (set-buffer buf)
371     (insert-buffer url-working-buffer)
372     (kill-buffer url-working-buffer)))
373
374 (deffoo nnshimbun-request-article (article &optional group server to-buffer)
375   (when (nnshimbun-possibly-change-group group server)
376     (if (stringp article)
377         (setq article (nnshimbun-search-id group article)))
378     (if (integerp article)
379         (nnshimbun-request-article-1 article group server to-buffer)
380       (nnheader-report 'nnml "Couldn't retrieve article: %s" (prin1-to-string article))
381       nil)))
382
383 (defsubst nnshimbun-header-xref (x)
384   (if (and (setq x (mail-header-xref x))
385            (string-match "^Xref: " x))
386       (substring x 6)
387     x))
388
389 (defun nnshimbun-request-article-1 (article &optional group server to-buffer)
390   (if (nnshimbun-backlog
391         (gnus-backlog-request-article
392          group article (or to-buffer nntp-server-buffer)))
393       (cons group article)
394     (let (header contents)
395       (when (setq header (save-excursion
396                            (set-buffer (nnshimbun-open-nov group))
397                            (and (nnheader-find-nov-line article)
398                                 (nnheader-parse-nov))))
399         (let* ((xref (nnshimbun-header-xref header))
400                (x-faces (cdr (or (assoc (or server
401                                             (nnoo-current-server 'nnshimbun))
402                                         nnshimbun-x-face-alist)
403                                  (assoc "default" nnshimbun-x-face-alist))))
404                (x-face (cdr (or (assoc group x-faces)
405                                 (assoc "default" x-faces)))))
406           (save-excursion
407             (set-buffer nnshimbun-buffer)
408             (erase-buffer)
409             (nnshimbun-retrieve-url xref)
410             (nnheader-message 6 "nnshimbun: Make contents...")
411             (goto-char (point-min))
412             (setq contents (funcall nnshimbun-make-contents header x-face))
413             (nnheader-message 6 "nnshimbun: Make contents...done"))))
414       (when contents
415         (save-excursion
416           (set-buffer (or to-buffer nntp-server-buffer))
417           (erase-buffer)
418           (insert contents)
419           (nnshimbun-backlog
420             (gnus-backlog-enter-article group article (current-buffer)))
421           (nnheader-report 'nnshimbun "Article %s retrieved" (mail-header-id header))
422           (cons group (mail-header-number header)))))))
423
424 (deffoo nnshimbun-request-group (group &optional server dont-check)
425   (let ((pathname-coding-system 'binary))
426     (cond
427      ((not (nnshimbun-possibly-change-group group server))
428       (nnheader-report 'nnshimbun "Invalid group (no such directory)"))
429      ((not (file-exists-p nnshimbun-current-directory))
430       (nnheader-report 'nnshimbun "Directory %s does not exist"
431                        nnshimbun-current-directory))
432      ((not (file-directory-p nnshimbun-current-directory))
433       (nnheader-report 'nnshimbun "%s is not a directory" nnshimbun-current-directory))
434      (dont-check
435       (nnheader-report 'nnshimbun "Group %s selected" group)
436       t)
437      (t
438       (let (beg end lines)
439         (save-excursion
440           (set-buffer (nnshimbun-open-nov group))
441           (goto-char (point-min))
442           (setq beg (ignore-errors (read (current-buffer))))
443           (goto-char (point-max))
444           (forward-line -1)
445           (setq end (ignore-errors (read (current-buffer)))
446                 lines (count-lines (point-min) (point-max))))
447         (nnheader-report 'nnshimbunw "Selected group %s" group)
448         (nnheader-insert "211 %d %d %d %s\n"
449                          lines (or beg 0) (or end 0) group))))))
450
451 (deffoo nnshimbun-request-scan (&optional group server)
452   (nnshimbun-possibly-change-group group server)
453   (nnshimbun-generate-nov-database group))
454
455 (deffoo nnshimbun-close-group (group &optional server)
456   (nnshimbun-write-nov group)
457   t)
458
459 (deffoo nnshimbun-request-list (&optional server)
460   (save-excursion
461     (set-buffer nntp-server-buffer)
462     (erase-buffer)
463     (dolist (group nnshimbun-groups)
464       (when (nnshimbun-possibly-change-group group server)
465         (let (beg end)
466           (save-excursion
467             (set-buffer (nnshimbun-open-nov group))
468             (goto-char (point-min))
469             (setq beg (ignore-errors (read (current-buffer))))
470             (goto-char (point-max))
471             (forward-line -1)
472             (setq end (ignore-errors (read (current-buffer)))))
473           (insert (format "%s %d %d n\n" group (or end 0) (or beg 0)))))))
474   t) ; return value
475
476 (eval-and-compile
477   (if (fboundp 'mime-entity-fetch-field)
478       ;; For Semi-Gnus.
479       (defun nnshimbun-insert-header (header)
480         (insert "Subject: " (or (mime-entity-fetch-field header 'Subject) "(none)") "\n"
481                 "From: " (or (mime-entity-fetch-field header 'From) "(nobody)") "\n"
482                 "Date: " (or (mail-header-date header) "") "\n"
483                 "Message-ID: " (or (mail-header-id header) (nnmail-message-id)) "\n")
484         (let ((refs (mail-header-references header)))
485           (and refs
486                (string< "" refs)
487                (insert "References: " refs "\n")))
488         (insert "Lines: " (number-to-string (or (mail-header-lines header) 0)) "\n"
489                 "Xref: " (nnshimbun-header-xref header) "\n"))
490     ;; For pure Gnus.
491     (defun nnshimbun-insert-header (header)
492       (nnheader-insert-header header)
493       (delete-char -1)
494       (insert "Xref: " (nnshimbun-header-xref header) "\n"))))
495
496 (deffoo nnshimbun-retrieve-headers (articles &optional group server fetch-old)
497   (when (nnshimbun-possibly-change-group group server)
498     (if (nnshimbun-retrieve-headers-with-nov articles fetch-old)
499         'nov
500       (save-excursion
501         (set-buffer nntp-server-buffer)
502         (erase-buffer)
503         (let (header)
504           (dolist (art articles)
505             (if (stringp art)
506                 (setq art (nnshimbun-search-id group art)))
507             (if (integerp art)
508                 (when (setq header
509                             (save-excursion
510                               (set-buffer (nnshimbun-open-nov group))
511                               (and (nnheader-find-nov-line art)
512                                    (nnheader-parse-nov))))
513                   (insert (format "220 %d Article retrieved.\n" art))
514                   (nnshimbun-insert-header header)
515                   (insert ".\n")
516                   (delete-region (point) (point-max))))))
517         'header))))
518
519 (defun nnshimbun-retrieve-headers-with-nov (articles &optional fetch-old)
520   (if (or gnus-nov-is-evil nnshimbun-nov-is-evil)
521       nil
522     (let ((nov (expand-file-name nnshimbun-nov-file-name nnshimbun-current-directory)))
523       (when (file-exists-p nov)
524         (save-excursion
525           (set-buffer nntp-server-buffer)
526           (erase-buffer)
527           (nnheader-insert-file-contents nov)
528           (if (and fetch-old (not (numberp fetch-old)))
529               t                         ; Don't remove anything.
530             (nnheader-nov-delete-outside-range
531              (if fetch-old (max 1 (- (car articles) fetch-old))
532                (car articles))
533              (car (last articles)))
534             t))))))
535
536
537
538 ;;; Nov Database Operations
539
540 (defun nnshimbun-generate-nov-database (group)
541   (prog1 (funcall nnshimbun-generate-nov group)
542     (nnshimbun-write-nov group)))
543
544 (defun nnshimbun-generate-nov-for-each-group (group)
545   (nnshimbun-possibly-change-group group)
546   (save-excursion
547     (set-buffer (nnshimbun-open-nov group))
548     (let (i)
549       (goto-char (point-max))
550       (forward-line -1)
551       (setq i (or (ignore-errors (read (current-buffer))) 0))
552       (dolist (header (save-excursion
553                         (set-buffer nnshimbun-buffer)
554                         (erase-buffer)
555                         (nnshimbun-retrieve-url (eval nnshimbun-index-url) t)
556                         (goto-char (point-min))
557                         (funcall nnshimbun-get-headers)))
558         (unless (nnshimbun-search-id group (mail-header-id header))
559           (mail-header-set-number header (setq i (1+ i)))
560           (goto-char (point-max))
561           (nnheader-insert-nov header)
562           (if nnshimbun-pre-fetch-article
563               (nnshimbun-request-article-1 i group nil nnshimbun-buffer)))))))
564
565 (defun nnshimbun-generate-nov-for-all-groups (&rest args)
566   (unless (and nnshimbun-nov-last-check
567                (< (nnshimbun-lapse-seconds nnshimbun-nov-last-check)
568                   nnshimbun-check-interval))
569     (save-excursion
570       (dolist (list (funcall nnshimbun-get-headers))
571         (let ((group (car list)))
572           (nnshimbun-possibly-change-group group)
573           (when (cdr list)
574             (set-buffer (nnshimbun-open-nov group))
575             (let (i)
576               (goto-char (point-max))
577               (forward-line -1)
578               (setq i (or (ignore-errors (read (current-buffer))) 0))
579               (dolist (header (cdr list))
580                 (unless (nnshimbun-search-id group (mail-header-id header))
581                   (mail-header-set-number header (setq i (1+ i)))
582                   (goto-char (point-max))
583                   (nnheader-insert-nov header)
584                   (if nnshimbun-pre-fetch-article
585                       (nnshimbun-request-article-1 i group nil nnshimbun-buffer))))))))
586       (nnshimbun-save-nov)
587       (setq nnshimbun-nov-last-check (current-time)))))
588
589 (defun nnshimbun-search-id (group id &optional nov)
590   (save-excursion
591     (set-buffer (nnshimbun-open-nov group))
592     (goto-char (point-min))
593     (let (found)
594       (while (and (not found)
595                   (search-forward id nil t)) ; We find the ID.
596         ;; And the id is in the fourth field.
597         (if (not (and (search-backward "\t" nil t 4)
598                       (not (search-backward "\t" (gnus-point-at-bol) t))))
599             (forward-line 1)
600           (forward-line 0)
601           (setq found t)))
602       (unless found
603         (goto-char (point-min))
604         (when (search-forward (concat "X-Nnshimbun-Id: " id) nil t)
605           (forward-line 0)
606           (setq found t)))
607       (if found
608           (if nov
609               (nnheader-parse-nov)
610             ;; We return the article number.
611             (ignore-errors (read (current-buffer))))))))
612
613 (defun nnshimbun-nov-fix-header (group header args)
614   (save-excursion
615     (set-buffer (nnshimbun-open-nov group))
616     (when (nnheader-find-nov-line (mail-header-number header))
617       (dolist (arg args)
618         (if (eq (car arg) 'id)
619             (let ((extra (mail-header-extra header)))
620               (unless (assq 'X-Nnshimbun-Id extra)
621                 (mail-header-set-extra
622                  header
623                  (cons (cons 'X-Nnshimbun-Id (mail-header-id header))
624                        extra)))
625               (mail-header-set-id header (cdr arg)))
626           (let ((func (intern (concat "mail-header-set-" (symbol-name (car arg))))))
627             (if (cdr arg) (eval (list func header (cdr arg)))))))
628       (mail-header-set-xref header (nnshimbun-header-xref header))
629       (delete-region (point) (progn (forward-line 1) (point)))
630       (nnheader-insert-nov header))))
631
632 (defun nnshimbun-open-nov (group)
633   (let ((buffer (cdr (assoc group nnshimbun-nov-buffer-alist))))
634     (if (buffer-live-p buffer)
635         buffer
636       (setq buffer (gnus-get-buffer-create
637                     (format " *nnshimbun overview %s %s*"
638                             (nnoo-current-server 'nnshimbun) group)))
639       (save-excursion
640         (set-buffer buffer)
641         (set (make-local-variable 'nnshimbun-nov-buffer-file-name)
642              (expand-file-name
643               nnshimbun-nov-file-name
644               (nnmail-group-pathname group nnshimbun-server-directory)))
645         (erase-buffer)
646         (when (file-exists-p nnshimbun-nov-buffer-file-name)
647           (nnheader-insert-file-contents nnshimbun-nov-buffer-file-name))
648         (set-buffer-modified-p nil))
649       (push (cons group buffer) nnshimbun-nov-buffer-alist)
650       buffer)))
651
652 (defun nnshimbun-write-nov (group)
653   (let ((buffer (cdr (assoc group nnshimbun-nov-buffer-alist))))
654     (when (buffer-live-p buffer)
655       (save-excursion
656         (set-buffer buffer)
657         (buffer-modified-p)
658         (nnmail-write-region 1 (point-max) nnshimbun-nov-buffer-file-name
659                              nil 'nomesg)))))
660
661 (defun nnshimbun-save-nov ()
662   (save-excursion
663     (while nnshimbun-nov-buffer-alist
664       (when (buffer-name (cdar nnshimbun-nov-buffer-alist))
665         (set-buffer (cdar nnshimbun-nov-buffer-alist))
666         (when (buffer-modified-p)
667           (nnmail-write-region 1 (point-max) nnshimbun-nov-buffer-file-name
668                                nil 'nomesg))
669         (set-buffer-modified-p nil)
670         (kill-buffer (current-buffer)))
671       (setq nnshimbun-nov-buffer-alist (cdr nnshimbun-nov-buffer-alist)))))
672
673
674
675 ;;; Server Initialize
676 (defun nnshimbun-possibly-change-group (group &optional server)
677   (when server
678     (unless (nnshimbun-server-opened server)
679       (nnshimbun-open-server server)))
680   (unless (gnus-buffer-live-p nnshimbun-buffer)
681     (setq nnshimbun-buffer
682           (save-excursion
683             (nnheader-set-temp-buffer
684              (format " *nnshimbun %s*" (nnoo-current-server 'nnshimbun))))))
685   (if (not group)
686       t
687     (let ((pathname (nnmail-group-pathname group nnshimbun-server-directory))
688           (pathname-coding-system 'binary))
689       (unless (equal pathname nnshimbun-current-directory)
690         (setq nnshimbun-current-directory pathname
691               nnshimbun-current-group group))
692       (unless (file-exists-p nnshimbun-current-directory)
693         (ignore-errors (make-directory nnshimbun-current-directory t)))
694       (cond
695        ((not (file-exists-p nnshimbun-current-directory))
696         (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-current-directory))
697        ((not (file-directory-p (file-truename nnshimbun-current-directory)))
698         (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-current-directory))
699        (t t)))))
700
701
702
703 ;;; Misc Functions
704
705 (eval-and-compile
706   (if (fboundp 'eword-encode-string)
707       ;; For Semi-Gnus.
708       (defun nnshimbun-mime-encode-string (string)
709         (mapconcat
710          #'identity
711          (split-string (eword-encode-string (nnweb-decode-entities-string string)) "\n")
712          ""))
713     ;; For pure Gnus.
714     (defun nnshimbun-mime-encode-string (string)
715       (mapconcat
716        #'identity
717        (split-string
718         (with-temp-buffer
719           (insert (nnweb-decode-entities-string string))
720           (rfc2047-encode-region (point-min) (point-max))
721           (buffer-substring (point-min) (point-max)))
722         "\n")
723        ""))))
724
725 (defun nnshimbun-lapse-seconds (time)
726   (let ((now (current-time)))
727     (+ (* (- (car now) (car time)) 65536)
728        (- (nth 1 now) (nth 1 time)))))
729
730 (defun nnshimbun-make-date-string (year month day &optional time)
731   (format "%02d %s %04d %s +0900"
732           day
733           (aref [nil "Jan" "Feb" "Mar" "Apr" "May" "Jun"
734                      "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"]
735                 month)
736           (cond ((< year 69)
737                  (+ year 2000))
738                 ((< year 100)
739                  (+ year 1900))
740                 ((< year 1000)  ; possible 3-digit years.
741                  (+ year 1900))
742                 (t year))
743           (or time "00:00")))
744
745 (if (fboundp 'regexp-opt)
746     (defalias 'nnshimbun-regexp-opt 'regexp-opt)
747   (defun nnshimbun-regexp-opt (strings &optional paren)
748     "Return a regexp to match a string in STRINGS.
749 Each string should be unique in STRINGS and should not contain any regexps,
750 quoted or not.  If optional PAREN is non-nil, ensure that the returned regexp
751 is enclosed by at least one regexp grouping construct."
752     (let ((open-paren (if paren "\\(" "")) (close-paren (if paren "\\)" "")))
753       (concat open-paren (mapconcat 'regexp-quote strings "\\|") close-paren))))
754
755
756 ;; Fast fill-region function
757
758 (defvar nnshimbun-fill-column (min 80 (- (frame-width) 4)))
759
760 (defconst nnshimbun-kinsoku-bol-list
761   (funcall
762    (if (fboundp 'string-to-char-list)
763        'string-to-char-list
764      'string-to-list) "\
765 !)-_~}]:;',.?\e$B!"!#!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2!3!4!5!6!7!8!9!:!;!<!=!>!?!@!A\e(B\
766 \e$B!B!C!D!E!G!I!K!M!O!Q!S!U!W!Y![!k!l!m!n$!$#$%$'$)$C$c$e$g$n%!%#%%%'%)%C%c%e%g%n%u%v\e(B"))
767
768 (defconst nnshimbun-kinsoku-eol-list
769   (funcall
770    (if (fboundp 'string-to-char-list)
771        'string-to-char-list
772      'string-to-list)
773    "({[`\e$B!F!H!J!L!N!P!R!T!V!X!Z!k!l!m!x\e(B"))
774
775 (defun nnshimbun-fill-line ()
776   (forward-line 0)
777   (let ((top (point)) chr)
778     (while (if (>= (move-to-column nnshimbun-fill-column)
779                    nnshimbun-fill-column)
780                (not (progn
781                       (if (memq (preceding-char) nnshimbun-kinsoku-eol-list)
782                           (progn
783                             (backward-char)
784                             (while (memq (preceding-char) nnshimbun-kinsoku-eol-list)
785                               (backward-char))
786                             (insert "\n"))
787                         (while (memq (setq chr (following-char)) nnshimbun-kinsoku-bol-list)
788                           (forward-char))
789                         (if (looking-at "\\s-+")
790                             (or (eolp) (delete-region (point) (match-end 0)))
791                           (or (> (char-width chr) 1)
792                               (re-search-backward "\\<" top t)
793                               (end-of-line)))
794                         (or (eolp) (insert "\n"))))))
795       (setq top (point))))
796   (forward-line 1)
797   (not (eobp)))
798
799 (defsubst nnshimbun-shallow-rendering ()
800   (goto-char (point-min))
801   (while (search-forward "<p>" nil t)
802     (insert "\n\n"))
803   (goto-char (point-min))
804   (while (search-forward "<br>" nil t)
805     (insert "\n"))
806   (nnweb-remove-markup)
807   (nnweb-decode-entities)
808   (goto-char (point-min))
809   (while (nnshimbun-fill-line))
810   (goto-char (point-min))
811   (when (skip-chars-forward "\n")
812     (delete-region (point-min) (point)))
813   (while (search-forward "\n\n" nil t)
814     (let ((p (point)))
815       (when (skip-chars-forward "\n")
816         (delete-region p (point)))))
817   (goto-char (point-max))
818   (when (skip-chars-backward "\n")
819     (delete-region (point) (point-max)))
820   (insert "\n"))
821
822 (defun nnshimbun-make-text-or-html-contents (header &optional x-face)
823   (let ((case-fold-search t) (html t) (start))
824     (when (and (re-search-forward nnshimbun-contents-start nil t)
825                (setq start (point))
826                (re-search-forward nnshimbun-contents-end nil t))
827       (delete-region (match-beginning 0) (point-max))
828       (delete-region (point-min) start)
829       (nnshimbun-shallow-rendering)
830       (setq html nil))
831     (goto-char (point-min))
832     (nnshimbun-insert-header header)
833     (insert "Content-Type: " (if html "text/html" "text/plain")
834             "; charset=ISO-2022-JP\nMIME-Version: 1.0\n")
835     (when x-face
836       (insert x-face)
837       (unless (bolp)
838         (insert "\n")))
839     (insert "\n")
840     (encode-coding-string (buffer-string)
841                           (mime-charset-to-coding-system "ISO-2022-JP"))))
842
843 (defun nnshimbun-make-html-contents (header &optional x-face)
844   (let (start)
845     (when (and (re-search-forward nnshimbun-contents-start nil t)
846                (setq start (point))
847                (re-search-forward nnshimbun-contents-end nil t))
848       (delete-region (match-beginning 0) (point-max))
849       (delete-region (point-min) start))
850     (goto-char (point-min))
851     (nnshimbun-insert-header header)
852     (insert "Content-Type: text/html; charset=ISO-2022-JP\n"
853             "MIME-Version: 1.0\n")
854     (when x-face
855       (insert x-face)
856       (unless (bolp)
857         (insert "\n")))
858     (insert "\n")
859     (encode-coding-string (buffer-string)
860                           (mime-charset-to-coding-system "ISO-2022-JP"))))
861
862 (defun nnshimbun-make-mhonarc-contents (header &rest args)
863   (require 'mml)
864   (if (search-forward "<!--X-Head-End-->" nil t)
865       (progn
866         (forward-line 0)
867         ;; Processing headers.
868         (save-restriction
869           (narrow-to-region (point-min) (point))
870           (nnweb-decode-entities)
871           (goto-char (point-min))
872           (while (search-forward "\n<!--X-" nil t)
873             (replace-match "\n"))
874           (goto-char (point-min))
875           (while (search-forward " -->\n" nil t)
876             (replace-match "\n"))
877           (goto-char (point-min))
878           (while (search-forward "\t" nil t)
879             (replace-match " "))
880           (goto-char (point-min))
881           (let (buf refs)
882             (while (not (eobp))
883               (cond
884                ((looking-at "<!--")
885                 (delete-region (point) (progn (forward-line 1) (point))))
886                ((looking-at "Subject: +")
887                 (push (cons 'subject (nnheader-header-value)) buf)
888                 (delete-region (point) (progn (forward-line 1) (point))))
889                ((looking-at "From: +")
890                 (push (cons 'from (nnheader-header-value)) buf)
891                 (delete-region (point) (progn (forward-line 1) (point))))
892                ((looking-at "Date: +")
893                 (push (cons 'date (nnheader-header-value)) buf)
894                 (delete-region (point) (progn (forward-line 1) (point))))
895                ((looking-at "Message-Id: +")
896                 (push (cons 'id (concat "<" (nnheader-header-value) ">")) buf)
897                 (delete-region (point) (progn (forward-line 1) (point))))
898                ((looking-at "Reference: +")
899                 (push (concat "<" (nnheader-header-value) ">") refs)
900                 (delete-region (point) (progn (forward-line 1) (point))))
901                ((looking-at "Content-Type: ")
902                 (unless (search-forward "charset" (gnus-point-at-eol) t)
903                   (end-of-line)
904                   (insert "; charset=ISO-2022-JP"))
905                 (forward-line 1))
906                (t (forward-line 1))))
907             (insert "MIME-Version: 1.0\n")
908             (if refs (push (cons 'references (mapconcat 'identity refs " ")) buf))
909             (nnshimbun-nov-fix-header nnshimbun-current-group header buf)
910             (goto-char (point-min))
911             (nnshimbun-insert-header header))
912           (goto-char (point-max)))
913         ;; Processing body.
914         (save-restriction
915           (narrow-to-region (point) (point-max))
916           (delete-region
917            (point)
918            (progn
919              (search-forward "\n<!--X-Body-of-Message-->\n" nil t)
920              (point)))
921           (when (search-forward "\n<!--X-Body-of-Message-End-->\n" nil t)
922             (forward-line -1)
923             (delete-region (point) (point-max)))
924           (nnweb-remove-markup)
925           (nnweb-decode-entities)))
926     (goto-char (point-min))
927     (nnshimbun-insert-header header)
928     (insert "Content-Type: text/html; charset=ISO-2022-JP\nMIME-Version: 1.0\n\n"))
929   (encode-coding-string (buffer-string)
930                         (mime-charset-to-coding-system "ISO-2022-JP")))
931
932
933 ;;; www.asahi.com
934
935 (defun nnshimbun-asahi-get-headers ()
936   (when (search-forward "\n<!-- Start of past -->\n" nil t)
937     (delete-region (point-min) (point))
938     (when (search-forward "\n<!-- End of past -->\n" nil t)
939       (forward-line -1)
940       (delete-region (point) (point-max))
941       (goto-char (point-min))
942       (let (headers)
943         (while (re-search-forward
944                 "^\e$B"#\e(B<a href=\"\\(\\([0-9][0-9][0-9][0-9]\\)/past/\\([A-z]*[0-9]*\\)\\.html\\)\"> *"
945                 nil t)
946           (let ((id (format "<%s%s%%%s>"
947                             (match-string 2)
948                             (match-string 3)
949                             nnshimbun-current-group))
950                 (url (match-string 1)))
951             (push (make-full-mail-header
952                    0
953                    (nnshimbun-mime-encode-string
954                     (mapconcat 'identity
955                                (split-string
956                                 (buffer-substring
957                                  (match-end 0)
958                                  (progn (search-forward "<br>" nil t) (point)))
959                                 "\\(<[^>]+>\\|\r\\)")
960                                ""))
961                    nnshimbun-from-address
962                    "" id "" 0 0 (concat nnshimbun-url url))
963                   headers)))
964         (setq headers (nreverse headers))
965         (let ((i 0))
966           (while (and (nth i headers)
967                       (re-search-forward
968                        "^\\[\\([0-9][0-9]\\)/\\([0-9][0-9]\\) \\([0-9][0-9]:[0-9][0-9]\\)\\]"
969                        nil t))
970             (let ((month (string-to-number (match-string 1)))
971                   (date (decode-time (current-time))))
972               (mail-header-set-date
973                (nth i headers)
974                (nnshimbun-make-date-string
975                 (if (and (eq 12 month) (eq 1 (nth 4 date)))
976                     (1- (nth 5 date))
977                   (nth 5 date))
978                 month
979                 (string-to-number (match-string 2))
980                 (match-string 3))))
981             (setq i (1+ i))))
982         (nreverse headers)))))
983
984
985
986 ;;; www.sponichi.co.jp
987
988 (defun nnshimbun-sponichi-get-headers ()
989   (when (search-forward "\e$B%K%e!<%9%$%s%G%C%/%9\e(B" nil t)
990     (delete-region (point-min) (point))
991     (when (search-forward "\e$B%"%I%?%0\e(B" nil t)
992       (forward-line 2)
993       (delete-region (point) (point-max))
994       (goto-char (point-min))
995       (let ((case-fold-search t) headers)
996         (while (re-search-forward
997                 "^<a href=\"/\\(\\([A-z]*\\)/kiji/\\([0-9][0-9][0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([0-9][0-9]\\)\\.html\\)\">"
998                 nil t)
999           (let ((url (match-string 1))
1000                 (id (format "<%s%s%s%s%%%s>"
1001                             (match-string 3)
1002                             (match-string 4)
1003                             (match-string 5)
1004                             (match-string 6)
1005                             nnshimbun-current-group))
1006                 (date (nnshimbun-make-date-string
1007                        (string-to-number (match-string 3))
1008                        (string-to-number (match-string 4))
1009                        (string-to-number (match-string 5)))))
1010             (push (make-full-mail-header
1011                    0
1012                    (nnshimbun-mime-encode-string
1013                     (mapconcat 'identity
1014                                (split-string
1015                                 (buffer-substring
1016                                  (match-end 0)
1017                                  (progn (search-forward "<br>" nil t) (point)))
1018                                 "<[^>]+>")
1019                                ""))
1020                    nnshimbun-from-address
1021                    date id "" 0 0 (concat nnshimbun-url url))
1022                   headers)))
1023         headers))))
1024
1025
1026
1027 ;;; CNET Japan
1028
1029 (defun nnshimbun-cnet-get-headers ()
1030   (let ((case-fold-search t) headers)
1031     (while (search-forward "\n<!--*****\e$B8+=P$7\e(B*****-->\n" nil t)
1032       (let ((subject (buffer-substring (point) (gnus-point-at-eol)))
1033             (point (point)))
1034         (forward-line -2)
1035         (when (looking-at "<a href=\"/\\(News/\\([0-9][0-9][0-9][0-9]\\)/Item/\\([0-9][0-9]\\([0-9][0-9]\\)\\([0-9][0-9]\\)-[0-9]+\\).html\\)\">")
1036           (let ((url (match-string 1))
1037                 (id  (format "<%s%s%%%s>"
1038                              (match-string 2)
1039                              (match-string 3)
1040                              nnshimbun-current-group))
1041                 (date (nnshimbun-make-date-string
1042                        (string-to-number (match-string 2))
1043                        (string-to-number (match-string 4))
1044                        (string-to-number (match-string 5)))))
1045             (push (make-full-mail-header
1046                    0
1047                    (nnshimbun-mime-encode-string subject)
1048                    nnshimbun-from-address
1049                    date id "" 0 0 (concat nnshimbun-url url))
1050                   headers)))
1051         (goto-char point)))
1052     headers))
1053
1054
1055
1056 ;;; Wired
1057
1058 (defun nnshimbun-wired-get-all-headers ()
1059   (save-excursion
1060     (set-buffer nnshimbun-buffer)
1061     (let ((group-header-alist (mapcar (lambda (g) (cons g nil)) nnshimbun-groups))
1062           (case-fold-search t)
1063           (regexp (format
1064                    "<a href=\"\\(%s\\|/\\)\\(news/news/\\(%s\\)/story/\\(\\([0-9][0-9][0-9][0-9]\\)\\([0-9][0-9]\\)\\([0-9][0-9]\\)[0-9]+\\)\\.html\\)\"><b>"
1065                    (regexp-quote nnshimbun-url)
1066                    (nnshimbun-regexp-opt nnshimbun-groups))))
1067       (dolist (xover (list (concat nnshimbun-url "news/news/index.html")
1068                            (concat nnshimbun-url "news/news/last_seven.html")))
1069         (erase-buffer)
1070         (nnshimbun-retrieve-url xover t)
1071         (goto-char (point-min))
1072         (while (re-search-forward regexp nil t)
1073           (let* ((url   (concat nnshimbun-url (match-string 2)))
1074                  (group (downcase (match-string 3)))
1075                  (id    (format "<%s%%%s>" (match-string 4) group))
1076                  (date  (nnshimbun-make-date-string
1077                          (string-to-number (match-string 5))
1078                          (string-to-number (match-string 6))
1079                          (string-to-number (match-string 7))))
1080                  (header (make-full-mail-header
1081                           0
1082                           (nnshimbun-mime-encode-string
1083                            (mapconcat 'identity
1084                                       (split-string
1085                                        (buffer-substring
1086                                         (match-end 0)
1087                                         (progn (search-forward "</b>" nil t) (point)))
1088                                        "<[^>]+>")
1089                                       ""))
1090                           nnshimbun-from-address
1091                           date id "" 0 0 url))
1092                  (x (assoc group group-header-alist)))
1093             (setcdr x (cons header (cdr x))))))
1094       group-header-alist)))
1095
1096
1097
1098 ;;; www.yomiuri.co.jp
1099
1100 (defun nnshimbun-yomiuri-get-all-headers ()
1101   (save-excursion
1102     (set-buffer nnshimbun-buffer)
1103     (erase-buffer)
1104     (nnshimbun-retrieve-url (eval nnshimbun-index-url) t)
1105     (let ((case-fold-search t)
1106           (group-header-alist (mapcar (lambda (g) (cons g nil)) nnshimbun-groups)))
1107       (dolist (group nnshimbun-groups)
1108         (let (start)
1109           (goto-char (point-min))
1110           (when (and (search-forward (format "\n<!-- /news/%s=start -->\n" group) nil t)
1111                      (setq start (point))
1112                      (search-forward (format "\n<!-- /news/%s=end -->\n" group) nil t))
1113             (forward-line -1)
1114             (save-restriction
1115               (narrow-to-region start (point))
1116               (goto-char start)
1117               (while (re-search-forward
1118                       "<a href=\"/\\([0-9]+\\)/\\(\\(\\([0-9][0-9][0-9][0-9]\\)\\([0-9][0-9]\\)\\([0-9][0-9]\\)[A-z0-9]+\\)\\.htm\\)\"[^>]*>"
1119                       nil t)
1120                 (let ((url   (concat (match-string 1) "a/" (match-string 2)))
1121                       (id    (format "<%s%s%%%s>"
1122                                      (match-string 1)
1123                                      (match-string 3)
1124                                      group))
1125                       (year  (string-to-number (match-string 4)))
1126                       (month (string-to-number (match-string 5)))
1127                       (day   (string-to-number (match-string 6)))
1128                       (subject (mapconcat
1129                                 'identity
1130                                 (split-string
1131                                  (buffer-substring
1132                                   (match-end 0)
1133                                   (progn (search-forward "<br>" nil t) (point)))
1134                                  "<[^>]+>")
1135                                 ""))
1136                       date x)
1137                   (when (string-match "^\e$B"!\e(B" subject)
1138                     (setq subject (substring subject (match-end 0))))
1139                   (if (string-match "(\\([0-9][0-9]:[0-9][0-9]\\))$" subject)
1140                       (setq date (nnshimbun-make-date-string
1141                                   year month day (match-string 1 subject))
1142                             subject (substring subject 0 (match-beginning 0)))
1143                     (setq date (nnshimbun-make-date-string year month day)))
1144                   (setcdr (setq x (assoc group group-header-alist))
1145                           (cons (make-full-mail-header
1146                                  0
1147                                  (nnshimbun-mime-encode-string subject)
1148                                  nnshimbun-from-address
1149                                  date id "" 0 0 (concat nnshimbun-url url))
1150                                 (cdr x)))))))))
1151       group-header-alist)))
1152
1153
1154
1155 ;;; Zdnet Japan
1156
1157 (defun nnshimbun-zdnet-get-headers ()
1158   (let ((case-fold-search t) headers)
1159     (goto-char (point-min))
1160     (let (start)
1161       (while (and (search-forward "<!--" nil t)
1162                   (setq start (- (point) 4))
1163                   (search-forward "-->" nil t))
1164         (delete-region start (point))))
1165     (goto-char (point-min))
1166     (while (re-search-forward
1167             "<a href=\"\\(/news/\\)?\\(\\([0-9][0-9]\\)\\([0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([^\\.]+\\).html\\)\"><font size=\"4\"><strong>"
1168             nil t)
1169       (let ((year  (+ 2000 (string-to-number (match-string 3))))
1170             (month (string-to-number (match-string 4)))
1171             (day   (string-to-number (match-string 5)))
1172             (id    (format "<%s%s%s%s%%%s>"
1173                            (match-string 3)
1174                            (match-string 4)
1175                            (match-string 5)
1176                            (match-string 6)
1177                            nnshimbun-current-group))
1178             (url (match-string 2)))
1179         (push (make-full-mail-header
1180                0
1181                (nnshimbun-mime-encode-string
1182                 (mapconcat 'identity
1183                            (split-string
1184                             (buffer-substring
1185                              (match-end 0)
1186                              (progn (search-forward "</a>" nil t) (point)))
1187                             "<[^>]+>")
1188                            ""))
1189                nnshimbun-from-address
1190                (nnshimbun-make-date-string year month day)
1191                id  "" 0 0 (concat nnshimbun-url url))
1192               headers)))
1193     (nreverse headers)))
1194
1195
1196
1197 ;;; MLs on www.mew.org
1198
1199 (defmacro nnshimbun-mew-concat-url (url)
1200   `(concat nnshimbun-url
1201            (nth 1 (assoc nnshimbun-current-group nnshimbun-mew-groups))
1202            "/"
1203            ,url))
1204
1205 (defmacro nnshimbun-mew-reverse-order-p ()
1206   `(nth 2 (assoc nnshimbun-current-group nnshimbun-mew-groups)))
1207
1208 (defmacro nnshimbun-mew-spew-p ()
1209   `(nth 3 (assoc nnshimbun-current-group nnshimbun-mew-groups)))
1210
1211 (defsubst nnshimbun-mew-retrieve-xover (aux)
1212   (erase-buffer)
1213   (nnshimbun-retrieve-url
1214    (nnshimbun-mew-concat-url (if (= aux 1) "index.html" (format "mail%d.html" aux)))
1215    t))
1216
1217 (defconst nnshimbun-mew-regexp "<A[^>]*HREF=\"\\(msg\\([0-9]+\\).html\\)\">\\([^<]+\\)<")
1218
1219 (defmacro nnshimbun-mew-extract-header-values ()
1220   `(progn
1221      (setq url (nnshimbun-mew-concat-url (match-string 1))
1222            id (format "<%05d%%%s>"
1223                       (1- (string-to-number (match-string 2)))
1224                       nnshimbun-current-group)
1225            subject (match-string 3))
1226      (forward-line 1)
1227      (if (nnshimbun-search-id nnshimbun-current-group id)
1228          (throw 'stop headers)
1229        (push (make-full-mail-header
1230               0
1231               (nnshimbun-mime-encode-string subject)
1232               (if (looking-at "<EM>\\([^<]+\\)<")
1233                   (nnshimbun-mime-encode-string (match-string 1))
1234                 "")
1235               "" id "" 0 0 url)
1236              headers))))
1237
1238 (eval-and-compile
1239   (if (fboundp 'mime-entity-fetch-field)
1240       ;; For Semi-Gnus.
1241       (defmacro nnshimbun-mew-mail-header-subject (header)
1242         `(mime-entity-fetch-field ,header 'Subject))
1243     ;; For pure Gnus.
1244     (defalias 'nnshimbun-mew-mail-header-subject 'mail-header-subject)))
1245
1246 (defun nnshimbun-mew-get-headers ()
1247   (if (nnshimbun-mew-spew-p)
1248       (let ((headers (nnshimbun-mew-get-headers-1)))
1249         (erase-buffer)
1250         (insert-buffer-substring (nnshimbun-open-nov nnshimbun-current-group))
1251         (delq nil
1252               (mapcar
1253                (lambda (header)
1254                  (goto-char (point-min))
1255                  (let ((subject (nnshimbun-mew-mail-header-subject header))
1256                        (found))
1257                    (while (and (not found)
1258                                (search-forward subject nil t))
1259                      (if (not (and (search-backward "\t" nil t)
1260                                    (not (search-backward "\t" (gnus-point-at-bol) t))))
1261                          (forward-line 1)
1262                        (setq found t)))
1263                    (if found
1264                        nil
1265                      (goto-char (point-max))
1266                      (nnheader-insert-nov header)
1267                      header)))
1268                headers)))
1269     (nnshimbun-mew-get-headers-1)))
1270
1271 (defun nnshimbun-mew-get-headers-1 ()
1272   (let (headers)
1273     (when (re-search-forward
1274            "<A[^>]*HREF=\"mail\\([0-9]+\\)\\.html\">\\[?Last Page\\]?</A>" nil t)
1275       (let ((limit (string-to-number (match-string 1))))
1276         (catch 'stop
1277           (if (nnshimbun-mew-reverse-order-p)
1278               (let ((aux 1))
1279                 (while (let (id url subject)
1280                          (while (re-search-forward nnshimbun-mew-regexp nil t)
1281                            (nnshimbun-mew-extract-header-values))
1282                          (< aux limit))
1283                   (nnshimbun-mew-retrieve-xover (setq aux (1+ aux)))))
1284             (while (> limit 0)
1285               (nnshimbun-mew-retrieve-xover limit)
1286               (setq limit (1- limit))
1287               (let (id url subject)
1288                 (goto-char (point-max))
1289                 (while (re-search-backward nnshimbun-mew-regexp nil t)
1290                   (nnshimbun-mew-extract-header-values)
1291                   (forward-line -2)))))
1292           headers)))))
1293
1294
1295
1296 ;;; MLs on www.xemacs.org
1297
1298 (defmacro nnshimbun-xemacs-concat-url (url)
1299   `(concat nnshimbun-url nnshimbun-current-group "/" ,url))
1300
1301 (defun nnshimbun-xemacs-get-headers ()
1302   (let (headers auxs aux)
1303     (catch 'stop
1304       (while (re-search-forward
1305               (concat "<A HREF=\"/list-archives/" nnshimbun-current-group
1306                       "/\\([12][0-9][0-9][0-9][0-1][0-9]\\)/\">\\[Index\\]")
1307               nil t)
1308         (setq auxs (append auxs (list (match-string 1)))))
1309       (while auxs
1310         (erase-buffer)
1311         (nnshimbun-retrieve-url
1312          (nnshimbun-xemacs-concat-url (concat (setq aux (car auxs)) "/")))
1313         (let (id url subject)
1314           (goto-char (point-max))
1315           (while (re-search-backward
1316                   "<A[^>]*HREF=\"\\(msg\\([0-9]+\\).html\\)\">\\([^<]+\\)<"
1317                   nil t)
1318             (setq url (nnshimbun-xemacs-concat-url
1319                        (concat aux "/" (match-string 1)))
1320                   id (format "<%s%05d%%%s>"
1321                              aux
1322                              (string-to-number (match-string 2))
1323                              nnshimbun-current-group)
1324                   subject (match-string 3))
1325             (forward-line 1)
1326             (if (nnshimbun-search-id nnshimbun-current-group id)
1327                 (throw 'stop headers)
1328               (push (make-full-mail-header
1329                      0
1330                      (nnshimbun-mime-encode-string subject)
1331                      (if (looking-at "<td><em>\\([^<]+\\)<")
1332                          (match-string 1)
1333                        "")
1334                      "" id "" 0 0 url)
1335                     headers))
1336             (message "%s" id)
1337             (forward-line -2)))
1338         (setq auxs (cdr auxs))))
1339     headers))
1340
1341 ;;; MLs on www.jp.netbsd.org
1342
1343 (defun nnshimbun-netbsd-get-headers ()
1344   (let ((case-fold-search t) headers months)
1345     (goto-char (point-min))
1346     (while (re-search-forward "<A HREF=\"\\([0-9]+\\)/\\(threads.html\\)?\">" nil t)
1347       (push (match-string 1) months))
1348     (setq months (nreverse months))
1349     (catch 'exit
1350       (dolist (month months)
1351         (erase-buffer)
1352         (nnshimbun-retrieve-url
1353          (format "%s%s/%s/maillist.html" nnshimbun-url nnshimbun-current-group month)
1354          t)
1355         (let (id url subject)
1356           (while (re-search-forward
1357                   "<A[^>]*HREF=\"\\(msg\\([0-9]+\\)\\.html\\)\">\\([^<]+\\)</A>"
1358                   nil t)
1359             (setq url (format "%s%s/%s/%s"
1360                               nnshimbun-url
1361                               nnshimbun-current-group
1362                               month
1363                               (match-string 1))
1364                   id (format "<%s%05d%%%s>"
1365                              month
1366                              (string-to-number (match-string 2))
1367                              nnshimbun-current-group)
1368                   subject (match-string 3))
1369             (if (nnshimbun-search-id nnshimbun-current-group id)
1370                 (throw 'exit headers)
1371               (push (make-full-mail-header
1372                      0
1373                      (nnshimbun-mime-encode-string subject)
1374                      (if (looking-at "</STRONG> *<EM>\\([^<]+\\)<")
1375                          (nnshimbun-mime-encode-string (match-string 1))
1376                        "")
1377                      "" id "" 0 0 url)
1378                     headers)))))
1379       headers)))
1380
1381 (provide 'nnshimbun)
1382 ;;; nnshimbun.el ends here.