* lisp/nnshimbun.el (nnshimbun-retrieve-url): coding detection is improved.
[elisp/gnus.git-] / lisp / nnshimbun.el
1 ;;; nnshimbun.el --- interfacing with web newspapers -*- coding: junet; -*-
2
3 ;; Authors: TSUCHIYA Masatoshi <tsuchiya@pine.kuee.kyoto-u.ac.jp>
4 ;;          Akihiro Arisawa    <ari@atesoft.advantest.co.jp>
5 ;; Keywords: news
6
7 ;;; Copyright:
8
9 ;; This file is a part of Semi-Gnus.
10
11 ;; This program is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; This program is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with this program; if not, you can either send email to this
23 ;; program's maintainer or write to: The Free Software Foundation,
24 ;; Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;; Gnus backend to read newspapers on WEB.
29
30
31 ;;; Defintinos:
32
33 (gnus-declare-backend "nnshimbun" 'address)
34
35 (eval-when-compile (require 'cl))
36
37 (require 'nnheader)
38 (require 'nnmail)
39 (require 'nnoo)
40 (require 'gnus-bcklg)
41 (eval-when-compile (ignore-errors (require 'nnweb)))
42 ;; Report failure to find w3 at load time if appropriate.
43 (eval '(require 'nnweb))
44 (require 'mcharset)
45
46
47 (nnoo-declare nnshimbun)
48
49 (defvar nnshimbun-check-interval 300)
50
51 (defconst nnshimbun-mew-groups
52   '(("meadow-develop" "meadow-develop" nil t)
53     ("meadow-users-jp" "meadow-users-jp")
54     ("mule-win32" "mule-win32")
55     ("mew-win32" "mew-win32")
56     ("mew-dist" "mew-dist/3300" t)
57     ("mgp-users-jp" "mgp-users-jp/A" t t)))
58
59 (defvar nnshimbun-type-definition
60   `(("asahi"
61      (url . "http://spin.asahi.com/")
62      (groups "national" "business" "politics" "international" "sports" "personal" "feneral")
63      (coding-system  . ,(if (boundp 'MULE) '*sjis* 'shift_jis))
64      (generate-nov   . nnshimbun-generate-nov-for-each-group)
65      (get-headers    . nnshimbun-asahi-get-headers)
66      (index-url      . (format "%sp%s.html" nnshimbun-url nnshimbun-current-group))
67      (from-address   . "webmaster@www.asahi.com")
68      (make-contents  . nnshimbun-make-text-or-html-contents)
69      (contents-start . "\n<!-- Start of kiji -->\n")
70      (contents-end   . "\n<!-- End of kiji -->\n"))
71     ("sponichi"
72      (url . "http://www.sponichi.co.jp/")
73      (groups "baseball" "soccer" "usa" "others" "society" "entertainment" "horseracing")
74      (coding-system  . ,(if (boundp 'MULE) '*sjis* 'shift_jis))
75      (generate-nov   . nnshimbun-generate-nov-for-each-group)
76      (get-headers    . nnshimbun-sponichi-get-headers)
77      (index-url      . (format "%s%s/index.html" nnshimbun-url nnshimbun-current-group))
78      (from-address   . "webmaster@www.sponichi.co.jp")
79      (make-contents  . nnshimbun-make-text-or-html-contents)
80      (contents-start . "\n<span class=\"text\">\e$B!!\e(B")
81      (contents-end   . "\n"))
82     ("cnet"
83      (url . "http://cnet.sphere.ne.jp/")
84      (groups "comp")
85      (coding-system  . ,(if (boundp 'MULE) '*sjis* 'shift_jis))
86      (generate-nov   . nnshimbun-generate-nov-for-each-group)
87      (get-headers    . nnshimbun-cnet-get-headers)
88      (index-url      . (format "%s/News/Oneweek/" nnshimbun-url))
89      (from-address   . "cnet@sphere.ad.jp")
90      (make-contents  . nnshimbun-make-html-contents)
91      (contents-start . "\n<!--KIJI-->\n")
92      (contents-end   . "\n<!--/KIJI-->\n"))
93     ("wired"
94      (url . "http://www.hotwired.co.jp/")
95      (groups "business" "culture" "technology")
96      (coding-system  . ,(if (boundp 'MULE) '*euc-japan* 'euc-jp))
97      (generate-nov   . nnshimbun-generate-nov-for-all-groups)
98      (get-headers    . nnshimbun-wired-get-all-headers)
99      (index-url)
100      (from-address   . "webmaster@www.hotwired.co.jp")
101      (make-contents  . nnshimbun-make-html-contents)
102      (contents-start . "\n<!-- START_OF_BODY -->\n")
103      (contents-end   . "\n<!-- END_OF_BODY -->\n"))
104     ("yomiuri"
105      (url . "http://www.yomiuri.co.jp/")
106      (groups "shakai" "sports" "seiji" "keizai" "kokusai" "fuho")
107      (coding-system  . ,(if (boundp 'MULE) '*sjis* 'shift_jis))
108      (generate-nov   . nnshimbun-generate-nov-for-all-groups)
109      (get-headers    . nnshimbun-yomiuri-get-all-headers)
110      (index-url      . (concat nnshimbun-url "main.htm"))
111      (from-address   . "webmaster@www.yomiuri.co.jp")
112      (make-contents  . nnshimbun-make-text-or-html-contents)
113      (contents-start . "\n<!--  honbun start  -->\n")
114      (contents-end   . "\n<!--  honbun end  -->\n"))
115     ("zdnet"
116      (url . "http://www.zdnet.co.jp/news/")
117      (groups "comp")
118      (coding-system  . ,(if (boundp 'MULE) '*sjis* 'shift_jis))
119      (generate-nov   . nnshimbun-generate-nov-for-each-group)
120      (get-headers    . nnshimbun-zdnet-get-headers)
121      (index-url      . nnshimbun-url)
122      (from-address   . "zdnn@softbank.co.jp")
123      (make-contents  . nnshimbun-make-html-contents)
124      (contents-start . "\\(<!--BODY-->\\|<!--DATE-->\\)")
125      (contents-end   . "\\(<!--BODYEND-->\\|<!--BYLINEEND-->\\)"))
126     ("mew"
127      (url . "http://www.mew.org/archive/")
128      (groups ,@(mapcar #'car nnshimbun-mew-groups))
129      (coding-system . ,(if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
130      (generate-nov  . nnshimbun-generate-nov-for-each-group)
131      (get-headers   . nnshimbun-mew-get-headers)
132      (index-url     . (nnshimbun-mew-concat-url "index.html"))
133      (make-contents . nnshimbun-make-mhonarc-contents))
134     ("xemacs"
135      (url . "http://www.xemacs.org/list-archives/")
136      (groups "xemacs-announce" "xemacs-beta-ja" "xemacs-beta"
137              "xemacs-build-reports" "xemacs-cvs" "xemacs-mule"
138              "xemacs-nt" "xemacs-patches" "xemacs-users-ja" "xemacs")
139      (coding-system . ,(if (boundp 'MULE) '*euc-japan* 'euc-jp))
140      (generate-nov  . nnshimbun-generate-nov-for-each-group)
141      (get-headers   . nnshimbun-xemacs-get-headers)
142      (index-url     . (nnshimbun-xemacs-concat-url nil))
143      (make-contents . nnshimbun-make-mhonarc-contents))
144     ("netbsd"
145      (url . "http://www.jp.netbsd.org/ja/JP/ml/")
146      (groups "announce-ja" "junk-ja" "tech-misc-ja" "tech-pkg-ja"
147              "port-arm32-ja" "port-hpcmips-ja" "port-mac68k-ja"
148              "port-mips-ja" "port-powerpc-ja" "hpcmips-changes-ja"
149              "members-ja" "admin-ja" "www-changes-ja")
150      (coding-system  . ,(if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
151      (generate-nov   . nnshimbun-generate-nov-for-each-group)
152      (get-headers    . nnshimbun-netbsd-get-headers)
153      (index-url      . (format "%s%s/index.html" nnshimbun-url nnshimbun-current-group))
154      (make-contents  . nnshimbun-make-mhonarc-contents))
155     ))
156
157 (defvar nnshimbun-x-face-alist
158   '(("default" .
159      (("default" .
160        "X-Face: Ygq$6P.,%Xt$U)DS)cRY@k$VkW!7(X'X'?U{{osjjFG\"E]hND;SPJ-J?O?R|a?L
161         g2$0rVng=O3Lt}?~IId8Jj&vP^3*o=LKUyk(`t%0c!;t6REk=JbpsEn9MrN7gZ%"))))
162   "Alist of server vs. alist of group vs. X-Face field.  It looks like:
163
164 \((\"asahi\" . ((\"national\" . \"X-face: ***\")
165              (\"business\" . \"X-Face: ***\")
166                 ;;
167                 ;;
168              (\"default\" . \"X-face: ***\")))
169  (\"sponichi\" . ((\"baseball\" . \"X-face: ***\")
170                 (\"soccer\" . \"X-Face: ***\")
171                 ;;
172                 ;;
173                 (\"default\" . \"X-face: ***\")))
174                 ;;
175  (\"default\" . ((\"default\" . \"X-face: ***\")))")
176
177 (defvoo nnshimbun-directory (nnheader-concat gnus-directory "shimbun/")
178   "Where nnshimbun will save its files.")
179
180 (defvoo nnshimbun-nov-is-evil nil
181   "*Non-nil means that nnshimbun will never retrieve NOV headers.")
182
183 (defvoo nnshimbun-nov-file-name ".overview")
184
185 (defvoo nnshimbun-pre-fetch-article nil
186   "*Non nil means that nnshimbun fetch unread articles when scanning groups.")
187
188 ;; set by nnshimbun-possibly-change-group
189 (defvoo nnshimbun-buffer nil)
190 (defvoo nnshimbun-current-directory nil)
191 (defvoo nnshimbun-current-group nil)
192
193 ;; set by nnshimbun-open-server
194 (defvoo nnshimbun-url nil)
195 (defvoo nnshimbun-coding-system nil)
196 (defvoo nnshimbun-groups nil)
197 (defvoo nnshimbun-generate-nov nil)
198 (defvoo nnshimbun-get-headers nil)
199 (defvoo nnshimbun-index-url nil)
200 (defvoo nnshimbun-from-address nil)
201 (defvoo nnshimbun-make-contents nil)
202 (defvoo nnshimbun-contents-start nil)
203 (defvoo nnshimbun-contents-end nil)
204 (defvoo nnshimbun-server-directory nil)
205
206 (defvoo nnshimbun-status-string "")
207 (defvoo nnshimbun-nov-last-check nil)
208 (defvoo nnshimbun-nov-buffer-alist nil)
209 (defvoo nnshimbun-nov-buffer-file-name nil)
210
211 (defvoo nnshimbun-keep-backlog 300)
212 (defvoo nnshimbun-backlog-articles nil)
213 (defvoo nnshimbun-backlog-hashtb nil)
214
215 (defconst nnshimbun-meta-content-type-charset-regexp
216   (eval-when-compile
217     (concat "<meta[ \t]+http-equiv=\"?Content-type\"?[ \t]+content=\"\\([^;]+\\)"
218             ";[ \t]*charset=\"?\\([^\"]+\\)\"?"
219             ">"))
220   "Regexp used in parsing `<META HTTP-EQUIV=\"Content-Type\" content=\"...;charset=...\">
221 for a charset indication")
222
223 (defconst nnshimbun-meta-charset-content-type-regexp
224   (eval-when-compile
225     (concat "<meta[ \t]+content=\"\\([^;]+\\)"
226             ";[ \t]*charset=\"?\\([^\"]+\\)\"?"
227             "[ \t]+http-equiv=\"?Content-type\"?>"))
228   "Regexp used in parsing `<META content=\"...;charset=...\" HTTP-EQUIV=\"Content-Type\">
229 for a charset indication")
230
231
232
233 ;;; backlog
234 (defmacro nnshimbun-backlog (&rest form)
235   `(let ((gnus-keep-backlog nnshimbun-keep-backlog)
236          (gnus-backlog-buffer (format " *nnshimbun backlog %s*" (nnoo-current-server 'nnshimbun)))
237          (gnus-backlog-articles nnshimbun-backlog-articles)
238          (gnus-backlog-hashtb nnshimbun-backlog-hashtb))
239      (unwind-protect
240          (progn ,@form)
241        (setq nnshimbun-backlog-articles gnus-backlog-articles
242              nnshimbun-backlog-hashtb gnus-backlog-hashtb))))
243 (put 'nnshimbun-backlog 'lisp-indent-function 0)
244 (put 'nnshimbun-backlog 'edebug-form-spec '(form body))
245
246
247
248 ;;; Interface Functions
249 (nnoo-define-basics nnshimbun)
250
251 (deffoo nnshimbun-open-server (server &optional defs)
252   ;; Set default values.
253   (dolist (default (cdr (assoc server nnshimbun-type-definition)))
254     (let ((symbol (intern (concat "nnshimbun-" (symbol-name (car default))))))
255       (unless (assq symbol defs)
256         (push (list symbol (cdr default)) defs))))
257   ;; Set directory for server working files.
258   (push (list 'nnshimbun-server-directory
259               (file-name-as-directory
260                (expand-file-name server nnshimbun-directory)))
261         defs)
262   (nnoo-change-server 'nnshimbun server defs)
263   (nnshimbun-possibly-change-group nil server)
264   ;; Make directories.
265   (unless (file-exists-p nnshimbun-directory)
266     (ignore-errors (make-directory nnshimbun-directory t)))
267   (cond
268    ((not (file-exists-p nnshimbun-directory))
269     (nnshimbun-close-server)
270     (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-directory))
271    ((not (file-directory-p (file-truename nnshimbun-directory)))
272     (nnshimbun-close-server)
273     (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-directory))
274    (t
275     (unless (file-exists-p nnshimbun-server-directory)
276       (ignore-errors (make-directory nnshimbun-server-directory t)))
277     (cond
278      ((not (file-exists-p nnshimbun-server-directory))
279       (nnshimbun-close-server)
280       (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-server-directory))
281      ((not (file-directory-p (file-truename nnshimbun-server-directory)))
282       (nnshimbun-close-server)
283       (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-server-directory))
284      (t
285       (nnheader-report 'nnshimbun "Opened server %s using directory %s"
286                        server nnshimbun-server-directory)
287       t)))))
288
289 (deffoo nnshimbun-close-server (&optional server)
290   (and (nnshimbun-server-opened server)
291        (gnus-buffer-live-p nnshimbun-buffer)
292        (kill-buffer nnshimbun-buffer))
293   (nnshimbun-backlog (gnus-backlog-shutdown))
294   (nnshimbun-save-nov)
295   (nnoo-close-server 'nnshimbun server)
296   t)
297
298 (defun nnshimbun-retrieve-url (url &optional no-cache)
299   "Rertrieve URL contents and insert to current buffer."
300   (let ((buf (current-buffer))
301         (url-working-buffer url-working-buffer))
302     (let ((old-asynch (default-value 'url-be-asynchronous))
303           (old-caching (default-value 'url-automatic-caching))
304           (old-mode (default-value 'url-standalone-mode)))
305       (setq-default url-be-asynchronous nil)
306       (when no-cache
307         (setq-default url-automatic-caching nil)
308         (setq-default url-standalone-mode nil))
309       (unwind-protect
310           (let ((coding-system-for-read 'binary)
311                 (coding-system-for-write 'binary)
312                 (input-coding-system 'binary)
313                 (output-coding-system 'binary)
314                 (default-enable-multibyte-characters nil))
315             (set-buffer
316              (setq url-working-buffer
317                    (cdr (url-retrieve url no-cache))))
318             (url-uncompress))
319         (setq-default url-be-asynchronous old-asynch)
320         (setq-default url-automatic-caching old-caching)
321         (setq-default url-standalone-mode old-mode)))
322     (let ((charset
323            (or url-current-mime-charset
324                (let ((case-fold-search t))
325                  (goto-char (point-min))
326                  (if (or (re-search-forward nnshimbun-meta-content-type-charset-regexp nil t)
327                          (re-search-forward nnshimbun-meta-charset-content-type-regexp nil t))
328                      (buffer-substring-no-properties (match-beginning 2) (match-end 2)))))))
329       (decode-coding-region
330        (point-min) (point-max)
331        (if charset
332            (let ((mime-charset-coding-system-alist
333                   (append '((euc-jp . euc-japan)
334                             (shift-jis . shift_jis)
335                             (shift_jis . shift_jis)
336                             (sjis . shift_jis)
337                             (x-euc-jp . euc-japan)
338                             (x-shift-jis . shift_jis)
339                             (x-shift_jis . shift_jis)
340                             (x-sjis . shift_jis))
341                           mime-charset-coding-system-alist)))
342              (mime-charset-to-coding-system charset))
343          (let ((default (condition-case nil
344                             (coding-system-category nnshimbun-coding-system)
345                           (error nil)))
346                (candidate (detect-coding-region (point-min) (point-max))))
347            (unless (listp candidate)
348              (setq candidate (list candidate)))
349            (catch 'coding
350              (dolist (coding candidate)
351                (if (eq default (coding-system-category coding))
352                    (throw 'coding coding)))
353              (if (eq (coding-system-category 'binary)
354                      (coding-system-category (car candidate)))
355                  nnshimbun-coding-system
356                (car candidate)))))))
357     (set-buffer-multibyte t)
358     (set-buffer buf)
359     (insert-buffer url-working-buffer)
360     (kill-buffer url-working-buffer)))
361
362 (deffoo nnshimbun-request-article (article &optional group server to-buffer)
363   (when (nnshimbun-possibly-change-group group server)
364     (if (stringp article)
365         (setq article (nnshimbun-search-id group article)))
366     (if (integerp article)
367         (nnshimbun-request-article-1 article group server to-buffer)
368       (nnheader-report 'nnml "Couldn't retrieve article: %s" (prin1-to-string article))
369       nil)))
370
371 (defsubst nnshimbun-header-xref (x)
372   (if (and (setq x (mail-header-xref x))
373            (string-match "^Xref: " x))
374       (substring x 6)
375     x))
376
377 (defun nnshimbun-request-article-1 (article &optional group server to-buffer)
378   (if (nnshimbun-backlog
379         (gnus-backlog-request-article
380          group article (or to-buffer nntp-server-buffer)))
381       (cons group article)
382     (let (header contents)
383       (when (setq header (save-excursion
384                            (set-buffer (nnshimbun-open-nov group))
385                            (and (nnheader-find-nov-line article)
386                                 (nnheader-parse-nov))))
387         (let* ((xref (nnshimbun-header-xref header))
388                (x-faces (cdr (or (assoc (or server
389                                             (nnoo-current-server 'nnshimbun))
390                                         nnshimbun-x-face-alist)
391                                  (assoc "default" nnshimbun-x-face-alist))))
392                (x-face (cdr (or (assoc group x-faces)
393                                 (assoc "default" x-faces)))))
394           (save-excursion
395             (set-buffer nnshimbun-buffer)
396             (erase-buffer)
397             (nnshimbun-retrieve-url xref)
398             (nnheader-message 6 "nnshimbun: Make contents...")
399             (goto-char (point-min))
400             (setq contents (funcall nnshimbun-make-contents header x-face))
401             (nnheader-message 6 "nnshimbun: Make contents...done"))))
402       (when contents
403         (save-excursion
404           (set-buffer (or to-buffer nntp-server-buffer))
405           (erase-buffer)
406           (insert contents)
407           (nnshimbun-backlog
408             (gnus-backlog-enter-article group article (current-buffer)))
409           (nnheader-report 'nnshimbun "Article %s retrieved" (mail-header-id header))
410           (cons group (mail-header-number header)))))))
411
412 (deffoo nnshimbun-request-group (group &optional server dont-check)
413   (let ((pathname-coding-system 'binary))
414     (cond
415      ((not (nnshimbun-possibly-change-group group server))
416       (nnheader-report 'nnshimbun "Invalid group (no such directory)"))
417      ((not (file-exists-p nnshimbun-current-directory))
418       (nnheader-report 'nnshimbun "Directory %s does not exist"
419                        nnshimbun-current-directory))
420      ((not (file-directory-p nnshimbun-current-directory))
421       (nnheader-report 'nnshimbun "%s is not a directory" nnshimbun-current-directory))
422      (dont-check
423       (nnheader-report 'nnshimbun "Group %s selected" group)
424       t)
425      (t
426       (let (beg end lines)
427         (save-excursion
428           (set-buffer (nnshimbun-open-nov group))
429           (goto-char (point-min))
430           (setq beg (ignore-errors (read (current-buffer))))
431           (goto-char (point-max))
432           (forward-line -1)
433           (setq end (ignore-errors (read (current-buffer)))
434                 lines (count-lines (point-min) (point-max))))
435         (nnheader-report 'nnshimbunw "Selected group %s" group)
436         (nnheader-insert "211 %d %d %d %s\n"
437                          lines (or beg 0) (or end 0) group))))))
438
439 (deffoo nnshimbun-request-scan (&optional group server)
440   (nnshimbun-possibly-change-group group server)
441   (nnshimbun-generate-nov-database group))
442
443 (deffoo nnshimbun-close-group (group &optional server)
444   (nnshimbun-write-nov group)
445   t)
446
447 (deffoo nnshimbun-request-list (&optional server)
448   (save-excursion
449     (set-buffer nntp-server-buffer)
450     (erase-buffer)
451     (dolist (group nnshimbun-groups)
452       (when (nnshimbun-possibly-change-group group server)
453         (let (beg end)
454           (save-excursion
455             (set-buffer (nnshimbun-open-nov group))
456             (goto-char (point-min))
457             (setq beg (ignore-errors (read (current-buffer))))
458             (goto-char (point-max))
459             (forward-line -1)
460             (setq end (ignore-errors (read (current-buffer)))))
461           (insert (format "%s %d %d n\n" group (or end 0) (or beg 0)))))))
462   t) ; return value
463
464 (eval-and-compile
465   (if (fboundp 'mime-entity-fetch-field)
466       ;; For Semi-Gnus.
467       (defun nnshimbun-insert-header (header)
468         (insert "Subject: " (or (mime-entity-fetch-field header 'Subject) "(none)") "\n"
469                 "From: " (or (mime-entity-fetch-field header 'From) "(nobody)") "\n"
470                 "Date: " (or (mail-header-date header) "") "\n"
471                 "Message-ID: " (or (mail-header-id header) (nnmail-message-id)) "\n")
472         (let ((refs (mail-header-references header)))
473           (and refs
474                (string< "" refs)
475                (insert "References: " refs "\n")))
476         (insert "Lines: " (number-to-string (or (mail-header-lines header) 0)) "\n"
477                 "Xref: " (nnshimbun-header-xref header) "\n"))
478     ;; For pure Gnus.
479     (defun nnshimbun-insert-header (header)
480       (nnheader-insert-header header)
481       (delete-char -1)
482       (insert "Xref: " (nnshimbun-header-xref header) "\n"))))
483
484 (deffoo nnshimbun-retrieve-headers (articles &optional group server fetch-old)
485   (when (nnshimbun-possibly-change-group group server)
486     (if (nnshimbun-retrieve-headers-with-nov articles fetch-old)
487         'nov
488       (save-excursion
489         (set-buffer nntp-server-buffer)
490         (erase-buffer)
491         (let (header)
492           (dolist (art articles)
493             (if (stringp art)
494                 (setq art (nnshimbun-search-id group art)))
495             (if (integerp art)
496                 (when (setq header
497                             (save-excursion
498                               (set-buffer (nnshimbun-open-nov group))
499                               (and (nnheader-find-nov-line art)
500                                    (nnheader-parse-nov))))
501                   (insert (format "220 %d Article retrieved.\n" art))
502                   (nnshimbun-insert-header header)
503                   (insert ".\n")
504                   (delete-region (point) (point-max))))))
505         'header))))
506
507 (defun nnshimbun-retrieve-headers-with-nov (articles &optional fetch-old)
508   (if (or gnus-nov-is-evil nnshimbun-nov-is-evil)
509       nil
510     (let ((nov (expand-file-name nnshimbun-nov-file-name nnshimbun-current-directory)))
511       (when (file-exists-p nov)
512         (save-excursion
513           (set-buffer nntp-server-buffer)
514           (erase-buffer)
515           (nnheader-insert-file-contents nov)
516           (if (and fetch-old (not (numberp fetch-old)))
517               t                         ; Don't remove anything.
518             (nnheader-nov-delete-outside-range
519              (if fetch-old (max 1 (- (car articles) fetch-old))
520                (car articles))
521              (car (last articles)))
522             t))))))
523
524
525
526 ;;; Nov Database Operations
527
528 (defun nnshimbun-generate-nov-database (group)
529   (prog1 (funcall nnshimbun-generate-nov group)
530     (nnshimbun-write-nov group)))
531
532 (defun nnshimbun-generate-nov-for-each-group (group)
533   (nnshimbun-possibly-change-group group)
534   (save-excursion
535     (set-buffer (nnshimbun-open-nov group))
536     (let (i)
537       (goto-char (point-max))
538       (forward-line -1)
539       (setq i (or (ignore-errors (read (current-buffer))) 0))
540       (dolist (header (save-excursion
541                         (set-buffer nnshimbun-buffer)
542                         (erase-buffer)
543                         (nnshimbun-retrieve-url (eval nnshimbun-index-url) t)
544                         (goto-char (point-min))
545                         (funcall nnshimbun-get-headers)))
546         (unless (nnshimbun-search-id group (mail-header-id header))
547           (mail-header-set-number header (setq i (1+ i)))
548           (goto-char (point-max))
549           (nnheader-insert-nov header)
550           (if nnshimbun-pre-fetch-article
551               (nnshimbun-request-article-1 i group nil nnshimbun-buffer)))))))
552
553 (defun nnshimbun-generate-nov-for-all-groups (&rest args)
554   (unless (and nnshimbun-nov-last-check
555                (< (nnshimbun-lapse-seconds nnshimbun-nov-last-check)
556                   nnshimbun-check-interval))
557     (save-excursion
558       (dolist (list (funcall nnshimbun-get-headers))
559         (let ((group (car list)))
560           (nnshimbun-possibly-change-group group)
561           (when (cdr list)
562             (set-buffer (nnshimbun-open-nov group))
563             (let (i)
564               (goto-char (point-max))
565               (forward-line -1)
566               (setq i (or (ignore-errors (read (current-buffer))) 0))
567               (dolist (header (cdr list))
568                 (unless (nnshimbun-search-id group (mail-header-id header))
569                   (mail-header-set-number header (setq i (1+ i)))
570                   (goto-char (point-max))
571                   (nnheader-insert-nov header)
572                   (if nnshimbun-pre-fetch-article
573                       (nnshimbun-request-article-1 i group nil nnshimbun-buffer))))))))
574       (nnshimbun-save-nov)
575       (setq nnshimbun-nov-last-check (current-time)))))
576
577 (defun nnshimbun-search-id (group id &optional nov)
578   (save-excursion
579     (set-buffer (nnshimbun-open-nov group))
580     (goto-char (point-min))
581     (let (found)
582       (while (and (not found)
583                   (search-forward id nil t)) ; We find the ID.
584         ;; And the id is in the fourth field.
585         (if (not (and (search-backward "\t" nil t 4)
586                       (not (search-backward "\t" (gnus-point-at-bol) t))))
587             (forward-line 1)
588           (forward-line 0)
589           (setq found t)))
590       (unless found
591         (goto-char (point-min))
592         (when (search-forward (concat "X-Nnshimbun-Id: " id) nil t)
593           (forward-line 0)
594           (setq found t)))
595       (if found
596           (if nov
597               (nnheader-parse-nov)
598             ;; We return the article number.
599             (ignore-errors (read (current-buffer))))))))
600
601 (defun nnshimbun-nov-fix-header (group header args)
602   (save-excursion
603     (set-buffer (nnshimbun-open-nov group))
604     (when (nnheader-find-nov-line (mail-header-number header))
605       (dolist (arg args)
606         (if (eq (car arg) 'id)
607             (let ((extra (mail-header-extra header)))
608               (unless (assq 'X-Nnshimbun-Id extra)
609                 (mail-header-set-extra
610                  header
611                  (cons (cons 'X-Nnshimbun-Id (mail-header-id header))
612                        extra)))
613               (mail-header-set-id header (cdr arg)))
614           (let ((func (intern (concat "mail-header-set-" (symbol-name (car arg))))))
615             (if (cdr arg) (eval (list func header (cdr arg)))))))
616       (mail-header-set-xref header (nnshimbun-header-xref header))
617       (delete-region (point) (progn (forward-line 1) (point)))
618       (nnheader-insert-nov header))))
619
620 (defun nnshimbun-open-nov (group)
621   (let ((buffer (cdr (assoc group nnshimbun-nov-buffer-alist))))
622     (if (buffer-live-p buffer)
623         buffer
624       (setq buffer (gnus-get-buffer-create
625                     (format " *nnshimbun overview %s %s*"
626                             (nnoo-current-server 'nnshimbun) group)))
627       (save-excursion
628         (set-buffer buffer)
629         (set (make-local-variable 'nnshimbun-nov-buffer-file-name)
630              (expand-file-name
631               nnshimbun-nov-file-name
632               (nnmail-group-pathname group nnshimbun-server-directory)))
633         (erase-buffer)
634         (when (file-exists-p nnshimbun-nov-buffer-file-name)
635           (nnheader-insert-file-contents nnshimbun-nov-buffer-file-name))
636         (set-buffer-modified-p nil))
637       (push (cons group buffer) nnshimbun-nov-buffer-alist)
638       buffer)))
639
640 (defun nnshimbun-write-nov (group)
641   (let ((buffer (cdr (assoc group nnshimbun-nov-buffer-alist))))
642     (when (buffer-live-p buffer)
643       (save-excursion
644         (set-buffer buffer)
645         (buffer-modified-p)
646         (nnmail-write-region 1 (point-max) nnshimbun-nov-buffer-file-name
647                              nil 'nomesg)))))
648
649 (defun nnshimbun-save-nov ()
650   (save-excursion
651     (while nnshimbun-nov-buffer-alist
652       (when (buffer-name (cdar nnshimbun-nov-buffer-alist))
653         (set-buffer (cdar nnshimbun-nov-buffer-alist))
654         (when (buffer-modified-p)
655           (nnmail-write-region 1 (point-max) nnshimbun-nov-buffer-file-name
656                                nil 'nomesg))
657         (set-buffer-modified-p nil)
658         (kill-buffer (current-buffer)))
659       (setq nnshimbun-nov-buffer-alist (cdr nnshimbun-nov-buffer-alist)))))
660
661
662
663 ;;; Server Initialize
664 (defun nnshimbun-possibly-change-group (group &optional server)
665   (when server
666     (unless (nnshimbun-server-opened server)
667       (nnshimbun-open-server server)))
668   (unless (gnus-buffer-live-p nnshimbun-buffer)
669     (setq nnshimbun-buffer
670           (save-excursion
671             (nnheader-set-temp-buffer
672              (format " *nnshimbun %s*" (nnoo-current-server 'nnshimbun))))))
673   (if (not group)
674       t
675     (let ((pathname (nnmail-group-pathname group nnshimbun-server-directory))
676           (pathname-coding-system 'binary))
677       (unless (equal pathname nnshimbun-current-directory)
678         (setq nnshimbun-current-directory pathname
679               nnshimbun-current-group group))
680       (unless (file-exists-p nnshimbun-current-directory)
681         (ignore-errors (make-directory nnshimbun-current-directory t)))
682       (cond
683        ((not (file-exists-p nnshimbun-current-directory))
684         (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-current-directory))
685        ((not (file-directory-p (file-truename nnshimbun-current-directory)))
686         (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-current-directory))
687        (t t)))))
688
689
690
691 ;;; Misc Functions
692
693 (eval-and-compile
694   (if (fboundp 'eword-encode-string)
695       ;; For Semi-Gnus.
696       (defun nnshimbun-mime-encode-string (string)
697         (mapconcat
698          #'identity
699          (split-string (eword-encode-string (nnweb-decode-entities-string string)) "\n")
700          ""))
701     ;; For pure Gnus.
702     (defun nnshimbun-mime-encode-string (string)
703       (mapconcat
704        #'identity
705        (split-string
706         (with-temp-buffer
707           (insert (nnweb-decode-entities-string string))
708           (rfc2047-encode-region (point-min) (point-max))
709           (buffer-substring (point-min) (point-max)))
710         "\n")
711        ""))))
712
713 (defun nnshimbun-lapse-seconds (time)
714   (let ((now (current-time)))
715     (+ (* (- (car now) (car time)) 65536)
716        (- (nth 1 now) (nth 1 time)))))
717
718 (defun nnshimbun-make-date-string (year month day &optional time)
719   (format "%02d %s %04d %s +0900"
720           day
721           (aref [nil "Jan" "Feb" "Mar" "Apr" "May" "Jun"
722                      "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"]
723                 month)
724           (cond ((< year 69)
725                  (+ year 2000))
726                 ((< year 100)
727                  (+ year 1900))
728                 ((< year 1000)  ; possible 3-digit years.
729                  (+ year 1900))
730                 (t year))
731           (or time "00:00")))
732
733 (if (fboundp 'regexp-opt)
734     (defalias 'nnshimbun-regexp-opt 'regexp-opt)
735   (defun nnshimbun-regexp-opt (strings &optional paren)
736     "Return a regexp to match a string in STRINGS.
737 Each string should be unique in STRINGS and should not contain any regexps,
738 quoted or not.  If optional PAREN is non-nil, ensure that the returned regexp
739 is enclosed by at least one regexp grouping construct."
740     (let ((open-paren (if paren "\\(" "")) (close-paren (if paren "\\)" "")))
741       (concat open-paren (mapconcat 'regexp-quote strings "\\|") close-paren))))
742
743
744 ;; Fast fill-region function
745
746 (defvar nnshimbun-fill-column (min 80 (- (frame-width) 4)))
747
748 (defconst nnshimbun-kinsoku-bol-list
749   (funcall
750    (if (fboundp 'string-to-char-list)
751        'string-to-char-list
752      'string-to-list) "\
753 !)-_~}]:;',.?\e$B!"!#!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2!3!4!5!6!7!8!9!:!;!<!=!>!?!@!A\e(B\
754 \e$B!B!C!D!E!G!I!K!M!O!Q!S!U!W!Y![!k!l!m!n$!$#$%$'$)$C$c$e$g$n%!%#%%%'%)%C%c%e%g%n%u%v\e(B"))
755
756 (defconst nnshimbun-kinsoku-eol-list
757   (funcall
758    (if (fboundp 'string-to-char-list)
759        'string-to-char-list
760      'string-to-list)
761    "({[`\e$B!F!H!J!L!N!P!R!T!V!X!Z!k!l!m!x\e(B"))
762
763 (defun nnshimbun-fill-line ()
764   (forward-line 0)
765   (let ((top (point)) chr)
766     (while (if (>= (move-to-column nnshimbun-fill-column)
767                    nnshimbun-fill-column)
768                (not (progn
769                       (if (memq (preceding-char) nnshimbun-kinsoku-eol-list)
770                           (progn
771                             (backward-char)
772                             (while (memq (preceding-char) nnshimbun-kinsoku-eol-list)
773                               (backward-char))
774                             (insert "\n"))
775                         (while (memq (setq chr (following-char)) nnshimbun-kinsoku-bol-list)
776                           (forward-char))
777                         (if (looking-at "\\s-+")
778                             (or (eolp) (delete-region (point) (match-end 0)))
779                           (or (> (char-width chr) 1)
780                               (re-search-backward "\\<" top t)
781                               (end-of-line)))
782                         (or (eolp) (insert "\n"))))))
783       (setq top (point))))
784   (forward-line 1)
785   (not (eobp)))
786
787 (defsubst nnshimbun-shallow-rendering ()
788   (goto-char (point-min))
789   (while (search-forward "<p>" nil t)
790     (insert "\n\n"))
791   (goto-char (point-min))
792   (while (search-forward "<br>" nil t)
793     (insert "\n"))
794   (nnweb-remove-markup)
795   (nnweb-decode-entities)
796   (goto-char (point-min))
797   (while (nnshimbun-fill-line))
798   (goto-char (point-min))
799   (when (skip-chars-forward "\n")
800     (delete-region (point-min) (point)))
801   (while (search-forward "\n\n" nil t)
802     (let ((p (point)))
803       (when (skip-chars-forward "\n")
804         (delete-region p (point)))))
805   (goto-char (point-max))
806   (when (skip-chars-backward "\n")
807     (delete-region (point) (point-max)))
808   (insert "\n"))
809
810 (defun nnshimbun-make-text-or-html-contents (header &optional x-face)
811   (let ((case-fold-search t) (html t) (start))
812     (when (and (re-search-forward nnshimbun-contents-start nil t)
813                (setq start (point))
814                (re-search-forward nnshimbun-contents-end nil t))
815       (delete-region (match-beginning 0) (point-max))
816       (delete-region (point-min) start)
817       (nnshimbun-shallow-rendering)
818       (setq html nil))
819     (goto-char (point-min))
820     (nnshimbun-insert-header header)
821     (insert "Content-Type: " (if html "text/html" "text/plain")
822             "; charset=ISO-2022-JP\nMIME-Version: 1.0\n")
823     (when x-face
824       (insert x-face)
825       (unless (bolp)
826         (insert "\n")))
827     (insert "\n")
828     (encode-coding-string (buffer-string)
829                           (mime-charset-to-coding-system "ISO-2022-JP"))))
830
831 (defun nnshimbun-make-html-contents (header &optional x-face)
832   (let (start)
833     (when (and (re-search-forward nnshimbun-contents-start nil t)
834                (setq start (point))
835                (re-search-forward nnshimbun-contents-end nil t))
836       (delete-region (match-beginning 0) (point-max))
837       (delete-region (point-min) start))
838     (goto-char (point-min))
839     (nnshimbun-insert-header header)
840     (insert "Content-Type: text/html; charset=ISO-2022-JP\n"
841             "MIME-Version: 1.0\n")
842     (when x-face
843       (insert x-face)
844       (unless (bolp)
845         (insert "\n")))
846     (insert "\n")
847     (encode-coding-string (buffer-string)
848                           (mime-charset-to-coding-system "ISO-2022-JP"))))
849
850 (defun nnshimbun-make-mhonarc-contents (header &rest args)
851   (require 'mml)
852   (if (search-forward "<!--X-Head-End-->" nil t)
853       (progn
854         (forward-line 0)
855         ;; Processing headers.
856         (save-restriction
857           (narrow-to-region (point-min) (point))
858           (nnweb-decode-entities)
859           (goto-char (point-min))
860           (while (search-forward "\n<!--X-" nil t)
861             (replace-match "\n"))
862           (goto-char (point-min))
863           (while (search-forward " -->\n" nil t)
864             (replace-match "\n"))
865           (goto-char (point-min))
866           (while (search-forward "\t" nil t)
867             (replace-match " "))
868           (goto-char (point-min))
869           (let (buf refs)
870             (while (not (eobp))
871               (cond
872                ((looking-at "<!--")
873                 (delete-region (point) (progn (forward-line 1) (point))))
874                ((looking-at "Subject: +")
875                 (push (cons 'subject (nnheader-header-value)) buf)
876                 (delete-region (point) (progn (forward-line 1) (point))))
877                ((looking-at "From: +")
878                 (push (cons 'from (nnheader-header-value)) buf)
879                 (delete-region (point) (progn (forward-line 1) (point))))
880                ((looking-at "Date: +")
881                 (push (cons 'date (nnheader-header-value)) buf)
882                 (delete-region (point) (progn (forward-line 1) (point))))
883                ((looking-at "Message-Id: +")
884                 (push (cons 'id (concat "<" (nnheader-header-value) ">")) buf)
885                 (delete-region (point) (progn (forward-line 1) (point))))
886                ((looking-at "Reference: +")
887                 (push (concat "<" (nnheader-header-value) ">") refs)
888                 (delete-region (point) (progn (forward-line 1) (point))))
889                ((looking-at "Content-Type: ")
890                 (unless (search-forward "charset" (gnus-point-at-eol) t)
891                   (end-of-line)
892                   (insert "; charset=ISO-2022-JP"))
893                 (forward-line 1))
894                (t (forward-line 1))))
895             (insert "MIME-Version: 1.0\n")
896             (if refs (push (cons 'references (mapconcat 'identity refs " ")) buf))
897             (nnshimbun-nov-fix-header nnshimbun-current-group header buf)
898             (goto-char (point-min))
899             (nnshimbun-insert-header header))
900           (goto-char (point-max)))
901         ;; Processing body.
902         (save-restriction
903           (narrow-to-region (point) (point-max))
904           (delete-region
905            (point)
906            (progn
907              (search-forward "\n<!--X-Body-of-Message-->\n" nil t)
908              (point)))
909           (when (search-forward "\n<!--X-Body-of-Message-End-->\n" nil t)
910             (forward-line -1)
911             (delete-region (point) (point-max)))
912           (nnweb-remove-markup)
913           (nnweb-decode-entities)))
914     (goto-char (point-min))
915     (nnshimbun-insert-header header)
916     (insert "Content-Type: text/html; charset=ISO-2022-JP\nMIME-Version: 1.0\n\n"))
917   (encode-coding-string (buffer-string)
918                         (mime-charset-to-coding-system "ISO-2022-JP")))
919
920
921 ;;; www.asahi.com
922
923 (defun nnshimbun-asahi-get-headers ()
924   (when (search-forward "\n<!-- Start of past -->\n" nil t)
925     (delete-region (point-min) (point))
926     (when (search-forward "\n<!-- End of past -->\n" nil t)
927       (forward-line -1)
928       (delete-region (point) (point-max))
929       (goto-char (point-min))
930       (let (headers)
931         (while (re-search-forward
932                 "^\e$B"#\e(B<a href=\"\\(\\([0-9][0-9][0-9][0-9]\\)/past/\\([A-z]*[0-9]*\\)\\.html\\)\"> *"
933                 nil t)
934           (let ((id (format "<%s%s%%%s>"
935                             (match-string 2)
936                             (match-string 3)
937                             nnshimbun-current-group))
938                 (url (match-string 1)))
939             (push (make-full-mail-header
940                    0
941                    (nnshimbun-mime-encode-string
942                     (mapconcat 'identity
943                                (split-string
944                                 (buffer-substring
945                                  (match-end 0)
946                                  (progn (search-forward "<br>" nil t) (point)))
947                                 "\\(<[^>]+>\\|\r\\)")
948                                ""))
949                    nnshimbun-from-address
950                    "" id "" 0 0 (concat nnshimbun-url url))
951                   headers)))
952         (setq headers (nreverse headers))
953         (let ((i 0))
954           (while (and (nth i headers)
955                       (re-search-forward
956                        "^\\[\\([0-9][0-9]\\)/\\([0-9][0-9]\\) \\([0-9][0-9]:[0-9][0-9]\\)\\]"
957                        nil t))
958             (let ((month (string-to-number (match-string 1)))
959                   (date (decode-time (current-time))))
960               (mail-header-set-date
961                (nth i headers)
962                (nnshimbun-make-date-string
963                 (if (and (eq 12 month) (eq 1 (nth 4 date)))
964                     (1- (nth 5 date))
965                   (nth 5 date))
966                 month
967                 (string-to-number (match-string 2))
968                 (match-string 3))))
969             (setq i (1+ i))))
970         (nreverse headers)))))
971
972
973
974 ;;; www.sponichi.co.jp
975
976 (defun nnshimbun-sponichi-get-headers ()
977   (when (search-forward "\e$B%K%e!<%9%$%s%G%C%/%9\e(B" nil t)
978     (delete-region (point-min) (point))
979     (when (search-forward "\e$B%"%I%?%0\e(B" nil t)
980       (forward-line 2)
981       (delete-region (point) (point-max))
982       (goto-char (point-min))
983       (let ((case-fold-search t) headers)
984         (while (re-search-forward
985                 "^<a href=\"/\\(\\([A-z]*\\)/kiji/\\([0-9][0-9][0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([0-9][0-9]\\)\\.html\\)\">"
986                 nil t)
987           (let ((url (match-string 1))
988                 (id (format "<%s%s%s%s%%%s>"
989                             (match-string 3)
990                             (match-string 4)
991                             (match-string 5)
992                             (match-string 6)
993                             nnshimbun-current-group))
994                 (date (nnshimbun-make-date-string
995                        (string-to-number (match-string 3))
996                        (string-to-number (match-string 4))
997                        (string-to-number (match-string 5)))))
998             (push (make-full-mail-header
999                    0
1000                    (nnshimbun-mime-encode-string
1001                     (mapconcat 'identity
1002                                (split-string
1003                                 (buffer-substring
1004                                  (match-end 0)
1005                                  (progn (search-forward "<br>" nil t) (point)))
1006                                 "<[^>]+>")
1007                                ""))
1008                    nnshimbun-from-address
1009                    date id "" 0 0 (concat nnshimbun-url url))
1010                   headers)))
1011         headers))))
1012
1013
1014
1015 ;;; CNET Japan
1016
1017 (defun nnshimbun-cnet-get-headers ()
1018   (let ((case-fold-search t) headers)
1019     (while (search-forward "\n<!--*****\e$B8+=P$7\e(B*****-->\n" nil t)
1020       (let ((subject (buffer-substring (point) (gnus-point-at-eol)))
1021             (point (point)))
1022         (forward-line -2)
1023         (when (looking-at "<a href=\"/\\(News/\\([0-9][0-9][0-9][0-9]\\)/Item/\\([0-9][0-9]\\([0-9][0-9]\\)\\([0-9][0-9]\\)-[0-9]+\\).html\\)\">")
1024           (let ((url (match-string 1))
1025                 (id  (format "<%s%s%%%s>"
1026                              (match-string 2)
1027                              (match-string 3)
1028                              nnshimbun-current-group))
1029                 (date (nnshimbun-make-date-string
1030                        (string-to-number (match-string 2))
1031                        (string-to-number (match-string 4))
1032                        (string-to-number (match-string 5)))))
1033             (push (make-full-mail-header
1034                    0
1035                    (nnshimbun-mime-encode-string subject)
1036                    nnshimbun-from-address
1037                    date id "" 0 0 (concat nnshimbun-url url))
1038                   headers)))
1039         (goto-char point)))
1040     headers))
1041
1042
1043
1044 ;;; Wired
1045
1046 (defun nnshimbun-wired-get-all-headers ()
1047   (save-excursion
1048     (set-buffer nnshimbun-buffer)
1049     (let ((group-header-alist (mapcar (lambda (g) (cons g nil)) nnshimbun-groups))
1050           (case-fold-search t)
1051           (regexp (format
1052                    "<a href=\"\\(%s\\|/\\)\\(news/news/\\(%s\\)/story/\\(\\([0-9][0-9][0-9][0-9]\\)\\([0-9][0-9]\\)\\([0-9][0-9]\\)[0-9]+\\)\\.html\\)\"><b>"
1053                    (regexp-quote nnshimbun-url)
1054                    (nnshimbun-regexp-opt nnshimbun-groups))))
1055       (dolist (xover (list (concat nnshimbun-url "news/news/index.html")
1056                            (concat nnshimbun-url "news/news/last_seven.html")))
1057         (erase-buffer)
1058         (nnshimbun-retrieve-url xover t)
1059         (goto-char (point-min))
1060         (while (re-search-forward regexp nil t)
1061           (let* ((url   (concat nnshimbun-url (match-string 2)))
1062                  (group (downcase (match-string 3)))
1063                  (id    (format "<%s%%%s>" (match-string 4) group))
1064                  (date  (nnshimbun-make-date-string
1065                          (string-to-number (match-string 5))
1066                          (string-to-number (match-string 6))
1067                          (string-to-number (match-string 7))))
1068                  (header (make-full-mail-header
1069                           0
1070                           (nnshimbun-mime-encode-string
1071                            (mapconcat 'identity
1072                                       (split-string
1073                                        (buffer-substring
1074                                         (match-end 0)
1075                                         (progn (search-forward "</b>" nil t) (point)))
1076                                        "<[^>]+>")
1077                                       ""))
1078                           nnshimbun-from-address
1079                           date id "" 0 0 url))
1080                  (x (assoc group group-header-alist)))
1081             (setcdr x (cons header (cdr x))))))
1082       group-header-alist)))
1083
1084
1085
1086 ;;; www.yomiuri.co.jp
1087
1088 (defun nnshimbun-yomiuri-get-all-headers ()
1089   (save-excursion
1090     (set-buffer nnshimbun-buffer)
1091     (erase-buffer)
1092     (nnshimbun-retrieve-url (eval nnshimbun-index-url) t)
1093     (let ((case-fold-search t)
1094           (group-header-alist (mapcar (lambda (g) (cons g nil)) nnshimbun-groups)))
1095       (dolist (group nnshimbun-groups)
1096         (let (start)
1097           (goto-char (point-min))
1098           (when (and (search-forward (format "\n<!-- /news/%s=start -->\n" group) nil t)
1099                      (setq start (point))
1100                      (search-forward (format "\n<!-- /news/%s=end -->\n" group) nil t))
1101             (forward-line -1)
1102             (save-restriction
1103               (narrow-to-region start (point))
1104               (goto-char start)
1105               (while (re-search-forward
1106                       "<a href=\"/\\([0-9]+\\)/\\(\\(\\([0-9][0-9][0-9][0-9]\\)\\([0-9][0-9]\\)\\([0-9][0-9]\\)[A-z0-9]+\\)\\.htm\\)\"[^>]*>"
1107                       nil t)
1108                 (let ((url   (concat (match-string 1) "a/" (match-string 2)))
1109                       (id    (format "<%s%s%%%s>"
1110                                      (match-string 1)
1111                                      (match-string 3)
1112                                      group))
1113                       (year  (string-to-number (match-string 4)))
1114                       (month (string-to-number (match-string 5)))
1115                       (day   (string-to-number (match-string 6)))
1116                       (subject (mapconcat
1117                                 'identity
1118                                 (split-string
1119                                  (buffer-substring
1120                                   (match-end 0)
1121                                   (progn (search-forward "<br>" nil t) (point)))
1122                                  "<[^>]+>")
1123                                 ""))
1124                       date x)
1125                   (when (string-match "^\e$B"!\e(B" subject)
1126                     (setq subject (substring subject (match-end 0))))
1127                   (if (string-match "(\\([0-9][0-9]:[0-9][0-9]\\))$" subject)
1128                       (setq date (nnshimbun-make-date-string
1129                                   year month day (match-string 1 subject))
1130                             subject (substring subject 0 (match-beginning 0)))
1131                     (setq date (nnshimbun-make-date-string year month day)))
1132                   (setcdr (setq x (assoc group group-header-alist))
1133                           (cons (make-full-mail-header
1134                                  0
1135                                  (nnshimbun-mime-encode-string subject)
1136                                  nnshimbun-from-address
1137                                  date id "" 0 0 (concat nnshimbun-url url))
1138                                 (cdr x)))))))))
1139       group-header-alist)))
1140
1141
1142
1143 ;;; Zdnet Japan
1144
1145 (defun nnshimbun-zdnet-get-headers ()
1146   (let ((case-fold-search t) headers)
1147     (goto-char (point-min))
1148     (let (start)
1149       (while (and (search-forward "<!--" nil t)
1150                   (setq start (- (point) 4))
1151                   (search-forward "-->" nil t))
1152         (delete-region start (point))))
1153     (goto-char (point-min))
1154     (while (re-search-forward
1155             "<a href=\"\\(/news/\\)?\\(\\([0-9][0-9]\\)\\([0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([^\\.]+\\).html\\)\"><font size=\"4\"><strong>"
1156             nil t)
1157       (let ((year  (+ 2000 (string-to-number (match-string 3))))
1158             (month (string-to-number (match-string 4)))
1159             (day   (string-to-number (match-string 5)))
1160             (id    (format "<%s%s%s%s%%%s>"
1161                            (match-string 3)
1162                            (match-string 4)
1163                            (match-string 5)
1164                            (match-string 6)
1165                            nnshimbun-current-group))
1166             (url (match-string 2)))
1167         (push (make-full-mail-header
1168                0
1169                (nnshimbun-mime-encode-string
1170                 (mapconcat 'identity
1171                            (split-string
1172                             (buffer-substring
1173                              (match-end 0)
1174                              (progn (search-forward "</a>" nil t) (point)))
1175                             "<[^>]+>")
1176                            ""))
1177                nnshimbun-from-address
1178                (nnshimbun-make-date-string year month day)
1179                id  "" 0 0 (concat nnshimbun-url url))
1180               headers)))
1181     (nreverse headers)))
1182
1183
1184
1185 ;;; MLs on www.mew.org
1186
1187 (defmacro nnshimbun-mew-concat-url (url)
1188   `(concat nnshimbun-url
1189            (nth 1 (assoc nnshimbun-current-group nnshimbun-mew-groups))
1190            "/"
1191            ,url))
1192
1193 (defmacro nnshimbun-mew-reverse-order-p ()
1194   `(nth 2 (assoc nnshimbun-current-group nnshimbun-mew-groups)))
1195
1196 (defmacro nnshimbun-mew-spew-p ()
1197   `(nth 3 (assoc nnshimbun-current-group nnshimbun-mew-groups)))
1198
1199 (defsubst nnshimbun-mew-retrieve-xover (aux)
1200   (erase-buffer)
1201   (nnshimbun-retrieve-url
1202    (nnshimbun-mew-concat-url (if (= aux 1) "index.html" (format "mail%d.html" aux)))
1203    t))
1204
1205 (defconst nnshimbun-mew-regexp "<A[^>]*HREF=\"\\(msg\\([0-9]+\\).html\\)\">\\([^<]+\\)<")
1206
1207 (defmacro nnshimbun-mew-extract-header-values ()
1208   `(progn
1209      (setq url (nnshimbun-mew-concat-url (match-string 1))
1210            id (format "<%05d%%%s>"
1211                       (1- (string-to-number (match-string 2)))
1212                       nnshimbun-current-group)
1213            subject (match-string 3))
1214      (forward-line 1)
1215      (if (nnshimbun-search-id nnshimbun-current-group id)
1216          (throw 'stop headers)
1217        (push (make-full-mail-header
1218               0
1219               (nnshimbun-mime-encode-string subject)
1220               (if (looking-at "<EM>\\([^<]+\\)<")
1221                   (nnshimbun-mime-encode-string (match-string 1))
1222                 "")
1223               "" id "" 0 0 url)
1224              headers))))
1225
1226 (eval-and-compile
1227   (if (fboundp 'mime-entity-fetch-field)
1228       ;; For Semi-Gnus.
1229       (defmacro nnshimbun-mew-mail-header-subject (header)
1230         `(mime-entity-fetch-field ,header 'Subject))
1231     ;; For pure Gnus.
1232     (defalias 'nnshimbun-mew-mail-header-subject 'mail-header-subject)))
1233
1234 (defun nnshimbun-mew-get-headers ()
1235   (if (nnshimbun-mew-spew-p)
1236       (let ((headers (nnshimbun-mew-get-headers-1)))
1237         (erase-buffer)
1238         (insert-buffer-substring (nnshimbun-open-nov nnshimbun-current-group))
1239         (delq nil
1240               (mapcar
1241                (lambda (header)
1242                  (goto-char (point-min))
1243                  (let ((subject (nnshimbun-mew-mail-header-subject header))
1244                        (found))
1245                    (while (and (not found)
1246                                (search-forward subject nil t))
1247                      (if (not (and (search-backward "\t" nil t)
1248                                    (not (search-backward "\t" (gnus-point-at-bol) t))))
1249                          (forward-line 1)
1250                        (setq found t)))
1251                    (if found
1252                        nil
1253                      (goto-char (point-max))
1254                      (nnheader-insert-nov header)
1255                      header)))
1256                headers)))
1257     (nnshimbun-mew-get-headers-1)))
1258
1259 (defun nnshimbun-mew-get-headers-1 ()
1260   (let (headers)
1261     (when (re-search-forward
1262            "<A[^>]*HREF=\"mail\\([0-9]+\\)\\.html\">\\[?Last Page\\]?</A>" nil t)
1263       (let ((limit (string-to-number (match-string 1))))
1264         (catch 'stop
1265           (if (nnshimbun-mew-reverse-order-p)
1266               (let ((aux 1))
1267                 (while (let (id url subject)
1268                          (while (re-search-forward nnshimbun-mew-regexp nil t)
1269                            (nnshimbun-mew-extract-header-values))
1270                          (< aux limit))
1271                   (nnshimbun-mew-retrieve-xover (setq aux (1+ aux)))))
1272             (while (> limit 0)
1273               (nnshimbun-mew-retrieve-xover limit)
1274               (setq limit (1- limit))
1275               (let (id url subject)
1276                 (goto-char (point-max))
1277                 (while (re-search-backward nnshimbun-mew-regexp nil t)
1278                   (nnshimbun-mew-extract-header-values)
1279                   (forward-line -2)))))
1280           headers)))))
1281
1282
1283
1284 ;;; MLs on www.xemacs.org
1285
1286 (defmacro nnshimbun-xemacs-concat-url (url)
1287   `(concat nnshimbun-url nnshimbun-current-group "/" ,url))
1288
1289 (defun nnshimbun-xemacs-get-headers ()
1290   (let (headers auxs aux)
1291     (catch 'stop
1292       (while (re-search-forward
1293               (concat "<A HREF=\"/list-archives/" nnshimbun-current-group
1294                       "/\\([12][0-9][0-9][0-9][0-1][0-9]\\)/\">\\[Index\\]")
1295               nil t)
1296         (setq auxs (append auxs (list (match-string 1)))))
1297       (while auxs
1298         (erase-buffer)
1299         (nnshimbun-retrieve-url
1300          (nnshimbun-xemacs-concat-url (concat (setq aux (car auxs)) "/")))
1301         (let (id url subject)
1302           (goto-char (point-max))
1303           (while (re-search-backward
1304                   "<A[^>]*HREF=\"\\(msg\\([0-9]+\\).html\\)\">\\([^<]+\\)<"
1305                   nil t)
1306             (setq url (nnshimbun-xemacs-concat-url
1307                        (concat aux "/" (match-string 1)))
1308                   id (format "<%s%05d%%%s>"
1309                              aux
1310                              (string-to-number (match-string 2))
1311                              nnshimbun-current-group)
1312                   subject (match-string 3))
1313             (forward-line 1)
1314             (if (nnshimbun-search-id nnshimbun-current-group id)
1315                 (throw 'stop headers)
1316               (push (make-full-mail-header
1317                      0
1318                      (nnshimbun-mime-encode-string subject)
1319                      (if (looking-at "<td><em>\\([^<]+\\)<")
1320                          (match-string 1)
1321                        "")
1322                      "" id "" 0 0 url)
1323                     headers))
1324             (message "%s" id)
1325             (forward-line -2)))
1326         (setq auxs (cdr auxs))))
1327     headers))
1328
1329 ;;; MLs on www.jp.netbsd.org
1330
1331 (defun nnshimbun-netbsd-get-headers ()
1332   (let ((case-fold-search t) headers months)
1333     (goto-char (point-min))
1334     (while (re-search-forward "<A HREF=\"\\([0-9]+\\)/\\(threads.html\\)?\">" nil t)
1335       (push (match-string 1) months))
1336     (setq months (nreverse months))
1337     (catch 'exit
1338       (dolist (month months)
1339         (erase-buffer)
1340         (nnshimbun-retrieve-url
1341          (format "%s%s/%s/maillist.html" nnshimbun-url nnshimbun-current-group month)
1342          t)
1343         (let (id url subject)
1344           (while (re-search-forward
1345                   "<A[^>]*HREF=\"\\(msg\\([0-9]+\\)\\.html\\)\">\\([^<]+\\)</A>"
1346                   nil t)
1347             (setq url (format "%s%s/%s/%s"
1348                               nnshimbun-url
1349                               nnshimbun-current-group
1350                               month
1351                               (match-string 1))
1352                   id (format "<%s%05d%%%s>"
1353                              month
1354                              (string-to-number (match-string 2))
1355                              nnshimbun-current-group)
1356                   subject (match-string 3))
1357             (if (nnshimbun-search-id nnshimbun-current-group id)
1358                 (throw 'exit headers)
1359               (push (make-full-mail-header
1360                      0
1361                      (nnshimbun-mime-encode-string subject)
1362                      (if (looking-at "</STRONG> *<EM>\\([^<]+\\)<")
1363                          (nnshimbun-mime-encode-string (match-string 1))
1364                        "")
1365                      "" id "" 0 0 url)
1366                     headers)))))
1367       headers)))
1368
1369 (provide 'nnshimbun)
1370 ;;; nnshimbun.el ends here.