Patch from Arisawa-san.
[elisp/gnus.git-] / lisp / nnshimbun.el
1 ;;; nnshimbun.el --- interfacing with web newspapers -*- coding: junet; -*-
2
3 ;; Authors: TSUCHIYA Masatoshi <tsuchiya@pine.kuee.kyoto-u.ac.jp>
4 ;;          Akihiro Arisawa    <ari@atesoft.advantest.co.jp>
5 ;; Keywords: news
6
7 ;;; Copyright:
8
9 ;; This file is a part of Semi-Gnus.
10
11 ;; This program is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; This program is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with this program; if not, you can either send email to this
23 ;; program's maintainer or write to: The Free Software Foundation,
24 ;; Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;; Gnus backend to read newspapers on WEB.
29
30
31 ;;; Defintinos:
32
33 (gnus-declare-backend "nnshimbun" 'address)
34
35 (eval-when-compile (require 'cl))
36 (eval-when-compile (require 'static))
37
38 (require 'nnheader)
39 (require 'nnmail)
40 (require 'nnoo)
41 (require 'gnus-bcklg)
42 (eval-when-compile (ignore-errors (require 'nnweb)))
43 ;; Report failure to find w3 at load time if appropriate.
44 (eval '(require 'nnweb))
45 (require 'mcharset)
46
47
48 (nnoo-declare nnshimbun)
49
50 (defvar nnshimbun-check-interval 300)
51
52 (defconst nnshimbun-mew-groups
53   '(("meadow-develop" "meadow-develop" nil t)
54     ("meadow-users-jp" "meadow-users-jp")
55     ("mule-win32" "mule-win32")
56     ("mew-win32" "mew-win32")
57     ("mew-dist" "mew-dist/3300" t)
58     ("mgp-users-jp" "mgp-users-jp/A" t t)))
59
60 (defvar nnshimbun-type-definition
61   `(("asahi"
62      (url . "http://spin.asahi.com/")
63      (groups "national" "business" "politics" "international" "sports" "personal" "feneral")
64      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
65      (generate-nov   . nnshimbun-generate-nov-for-each-group)
66      (get-headers    . nnshimbun-asahi-get-headers)
67      (index-url      . (format "%sp%s.html" nnshimbun-url nnshimbun-current-group))
68      (from-address   . "webmaster@www.asahi.com")
69      (make-contents  . nnshimbun-make-text-or-html-contents)
70      (contents-start . "\n<!-- Start of kiji -->\n")
71      (contents-end   . "\n<!-- End of kiji -->\n"))
72     ("sponichi"
73      (url . "http://www.sponichi.co.jp/")
74      (groups "baseball" "soccer" "usa" "others" "society" "entertainment" "horseracing")
75      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
76      (generate-nov   . nnshimbun-generate-nov-for-each-group)
77      (get-headers    . nnshimbun-sponichi-get-headers)
78      (index-url      . (format "%s%s/index.html" nnshimbun-url nnshimbun-current-group))
79      (from-address   . "webmaster@www.sponichi.co.jp")
80      (make-contents  . nnshimbun-make-text-or-html-contents)
81      (contents-start . "\n<span class=\"text\">\e$B!!\e(B")
82      (contents-end   . "\n"))
83     ("cnet"
84      (url . "http://cnet.sphere.ne.jp/")
85      (groups "comp")
86      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
87      (generate-nov   . nnshimbun-generate-nov-for-each-group)
88      (get-headers    . nnshimbun-cnet-get-headers)
89      (index-url      . (format "%s/News/Oneweek/" nnshimbun-url))
90      (from-address   . "cnet@sphere.ad.jp")
91      (make-contents  . nnshimbun-make-html-contents)
92      (contents-start . "\n<!--KIJI-->\n")
93      (contents-end   . "\n<!--/KIJI-->\n"))
94     ("wired"
95      (url . "http://www.hotwired.co.jp/")
96      (groups "business" "culture" "technology")
97      (coding-system  . ,(static-if (boundp 'MULE) '*euc-japan* 'euc-jp))
98      (generate-nov   . nnshimbun-generate-nov-for-all-groups)
99      (get-headers    . nnshimbun-wired-get-all-headers)
100      (index-url)
101      (from-address   . "webmaster@www.hotwired.co.jp")
102      (make-contents  . nnshimbun-make-html-contents)
103      (contents-start . "\n<!-- START_OF_BODY -->\n")
104      (contents-end   . "\n<!-- END_OF_BODY -->\n"))
105     ("yomiuri"
106      (url . "http://www.yomiuri.co.jp/")
107      (groups "shakai" "sports" "seiji" "keizai" "kokusai" "fuho")
108      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
109      (generate-nov   . nnshimbun-generate-nov-for-all-groups)
110      (get-headers    . nnshimbun-yomiuri-get-all-headers)
111      (index-url      . (concat nnshimbun-url "main.htm"))
112      (from-address   . "webmaster@www.yomiuri.co.jp")
113      (make-contents  . nnshimbun-make-text-or-html-contents)
114      (contents-start . "\n<!--  honbun start  -->\n")
115      (contents-end   . "\n<!--  honbun end  -->\n"))
116     ("zdnet"
117      (url . "http://www.zdnet.co.jp/news/")
118      (groups "comp")
119      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
120      (generate-nov   . nnshimbun-generate-nov-for-each-group)
121      (get-headers    . nnshimbun-zdnet-get-headers)
122      (index-url      . nnshimbun-url)
123      (from-address   . "zdnn@softbank.co.jp")
124      (make-contents  . nnshimbun-make-html-contents)
125      (contents-start . "\\(<!--BODY-->\\|<!--DATE-->\\)")
126      (contents-end   . "\\(<!--BODYEND-->\\|<!--BYLINEEND-->\\)"))
127     ("mew"
128      (url . "http://www.mew.org/archive/")
129      (groups ,@(mapcar #'car nnshimbun-mew-groups))
130      (coding-system . ,(static-if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
131      (generate-nov  . nnshimbun-generate-nov-for-each-group)
132      (get-headers   . nnshimbun-mew-get-headers)
133      (index-url     . (nnshimbun-mew-concat-url "index.html"))
134      (make-contents . nnshimbun-make-mhonarc-contents))
135     ("xemacs"
136      (url . "http://list-archives.xemacs.org/")
137      (groups "xemacs-announce" "xemacs-beta-ja" "xemacs-beta"
138              "xemacs-build-reports" "xemacs-cvs" "xemacs-mule"
139              "xemacs-nt" "xemacs-patches" "xemacs-users-ja" "xemacs")
140      (coding-system . ,(static-if (boundp 'MULE) '*euc-japan* 'euc-jp))
141      (generate-nov  . nnshimbun-generate-nov-for-each-group)
142      (get-headers   . nnshimbun-xemacs-get-headers)
143      (index-url     . (nnshimbun-xemacs-concat-url nil))
144      (make-contents . nnshimbun-make-mhonarc-contents))
145     ("netbsd"
146      (url . "http://www.jp.netbsd.org/ja/JP/ml/")
147      (groups "announce-ja" "junk-ja" "tech-misc-ja" "tech-pkg-ja"
148              "port-arm32-ja" "port-hpcmips-ja" "port-mac68k-ja"
149              "port-mips-ja" "port-powerpc-ja" "hpcmips-changes-ja"
150              "members-ja" "admin-ja" "www-changes-ja")
151      (coding-system  . ,(static-if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
152      (generate-nov   . nnshimbun-generate-nov-for-each-group)
153      (get-headers    . nnshimbun-netbsd-get-headers)
154      (index-url      . (format "%s%s/index.html" nnshimbun-url nnshimbun-current-group))
155      (make-contents  . nnshimbun-make-mhonarc-contents))
156     ("bbdb-ml"
157      (url . "http://www.rc.tutrp.tut.ac.jp/bbdb-ml/")
158      (groups "bbdb-ml")
159      (coding-system . ,(static-if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
160      (generate-nov . nnshimbun-generate-nov-for-each-group)
161      (get-headers . nnshimbun-fml-get-headers)
162      (index-url . nnshimbun-url)
163      (make-contents . nnshimbun-make-fml-contents))
164     ))
165
166 (defvar nnshimbun-x-face-alist
167   '(("default" .
168      (("default" .
169        "X-Face: Ygq$6P.,%Xt$U)DS)cRY@k$VkW!7(X'X'?U{{osjjFG\"E]hND;SPJ-J?O?R|a?L
170         g2$0rVng=O3Lt}?~IId8Jj&vP^3*o=LKUyk(`t%0c!;t6REk=JbpsEn9MrN7gZ%"))))
171   "Alist of server vs. alist of group vs. X-Face field.  It looks like:
172
173 \((\"asahi\" . ((\"national\" . \"X-face: ***\")
174              (\"business\" . \"X-Face: ***\")
175                 ;;
176                 ;;
177              (\"default\" . \"X-face: ***\")))
178  (\"sponichi\" . ((\"baseball\" . \"X-face: ***\")
179                 (\"soccer\" . \"X-Face: ***\")
180                 ;;
181                 ;;
182                 (\"default\" . \"X-face: ***\")))
183                 ;;
184  (\"default\" . ((\"default\" . \"X-face: ***\")))")
185
186 (defvoo nnshimbun-directory (nnheader-concat gnus-directory "shimbun/")
187   "Where nnshimbun will save its files.")
188
189 (defvoo nnshimbun-nov-is-evil nil
190   "*Non-nil means that nnshimbun will never retrieve NOV headers.")
191
192 (defvoo nnshimbun-nov-file-name ".overview")
193
194 (defvoo nnshimbun-pre-fetch-article nil
195   "*Non nil means that nnshimbun fetch unread articles when scanning groups.")
196
197 ;; set by nnshimbun-possibly-change-group
198 (defvoo nnshimbun-buffer nil)
199 (defvoo nnshimbun-current-directory nil)
200 (defvoo nnshimbun-current-group nil)
201
202 ;; set by nnshimbun-open-server
203 (defvoo nnshimbun-url nil)
204 (defvoo nnshimbun-coding-system nil)
205 (defvoo nnshimbun-groups nil)
206 (defvoo nnshimbun-generate-nov nil)
207 (defvoo nnshimbun-get-headers nil)
208 (defvoo nnshimbun-index-url nil)
209 (defvoo nnshimbun-from-address nil)
210 (defvoo nnshimbun-make-contents nil)
211 (defvoo nnshimbun-contents-start nil)
212 (defvoo nnshimbun-contents-end nil)
213 (defvoo nnshimbun-server-directory nil)
214
215 (defvoo nnshimbun-status-string "")
216 (defvoo nnshimbun-nov-last-check nil)
217 (defvoo nnshimbun-nov-buffer-alist nil)
218 (defvoo nnshimbun-nov-buffer-file-name nil)
219
220 (defvoo nnshimbun-keep-backlog 300)
221 (defvoo nnshimbun-backlog-articles nil)
222 (defvoo nnshimbun-backlog-hashtb nil)
223
224 (defconst nnshimbun-meta-content-type-charset-regexp
225   (eval-when-compile
226     (concat "<meta[ \t]+http-equiv=\"?Content-type\"?[ \t]+content=\"\\([^;]+\\)"
227             ";[ \t]*charset=\"?\\([^\"]+\\)\"?"
228             ">"))
229   "Regexp used in parsing `<META HTTP-EQUIV=\"Content-Type\" content=\"...;charset=...\">
230 for a charset indication")
231
232 (defconst nnshimbun-meta-charset-content-type-regexp
233   (eval-when-compile
234     (concat "<meta[ \t]+content=\"\\([^;]+\\)"
235             ";[ \t]*charset=\"?\\([^\"]+\\)\"?"
236             "[ \t]+http-equiv=\"?Content-type\"?>"))
237   "Regexp used in parsing `<META content=\"...;charset=...\" HTTP-EQUIV=\"Content-Type\">
238 for a charset indication")
239
240
241
242 ;;; backlog
243 (defmacro nnshimbun-backlog (&rest form)
244   `(let ((gnus-keep-backlog nnshimbun-keep-backlog)
245          (gnus-backlog-buffer (format " *nnshimbun backlog %s*" (nnoo-current-server 'nnshimbun)))
246          (gnus-backlog-articles nnshimbun-backlog-articles)
247          (gnus-backlog-hashtb nnshimbun-backlog-hashtb))
248      (unwind-protect
249          (progn ,@form)
250        (setq nnshimbun-backlog-articles gnus-backlog-articles
251              nnshimbun-backlog-hashtb gnus-backlog-hashtb))))
252 (put 'nnshimbun-backlog 'lisp-indent-function 0)
253 (put 'nnshimbun-backlog 'edebug-form-spec '(form body))
254
255
256
257 ;;; Interface Functions
258 (nnoo-define-basics nnshimbun)
259
260 (deffoo nnshimbun-open-server (server &optional defs)
261   ;; Set default values.
262   (dolist (default (cdr (assoc server nnshimbun-type-definition)))
263     (let ((symbol (intern (concat "nnshimbun-" (symbol-name (car default))))))
264       (unless (assq symbol defs)
265         (push (list symbol (cdr default)) defs))))
266   ;; Set directory for server working files.
267   (push (list 'nnshimbun-server-directory
268               (file-name-as-directory
269                (expand-file-name server nnshimbun-directory)))
270         defs)
271   (nnoo-change-server 'nnshimbun server defs)
272   (nnshimbun-possibly-change-group nil server)
273   ;; Make directories.
274   (unless (file-exists-p nnshimbun-directory)
275     (ignore-errors (make-directory nnshimbun-directory t)))
276   (cond
277    ((not (file-exists-p nnshimbun-directory))
278     (nnshimbun-close-server)
279     (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-directory))
280    ((not (file-directory-p (file-truename nnshimbun-directory)))
281     (nnshimbun-close-server)
282     (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-directory))
283    (t
284     (unless (file-exists-p nnshimbun-server-directory)
285       (ignore-errors (make-directory nnshimbun-server-directory t)))
286     (cond
287      ((not (file-exists-p nnshimbun-server-directory))
288       (nnshimbun-close-server)
289       (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-server-directory))
290      ((not (file-directory-p (file-truename nnshimbun-server-directory)))
291       (nnshimbun-close-server)
292       (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-server-directory))
293      (t
294       (nnheader-report 'nnshimbun "Opened server %s using directory %s"
295                        server nnshimbun-server-directory)
296       t)))))
297
298 (deffoo nnshimbun-close-server (&optional server)
299   (and (nnshimbun-server-opened server)
300        (gnus-buffer-live-p nnshimbun-buffer)
301        (kill-buffer nnshimbun-buffer))
302   (nnshimbun-backlog (gnus-backlog-shutdown))
303   (nnshimbun-save-nov)
304   (nnoo-close-server 'nnshimbun server)
305   t)
306
307 (static-when (boundp 'MULE)
308   (unless (coding-system-p 'euc-japan)
309     (copy-coding-system '*euc-japan* 'euc-japan))
310   (unless (coding-system-p 'shift_jis)
311     (copy-coding-system '*sjis* 'shift_jis))
312   (eval-and-compile
313     (defalias-maybe 'coding-system-category 'get-code-mnemonic)))
314
315 (defun nnshimbun-retrieve-url (url &optional no-cache)
316   "Rertrieve URL contents and insert to current buffer."
317   (let ((buf (current-buffer))
318         (url-working-buffer url-working-buffer))
319     (let ((old-asynch (default-value 'url-be-asynchronous))
320           (old-caching (default-value 'url-automatic-caching))
321           (old-mode (default-value 'url-standalone-mode)))
322       (setq-default url-be-asynchronous nil)
323       (when no-cache
324         (setq-default url-automatic-caching nil)
325         (setq-default url-standalone-mode nil))
326       (unwind-protect
327           (let ((coding-system-for-read 'binary)
328                 (coding-system-for-write 'binary)
329                 (input-coding-system 'binary)
330                 (output-coding-system 'binary)
331                 (default-enable-multibyte-characters nil))
332             (set-buffer
333              (setq url-working-buffer
334                    (cdr (url-retrieve url no-cache))))
335             (url-uncompress))
336         (setq-default url-be-asynchronous old-asynch)
337         (setq-default url-automatic-caching old-caching)
338         (setq-default url-standalone-mode old-mode)))
339     (let ((charset
340            (or url-current-mime-charset
341                (let ((case-fold-search t))
342                  (goto-char (point-min))
343                  (if (or (re-search-forward
344                           nnshimbun-meta-content-type-charset-regexp nil t)
345                          (re-search-forward
346                           nnshimbun-meta-charset-content-type-regexp nil t))
347                      (buffer-substring-no-properties (match-beginning 2)
348                                                      (match-end 2)))))))
349       (decode-coding-region
350        (point-min) (point-max)
351        (if charset
352            (let ((mime-charset-coding-system-alist
353                   (append '((euc-jp . euc-japan)
354                             (shift-jis . shift_jis)
355                             (shift_jis . shift_jis)
356                             (sjis . shift_jis)
357                             (x-euc-jp . euc-japan)
358                             (x-shift-jis . shift_jis)
359                             (x-shift_jis . shift_jis)
360                             (x-sjis . shift_jis))
361                           mime-charset-coding-system-alist)))
362              (mime-charset-to-coding-system charset))
363          (let ((default (condition-case nil
364                             (coding-system-category nnshimbun-coding-system)
365                           (error nil)))
366                (candidate (detect-coding-region (point-min) (point-max))))
367            (unless (listp candidate)
368              (setq candidate (list candidate)))
369            (catch 'coding
370              (dolist (coding candidate)
371                (if (eq default (coding-system-category coding))
372                    (throw 'coding coding)))
373              (if (eq (coding-system-category 'binary)
374                      (coding-system-category (car candidate)))
375                  nnshimbun-coding-system
376                (car candidate)))))))
377     (set-buffer-multibyte t)
378     (set-buffer buf)
379     (insert-buffer url-working-buffer)
380     (kill-buffer url-working-buffer)))
381
382 (deffoo nnshimbun-request-article (article &optional group server to-buffer)
383   (when (nnshimbun-possibly-change-group group server)
384     (if (stringp article)
385         (setq article (nnshimbun-search-id group article)))
386     (if (integerp article)
387         (nnshimbun-request-article-1 article group server to-buffer)
388       (nnheader-report 'nnml "Couldn't retrieve article: %s" (prin1-to-string article))
389       nil)))
390
391 (defsubst nnshimbun-header-xref (x)
392   (if (and (setq x (mail-header-xref x))
393            (string-match "^Xref: " x))
394       (substring x 6)
395     x))
396
397 (defun nnshimbun-request-article-1 (article &optional group server to-buffer)
398   (if (nnshimbun-backlog
399         (gnus-backlog-request-article
400          group article (or to-buffer nntp-server-buffer)))
401       (cons group article)
402     (let (header contents)
403       (when (setq header (save-excursion
404                            (set-buffer (nnshimbun-open-nov group))
405                            (and (nnheader-find-nov-line article)
406                                 (nnheader-parse-nov))))
407         (let* ((xref (nnshimbun-header-xref header))
408                (x-faces (cdr (or (assoc (or server
409                                             (nnoo-current-server 'nnshimbun))
410                                         nnshimbun-x-face-alist)
411                                  (assoc "default" nnshimbun-x-face-alist))))
412                (x-face (cdr (or (assoc group x-faces)
413                                 (assoc "default" x-faces)))))
414           (save-excursion
415             (set-buffer nnshimbun-buffer)
416             (erase-buffer)
417             (nnshimbun-retrieve-url xref)
418             (nnheader-message 6 "nnshimbun: Make contents...")
419             (goto-char (point-min))
420             (setq contents (funcall nnshimbun-make-contents header x-face))
421             (nnheader-message 6 "nnshimbun: Make contents...done"))))
422       (when contents
423         (save-excursion
424           (set-buffer (or to-buffer nntp-server-buffer))
425           (erase-buffer)
426           (insert contents)
427           (nnshimbun-backlog
428             (gnus-backlog-enter-article group article (current-buffer)))
429           (nnheader-report 'nnshimbun "Article %s retrieved" (mail-header-id header))
430           (cons group (mail-header-number header)))))))
431
432 (deffoo nnshimbun-request-group (group &optional server dont-check)
433   (let ((pathname-coding-system 'binary))
434     (cond
435      ((not (nnshimbun-possibly-change-group group server))
436       (nnheader-report 'nnshimbun "Invalid group (no such directory)"))
437      ((not (file-exists-p nnshimbun-current-directory))
438       (nnheader-report 'nnshimbun "Directory %s does not exist"
439                        nnshimbun-current-directory))
440      ((not (file-directory-p nnshimbun-current-directory))
441       (nnheader-report 'nnshimbun "%s is not a directory" nnshimbun-current-directory))
442      (dont-check
443       (nnheader-report 'nnshimbun "Group %s selected" group)
444       t)
445      (t
446       (let (beg end lines)
447         (save-excursion
448           (set-buffer (nnshimbun-open-nov group))
449           (goto-char (point-min))
450           (setq beg (ignore-errors (read (current-buffer))))
451           (goto-char (point-max))
452           (forward-line -1)
453           (setq end (ignore-errors (read (current-buffer)))
454                 lines (count-lines (point-min) (point-max))))
455         (nnheader-report 'nnshimbunw "Selected group %s" group)
456         (nnheader-insert "211 %d %d %d %s\n"
457                          lines (or beg 0) (or end 0) group))))))
458
459 (deffoo nnshimbun-request-scan (&optional group server)
460   (nnshimbun-possibly-change-group group server)
461   (nnshimbun-generate-nov-database group))
462
463 (deffoo nnshimbun-close-group (group &optional server)
464   (nnshimbun-write-nov group)
465   t)
466
467 (deffoo nnshimbun-request-list (&optional server)
468   (save-excursion
469     (set-buffer nntp-server-buffer)
470     (erase-buffer)
471     (dolist (group nnshimbun-groups)
472       (when (nnshimbun-possibly-change-group group server)
473         (let (beg end)
474           (save-excursion
475             (set-buffer (nnshimbun-open-nov group))
476             (goto-char (point-min))
477             (setq beg (ignore-errors (read (current-buffer))))
478             (goto-char (point-max))
479             (forward-line -1)
480             (setq end (ignore-errors (read (current-buffer)))))
481           (insert (format "%s %d %d n\n" group (or end 0) (or beg 0)))))))
482   t) ; return value
483
484 (eval-and-compile
485   (if (fboundp 'mime-entity-fetch-field)
486       ;; For Semi-Gnus.
487       (defun nnshimbun-insert-header (header)
488         (insert "Subject: " (or (mime-entity-fetch-field header 'Subject) "(none)") "\n"
489                 "From: " (or (mime-entity-fetch-field header 'From) "(nobody)") "\n"
490                 "Date: " (or (mail-header-date header) "") "\n"
491                 "Message-ID: " (or (mail-header-id header) (nnmail-message-id)) "\n")
492         (let ((refs (mail-header-references header)))
493           (and refs
494                (string< "" refs)
495                (insert "References: " refs "\n")))
496         (insert "Lines: " (number-to-string (or (mail-header-lines header) 0)) "\n"
497                 "Xref: " (nnshimbun-header-xref header) "\n"))
498     ;; For pure Gnus.
499     (defun nnshimbun-insert-header (header)
500       (nnheader-insert-header header)
501       (delete-char -1)
502       (insert "Xref: " (nnshimbun-header-xref header) "\n"))))
503
504 (deffoo nnshimbun-retrieve-headers (articles &optional group server fetch-old)
505   (when (nnshimbun-possibly-change-group group server)
506     (if (nnshimbun-retrieve-headers-with-nov articles fetch-old)
507         'nov
508       (save-excursion
509         (set-buffer nntp-server-buffer)
510         (erase-buffer)
511         (let (header)
512           (dolist (art articles)
513             (if (stringp art)
514                 (setq art (nnshimbun-search-id group art)))
515             (if (integerp art)
516                 (when (setq header
517                             (save-excursion
518                               (set-buffer (nnshimbun-open-nov group))
519                               (and (nnheader-find-nov-line art)
520                                    (nnheader-parse-nov))))
521                   (insert (format "220 %d Article retrieved.\n" art))
522                   (nnshimbun-insert-header header)
523                   (insert ".\n")
524                   (delete-region (point) (point-max))))))
525         'header))))
526
527 (defun nnshimbun-retrieve-headers-with-nov (articles &optional fetch-old)
528   (if (or gnus-nov-is-evil nnshimbun-nov-is-evil)
529       nil
530     (let ((nov (expand-file-name nnshimbun-nov-file-name nnshimbun-current-directory)))
531       (when (file-exists-p nov)
532         (save-excursion
533           (set-buffer nntp-server-buffer)
534           (erase-buffer)
535           (nnheader-insert-file-contents nov)
536           (if (and fetch-old (not (numberp fetch-old)))
537               t                         ; Don't remove anything.
538             (nnheader-nov-delete-outside-range
539              (if fetch-old (max 1 (- (car articles) fetch-old))
540                (car articles))
541              (car (last articles)))
542             t))))))
543
544
545
546 ;;; Nov Database Operations
547
548 (defun nnshimbun-generate-nov-database (group)
549   (prog1 (funcall nnshimbun-generate-nov group)
550     (nnshimbun-write-nov group)))
551
552 (defun nnshimbun-generate-nov-for-each-group (group)
553   (nnshimbun-possibly-change-group group)
554   (save-excursion
555     (set-buffer (nnshimbun-open-nov group))
556     (let (i)
557       (goto-char (point-max))
558       (forward-line -1)
559       (setq i (or (ignore-errors (read (current-buffer))) 0))
560       (dolist (header (save-excursion
561                         (set-buffer nnshimbun-buffer)
562                         (erase-buffer)
563                         (nnshimbun-retrieve-url (eval nnshimbun-index-url) t)
564                         (goto-char (point-min))
565                         (funcall nnshimbun-get-headers)))
566         (unless (nnshimbun-search-id group (mail-header-id header))
567           (mail-header-set-number header (setq i (1+ i)))
568           (goto-char (point-max))
569           (nnheader-insert-nov header)
570           (if nnshimbun-pre-fetch-article
571               (nnshimbun-request-article-1 i group nil nnshimbun-buffer)))))))
572
573 (defun nnshimbun-generate-nov-for-all-groups (&rest args)
574   (unless (and nnshimbun-nov-last-check
575                (< (nnshimbun-lapse-seconds nnshimbun-nov-last-check)
576                   nnshimbun-check-interval))
577     (save-excursion
578       (dolist (list (funcall nnshimbun-get-headers))
579         (let ((group (car list)))
580           (nnshimbun-possibly-change-group group)
581           (when (cdr list)
582             (set-buffer (nnshimbun-open-nov group))
583             (let (i)
584               (goto-char (point-max))
585               (forward-line -1)
586               (setq i (or (ignore-errors (read (current-buffer))) 0))
587               (dolist (header (cdr list))
588                 (unless (nnshimbun-search-id group (mail-header-id header))
589                   (mail-header-set-number header (setq i (1+ i)))
590                   (goto-char (point-max))
591                   (nnheader-insert-nov header)
592                   (if nnshimbun-pre-fetch-article
593                       (nnshimbun-request-article-1 i group nil nnshimbun-buffer))))))))
594       (nnshimbun-save-nov)
595       (setq nnshimbun-nov-last-check (current-time)))))
596
597 (defun nnshimbun-search-id (group id &optional nov)
598   (save-excursion
599     (set-buffer (nnshimbun-open-nov group))
600     (goto-char (point-min))
601     (let (found)
602       (while (and (not found)
603                   (search-forward id nil t)) ; We find the ID.
604         ;; And the id is in the fourth field.
605         (if (not (and (search-backward "\t" nil t 4)
606                       (not (search-backward "\t" (gnus-point-at-bol) t))))
607             (forward-line 1)
608           (forward-line 0)
609           (setq found t)))
610       (unless found
611         (goto-char (point-min))
612         (when (search-forward (concat "X-Nnshimbun-Id: " id) nil t)
613           (forward-line 0)
614           (setq found t)))
615       (if found
616           (if nov
617               (nnheader-parse-nov)
618             ;; We return the article number.
619             (ignore-errors (read (current-buffer))))))))
620
621 (defun nnshimbun-nov-fix-header (group header args)
622   (save-excursion
623     (set-buffer (nnshimbun-open-nov group))
624     (when (nnheader-find-nov-line (mail-header-number header))
625       (dolist (arg args)
626         (if (eq (car arg) 'id)
627             (let ((extra (mail-header-extra header)))
628               (unless (assq 'X-Nnshimbun-Id extra)
629                 (mail-header-set-extra
630                  header
631                  (cons (cons 'X-Nnshimbun-Id (mail-header-id header))
632                        extra)))
633               (mail-header-set-id header (cdr arg)))
634           (let ((func (intern (concat "mail-header-set-" (symbol-name (car arg))))))
635             (if (cdr arg) (eval (list func header (cdr arg)))))))
636       (mail-header-set-xref header (nnshimbun-header-xref header))
637       (delete-region (point) (progn (forward-line 1) (point)))
638       (nnheader-insert-nov header))))
639
640 (defun nnshimbun-open-nov (group)
641   (let ((buffer (cdr (assoc group nnshimbun-nov-buffer-alist))))
642     (if (buffer-live-p buffer)
643         buffer
644       (setq buffer (gnus-get-buffer-create
645                     (format " *nnshimbun overview %s %s*"
646                             (nnoo-current-server 'nnshimbun) group)))
647       (save-excursion
648         (set-buffer buffer)
649         (set (make-local-variable 'nnshimbun-nov-buffer-file-name)
650              (expand-file-name
651               nnshimbun-nov-file-name
652               (nnmail-group-pathname group nnshimbun-server-directory)))
653         (erase-buffer)
654         (when (file-exists-p nnshimbun-nov-buffer-file-name)
655           (nnheader-insert-file-contents nnshimbun-nov-buffer-file-name))
656         (set-buffer-modified-p nil))
657       (push (cons group buffer) nnshimbun-nov-buffer-alist)
658       buffer)))
659
660 (defun nnshimbun-write-nov (group)
661   (let ((buffer (cdr (assoc group nnshimbun-nov-buffer-alist))))
662     (when (buffer-live-p buffer)
663       (save-excursion
664         (set-buffer buffer)
665         (buffer-modified-p)
666         (nnmail-write-region 1 (point-max) nnshimbun-nov-buffer-file-name
667                              nil 'nomesg)))))
668
669 (defun nnshimbun-save-nov ()
670   (save-excursion
671     (while nnshimbun-nov-buffer-alist
672       (when (buffer-name (cdar nnshimbun-nov-buffer-alist))
673         (set-buffer (cdar nnshimbun-nov-buffer-alist))
674         (when (buffer-modified-p)
675           (nnmail-write-region 1 (point-max) nnshimbun-nov-buffer-file-name
676                                nil 'nomesg))
677         (set-buffer-modified-p nil)
678         (kill-buffer (current-buffer)))
679       (setq nnshimbun-nov-buffer-alist (cdr nnshimbun-nov-buffer-alist)))))
680
681
682
683 ;;; Server Initialize
684 (defun nnshimbun-possibly-change-group (group &optional server)
685   (when server
686     (unless (nnshimbun-server-opened server)
687       (nnshimbun-open-server server)))
688   (unless (gnus-buffer-live-p nnshimbun-buffer)
689     (setq nnshimbun-buffer
690           (save-excursion
691             (nnheader-set-temp-buffer
692              (format " *nnshimbun %s*" (nnoo-current-server 'nnshimbun))))))
693   (if (not group)
694       t
695     (let ((pathname (nnmail-group-pathname group nnshimbun-server-directory))
696           (pathname-coding-system 'binary))
697       (unless (equal pathname nnshimbun-current-directory)
698         (setq nnshimbun-current-directory pathname
699               nnshimbun-current-group group))
700       (unless (file-exists-p nnshimbun-current-directory)
701         (ignore-errors (make-directory nnshimbun-current-directory t)))
702       (cond
703        ((not (file-exists-p nnshimbun-current-directory))
704         (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-current-directory))
705        ((not (file-directory-p (file-truename nnshimbun-current-directory)))
706         (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-current-directory))
707        (t t)))))
708
709
710
711 ;;; Misc Functions
712
713 (eval-and-compile
714   (if (fboundp 'eword-encode-string)
715       ;; For Semi-Gnus.
716       (defun nnshimbun-mime-encode-string (string)
717         (mapconcat
718          #'identity
719          (split-string (eword-encode-string (nnweb-decode-entities-string string)) "\n")
720          ""))
721     ;; For pure Gnus.
722     (defun nnshimbun-mime-encode-string (string)
723       (mapconcat
724        #'identity
725        (split-string
726         (with-temp-buffer
727           (insert (nnweb-decode-entities-string string))
728           (rfc2047-encode-region (point-min) (point-max))
729           (buffer-substring (point-min) (point-max)))
730         "\n")
731        ""))))
732
733 (defun nnshimbun-lapse-seconds (time)
734   (let ((now (current-time)))
735     (+ (* (- (car now) (car time)) 65536)
736        (- (nth 1 now) (nth 1 time)))))
737
738 (defun nnshimbun-make-date-string (year month day &optional time)
739   (format "%02d %s %04d %s +0900"
740           day
741           (aref [nil "Jan" "Feb" "Mar" "Apr" "May" "Jun"
742                      "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"]
743                 month)
744           (cond ((< year 69)
745                  (+ year 2000))
746                 ((< year 100)
747                  (+ year 1900))
748                 ((< year 1000)  ; possible 3-digit years.
749                  (+ year 1900))
750                 (t year))
751           (or time "00:00")))
752
753 (if (fboundp 'regexp-opt)
754     (defalias 'nnshimbun-regexp-opt 'regexp-opt)
755   (defun nnshimbun-regexp-opt (strings &optional paren)
756     "Return a regexp to match a string in STRINGS.
757 Each string should be unique in STRINGS and should not contain any regexps,
758 quoted or not.  If optional PAREN is non-nil, ensure that the returned regexp
759 is enclosed by at least one regexp grouping construct."
760     (let ((open-paren (if paren "\\(" "")) (close-paren (if paren "\\)" "")))
761       (concat open-paren (mapconcat 'regexp-quote strings "\\|") close-paren))))
762
763
764 ;; Fast fill-region function
765
766 (defvar nnshimbun-fill-column (min 80 (- (frame-width) 4)))
767
768 (defconst nnshimbun-kinsoku-bol-list
769   (funcall
770    (if (fboundp 'string-to-char-list)
771        'string-to-char-list
772      'string-to-list) "\
773 !)-_~}]:;',.?\e$B!"!#!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2!3!4!5!6!7!8!9!:!;!<!=!>!?!@!A\e(B\
774 \e$B!B!C!D!E!G!I!K!M!O!Q!S!U!W!Y![!k!l!m!n$!$#$%$'$)$C$c$e$g$n%!%#%%%'%)%C%c%e%g%n%u%v\e(B"))
775
776 (defconst nnshimbun-kinsoku-eol-list
777   (funcall
778    (if (fboundp 'string-to-char-list)
779        'string-to-char-list
780      'string-to-list)
781    "({[`\e$B!F!H!J!L!N!P!R!T!V!X!Z!k!l!m!x\e(B"))
782
783 (defun nnshimbun-fill-line ()
784   (forward-line 0)
785   (let ((top (point)) chr)
786     (while (if (>= (move-to-column nnshimbun-fill-column)
787                    nnshimbun-fill-column)
788                (not (progn
789                       (if (memq (preceding-char) nnshimbun-kinsoku-eol-list)
790                           (progn
791                             (backward-char)
792                             (while (memq (preceding-char) nnshimbun-kinsoku-eol-list)
793                               (backward-char))
794                             (insert "\n"))
795                         (while (memq (setq chr (following-char)) nnshimbun-kinsoku-bol-list)
796                           (forward-char))
797                         (if (looking-at "\\s-+")
798                             (or (eolp) (delete-region (point) (match-end 0)))
799                           (or (> (char-width chr) 1)
800                               (re-search-backward "\\<" top t)
801                               (end-of-line)))
802                         (or (eolp) (insert "\n"))))))
803       (setq top (point))))
804   (forward-line 1)
805   (not (eobp)))
806
807 (defsubst nnshimbun-shallow-rendering ()
808   (goto-char (point-min))
809   (while (search-forward "<p>" nil t)
810     (insert "\n\n"))
811   (goto-char (point-min))
812   (while (search-forward "<br>" nil t)
813     (insert "\n"))
814   (nnweb-remove-markup)
815   (nnweb-decode-entities)
816   (goto-char (point-min))
817   (while (nnshimbun-fill-line))
818   (goto-char (point-min))
819   (when (skip-chars-forward "\n")
820     (delete-region (point-min) (point)))
821   (while (search-forward "\n\n" nil t)
822     (let ((p (point)))
823       (when (skip-chars-forward "\n")
824         (delete-region p (point)))))
825   (goto-char (point-max))
826   (when (skip-chars-backward "\n")
827     (delete-region (point) (point-max)))
828   (insert "\n"))
829
830 (defun nnshimbun-make-text-or-html-contents (header &optional x-face)
831   (let ((case-fold-search t) (html t) (start))
832     (when (and (re-search-forward nnshimbun-contents-start nil t)
833                (setq start (point))
834                (re-search-forward nnshimbun-contents-end nil t))
835       (delete-region (match-beginning 0) (point-max))
836       (delete-region (point-min) start)
837       (nnshimbun-shallow-rendering)
838       (setq html nil))
839     (goto-char (point-min))
840     (nnshimbun-insert-header header)
841     (insert "Content-Type: " (if html "text/html" "text/plain")
842             "; charset=ISO-2022-JP\nMIME-Version: 1.0\n")
843     (when x-face
844       (insert x-face)
845       (unless (bolp)
846         (insert "\n")))
847     (insert "\n")
848     (encode-coding-string (buffer-string)
849                           (mime-charset-to-coding-system "ISO-2022-JP"))))
850
851 (defun nnshimbun-make-html-contents (header &optional x-face)
852   (let (start)
853     (when (and (re-search-forward nnshimbun-contents-start nil t)
854                (setq start (point))
855                (re-search-forward nnshimbun-contents-end nil t))
856       (delete-region (match-beginning 0) (point-max))
857       (delete-region (point-min) start))
858     (goto-char (point-min))
859     (nnshimbun-insert-header header)
860     (insert "Content-Type: text/html; charset=ISO-2022-JP\n"
861             "MIME-Version: 1.0\n")
862     (when x-face
863       (insert x-face)
864       (unless (bolp)
865         (insert "\n")))
866     (insert "\n")
867     (encode-coding-string (buffer-string)
868                           (mime-charset-to-coding-system "ISO-2022-JP"))))
869
870 (defun nnshimbun-make-mhonarc-contents (header &rest args)
871   (require 'mml)
872   (if (search-forward "<!--X-Head-End-->" nil t)
873       (progn
874         (forward-line 0)
875         ;; Processing headers.
876         (save-restriction
877           (narrow-to-region (point-min) (point))
878           (nnweb-decode-entities)
879           (goto-char (point-min))
880           (while (search-forward "\n<!--X-" nil t)
881             (replace-match "\n"))
882           (goto-char (point-min))
883           (while (search-forward " -->\n" nil t)
884             (replace-match "\n"))
885           (goto-char (point-min))
886           (while (search-forward "\t" nil t)
887             (replace-match " "))
888           (goto-char (point-min))
889           (let (buf refs)
890             (while (not (eobp))
891               (cond
892                ((looking-at "<!--")
893                 (delete-region (point) (progn (forward-line 1) (point))))
894                ((looking-at "Subject: +")
895                 (push (cons 'subject (nnheader-header-value)) buf)
896                 (delete-region (point) (progn (forward-line 1) (point))))
897                ((looking-at "From: +")
898                 (push (cons 'from (nnheader-header-value)) buf)
899                 (delete-region (point) (progn (forward-line 1) (point))))
900                ((looking-at "Date: +")
901                 (push (cons 'date (nnheader-header-value)) buf)
902                 (delete-region (point) (progn (forward-line 1) (point))))
903                ((looking-at "Message-Id: +")
904                 (push (cons 'id (concat "<" (nnheader-header-value) ">")) buf)
905                 (delete-region (point) (progn (forward-line 1) (point))))
906                ((looking-at "Reference: +")
907                 (push (concat "<" (nnheader-header-value) ">") refs)
908                 (delete-region (point) (progn (forward-line 1) (point))))
909                ((looking-at "Content-Type: ")
910                 (unless (search-forward "charset" (gnus-point-at-eol) t)
911                   (end-of-line)
912                   (insert "; charset=ISO-2022-JP"))
913                 (forward-line 1))
914                (t (forward-line 1))))
915             (insert "MIME-Version: 1.0\n")
916             (if refs (push (cons 'references (mapconcat 'identity refs " ")) buf))
917             (nnshimbun-nov-fix-header nnshimbun-current-group header buf)
918             (goto-char (point-min))
919             (nnshimbun-insert-header header))
920           (goto-char (point-max)))
921         ;; Processing body.
922         (save-restriction
923           (narrow-to-region (point) (point-max))
924           (delete-region
925            (point)
926            (progn
927              (search-forward "\n<!--X-Body-of-Message-->\n" nil t)
928              (point)))
929           (when (search-forward "\n<!--X-Body-of-Message-End-->\n" nil t)
930             (forward-line -1)
931             (delete-region (point) (point-max)))
932           (nnweb-remove-markup)
933           (nnweb-decode-entities)))
934     (goto-char (point-min))
935     (nnshimbun-insert-header header)
936     (insert "Content-Type: text/html; charset=ISO-2022-JP\nMIME-Version: 1.0\n\n"))
937   (encode-coding-string (buffer-string)
938                         (mime-charset-to-coding-system "ISO-2022-JP")))
939
940 (defun nnshimbun-make-fml-contents (header &rest args)
941   (require 'mml)
942   (catch 'stop
943     (if (search-forward "<SPAN CLASS=mailheaders>" nil t)
944         (delete-region (point-min) (point))
945       (throw 'stop nil))
946     (if (search-forward "</PRE>")
947         (progn
948           (beginning-of-line)
949           (delete-region (point) (point-max)))
950       (throw 'stop nil))
951     (if (search-backward "</SPAN>")
952         (progn
953           (beginning-of-line)
954           (kill-line))
955       (throw 'stop nil))
956     (save-restriction
957       (narrow-to-region (point-min) (point))
958       (subst-char-in-region (point-min) (point-max) ?\t ?  t)
959       (nnweb-decode-entities)
960       (goto-char (point-min))
961       (let (buf field value start value-beg end)
962         (while (and (setq start (point))
963                     (re-search-forward "<SPAN CLASS=\\(.*\\)>\\(.*\\)</SPAN>:"
964                                        nil t)
965                     (setq field (match-string 2))
966                     (re-search-forward 
967                      (concat "<SPAN CLASS=" (match-string 1) "-value>") nil t)
968                     (setq value-beg (point))
969                     (search-forward "</SPAN>" nil t)
970                     (setq end (point)))
971           (setq value (buffer-substring value-beg
972                                         (progn (search-backward "</SPAN>")
973                                                (point))))
974           (delete-region start end)
975           (cond ((string= field "Date")
976                  (push (cons 'date value) buf))
977                 ((string= field "From")
978                  (push (cons 'from value) buf))
979                 ((string= field "Subject")
980                  (push (cons 'subject value) buf))
981                 ((string= field "Message-Id")
982                  (push (cons 'id value) buf))
983                 ((string= field "References")
984                  (push (cons 'references value) buf))
985                 (t
986                  (insert (concat field ": " value "\n")))))
987         (nnshimbun-nov-fix-header nnshimbun-current-group header buf)
988         (goto-char (point-min))
989         (nnshimbun-insert-header header))
990       (goto-char (point-max)))
991     ;; Processing body.
992     (save-restriction
993       (narrow-to-region (point) (point-max))
994       (nnweb-remove-markup)
995       (nnweb-decode-entities)))
996   (encode-coding-string (buffer-string)
997                         (mime-charset-to-coding-system "ISO-2022-JP")))
998
999 ;;; www.asahi.com
1000
1001 (defun nnshimbun-asahi-get-headers ()
1002   (when (search-forward "\n<!-- Start of past -->\n" nil t)
1003     (delete-region (point-min) (point))
1004     (when (search-forward "\n<!-- End of past -->\n" nil t)
1005       (forward-line -1)
1006       (delete-region (point) (point-max))
1007       (goto-char (point-min))
1008       (let (headers)
1009         (while (re-search-forward
1010                 "^\e$B"#\e(B<a href=\"\\(\\([0-9][0-9][0-9][0-9]\\)/past/\\([A-z]*[0-9]*\\)\\.html\\)\"> *"
1011                 nil t)
1012           (let ((id (format "<%s%s%%%s>"
1013                             (match-string 2)
1014                             (match-string 3)
1015                             nnshimbun-current-group))
1016                 (url (match-string 1)))
1017             (push (make-full-mail-header
1018                    0
1019                    (nnshimbun-mime-encode-string
1020                     (mapconcat 'identity
1021                                (split-string
1022                                 (buffer-substring
1023                                  (match-end 0)
1024                                  (progn (search-forward "<br>" nil t) (point)))
1025                                 "\\(<[^>]+>\\|\r\\)")
1026                                ""))
1027                    nnshimbun-from-address
1028                    "" id "" 0 0 (concat nnshimbun-url url))
1029                   headers)))
1030         (setq headers (nreverse headers))
1031         (let ((i 0))
1032           (while (and (nth i headers)
1033                       (re-search-forward
1034                        "^\\[\\([0-9][0-9]\\)/\\([0-9][0-9]\\) \\([0-9][0-9]:[0-9][0-9]\\)\\]"
1035                        nil t))
1036             (let ((month (string-to-number (match-string 1)))
1037                   (date (decode-time (current-time))))
1038               (mail-header-set-date
1039                (nth i headers)
1040                (nnshimbun-make-date-string
1041                 (if (and (eq 12 month) (eq 1 (nth 4 date)))
1042                     (1- (nth 5 date))
1043                   (nth 5 date))
1044                 month
1045                 (string-to-number (match-string 2))
1046                 (match-string 3))))
1047             (setq i (1+ i))))
1048         (nreverse headers)))))
1049
1050
1051
1052 ;;; www.sponichi.co.jp
1053
1054 (defun nnshimbun-sponichi-get-headers ()
1055   (when (search-forward "\e$B%K%e!<%9%$%s%G%C%/%9\e(B" nil t)
1056     (delete-region (point-min) (point))
1057     (when (search-forward "\e$B%"%I%?%0\e(B" nil t)
1058       (forward-line 2)
1059       (delete-region (point) (point-max))
1060       (goto-char (point-min))
1061       (let ((case-fold-search t) headers)
1062         (while (re-search-forward
1063                 "^<a href=\"/\\(\\([A-z]*\\)/kiji/\\([0-9][0-9][0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([0-9][0-9]\\)\\.html\\)\">"
1064                 nil t)
1065           (let ((url (match-string 1))
1066                 (id (format "<%s%s%s%s%%%s>"
1067                             (match-string 3)
1068                             (match-string 4)
1069                             (match-string 5)
1070                             (match-string 6)
1071                             nnshimbun-current-group))
1072                 (date (nnshimbun-make-date-string
1073                        (string-to-number (match-string 3))
1074                        (string-to-number (match-string 4))
1075                        (string-to-number (match-string 5)))))
1076             (push (make-full-mail-header
1077                    0
1078                    (nnshimbun-mime-encode-string
1079                     (mapconcat 'identity
1080                                (split-string
1081                                 (buffer-substring
1082                                  (match-end 0)
1083                                  (progn (search-forward "<br>" nil t) (point)))
1084                                 "<[^>]+>")
1085                                ""))
1086                    nnshimbun-from-address
1087                    date id "" 0 0 (concat nnshimbun-url url))
1088                   headers)))
1089         headers))))
1090
1091
1092
1093 ;;; CNET Japan
1094
1095 (defun nnshimbun-cnet-get-headers ()
1096   (let ((case-fold-search t) headers)
1097     (while (search-forward "\n<!--*****\e$B8+=P$7\e(B*****-->\n" nil t)
1098       (let ((subject (buffer-substring (point) (gnus-point-at-eol)))
1099             (point (point)))
1100         (forward-line -2)
1101         (when (looking-at "<a href=\"/\\(News/\\([0-9][0-9][0-9][0-9]\\)/Item/\\([0-9][0-9]\\([0-9][0-9]\\)\\([0-9][0-9]\\)-[0-9]+\\).html\\)\">")
1102           (let ((url (match-string 1))
1103                 (id  (format "<%s%s%%%s>"
1104                              (match-string 2)
1105                              (match-string 3)
1106                              nnshimbun-current-group))
1107                 (date (nnshimbun-make-date-string
1108                        (string-to-number (match-string 2))
1109                        (string-to-number (match-string 4))
1110                        (string-to-number (match-string 5)))))
1111             (push (make-full-mail-header
1112                    0
1113                    (nnshimbun-mime-encode-string subject)
1114                    nnshimbun-from-address
1115                    date id "" 0 0 (concat nnshimbun-url url))
1116                   headers)))
1117         (goto-char point)))
1118     headers))
1119
1120
1121
1122 ;;; Wired
1123
1124 (defun nnshimbun-wired-get-all-headers ()
1125   (save-excursion
1126     (set-buffer nnshimbun-buffer)
1127     (let ((group-header-alist (mapcar (lambda (g) (cons g nil)) nnshimbun-groups))
1128           (case-fold-search t)
1129           (regexp (format
1130                    "<a href=\"\\(%s\\|/\\)\\(news/news/\\(%s\\)/story/\\(\\([0-9][0-9][0-9][0-9]\\)\\([0-9][0-9]\\)\\([0-9][0-9]\\)[0-9]+\\)\\.html\\)\"><b>"
1131                    (regexp-quote nnshimbun-url)
1132                    (nnshimbun-regexp-opt nnshimbun-groups))))
1133       (dolist (xover (list (concat nnshimbun-url "news/news/index.html")
1134                            (concat nnshimbun-url "news/news/last_seven.html")))
1135         (erase-buffer)
1136         (nnshimbun-retrieve-url xover t)
1137         (goto-char (point-min))
1138         (while (re-search-forward regexp nil t)
1139           (let* ((url   (concat nnshimbun-url (match-string 2)))
1140                  (group (downcase (match-string 3)))
1141                  (id    (format "<%s%%%s>" (match-string 4) group))
1142                  (date  (nnshimbun-make-date-string
1143                          (string-to-number (match-string 5))
1144                          (string-to-number (match-string 6))
1145                          (string-to-number (match-string 7))))
1146                  (header (make-full-mail-header
1147                           0
1148                           (nnshimbun-mime-encode-string
1149                            (mapconcat 'identity
1150                                       (split-string
1151                                        (buffer-substring
1152                                         (match-end 0)
1153                                         (progn (search-forward "</b>" nil t) (point)))
1154                                        "<[^>]+>")
1155                                       ""))
1156                           nnshimbun-from-address
1157                           date id "" 0 0 url))
1158                  (x (assoc group group-header-alist)))
1159             (setcdr x (cons header (cdr x))))))
1160       group-header-alist)))
1161
1162
1163
1164 ;;; www.yomiuri.co.jp
1165
1166 (defun nnshimbun-yomiuri-get-all-headers ()
1167   (save-excursion
1168     (set-buffer nnshimbun-buffer)
1169     (erase-buffer)
1170     (nnshimbun-retrieve-url (eval nnshimbun-index-url) t)
1171     (let ((case-fold-search t)
1172           (group-header-alist (mapcar (lambda (g) (cons g nil)) nnshimbun-groups)))
1173       (dolist (group nnshimbun-groups)
1174         (let (start)
1175           (goto-char (point-min))
1176           (when (and (search-forward (format "\n<!-- /news/%s=start -->\n" group) nil t)
1177                      (setq start (point))
1178                      (search-forward (format "\n<!-- /news/%s=end -->\n" group) nil t))
1179             (forward-line -1)
1180             (save-restriction
1181               (narrow-to-region start (point))
1182               (goto-char start)
1183               (while (re-search-forward
1184                       "<a href=\"/\\([0-9]+\\)/\\(\\(\\([0-9][0-9][0-9][0-9]\\)\\([0-9][0-9]\\)\\([0-9][0-9]\\)[A-z0-9]+\\)\\.htm\\)\"[^>]*>"
1185                       nil t)
1186                 (let ((url   (concat (match-string 1) "a/" (match-string 2)))
1187                       (id    (format "<%s%s%%%s>"
1188                                      (match-string 1)
1189                                      (match-string 3)
1190                                      group))
1191                       (year  (string-to-number (match-string 4)))
1192                       (month (string-to-number (match-string 5)))
1193                       (day   (string-to-number (match-string 6)))
1194                       (subject (mapconcat
1195                                 'identity
1196                                 (split-string
1197                                  (buffer-substring
1198                                   (match-end 0)
1199                                   (progn (search-forward "<br>" nil t) (point)))
1200                                  "<[^>]+>")
1201                                 ""))
1202                       date x)
1203                   (when (string-match "^\e$B"!\e(B" subject)
1204                     (setq subject (substring subject (match-end 0))))
1205                   (if (string-match "(\\([0-9][0-9]:[0-9][0-9]\\))$" subject)
1206                       (setq date (nnshimbun-make-date-string
1207                                   year month day (match-string 1 subject))
1208                             subject (substring subject 0 (match-beginning 0)))
1209                     (setq date (nnshimbun-make-date-string year month day)))
1210                   (setcdr (setq x (assoc group group-header-alist))
1211                           (cons (make-full-mail-header
1212                                  0
1213                                  (nnshimbun-mime-encode-string subject)
1214                                  nnshimbun-from-address
1215                                  date id "" 0 0 (concat nnshimbun-url url))
1216                                 (cdr x)))))))))
1217       group-header-alist)))
1218
1219
1220
1221 ;;; Zdnet Japan
1222
1223 (defun nnshimbun-zdnet-get-headers ()
1224   (let ((case-fold-search t) headers)
1225     (goto-char (point-min))
1226     (let (start)
1227       (while (and (search-forward "<!--" nil t)
1228                   (setq start (- (point) 4))
1229                   (search-forward "-->" nil t))
1230         (delete-region start (point))))
1231     (goto-char (point-min))
1232     (while (re-search-forward
1233             "<a href=\"\\(/news/\\)?\\(\\([0-9][0-9]\\)\\([0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([^\\.]+\\).html\\)\"><font size=\"4\"><strong>"
1234             nil t)
1235       (let ((year  (+ 2000 (string-to-number (match-string 3))))
1236             (month (string-to-number (match-string 4)))
1237             (day   (string-to-number (match-string 5)))
1238             (id    (format "<%s%s%s%s%%%s>"
1239                            (match-string 3)
1240                            (match-string 4)
1241                            (match-string 5)
1242                            (match-string 6)
1243                            nnshimbun-current-group))
1244             (url (match-string 2)))
1245         (push (make-full-mail-header
1246                0
1247                (nnshimbun-mime-encode-string
1248                 (mapconcat 'identity
1249                            (split-string
1250                             (buffer-substring
1251                              (match-end 0)
1252                              (progn (search-forward "</a>" nil t) (point)))
1253                             "<[^>]+>")
1254                            ""))
1255                nnshimbun-from-address
1256                (nnshimbun-make-date-string year month day)
1257                id  "" 0 0 (concat nnshimbun-url url))
1258               headers)))
1259     (nreverse headers)))
1260
1261
1262
1263 ;;; MLs on www.mew.org
1264
1265 (defmacro nnshimbun-mew-concat-url (url)
1266   `(concat nnshimbun-url
1267            (nth 1 (assoc nnshimbun-current-group nnshimbun-mew-groups))
1268            "/"
1269            ,url))
1270
1271 (defmacro nnshimbun-mew-reverse-order-p ()
1272   `(nth 2 (assoc nnshimbun-current-group nnshimbun-mew-groups)))
1273
1274 (defmacro nnshimbun-mew-spew-p ()
1275   `(nth 3 (assoc nnshimbun-current-group nnshimbun-mew-groups)))
1276
1277 (defsubst nnshimbun-mew-retrieve-xover (aux)
1278   (erase-buffer)
1279   (nnshimbun-retrieve-url
1280    (nnshimbun-mew-concat-url (if (= aux 1) "index.html" (format "mail%d.html" aux)))
1281    t))
1282
1283 (defconst nnshimbun-mew-regexp "<A[^>]*HREF=\"\\(msg\\([0-9]+\\).html\\)\">\\([^<]+\\)<")
1284
1285 (defmacro nnshimbun-mew-extract-header-values ()
1286   `(progn
1287      (setq url (nnshimbun-mew-concat-url (match-string 1))
1288            id (format "<%05d%%%s>"
1289                       (1- (string-to-number (match-string 2)))
1290                       nnshimbun-current-group)
1291            subject (match-string 3))
1292      (forward-line 1)
1293      (if (nnshimbun-search-id nnshimbun-current-group id)
1294          (throw 'stop headers)
1295        (push (make-full-mail-header
1296               0
1297               (nnshimbun-mime-encode-string subject)
1298               (if (looking-at "<EM>\\([^<]+\\)<")
1299                   (nnshimbun-mime-encode-string (match-string 1))
1300                 "")
1301               "" id "" 0 0 url)
1302              headers))))
1303
1304 (eval-and-compile
1305   (if (fboundp 'mime-entity-fetch-field)
1306       ;; For Semi-Gnus.
1307       (defmacro nnshimbun-mew-mail-header-subject (header)
1308         `(mime-entity-fetch-field ,header 'Subject))
1309     ;; For pure Gnus.
1310     (defalias 'nnshimbun-mew-mail-header-subject 'mail-header-subject)))
1311
1312 (defun nnshimbun-mew-get-headers ()
1313   (if (nnshimbun-mew-spew-p)
1314       (let ((headers (nnshimbun-mew-get-headers-1)))
1315         (erase-buffer)
1316         (insert-buffer-substring (nnshimbun-open-nov nnshimbun-current-group))
1317         (delq nil
1318               (mapcar
1319                (lambda (header)
1320                  (goto-char (point-min))
1321                  (let ((subject (nnshimbun-mew-mail-header-subject header))
1322                        (found))
1323                    (while (and (not found)
1324                                (search-forward subject nil t))
1325                      (if (not (and (search-backward "\t" nil t)
1326                                    (not (search-backward "\t" (gnus-point-at-bol) t))))
1327                          (forward-line 1)
1328                        (setq found t)))
1329                    (if found
1330                        nil
1331                      (goto-char (point-max))
1332                      (nnheader-insert-nov header)
1333                      header)))
1334                headers)))
1335     (nnshimbun-mew-get-headers-1)))
1336
1337 (defun nnshimbun-mew-get-headers-1 ()
1338   (let (headers)
1339     (when (re-search-forward
1340            "<A[^>]*HREF=\"mail\\([0-9]+\\)\\.html\">\\[?Last Page\\]?</A>" nil t)
1341       (let ((limit (string-to-number (match-string 1))))
1342         (catch 'stop
1343           (if (nnshimbun-mew-reverse-order-p)
1344               (let ((aux 1))
1345                 (while (let (id url subject)
1346                          (while (re-search-forward nnshimbun-mew-regexp nil t)
1347                            (nnshimbun-mew-extract-header-values))
1348                          (< aux limit))
1349                   (nnshimbun-mew-retrieve-xover (setq aux (1+ aux)))))
1350             (while (> limit 0)
1351               (nnshimbun-mew-retrieve-xover limit)
1352               (setq limit (1- limit))
1353               (let (id url subject)
1354                 (goto-char (point-max))
1355                 (while (re-search-backward nnshimbun-mew-regexp nil t)
1356                   (nnshimbun-mew-extract-header-values)
1357                   (forward-line -2)))))
1358           headers)))))
1359
1360
1361
1362 ;;; MLs on www.xemacs.org
1363
1364 (defmacro nnshimbun-xemacs-concat-url (url)
1365   `(concat nnshimbun-url nnshimbun-current-group "/" ,url))
1366
1367 (defun nnshimbun-xemacs-get-headers ()
1368   (let (headers auxs aux)
1369     (catch 'stop
1370       (while (re-search-forward
1371               (concat "<A HREF=\"/" nnshimbun-current-group
1372                       "/\\([12][0-9][0-9][0-9][0-1][0-9]\\)/\">\\[Index\\]")
1373               nil t)
1374         (setq auxs (append auxs (list (match-string 1)))))
1375       (while auxs
1376         (erase-buffer)
1377         (nnshimbun-retrieve-url
1378          (nnshimbun-xemacs-concat-url (concat (setq aux (car auxs)) "/")))
1379         (let (id url subject)
1380           (goto-char (point-max))
1381           (while (re-search-backward
1382                   "<A[^>]*HREF=\"\\(msg\\([0-9]+\\).html\\)\">\\([^<]+\\)<"
1383                   nil t)
1384             (setq url (nnshimbun-xemacs-concat-url
1385                        (concat aux "/" (match-string 1)))
1386                   id (format "<%s%05d%%%s>"
1387                              aux
1388                              (string-to-number (match-string 2))
1389                              nnshimbun-current-group)
1390                   subject (match-string 3))
1391             (forward-line 1)
1392             (if (nnshimbun-search-id nnshimbun-current-group id)
1393                 (throw 'stop headers)
1394               (push (make-full-mail-header
1395                      0
1396                      (nnshimbun-mime-encode-string subject)
1397                      (if (looking-at "<td><em>\\([^<]+\\)<")
1398                          (match-string 1)
1399                        "")
1400                      "" id "" 0 0 url)
1401                     headers))
1402             (message "%s" id)
1403             (forward-line -2)))
1404         (setq auxs (cdr auxs))))
1405     headers))
1406
1407 ;;; MLs on www.jp.netbsd.org
1408
1409 (defun nnshimbun-netbsd-get-headers ()
1410   (let ((case-fold-search t) headers months)
1411     (goto-char (point-min))
1412     (while (re-search-forward "<A HREF=\"\\([0-9]+\\)/\\(threads.html\\)?\">" nil t)
1413       (push (match-string 1) months))
1414     (setq months (nreverse months))
1415     (catch 'exit
1416       (dolist (month months)
1417         (erase-buffer)
1418         (nnshimbun-retrieve-url
1419          (format "%s%s/%s/maillist.html" nnshimbun-url nnshimbun-current-group month)
1420          t)
1421         (let (id url subject)
1422           (while (re-search-forward
1423                   "<A[^>]*HREF=\"\\(msg\\([0-9]+\\)\\.html\\)\">\\([^<]+\\)</A>"
1424                   nil t)
1425             (setq url (format "%s%s/%s/%s"
1426                               nnshimbun-url
1427                               nnshimbun-current-group
1428                               month
1429                               (match-string 1))
1430                   id (format "<%s%05d%%%s>"
1431                              month
1432                              (string-to-number (match-string 2))
1433                              nnshimbun-current-group)
1434                   subject (match-string 3))
1435             (if (nnshimbun-search-id nnshimbun-current-group id)
1436                 (throw 'exit headers)
1437               (push (make-full-mail-header
1438                      0
1439                      (nnshimbun-mime-encode-string subject)
1440                      (if (looking-at "</STRONG> *<EM>\\([^<]+\\)<")
1441                          (nnshimbun-mime-encode-string (match-string 1))
1442                        "")
1443                      "" id "" 0 0 url)
1444                     headers)))))
1445       headers)))
1446
1447 ;;; MLs using fml
1448 (defun nnshimbun-fml-get-headers ()
1449   (let (headers auxs aux)
1450     (catch 'stop
1451       (while (re-search-forward "<a href=\"\\([0-9]+\\(\\.week\\|\\.month\\)?\\)/index.html\">" nil t)
1452         (setq auxs (append auxs (list (match-string 1)))))
1453       (while auxs
1454         (erase-buffer)
1455         (nnshimbun-retrieve-url
1456          (concat nnshimbun-url (setq aux (car auxs)) "/"))
1457         (subst-char-in-region (point-min) (point-max) ?\t ?  t)
1458         (let (id url date subject from)
1459           (goto-char (point-min))
1460           (while (re-search-forward
1461                   "<LI><A HREF=\"\\([0-9]+\\.html\\)\">Article .*</A> <DIV><SPAN CLASS=article>Article <SPAN CLASS=article-value>\\([0-9]+\\)</SPAN></SPAN> at <SPAN CLASS=Date-value>\\([^<]*\\)</SPAN> <SPAN CLASS=Subject>Subject: <SPAN CLASS=Subject-value>\\([^<]*\\)</SPAN></SPAN></DIV><DIV><SPAN CLASS=From>From: <SPAN CLASS=From-value>\\([^<]*\\)</SPAN></SPAN></DIV>"
1462                   nil t)
1463             (setq url (concat nnshimbun-url aux "/" (match-string 1))
1464                   id (format "<%s%05d%%%s>"
1465                              aux
1466                              (string-to-number (match-string 2))
1467                              nnshimbun-current-group)
1468                   date (match-string 3)
1469                   subject (match-string 4)
1470                   from (match-string 5))
1471             (forward-line 1)
1472             (if (nnshimbun-search-id nnshimbun-current-group id)
1473                 (throw 'stop headers)
1474               (push (make-full-mail-header
1475                      0
1476                      (nnshimbun-mime-encode-string subject)
1477                      from date id "" 0 0 url)
1478                     headers))
1479 ;           (message "%s" id)))
1480         (setq auxs (cdr auxs))))
1481     headers))
1482
1483 (provide 'nnshimbun)
1484 ;;; nnshimbun.el ends here.