Synch with Oort Gnus.
[elisp/gnus.git-] / lisp / nnshimbun.el
1 ;;; nnshimbun.el --- interfacing with web newspapers -*- coding: junet; -*-
2
3 ;; Authors: TSUCHIYA Masatoshi <tsuchiya@pine.kuee.kyoto-u.ac.jp>
4 ;;          Akihiro Arisawa    <ari@atesoft.advantest.co.jp>
5 ;; Keywords: news
6
7 ;;; Copyright:
8
9 ;; This file is a part of Semi-Gnus.
10
11 ;; This program is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; This program is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with this program; if not, you can either send email to this
23 ;; program's maintainer or write to: The Free Software Foundation,
24 ;; Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;; Gnus backend to read newspapers on WEB.
29
30
31 ;;; Defintinos:
32
33 (gnus-declare-backend "nnshimbun" 'address)
34
35 (eval-when-compile (require 'cl))
36 (eval-when-compile (require 'gnus-clfns))
37 (eval-when-compile (require 'static))
38
39 (require 'nnheader)
40 (require 'nnmail)
41 (require 'nnoo)
42 (require 'gnus-bcklg)
43 (eval-when-compile (ignore-errors (require 'nnweb)))
44 ;; Report failure to find w3 at load time if appropriate.
45 (eval '(require 'nnweb))
46 (require 'mcharset)
47
48
49 (nnoo-declare nnshimbun)
50
51 (defvar nnshimbun-check-interval 300)
52
53 (defconst nnshimbun-mew-groups
54   '(("meadow-develop" "meadow-develop" nil t)
55     ("meadow-users-jp" "meadow-users-jp")
56     ("mule-win32" "mule-win32")
57     ("mew-win32" "mew-win32")
58     ("mew-dist" "mew-dist/3300" t)
59     ("mgp-users-jp" "mgp-users-jp/A" t t)))
60
61 (defvar nnshimbun-type-definition
62   `(("asahi"
63      (url . "http://spin.asahi.com/")
64      (groups "national" "business" "politics" "international" "sports" "personal" "feneral")
65      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
66      (generate-nov   . nnshimbun-generate-nov-for-each-group)
67      (get-headers    . nnshimbun-asahi-get-headers)
68      (index-url      . (format "%sp%s.html" nnshimbun-url nnshimbun-current-group))
69      (from-address   . "webmaster@www.asahi.com")
70      (make-contents  . nnshimbun-make-text-or-html-contents)
71      (contents-start . "\n<!-- Start of kiji -->\n")
72      (contents-end   . "\n<!-- End of kiji -->\n"))
73     ("sponichi"
74      (url . "http://www.sponichi.co.jp/")
75      (groups "baseball" "soccer" "usa" "others" "society" "entertainment" "horseracing")
76      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
77      (generate-nov   . nnshimbun-generate-nov-for-each-group)
78      (get-headers    . nnshimbun-sponichi-get-headers)
79      (index-url      . (format "%s%s/index.html" nnshimbun-url nnshimbun-current-group))
80      (from-address   . "webmaster@www.sponichi.co.jp")
81      (make-contents  . nnshimbun-make-text-or-html-contents)
82      (contents-start . "\n<span class=\"text\">\e$B!!\e(B")
83      (contents-end   . "\n"))
84     ("cnet"
85      (url . "http://cnet.sphere.ne.jp/")
86      (groups "comp")
87      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
88      (generate-nov   . nnshimbun-generate-nov-for-each-group)
89      (get-headers    . nnshimbun-cnet-get-headers)
90      (index-url      . (format "%s/News/Oneweek/" nnshimbun-url))
91      (from-address   . "cnet@sphere.ad.jp")
92      (make-contents  . nnshimbun-make-html-contents)
93      (contents-start . "\n<!--KIJI-->\n")
94      (contents-end   . "\n<!--/KIJI-->\n"))
95     ("wired"
96      (url . "http://www.hotwired.co.jp/")
97      (groups "business" "culture" "technology")
98      (coding-system  . ,(static-if (boundp 'MULE) '*euc-japan* 'euc-jp))
99      (generate-nov   . nnshimbun-generate-nov-for-all-groups)
100      (get-headers    . nnshimbun-wired-get-all-headers)
101      (index-url)
102      (from-address   . "webmaster@www.hotwired.co.jp")
103      (make-contents  . nnshimbun-make-html-contents)
104      (contents-start . "\n<!-- START_OF_BODY -->\n")
105      (contents-end   . "\n<!-- END_OF_BODY -->\n"))
106     ("yomiuri"
107      (url . "http://www.yomiuri.co.jp/")
108      (groups "shakai" "sports" "seiji" "keizai" "kokusai" "fuho")
109      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
110      (generate-nov   . nnshimbun-generate-nov-for-all-groups)
111      (get-headers    . nnshimbun-yomiuri-get-all-headers)
112      (index-url      . (concat nnshimbun-url "main.htm"))
113      (from-address   . "webmaster@www.yomiuri.co.jp")
114      (make-contents  . nnshimbun-make-text-or-html-contents)
115      (contents-start . "\n<!--  honbun start  -->\n")
116      (contents-end   . "\n<!--  honbun end  -->\n"))
117     ("zdnet"
118      (url . "http://www.zdnet.co.jp/news/")
119      (groups "comp")
120      (coding-system  . ,(static-if (boundp 'MULE) '*sjis* 'shift_jis))
121      (generate-nov   . nnshimbun-generate-nov-for-each-group)
122      (get-headers    . nnshimbun-zdnet-get-headers)
123      (index-url      . nnshimbun-url)
124      (from-address   . "zdnn@softbank.co.jp")
125      (make-contents  . nnshimbun-make-html-contents)
126      (contents-start . "\\(<!--BODY-->\\|<!--DATE-->\\)")
127      (contents-end   . "\\(<!--BODYEND-->\\|<!--BYLINEEND-->\\)"))
128     ("mew"
129      (url . "http://www.mew.org/archive/")
130      (groups ,@(mapcar #'car nnshimbun-mew-groups))
131      (coding-system . ,(static-if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
132      (generate-nov  . nnshimbun-generate-nov-for-each-group)
133      (get-headers   . nnshimbun-mew-get-headers)
134      (index-url     . (nnshimbun-mew-concat-url "index.html"))
135      (make-contents . nnshimbun-make-mhonarc-contents))
136     ("xemacs"
137      (url . "http://list-archives.xemacs.org/")
138      (groups "xemacs-announce" "xemacs-beta-ja" "xemacs-beta"
139              "xemacs-build-reports" "xemacs-cvs" "xemacs-mule"
140              "xemacs-nt" "xemacs-patches" "xemacs-users-ja" "xemacs")
141      (coding-system . ,(static-if (boundp 'MULE) '*euc-japan* 'euc-jp))
142      (generate-nov  . nnshimbun-generate-nov-for-each-group)
143      (get-headers   . nnshimbun-xemacs-get-headers)
144      (index-url     . (nnshimbun-xemacs-concat-url nil))
145      (make-contents . nnshimbun-make-mhonarc-contents))
146     ("netbsd"
147      (url . "http://www.jp.netbsd.org/ja/JP/ml/")
148      (groups "announce-ja" "junk-ja" "tech-misc-ja" "tech-pkg-ja"
149              "port-arm32-ja" "port-hpcmips-ja" "port-mac68k-ja"
150              "port-mips-ja" "port-powerpc-ja" "hpcmips-changes-ja"
151              "members-ja" "admin-ja" "www-changes-ja")
152      (coding-system  . ,(static-if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
153      (generate-nov   . nnshimbun-generate-nov-for-each-group)
154      (get-headers    . nnshimbun-netbsd-get-headers)
155      (index-url      . (format "%s%s/index.html" nnshimbun-url nnshimbun-current-group))
156      (make-contents  . nnshimbun-make-mhonarc-contents))
157     ("bbdb-ml"
158      (url . "http://www.rc.tutrp.tut.ac.jp/bbdb-ml/")
159      (groups "bbdb-ml")
160      (coding-system . ,(static-if (boundp 'MULE) '*iso-2022-jp* 'iso-2022-jp))
161      (generate-nov . nnshimbun-generate-nov-for-each-group)
162      (get-headers . nnshimbun-fml-get-headers)
163      (index-url . nnshimbun-url)
164      (make-contents . nnshimbun-make-fml-contents))
165     ))
166
167 (defvar nnshimbun-x-face-alist
168   '(("default" .
169      (("default" .
170        "X-Face: Ygq$6P.,%Xt$U)DS)cRY@k$VkW!7(X'X'?U{{osjjFG\"E]hND;SPJ-J?O?R|a?L
171         g2$0rVng=O3Lt}?~IId8Jj&vP^3*o=LKUyk(`t%0c!;t6REk=JbpsEn9MrN7gZ%"))))
172   "Alist of server vs. alist of group vs. X-Face field.  It looks like:
173
174 \((\"asahi\" . ((\"national\" . \"X-face: ***\")
175              (\"business\" . \"X-Face: ***\")
176                 ;;
177                 ;;
178              (\"default\" . \"X-face: ***\")))
179  (\"sponichi\" . ((\"baseball\" . \"X-face: ***\")
180                 (\"soccer\" . \"X-Face: ***\")
181                 ;;
182                 ;;
183                 (\"default\" . \"X-face: ***\")))
184                 ;;
185  (\"default\" . ((\"default\" . \"X-face: ***\")))")
186
187 (defvoo nnshimbun-directory (nnheader-concat gnus-directory "shimbun/")
188   "Where nnshimbun will save its files.")
189
190 (defvoo nnshimbun-nov-is-evil nil
191   "*Non-nil means that nnshimbun will never retrieve NOV headers.")
192
193 (defvoo nnshimbun-nov-file-name ".overview")
194
195 (defvoo nnshimbun-pre-fetch-article nil
196   "*Non nil means that nnshimbun fetch unread articles when scanning groups.")
197
198 ;; set by nnshimbun-possibly-change-group
199 (defvoo nnshimbun-buffer nil)
200 (defvoo nnshimbun-current-directory nil)
201 (defvoo nnshimbun-current-group nil)
202
203 ;; set by nnshimbun-open-server
204 (defvoo nnshimbun-url nil)
205 (defvoo nnshimbun-coding-system nil)
206 (defvoo nnshimbun-groups nil)
207 (defvoo nnshimbun-generate-nov nil)
208 (defvoo nnshimbun-get-headers nil)
209 (defvoo nnshimbun-index-url nil)
210 (defvoo nnshimbun-from-address nil)
211 (defvoo nnshimbun-make-contents nil)
212 (defvoo nnshimbun-contents-start nil)
213 (defvoo nnshimbun-contents-end nil)
214 (defvoo nnshimbun-server-directory nil)
215
216 (defvoo nnshimbun-status-string "")
217 (defvoo nnshimbun-nov-last-check nil)
218 (defvoo nnshimbun-nov-buffer-alist nil)
219 (defvoo nnshimbun-nov-buffer-file-name nil)
220
221 (defvoo nnshimbun-keep-backlog 300)
222 (defvoo nnshimbun-backlog-articles nil)
223 (defvoo nnshimbun-backlog-hashtb nil)
224
225 (defconst nnshimbun-meta-content-type-charset-regexp
226   (eval-when-compile
227     (concat "<meta[ \t]+http-equiv=\"?Content-type\"?[ \t]+content=\"\\([^;]+\\)"
228             ";[ \t]*charset=\"?\\([^\"]+\\)\"?"
229             ">"))
230   "Regexp used in parsing `<META HTTP-EQUIV=\"Content-Type\" content=\"...;charset=...\">
231 for a charset indication")
232
233 (defconst nnshimbun-meta-charset-content-type-regexp
234   (eval-when-compile
235     (concat "<meta[ \t]+content=\"\\([^;]+\\)"
236             ";[ \t]*charset=\"?\\([^\"]+\\)\"?"
237             "[ \t]+http-equiv=\"?Content-type\"?>"))
238   "Regexp used in parsing `<META content=\"...;charset=...\" HTTP-EQUIV=\"Content-Type\">
239 for a charset indication")
240
241
242
243 ;;; backlog
244 (defmacro nnshimbun-backlog (&rest form)
245   `(let ((gnus-keep-backlog nnshimbun-keep-backlog)
246          (gnus-backlog-buffer (format " *nnshimbun backlog %s*" (nnoo-current-server 'nnshimbun)))
247          (gnus-backlog-articles nnshimbun-backlog-articles)
248          (gnus-backlog-hashtb nnshimbun-backlog-hashtb))
249      (unwind-protect
250          (progn ,@form)
251        (setq nnshimbun-backlog-articles gnus-backlog-articles
252              nnshimbun-backlog-hashtb gnus-backlog-hashtb))))
253 (put 'nnshimbun-backlog 'lisp-indent-function 0)
254 (put 'nnshimbun-backlog 'edebug-form-spec '(form body))
255
256
257
258 ;;; Interface Functions
259 (nnoo-define-basics nnshimbun)
260
261 (deffoo nnshimbun-open-server (server &optional defs)
262   ;; Set default values.
263   (dolist (default (cdr (assoc server nnshimbun-type-definition)))
264     (let ((symbol (intern (concat "nnshimbun-" (symbol-name (car default))))))
265       (unless (assq symbol defs)
266         (push (list symbol (cdr default)) defs))))
267   ;; Set directory for server working files.
268   (push (list 'nnshimbun-server-directory
269               (file-name-as-directory
270                (expand-file-name server nnshimbun-directory)))
271         defs)
272   (nnoo-change-server 'nnshimbun server defs)
273   (nnshimbun-possibly-change-group nil server)
274   ;; Make directories.
275   (unless (file-exists-p nnshimbun-directory)
276     (ignore-errors (make-directory nnshimbun-directory t)))
277   (cond
278    ((not (file-exists-p nnshimbun-directory))
279     (nnshimbun-close-server)
280     (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-directory))
281    ((not (file-directory-p (file-truename nnshimbun-directory)))
282     (nnshimbun-close-server)
283     (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-directory))
284    (t
285     (unless (file-exists-p nnshimbun-server-directory)
286       (ignore-errors (make-directory nnshimbun-server-directory t)))
287     (cond
288      ((not (file-exists-p nnshimbun-server-directory))
289       (nnshimbun-close-server)
290       (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-server-directory))
291      ((not (file-directory-p (file-truename nnshimbun-server-directory)))
292       (nnshimbun-close-server)
293       (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-server-directory))
294      (t
295       (nnheader-report 'nnshimbun "Opened server %s using directory %s"
296                        server nnshimbun-server-directory)
297       t)))))
298
299 (deffoo nnshimbun-close-server (&optional server)
300   (and (nnshimbun-server-opened server)
301        (gnus-buffer-live-p nnshimbun-buffer)
302        (kill-buffer nnshimbun-buffer))
303   (nnshimbun-backlog (gnus-backlog-shutdown))
304   (nnshimbun-save-nov)
305   (nnoo-close-server 'nnshimbun server)
306   t)
307
308 (static-when (boundp 'MULE)
309   (unless (coding-system-p 'euc-japan)
310     (copy-coding-system '*euc-japan* 'euc-japan))
311   (unless (coding-system-p 'shift_jis)
312     (copy-coding-system '*sjis* 'shift_jis))
313   (eval-and-compile
314     (defalias-maybe 'coding-system-category 'get-code-mnemonic)))
315
316 (defun nnshimbun-retrieve-url (url &optional no-cache)
317   "Rertrieve URL contents and insert to current buffer."
318   (let ((buf (current-buffer))
319         (url-working-buffer url-working-buffer))
320     (let ((old-asynch (default-value 'url-be-asynchronous))
321           (old-caching (default-value 'url-automatic-caching))
322           (old-mode (default-value 'url-standalone-mode)))
323       (setq-default url-be-asynchronous nil)
324       (when no-cache
325         (setq-default url-automatic-caching nil)
326         (setq-default url-standalone-mode nil))
327       (unwind-protect
328           (let ((coding-system-for-read 'binary)
329                 (coding-system-for-write 'binary)
330                 (input-coding-system 'binary)
331                 (output-coding-system 'binary)
332                 (default-enable-multibyte-characters nil))
333             (set-buffer
334              (setq url-working-buffer
335                    (cdr (url-retrieve url no-cache))))
336             (url-uncompress))
337         (setq-default url-be-asynchronous old-asynch)
338         (setq-default url-automatic-caching old-caching)
339         (setq-default url-standalone-mode old-mode)))
340     (let ((charset
341            (or url-current-mime-charset
342                (let ((case-fold-search t))
343                  (goto-char (point-min))
344                  (if (or (re-search-forward
345                           nnshimbun-meta-content-type-charset-regexp nil t)
346                          (re-search-forward
347                           nnshimbun-meta-charset-content-type-regexp nil t))
348                      (buffer-substring-no-properties (match-beginning 2)
349                                                      (match-end 2)))))))
350       (decode-coding-region
351        (point-min) (point-max)
352        (if charset
353            (let ((mime-charset-coding-system-alist
354                   (append '((euc-jp . euc-japan)
355                             (shift-jis . shift_jis)
356                             (shift_jis . shift_jis)
357                             (sjis . shift_jis)
358                             (x-euc-jp . euc-japan)
359                             (x-shift-jis . shift_jis)
360                             (x-shift_jis . shift_jis)
361                             (x-sjis . shift_jis))
362                           mime-charset-coding-system-alist)))
363              (mime-charset-to-coding-system charset))
364          (let ((default (condition-case nil
365                             (coding-system-category nnshimbun-coding-system)
366                           (error nil)))
367                (candidate (detect-coding-region (point-min) (point-max))))
368            (unless (listp candidate)
369              (setq candidate (list candidate)))
370            (catch 'coding
371              (dolist (coding candidate)
372                (if (eq default (coding-system-category coding))
373                    (throw 'coding coding)))
374              (if (eq (coding-system-category 'binary)
375                      (coding-system-category (car candidate)))
376                  nnshimbun-coding-system
377                (car candidate)))))))
378     (set-buffer-multibyte t)
379     (set-buffer buf)
380     (insert-buffer url-working-buffer)
381     (kill-buffer url-working-buffer)))
382
383 (deffoo nnshimbun-request-article (article &optional group server to-buffer)
384   (when (nnshimbun-possibly-change-group group server)
385     (if (stringp article)
386         (setq article (nnshimbun-search-id group article)))
387     (if (integerp article)
388         (nnshimbun-request-article-1 article group server to-buffer)
389       (nnheader-report 'nnml "Couldn't retrieve article: %s" (prin1-to-string article))
390       nil)))
391
392 (defsubst nnshimbun-header-xref (x)
393   (if (and (setq x (mail-header-xref x))
394            (string-match "^Xref: " x))
395       (substring x 6)
396     x))
397
398 (defun nnshimbun-request-article-1 (article &optional group server to-buffer)
399   (if (nnshimbun-backlog
400         (gnus-backlog-request-article
401          group article (or to-buffer nntp-server-buffer)))
402       (cons group article)
403     (let (header contents)
404       (when (setq header (save-excursion
405                            (set-buffer (nnshimbun-open-nov group))
406                            (and (nnheader-find-nov-line article)
407                                 (nnheader-parse-nov))))
408         (let* ((xref (nnshimbun-header-xref header))
409                (x-faces (cdr (or (assoc (or server
410                                             (nnoo-current-server 'nnshimbun))
411                                         nnshimbun-x-face-alist)
412                                  (assoc "default" nnshimbun-x-face-alist))))
413                (x-face (cdr (or (assoc group x-faces)
414                                 (assoc "default" x-faces)))))
415           (save-excursion
416             (set-buffer nnshimbun-buffer)
417             (erase-buffer)
418             (nnshimbun-retrieve-url xref)
419             (nnheader-message 6 "nnshimbun: Make contents...")
420             (goto-char (point-min))
421             (setq contents (funcall nnshimbun-make-contents header x-face))
422             (nnheader-message 6 "nnshimbun: Make contents...done"))))
423       (when contents
424         (save-excursion
425           (set-buffer (or to-buffer nntp-server-buffer))
426           (erase-buffer)
427           (insert contents)
428           (nnshimbun-backlog
429             (gnus-backlog-enter-article group article (current-buffer)))
430           (nnheader-report 'nnshimbun "Article %s retrieved" (mail-header-id header))
431           (cons group (mail-header-number header)))))))
432
433 (deffoo nnshimbun-request-group (group &optional server dont-check)
434   (let ((pathname-coding-system 'binary))
435     (cond
436      ((not (nnshimbun-possibly-change-group group server))
437       (nnheader-report 'nnshimbun "Invalid group (no such directory)"))
438      ((not (file-exists-p nnshimbun-current-directory))
439       (nnheader-report 'nnshimbun "Directory %s does not exist"
440                        nnshimbun-current-directory))
441      ((not (file-directory-p nnshimbun-current-directory))
442       (nnheader-report 'nnshimbun "%s is not a directory" nnshimbun-current-directory))
443      (dont-check
444       (nnheader-report 'nnshimbun "Group %s selected" group)
445       t)
446      (t
447       (let (beg end lines)
448         (save-excursion
449           (set-buffer (nnshimbun-open-nov group))
450           (goto-char (point-min))
451           (setq beg (ignore-errors (read (current-buffer))))
452           (goto-char (point-max))
453           (forward-line -1)
454           (setq end (ignore-errors (read (current-buffer)))
455                 lines (count-lines (point-min) (point-max))))
456         (nnheader-report 'nnshimbunw "Selected group %s" group)
457         (nnheader-insert "211 %d %d %d %s\n"
458                          lines (or beg 0) (or end 0) group))))))
459
460 (deffoo nnshimbun-request-scan (&optional group server)
461   (nnshimbun-possibly-change-group group server)
462   (nnshimbun-generate-nov-database group))
463
464 (deffoo nnshimbun-close-group (group &optional server)
465   (nnshimbun-write-nov group)
466   t)
467
468 (deffoo nnshimbun-request-list (&optional server)
469   (save-excursion
470     (set-buffer nntp-server-buffer)
471     (erase-buffer)
472     (dolist (group nnshimbun-groups)
473       (when (nnshimbun-possibly-change-group group server)
474         (let (beg end)
475           (save-excursion
476             (set-buffer (nnshimbun-open-nov group))
477             (goto-char (point-min))
478             (setq beg (ignore-errors (read (current-buffer))))
479             (goto-char (point-max))
480             (forward-line -1)
481             (setq end (ignore-errors (read (current-buffer)))))
482           (insert (format "%s %d %d n\n" group (or end 0) (or beg 0)))))))
483   t) ; return value
484
485 (eval-and-compile
486   (if (fboundp 'mime-entity-fetch-field)
487       ;; For Semi-Gnus.
488       (defun nnshimbun-insert-header (header)
489         (insert "Subject: " (or (mime-entity-fetch-field header 'Subject) "(none)") "\n"
490                 "From: " (or (mime-entity-fetch-field header 'From) "(nobody)") "\n"
491                 "Date: " (or (mail-header-date header) "") "\n"
492                 "Message-ID: " (or (mail-header-id header) (nnmail-message-id)) "\n")
493         (let ((refs (mail-header-references header)))
494           (and refs
495                (string< "" refs)
496                (insert "References: " refs "\n")))
497         (insert "Lines: " (number-to-string (or (mail-header-lines header) 0)) "\n"
498                 "Xref: " (nnshimbun-header-xref header) "\n"))
499     ;; For pure Gnus.
500     (defun nnshimbun-insert-header (header)
501       (nnheader-insert-header header)
502       (delete-char -1)
503       (insert "Xref: " (nnshimbun-header-xref header) "\n"))))
504
505 (deffoo nnshimbun-retrieve-headers (articles &optional group server fetch-old)
506   (when (nnshimbun-possibly-change-group group server)
507     (if (nnshimbun-retrieve-headers-with-nov articles fetch-old)
508         'nov
509       (save-excursion
510         (set-buffer nntp-server-buffer)
511         (erase-buffer)
512         (let (header)
513           (dolist (art articles)
514             (if (stringp art)
515                 (setq art (nnshimbun-search-id group art)))
516             (if (integerp art)
517                 (when (setq header
518                             (save-excursion
519                               (set-buffer (nnshimbun-open-nov group))
520                               (and (nnheader-find-nov-line art)
521                                    (nnheader-parse-nov))))
522                   (insert (format "220 %d Article retrieved.\n" art))
523                   (nnshimbun-insert-header header)
524                   (insert ".\n")
525                   (delete-region (point) (point-max))))))
526         'header))))
527
528 (defun nnshimbun-retrieve-headers-with-nov (articles &optional fetch-old)
529   (if (or gnus-nov-is-evil nnshimbun-nov-is-evil)
530       nil
531     (let ((nov (expand-file-name nnshimbun-nov-file-name nnshimbun-current-directory)))
532       (when (file-exists-p nov)
533         (save-excursion
534           (set-buffer nntp-server-buffer)
535           (erase-buffer)
536           (nnheader-insert-file-contents nov)
537           (if (and fetch-old (not (numberp fetch-old)))
538               t                         ; Don't remove anything.
539             (nnheader-nov-delete-outside-range
540              (if fetch-old (max 1 (- (car articles) fetch-old))
541                (car articles))
542              (car (last articles)))
543             t))))))
544
545
546
547 ;;; Nov Database Operations
548
549 (defun nnshimbun-generate-nov-database (group)
550   (prog1 (funcall nnshimbun-generate-nov group)
551     (nnshimbun-write-nov group)))
552
553 (defun nnshimbun-generate-nov-for-each-group (group)
554   (nnshimbun-possibly-change-group group)
555   (save-excursion
556     (set-buffer (nnshimbun-open-nov group))
557     (let (i)
558       (goto-char (point-max))
559       (forward-line -1)
560       (setq i (or (ignore-errors (read (current-buffer))) 0))
561       (dolist (header (save-excursion
562                         (set-buffer nnshimbun-buffer)
563                         (erase-buffer)
564                         (nnshimbun-retrieve-url (eval nnshimbun-index-url) t)
565                         (goto-char (point-min))
566                         (funcall nnshimbun-get-headers)))
567         (unless (nnshimbun-search-id group (mail-header-id header))
568           (mail-header-set-number header (setq i (1+ i)))
569           (goto-char (point-max))
570           (nnheader-insert-nov header)
571           (if nnshimbun-pre-fetch-article
572               (nnshimbun-request-article-1 i group nil nnshimbun-buffer)))))))
573
574 (defun nnshimbun-generate-nov-for-all-groups (&rest args)
575   (unless (and nnshimbun-nov-last-check
576                (< (nnshimbun-lapse-seconds nnshimbun-nov-last-check)
577                   nnshimbun-check-interval))
578     (save-excursion
579       (dolist (list (funcall nnshimbun-get-headers))
580         (let ((group (car list)))
581           (nnshimbun-possibly-change-group group)
582           (when (cdr list)
583             (set-buffer (nnshimbun-open-nov group))
584             (let (i)
585               (goto-char (point-max))
586               (forward-line -1)
587               (setq i (or (ignore-errors (read (current-buffer))) 0))
588               (dolist (header (cdr list))
589                 (unless (nnshimbun-search-id group (mail-header-id header))
590                   (mail-header-set-number header (setq i (1+ i)))
591                   (goto-char (point-max))
592                   (nnheader-insert-nov header)
593                   (if nnshimbun-pre-fetch-article
594                       (nnshimbun-request-article-1 i group nil nnshimbun-buffer))))))))
595       (nnshimbun-save-nov)
596       (setq nnshimbun-nov-last-check (current-time)))))
597
598 (defun nnshimbun-search-id (group id &optional nov)
599   (save-excursion
600     (set-buffer (nnshimbun-open-nov group))
601     (goto-char (point-min))
602     (let (found)
603       (while (and (not found)
604                   (search-forward id nil t)) ; We find the ID.
605         ;; And the id is in the fourth field.
606         (if (not (and (search-backward "\t" nil t 4)
607                       (not (search-backward "\t" (gnus-point-at-bol) t))))
608             (forward-line 1)
609           (forward-line 0)
610           (setq found t)))
611       (unless found
612         (goto-char (point-min))
613         (when (search-forward (concat "X-Nnshimbun-Id: " id) nil t)
614           (forward-line 0)
615           (setq found t)))
616       (if found
617           (if nov
618               (nnheader-parse-nov)
619             ;; We return the article number.
620             (ignore-errors (read (current-buffer))))))))
621
622 (defun nnshimbun-nov-fix-header (group header args)
623   (save-excursion
624     (set-buffer (nnshimbun-open-nov group))
625     (when (nnheader-find-nov-line (mail-header-number header))
626       (dolist (arg args)
627         (if (eq (car arg) 'id)
628             (let ((extra (mail-header-extra header)))
629               (unless (assq 'X-Nnshimbun-Id extra)
630                 (mail-header-set-extra
631                  header
632                  (cons (cons 'X-Nnshimbun-Id (mail-header-id header))
633                        extra)))
634               (mail-header-set-id header (cdr arg)))
635           (let ((func (intern (concat "mail-header-set-" (symbol-name (car arg))))))
636             (if (cdr arg) (eval (list func header (cdr arg)))))))
637       (mail-header-set-xref header (nnshimbun-header-xref header))
638       (delete-region (point) (progn (forward-line 1) (point)))
639       (nnheader-insert-nov header))))
640
641 (defun nnshimbun-open-nov (group)
642   (let ((buffer (cdr (assoc group nnshimbun-nov-buffer-alist))))
643     (if (buffer-live-p buffer)
644         buffer
645       (setq buffer (gnus-get-buffer-create
646                     (format " *nnshimbun overview %s %s*"
647                             (nnoo-current-server 'nnshimbun) group)))
648       (save-excursion
649         (set-buffer buffer)
650         (set (make-local-variable 'nnshimbun-nov-buffer-file-name)
651              (expand-file-name
652               nnshimbun-nov-file-name
653               (nnmail-group-pathname group nnshimbun-server-directory)))
654         (erase-buffer)
655         (when (file-exists-p nnshimbun-nov-buffer-file-name)
656           (nnheader-insert-file-contents nnshimbun-nov-buffer-file-name))
657         (set-buffer-modified-p nil))
658       (push (cons group buffer) nnshimbun-nov-buffer-alist)
659       buffer)))
660
661 (defun nnshimbun-write-nov (group)
662   (let ((buffer (cdr (assoc group nnshimbun-nov-buffer-alist))))
663     (when (buffer-live-p buffer)
664       (save-excursion
665         (set-buffer buffer)
666         (buffer-modified-p)
667         (nnmail-write-region 1 (point-max) nnshimbun-nov-buffer-file-name
668                              nil 'nomesg)))))
669
670 (defun nnshimbun-save-nov ()
671   (save-excursion
672     (while nnshimbun-nov-buffer-alist
673       (when (buffer-name (cdar nnshimbun-nov-buffer-alist))
674         (set-buffer (cdar nnshimbun-nov-buffer-alist))
675         (when (buffer-modified-p)
676           (nnmail-write-region 1 (point-max) nnshimbun-nov-buffer-file-name
677                                nil 'nomesg))
678         (set-buffer-modified-p nil)
679         (kill-buffer (current-buffer)))
680       (setq nnshimbun-nov-buffer-alist (cdr nnshimbun-nov-buffer-alist)))))
681
682
683
684 ;;; Server Initialize
685 (defun nnshimbun-possibly-change-group (group &optional server)
686   (when server
687     (unless (nnshimbun-server-opened server)
688       (nnshimbun-open-server server)))
689   (unless (gnus-buffer-live-p nnshimbun-buffer)
690     (setq nnshimbun-buffer
691           (save-excursion
692             (nnheader-set-temp-buffer
693              (format " *nnshimbun %s*" (nnoo-current-server 'nnshimbun))))))
694   (if (not group)
695       t
696     (let ((pathname (nnmail-group-pathname group nnshimbun-server-directory))
697           (pathname-coding-system 'binary))
698       (unless (equal pathname nnshimbun-current-directory)
699         (setq nnshimbun-current-directory pathname
700               nnshimbun-current-group group))
701       (unless (file-exists-p nnshimbun-current-directory)
702         (ignore-errors (make-directory nnshimbun-current-directory t)))
703       (cond
704        ((not (file-exists-p nnshimbun-current-directory))
705         (nnheader-report 'nnshimbun "Couldn't create directory: %s" nnshimbun-current-directory))
706        ((not (file-directory-p (file-truename nnshimbun-current-directory)))
707         (nnheader-report 'nnshimbun "Not a directory: %s" nnshimbun-current-directory))
708        (t t)))))
709
710
711
712 ;;; Misc Functions
713
714 (eval-and-compile
715   (if (fboundp 'eword-encode-string)
716       ;; For Semi-Gnus.
717       (defun nnshimbun-mime-encode-string (string)
718         (mapconcat
719          #'identity
720          (split-string (eword-encode-string (nnweb-decode-entities-string string)) "\n")
721          ""))
722     ;; For pure Gnus.
723     (defun nnshimbun-mime-encode-string (string)
724       (mapconcat
725        #'identity
726        (split-string
727         (with-temp-buffer
728           (insert (nnweb-decode-entities-string string))
729           (rfc2047-encode-region (point-min) (point-max))
730           (buffer-substring (point-min) (point-max)))
731         "\n")
732        ""))))
733
734 (defun nnshimbun-lapse-seconds (time)
735   (let ((now (current-time)))
736     (+ (* (- (car now) (car time)) 65536)
737        (- (nth 1 now) (nth 1 time)))))
738
739 (defun nnshimbun-make-date-string (year month day &optional time)
740   (format "%02d %s %04d %s +0900"
741           day
742           (aref [nil "Jan" "Feb" "Mar" "Apr" "May" "Jun"
743                      "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"]
744                 month)
745           (cond ((< year 69)
746                  (+ year 2000))
747                 ((< year 100)
748                  (+ year 1900))
749                 ((< year 1000)  ; possible 3-digit years.
750                  (+ year 1900))
751                 (t year))
752           (or time "00:00")))
753
754 (if (fboundp 'regexp-opt)
755     (defalias 'nnshimbun-regexp-opt 'regexp-opt)
756   (defun nnshimbun-regexp-opt (strings &optional paren)
757     "Return a regexp to match a string in STRINGS.
758 Each string should be unique in STRINGS and should not contain any regexps,
759 quoted or not.  If optional PAREN is non-nil, ensure that the returned regexp
760 is enclosed by at least one regexp grouping construct."
761     (let ((open-paren (if paren "\\(" "")) (close-paren (if paren "\\)" "")))
762       (concat open-paren (mapconcat 'regexp-quote strings "\\|") close-paren))))
763
764
765 ;; Fast fill-region function
766
767 (defvar nnshimbun-fill-column (min 80 (- (frame-width) 4)))
768
769 (defconst nnshimbun-kinsoku-bol-list
770   (append "!)-_~}]:;',.?\e$B!"!#!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2!3!4!5!6!7\e(B\
771 \e$B!8!9!:!;!<!=!>!?!@!A!B!C!D!E!G!I!K!M!O!Q!S!U!W!Y![!k!l!m!n$!$#$%$'$)\e(B\
772 \e$B$C$c$e$g$n%!%#%%%'%)%C%c%e%g%n%u%v\e(B" nil))
773
774 (defconst nnshimbun-kinsoku-eol-list
775   (append "({[`\e$B!F!H!J!L!N!P!R!T!V!X!Z!k!l!m!x\e(B" nil))
776
777 (defun nnshimbun-fill-line ()
778   (forward-line 0)
779   (let ((top (point)) chr)
780     (while (if (>= (move-to-column nnshimbun-fill-column)
781                    nnshimbun-fill-column)
782                (not (progn
783                       (if (memq (preceding-char) nnshimbun-kinsoku-eol-list)
784                           (progn
785                             (backward-char)
786                             (while (memq (preceding-char) nnshimbun-kinsoku-eol-list)
787                               (backward-char))
788                             (insert "\n"))
789                         (while (memq (setq chr (following-char)) nnshimbun-kinsoku-bol-list)
790                           (forward-char))
791                         (if (looking-at "\\s-+")
792                             (or (eolp) (delete-region (point) (match-end 0)))
793                           (or (> (char-width chr) 1)
794                               (re-search-backward "\\<" top t)
795                               (end-of-line)))
796                         (or (eolp) (insert "\n"))))))
797       (setq top (point))))
798   (forward-line 1)
799   (not (eobp)))
800
801 (defsubst nnshimbun-shallow-rendering ()
802   (goto-char (point-min))
803   (while (search-forward "<p>" nil t)
804     (insert "\n\n"))
805   (goto-char (point-min))
806   (while (search-forward "<br>" nil t)
807     (insert "\n"))
808   (nnweb-remove-markup)
809   (nnweb-decode-entities)
810   (goto-char (point-min))
811   (while (nnshimbun-fill-line))
812   (goto-char (point-min))
813   (when (skip-chars-forward "\n")
814     (delete-region (point-min) (point)))
815   (while (search-forward "\n\n" nil t)
816     (let ((p (point)))
817       (when (skip-chars-forward "\n")
818         (delete-region p (point)))))
819   (goto-char (point-max))
820   (when (skip-chars-backward "\n")
821     (delete-region (point) (point-max)))
822   (insert "\n"))
823
824 (defun nnshimbun-make-text-or-html-contents (header &optional x-face)
825   (let ((case-fold-search t) (html t) (start))
826     (when (and (re-search-forward nnshimbun-contents-start nil t)
827                (setq start (point))
828                (re-search-forward nnshimbun-contents-end nil t))
829       (delete-region (match-beginning 0) (point-max))
830       (delete-region (point-min) start)
831       (nnshimbun-shallow-rendering)
832       (setq html nil))
833     (goto-char (point-min))
834     (nnshimbun-insert-header header)
835     (insert "Content-Type: " (if html "text/html" "text/plain")
836             "; charset=ISO-2022-JP\nMIME-Version: 1.0\n")
837     (when x-face
838       (insert x-face)
839       (unless (bolp)
840         (insert "\n")))
841     (insert "\n")
842     (encode-coding-string (buffer-string)
843                           (mime-charset-to-coding-system "ISO-2022-JP"))))
844
845 (defun nnshimbun-make-html-contents (header &optional x-face)
846   (let (start)
847     (when (and (re-search-forward nnshimbun-contents-start nil t)
848                (setq start (point))
849                (re-search-forward nnshimbun-contents-end nil t))
850       (delete-region (match-beginning 0) (point-max))
851       (delete-region (point-min) start))
852     (goto-char (point-min))
853     (nnshimbun-insert-header header)
854     (insert "Content-Type: text/html; charset=ISO-2022-JP\n"
855             "MIME-Version: 1.0\n")
856     (when x-face
857       (insert x-face)
858       (unless (bolp)
859         (insert "\n")))
860     (insert "\n")
861     (encode-coding-string (buffer-string)
862                           (mime-charset-to-coding-system "ISO-2022-JP"))))
863
864 (defun nnshimbun-make-mhonarc-contents (header &rest args)
865   (require 'mml)
866   (if (search-forward "<!--X-Head-End-->" nil t)
867       (progn
868         (forward-line 0)
869         ;; Processing headers.
870         (save-restriction
871           (narrow-to-region (point-min) (point))
872           (nnweb-decode-entities)
873           (goto-char (point-min))
874           (while (search-forward "\n<!--X-" nil t)
875             (replace-match "\n"))
876           (goto-char (point-min))
877           (while (search-forward " -->\n" nil t)
878             (replace-match "\n"))
879           (goto-char (point-min))
880           (while (search-forward "\t" nil t)
881             (replace-match " "))
882           (goto-char (point-min))
883           (let (buf refs)
884             (while (not (eobp))
885               (cond
886                ((looking-at "<!--")
887                 (delete-region (point) (progn (forward-line 1) (point))))
888                ((looking-at "Subject: +")
889                 (push (cons 'subject (nnheader-header-value)) buf)
890                 (delete-region (point) (progn (forward-line 1) (point))))
891                ((looking-at "From: +")
892                 (push (cons 'from (nnheader-header-value)) buf)
893                 (delete-region (point) (progn (forward-line 1) (point))))
894                ((looking-at "Date: +")
895                 (push (cons 'date (nnheader-header-value)) buf)
896                 (delete-region (point) (progn (forward-line 1) (point))))
897                ((looking-at "Message-Id: +")
898                 (push (cons 'id (concat "<" (nnheader-header-value) ">")) buf)
899                 (delete-region (point) (progn (forward-line 1) (point))))
900                ((looking-at "Reference: +")
901                 (push (concat "<" (nnheader-header-value) ">") refs)
902                 (delete-region (point) (progn (forward-line 1) (point))))
903                ((looking-at "Content-Type: ")
904                 (unless (search-forward "charset" (gnus-point-at-eol) t)
905                   (end-of-line)
906                   (insert "; charset=ISO-2022-JP"))
907                 (forward-line 1))
908                (t (forward-line 1))))
909             (insert "MIME-Version: 1.0\n")
910             (if refs (push (cons 'references (mapconcat 'identity refs " ")) buf))
911             (nnshimbun-nov-fix-header nnshimbun-current-group header buf)
912             (goto-char (point-min))
913             (nnshimbun-insert-header header))
914           (goto-char (point-max)))
915         ;; Processing body.
916         (save-restriction
917           (narrow-to-region (point) (point-max))
918           (delete-region
919            (point)
920            (progn
921              (search-forward "\n<!--X-Body-of-Message-->\n" nil t)
922              (point)))
923           (when (search-forward "\n<!--X-Body-of-Message-End-->\n" nil t)
924             (forward-line -1)
925             (delete-region (point) (point-max)))
926           (nnweb-remove-markup)
927           (nnweb-decode-entities)))
928     (goto-char (point-min))
929     (nnshimbun-insert-header header)
930     (insert "Content-Type: text/html; charset=ISO-2022-JP\nMIME-Version: 1.0\n\n"))
931   (encode-coding-string (buffer-string)
932                         (mime-charset-to-coding-system "ISO-2022-JP")))
933
934 (defun nnshimbun-make-fml-contents (header &rest args)
935   (require 'mml)
936   (catch 'stop
937     (if (search-forward "<SPAN CLASS=mailheaders>" nil t)
938         (delete-region (point-min) (point))
939       (throw 'stop nil))
940     (if (search-forward "</PRE>")
941         (progn
942           (beginning-of-line)
943           (delete-region (point) (point-max)))
944       (throw 'stop nil))
945     (if (search-backward "</SPAN>")
946         (progn
947           (beginning-of-line)
948           (kill-line))
949       (throw 'stop nil))
950     (save-restriction
951       (narrow-to-region (point-min) (point))
952       (subst-char-in-region (point-min) (point-max) ?\t ?  t)
953       (nnweb-decode-entities)
954       (goto-char (point-min))
955       (let (buf field value start value-beg end)
956         (while (and (setq start (point))
957                     (re-search-forward "<SPAN CLASS=\\(.*\\)>\\(.*\\)</SPAN>:"
958                                        nil t)
959                     (setq field (match-string 2))
960                     (re-search-forward 
961                      (concat "<SPAN CLASS=" (match-string 1) "-value>") nil t)
962                     (setq value-beg (point))
963                     (search-forward "</SPAN>" nil t)
964                     (setq end (point)))
965           (setq value (buffer-substring value-beg
966                                         (progn (search-backward "</SPAN>")
967                                                (point))))
968           (delete-region start end)
969           (cond ((string= field "Date")
970                  (push (cons 'date value) buf))
971                 ((string= field "From")
972                  (push (cons 'from value) buf))
973                 ((string= field "Subject")
974                  (push (cons 'subject value) buf))
975                 ((string= field "Message-Id")
976                  (push (cons 'id value) buf))
977                 ((string= field "References")
978                  (push (cons 'references value) buf))
979                 (t
980                  (insert (concat field ": " value "\n")))))
981         (nnshimbun-nov-fix-header nnshimbun-current-group header buf)
982         (goto-char (point-min))
983         (nnshimbun-insert-header header))
984       (goto-char (point-max)))
985     ;; Processing body.
986     (save-restriction
987       (narrow-to-region (point) (point-max))
988       (nnweb-remove-markup)
989       (nnweb-decode-entities)))
990   (encode-coding-string (buffer-string)
991                         (mime-charset-to-coding-system "ISO-2022-JP")))
992
993 ;;; www.asahi.com
994
995 (defun nnshimbun-asahi-get-headers ()
996   (when (search-forward "\n<!-- Start of past -->\n" nil t)
997     (delete-region (point-min) (point))
998     (when (search-forward "\n<!-- End of past -->\n" nil t)
999       (forward-line -1)
1000       (delete-region (point) (point-max))
1001       (goto-char (point-min))
1002       (let (headers)
1003         (while (re-search-forward
1004                 "^\e$B"#\e(B<a href=\"\\(\\([0-9][0-9][0-9][0-9]\\)/past/\\([A-z]*[0-9]*\\)\\.html\\)\"> *"
1005                 nil t)
1006           (let ((id (format "<%s%s%%%s>"
1007                             (match-string 2)
1008                             (match-string 3)
1009                             nnshimbun-current-group))
1010                 (url (match-string 1)))
1011             (push (make-full-mail-header
1012                    0
1013                    (nnshimbun-mime-encode-string
1014                     (mapconcat 'identity
1015                                (split-string
1016                                 (buffer-substring
1017                                  (match-end 0)
1018                                  (progn (search-forward "<br>" nil t) (point)))
1019                                 "\\(<[^>]+>\\|\r\\)")
1020                                ""))
1021                    nnshimbun-from-address
1022                    "" id "" 0 0 (concat nnshimbun-url url))
1023                   headers)))
1024         (setq headers (nreverse headers))
1025         (let ((i 0))
1026           (while (and (nth i headers)
1027                       (re-search-forward
1028                        "^\\[\\([0-9][0-9]\\)/\\([0-9][0-9]\\) \\([0-9][0-9]:[0-9][0-9]\\)\\]"
1029                        nil t))
1030             (let ((month (string-to-number (match-string 1)))
1031                   (date (decode-time (current-time))))
1032               (mail-header-set-date
1033                (nth i headers)
1034                (nnshimbun-make-date-string
1035                 (if (and (eq 12 month) (eq 1 (nth 4 date)))
1036                     (1- (nth 5 date))
1037                   (nth 5 date))
1038                 month
1039                 (string-to-number (match-string 2))
1040                 (match-string 3))))
1041             (setq i (1+ i))))
1042         (nreverse headers)))))
1043
1044
1045
1046 ;;; www.sponichi.co.jp
1047
1048 (defun nnshimbun-sponichi-get-headers ()
1049   (when (search-forward "\e$B%K%e!<%9%$%s%G%C%/%9\e(B" nil t)
1050     (delete-region (point-min) (point))
1051     (when (search-forward "\e$B%"%I%?%0\e(B" nil t)
1052       (forward-line 2)
1053       (delete-region (point) (point-max))
1054       (goto-char (point-min))
1055       (let ((case-fold-search t) headers)
1056         (while (re-search-forward
1057                 "^<a href=\"/\\(\\([A-z]*\\)/kiji/\\([0-9][0-9][0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([0-9][0-9]\\)\\.html\\)\">"
1058                 nil t)
1059           (let ((url (match-string 1))
1060                 (id (format "<%s%s%s%s%%%s>"
1061                             (match-string 3)
1062                             (match-string 4)
1063                             (match-string 5)
1064                             (match-string 6)
1065                             nnshimbun-current-group))
1066                 (date (nnshimbun-make-date-string
1067                        (string-to-number (match-string 3))
1068                        (string-to-number (match-string 4))
1069                        (string-to-number (match-string 5)))))
1070             (push (make-full-mail-header
1071                    0
1072                    (nnshimbun-mime-encode-string
1073                     (mapconcat 'identity
1074                                (split-string
1075                                 (buffer-substring
1076                                  (match-end 0)
1077                                  (progn (search-forward "<br>" nil t) (point)))
1078                                 "<[^>]+>")
1079                                ""))
1080                    nnshimbun-from-address
1081                    date id "" 0 0 (concat nnshimbun-url url))
1082                   headers)))
1083         headers))))
1084
1085
1086
1087 ;;; CNET Japan
1088
1089 (defun nnshimbun-cnet-get-headers ()
1090   (let ((case-fold-search t) headers)
1091     (while (search-forward "\n<!--*****\e$B8+=P$7\e(B*****-->\n" nil t)
1092       (let ((subject (buffer-substring (point) (gnus-point-at-eol)))
1093             (point (point)))
1094         (forward-line -2)
1095         (when (looking-at "<a href=\"/\\(News/\\([0-9][0-9][0-9][0-9]\\)/Item/\\([0-9][0-9]\\([0-9][0-9]\\)\\([0-9][0-9]\\)-[0-9]+\\).html\\)\">")
1096           (let ((url (match-string 1))
1097                 (id  (format "<%s%s%%%s>"
1098                              (match-string 2)
1099                              (match-string 3)
1100                              nnshimbun-current-group))
1101                 (date (nnshimbun-make-date-string
1102                        (string-to-number (match-string 2))
1103                        (string-to-number (match-string 4))
1104                        (string-to-number (match-string 5)))))
1105             (push (make-full-mail-header
1106                    0
1107                    (nnshimbun-mime-encode-string subject)
1108                    nnshimbun-from-address
1109                    date id "" 0 0 (concat nnshimbun-url url))
1110                   headers)))
1111         (goto-char point)))
1112     headers))
1113
1114
1115
1116 ;;; Wired
1117
1118 (defun nnshimbun-wired-get-all-headers ()
1119   (save-excursion
1120     (set-buffer nnshimbun-buffer)
1121     (let ((group-header-alist (mapcar (lambda (g) (cons g nil)) nnshimbun-groups))
1122           (case-fold-search t)
1123           (regexp (format
1124                    "<a href=\"\\(%s\\|/\\)\\(news/news/\\(%s\\)/story/\\(\\([0-9][0-9][0-9][0-9]\\)\\([0-9][0-9]\\)\\([0-9][0-9]\\)[0-9]+\\)\\.html\\)\"><b>"
1125                    (regexp-quote nnshimbun-url)
1126                    (nnshimbun-regexp-opt nnshimbun-groups))))
1127       (dolist (xover (list (concat nnshimbun-url "news/news/index.html")
1128                            (concat nnshimbun-url "news/news/last_seven.html")))
1129         (erase-buffer)
1130         (nnshimbun-retrieve-url xover t)
1131         (goto-char (point-min))
1132         (while (re-search-forward regexp nil t)
1133           (let* ((url   (concat nnshimbun-url (match-string 2)))
1134                  (group (downcase (match-string 3)))
1135                  (id    (format "<%s%%%s>" (match-string 4) group))
1136                  (date  (nnshimbun-make-date-string
1137                          (string-to-number (match-string 5))
1138                          (string-to-number (match-string 6))
1139                          (string-to-number (match-string 7))))
1140                  (header (make-full-mail-header
1141                           0
1142                           (nnshimbun-mime-encode-string
1143                            (mapconcat 'identity
1144                                       (split-string
1145                                        (buffer-substring
1146                                         (match-end 0)
1147                                         (progn (search-forward "</b>" nil t) (point)))
1148                                        "<[^>]+>")
1149                                       ""))
1150                           nnshimbun-from-address
1151                           date id "" 0 0 url))
1152                  (x (assoc group group-header-alist)))
1153             (setcdr x (cons header (cdr x))))))
1154       group-header-alist)))
1155
1156
1157
1158 ;;; www.yomiuri.co.jp
1159
1160 (defun nnshimbun-yomiuri-get-all-headers ()
1161   (save-excursion
1162     (set-buffer nnshimbun-buffer)
1163     (erase-buffer)
1164     (nnshimbun-retrieve-url (eval nnshimbun-index-url) t)
1165     (let ((case-fold-search t)
1166           (group-header-alist (mapcar (lambda (g) (cons g nil)) nnshimbun-groups)))
1167       (dolist (group nnshimbun-groups)
1168         (let (start)
1169           (goto-char (point-min))
1170           (when (and (search-forward (format "\n<!-- /news/%s=start -->\n" group) nil t)
1171                      (setq start (point))
1172                      (search-forward (format "\n<!-- /news/%s=end -->\n" group) nil t))
1173             (forward-line -1)
1174             (save-restriction
1175               (narrow-to-region start (point))
1176               (goto-char start)
1177               (while (re-search-forward
1178                       "<a href=\"/\\([0-9]+\\)/\\(\\(\\([0-9][0-9][0-9][0-9]\\)\\([0-9][0-9]\\)\\([0-9][0-9]\\)[A-z0-9]+\\)\\.htm\\)\"[^>]*>"
1179                       nil t)
1180                 (let ((url   (concat (match-string 1) "a/" (match-string 2)))
1181                       (id    (format "<%s%s%%%s>"
1182                                      (match-string 1)
1183                                      (match-string 3)
1184                                      group))
1185                       (year  (string-to-number (match-string 4)))
1186                       (month (string-to-number (match-string 5)))
1187                       (day   (string-to-number (match-string 6)))
1188                       (subject (mapconcat
1189                                 'identity
1190                                 (split-string
1191                                  (buffer-substring
1192                                   (match-end 0)
1193                                   (progn (search-forward "<br>" nil t) (point)))
1194                                  "<[^>]+>")
1195                                 ""))
1196                       date x)
1197                   (when (string-match "^\e$B"!\e(B" subject)
1198                     (setq subject (substring subject (match-end 0))))
1199                   (if (string-match "(\\([0-9][0-9]:[0-9][0-9]\\))$" subject)
1200                       (setq date (nnshimbun-make-date-string
1201                                   year month day (match-string 1 subject))
1202                             subject (substring subject 0 (match-beginning 0)))
1203                     (setq date (nnshimbun-make-date-string year month day)))
1204                   (setcdr (setq x (assoc group group-header-alist))
1205                           (cons (make-full-mail-header
1206                                  0
1207                                  (nnshimbun-mime-encode-string subject)
1208                                  nnshimbun-from-address
1209                                  date id "" 0 0 (concat nnshimbun-url url))
1210                                 (cdr x)))))))))
1211       group-header-alist)))
1212
1213
1214
1215 ;;; Zdnet Japan
1216
1217 (defun nnshimbun-zdnet-get-headers ()
1218   (let ((case-fold-search t) headers)
1219     (goto-char (point-min))
1220     (let (start)
1221       (while (and (search-forward "<!--" nil t)
1222                   (setq start (- (point) 4))
1223                   (search-forward "-->" nil t))
1224         (delete-region start (point))))
1225     (goto-char (point-min))
1226     (while (re-search-forward
1227             "<a href=\"\\(/news/\\)?\\(\\([0-9][0-9]\\)\\([0-9][0-9]\\)/\\([0-9][0-9]\\)/\\([^\\.]+\\).html\\)\"><font size=\"4\"><strong>"
1228             nil t)
1229       (let ((year  (+ 2000 (string-to-number (match-string 3))))
1230             (month (string-to-number (match-string 4)))
1231             (day   (string-to-number (match-string 5)))
1232             (id    (format "<%s%s%s%s%%%s>"
1233                            (match-string 3)
1234                            (match-string 4)
1235                            (match-string 5)
1236                            (match-string 6)
1237                            nnshimbun-current-group))
1238             (url (match-string 2)))
1239         (push (make-full-mail-header
1240                0
1241                (nnshimbun-mime-encode-string
1242                 (mapconcat 'identity
1243                            (split-string
1244                             (buffer-substring
1245                              (match-end 0)
1246                              (progn (search-forward "</a>" nil t) (point)))
1247                             "<[^>]+>")
1248                            ""))
1249                nnshimbun-from-address
1250                (nnshimbun-make-date-string year month day)
1251                id  "" 0 0 (concat nnshimbun-url url))
1252               headers)))
1253     (nreverse headers)))
1254
1255
1256
1257 ;;; MLs on www.mew.org
1258
1259 (defmacro nnshimbun-mew-concat-url (url)
1260   `(concat nnshimbun-url
1261            (nth 1 (assoc nnshimbun-current-group nnshimbun-mew-groups))
1262            "/"
1263            ,url))
1264
1265 (defmacro nnshimbun-mew-reverse-order-p ()
1266   `(nth 2 (assoc nnshimbun-current-group nnshimbun-mew-groups)))
1267
1268 (defmacro nnshimbun-mew-spew-p ()
1269   `(nth 3 (assoc nnshimbun-current-group nnshimbun-mew-groups)))
1270
1271 (defsubst nnshimbun-mew-retrieve-xover (aux)
1272   (erase-buffer)
1273   (nnshimbun-retrieve-url
1274    (nnshimbun-mew-concat-url (if (= aux 1) "index.html" (format "mail%d.html" aux)))
1275    t))
1276
1277 (defconst nnshimbun-mew-regexp "<A[^>]*HREF=\"\\(msg\\([0-9]+\\).html\\)\">\\([^<]+\\)<")
1278
1279 (defmacro nnshimbun-mew-extract-header-values ()
1280   `(progn
1281      (setq url (nnshimbun-mew-concat-url (match-string 1))
1282            id (format "<%05d%%%s>"
1283                       (1- (string-to-number (match-string 2)))
1284                       nnshimbun-current-group)
1285            subject (match-string 3))
1286      (forward-line 1)
1287      (if (nnshimbun-search-id nnshimbun-current-group id)
1288          (throw 'stop headers)
1289        (push (make-full-mail-header
1290               0
1291               (nnshimbun-mime-encode-string subject)
1292               (if (looking-at "<EM>\\([^<]+\\)<")
1293                   (nnshimbun-mime-encode-string (match-string 1))
1294                 "")
1295               "" id "" 0 0 url)
1296              headers))))
1297
1298 (eval-and-compile
1299   (if (fboundp 'mime-entity-fetch-field)
1300       ;; For Semi-Gnus.
1301       (defmacro nnshimbun-mew-mail-header-subject (header)
1302         `(mime-entity-fetch-field ,header 'Subject))
1303     ;; For pure Gnus.
1304     (defalias 'nnshimbun-mew-mail-header-subject 'mail-header-subject)))
1305
1306 (defun nnshimbun-mew-get-headers ()
1307   (if (nnshimbun-mew-spew-p)
1308       (let ((headers (nnshimbun-mew-get-headers-1)))
1309         (erase-buffer)
1310         (insert-buffer-substring (nnshimbun-open-nov nnshimbun-current-group))
1311         (delq nil
1312               (mapcar
1313                (lambda (header)
1314                  (goto-char (point-min))
1315                  (let ((subject (nnshimbun-mew-mail-header-subject header))
1316                        (found))
1317                    (while (and (not found)
1318                                (search-forward subject nil t))
1319                      (if (not (and (search-backward "\t" nil t)
1320                                    (not (search-backward "\t" (gnus-point-at-bol) t))))
1321                          (forward-line 1)
1322                        (setq found t)))
1323                    (if found
1324                        nil
1325                      (goto-char (point-max))
1326                      (nnheader-insert-nov header)
1327                      header)))
1328                headers)))
1329     (nnshimbun-mew-get-headers-1)))
1330
1331 (defun nnshimbun-mew-get-headers-1 ()
1332   (let (headers)
1333     (when (re-search-forward
1334            "<A[^>]*HREF=\"mail\\([0-9]+\\)\\.html\">\\[?Last Page\\]?</A>" nil t)
1335       (let ((limit (string-to-number (match-string 1))))
1336         (catch 'stop
1337           (if (nnshimbun-mew-reverse-order-p)
1338               (let ((aux 1))
1339                 (while (let (id url subject)
1340                          (while (re-search-forward nnshimbun-mew-regexp nil t)
1341                            (nnshimbun-mew-extract-header-values))
1342                          (< aux limit))
1343                   (nnshimbun-mew-retrieve-xover (setq aux (1+ aux)))))
1344             (while (> limit 0)
1345               (nnshimbun-mew-retrieve-xover limit)
1346               (setq limit (1- limit))
1347               (let (id url subject)
1348                 (goto-char (point-max))
1349                 (while (re-search-backward nnshimbun-mew-regexp nil t)
1350                   (nnshimbun-mew-extract-header-values)
1351                   (forward-line -2)))))
1352           headers)))))
1353
1354
1355
1356 ;;; MLs on www.xemacs.org
1357
1358 (defmacro nnshimbun-xemacs-concat-url (url)
1359   `(concat nnshimbun-url nnshimbun-current-group "/" ,url))
1360
1361 (defun nnshimbun-xemacs-get-headers ()
1362   (let (headers auxs aux)
1363     (catch 'stop
1364       (while (re-search-forward
1365               (concat "<A HREF=\"/" nnshimbun-current-group
1366                       "/\\([12][0-9][0-9][0-9][0-1][0-9]\\)/\">\\[Index\\]")
1367               nil t)
1368         (setq auxs (append auxs (list (match-string 1)))))
1369       (while auxs
1370         (erase-buffer)
1371         (nnshimbun-retrieve-url
1372          (nnshimbun-xemacs-concat-url (concat (setq aux (car auxs)) "/")))
1373         (let (id url subject)
1374           (goto-char (point-max))
1375           (while (re-search-backward
1376                   "<A[^>]*HREF=\"\\(msg\\([0-9]+\\).html\\)\">\\([^<]+\\)<"
1377                   nil t)
1378             (setq url (nnshimbun-xemacs-concat-url
1379                        (concat aux "/" (match-string 1)))
1380                   id (format "<%s%05d%%%s>"
1381                              aux
1382                              (string-to-number (match-string 2))
1383                              nnshimbun-current-group)
1384                   subject (match-string 3))
1385             (forward-line 1)
1386             (if (nnshimbun-search-id nnshimbun-current-group id)
1387                 (throw 'stop headers)
1388               (push (make-full-mail-header
1389                      0
1390                      (nnshimbun-mime-encode-string subject)
1391                      (if (looking-at "<td><em>\\([^<]+\\)<")
1392                          (match-string 1)
1393                        "")
1394                      "" id "" 0 0 url)
1395                     headers))
1396             (message "%s" id)
1397             (forward-line -2)))
1398         (setq auxs (cdr auxs))))
1399     headers))
1400
1401 ;;; MLs on www.jp.netbsd.org
1402
1403 (defun nnshimbun-netbsd-get-headers ()
1404   (let ((case-fold-search t) headers months)
1405     (goto-char (point-min))
1406     (while (re-search-forward "<A HREF=\"\\([0-9]+\\)/\\(threads.html\\)?\">" nil t)
1407       (push (match-string 1) months))
1408     (setq months (nreverse months))
1409     (catch 'exit
1410       (dolist (month months)
1411         (erase-buffer)
1412         (nnshimbun-retrieve-url
1413          (format "%s%s/%s/maillist.html" nnshimbun-url nnshimbun-current-group month)
1414          t)
1415         (let (id url subject)
1416           (while (re-search-forward
1417                   "<A[^>]*HREF=\"\\(msg\\([0-9]+\\)\\.html\\)\">\\([^<]+\\)</A>"
1418                   nil t)
1419             (setq url (format "%s%s/%s/%s"
1420                               nnshimbun-url
1421                               nnshimbun-current-group
1422                               month
1423                               (match-string 1))
1424                   id (format "<%s%05d%%%s>"
1425                              month
1426                              (string-to-number (match-string 2))
1427                              nnshimbun-current-group)
1428                   subject (match-string 3))
1429             (if (nnshimbun-search-id nnshimbun-current-group id)
1430                 (throw 'exit headers)
1431               (push (make-full-mail-header
1432                      0
1433                      (nnshimbun-mime-encode-string subject)
1434                      (if (looking-at "</STRONG> *<EM>\\([^<]+\\)<")
1435                          (nnshimbun-mime-encode-string (match-string 1))
1436                        "")
1437                      "" id "" 0 0 url)
1438                     headers)))))
1439       headers)))
1440
1441 ;;; MLs using fml
1442 (defun nnshimbun-fml-get-headers ()
1443   (let (headers auxs aux)
1444     (catch 'stop
1445       (while (re-search-forward "<a href=\"\\([0-9]+\\(\\.week\\|\\.month\\)?\\)/index.html\">" nil t)
1446         (setq auxs (append auxs (list (match-string 1)))))
1447       (while auxs
1448         (erase-buffer)
1449         (nnshimbun-retrieve-url
1450          (concat nnshimbun-url (setq aux (car auxs)) "/"))
1451         (subst-char-in-region (point-min) (point-max) ?\t ?  t)
1452         (let (id url date subject from)
1453           (goto-char (point-min))
1454           (while (re-search-forward
1455                   "<LI><A HREF=\"\\([0-9]+\\.html\\)\">Article .*</A> <DIV><SPAN CLASS=article>Article <SPAN CLASS=article-value>\\([0-9]+\\)</SPAN></SPAN> at <SPAN CLASS=Date-value>\\([^<]*\\)</SPAN> <SPAN CLASS=Subject>Subject: <SPAN CLASS=Subject-value>\\([^<]*\\)</SPAN></SPAN></DIV><DIV><SPAN CLASS=From>From: <SPAN CLASS=From-value>\\([^<]*\\)</SPAN></SPAN></DIV>"
1456                   nil t)
1457             (setq url (concat nnshimbun-url aux "/" (match-string 1))
1458                   id (format "<%s%05d%%%s>"
1459                              aux
1460                              (string-to-number (match-string 2))
1461                              nnshimbun-current-group)
1462                   date (match-string 3)
1463                   subject (match-string 4)
1464                   from (match-string 5))
1465             (forward-line 1)
1466             (if (nnshimbun-search-id nnshimbun-current-group id)
1467                 (throw 'stop headers)
1468               (push (make-full-mail-header
1469                      0
1470                      (nnshimbun-mime-encode-string subject)
1471                      from date id "" 0 0 url)
1472                     headers))
1473             ;;(message "%s" id)
1474             ))
1475         (setq auxs (cdr auxs))))
1476     headers))
1477
1478 (provide 'nnshimbun)
1479 ;;; nnshimbun.el ends here.