1 ;;; shimbun.el --- interfacing with web newspapers -*- coding: junet; -*-
3 ;; Author: TSUCHIYA Masatoshi <tsuchiya@pine.kuee.kyoto-u.ac.jp>
4 ;; Akihiro Arisawa <ari@atesoft.advantest.co.jp>
5 ;; Yuuichi Teranishi <teranisi@gohome.org>
11 ;; This program is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; This program is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with this program; if not, you can either send email to this
23 ;; program's maintainer or write to: The Free Software Foundation,
24 ;; Inc.; 59 Temple Place, Suite 330; Boston, MA 02111-1307, USA.
28 ;; Original code was nnshimbun.el written by
29 ;; TSUCHIYA Masatoshi <tsuchiya@pine.kuee.kyoto-u.ac.jp>.
36 ;; shimbun-close-group
42 ;; Shimbun Header API:
44 ;; shimbun-header-subject
45 ;; shimbun-header-set-subject
46 ;; shimbun-header-from
47 ;; shimbun-header-set-from
48 ;; shimbun-header-date
49 ;; shimbun-header-set-date
51 ;; shimbun-header-set-id
52 ;; shimbun-header-references
53 ;; shimbun-header-set-references
54 ;; shimbun-header-chars
55 ;; shimbun-header-set-chars
56 ;; shimbun-header-lines
57 ;; shimbun-header-set-lines
58 ;; shimbun-header-xref
59 ;; shimbun-header-set-xref
60 ;; shimbun-header-extra
61 ;; shimbun-header-set-extra
62 ;; shimbun-header-insert
64 (eval-when-compile (require 'cl))
65 (eval-when-compile (require 'static))
68 (require 'eword-encode)
73 (luna-define-class shimbun ()
74 (server current-group groups headers hash x-face
75 url coding-system from-address
76 content-start content-end))
77 (luna-define-internal-accessors 'shimbun))
79 (defvar shimbun-x-face-alist
82 "X-Face: +Oh!C!EFfmR$+Zw{dwWW]1e_>S0rnNCA*CX|bIy3rr^<Q#lf&~ADU:X!t5t>
83 gW5)Q]N{MmnL]suPpL|gFjV{S|]a-:)\\FR7GRf9uL:ue5_=;h{V%@()={u
84 Td@l?eXBppF%`6W%;h`#]2q+f*81n$Bh|t")))
87 "X-Face: 0p7.+XId>z%:!$ahe?x%+AEm37Abvn]n*GGh+>v=;[3`a{1l
88 qO[$,~3C3xU_ri>[JwJ!9l0~Y`b*eXAQ:*q=bBI_=ro*?]4:
89 |n>]ZiLZ2LEo^2nr('C<+`lO~/!R[lH'N'4X&%\\I}8T!wt")))
92 "X-Face: \"yhMDxMBowCFKt;5Q$s_Wx)/'L][0@c\"#n2BwH{7mg]5^w1D]\"K^R
93 ]&fZ5xtt1Ynu6V;Cv(@BcZUf9IV$($6TZ`L)$,cegh`b:Uwy`8}#D
94 b-kyCsr_UMRz=,U|>-:&`05lXB4(;h{[&~={Imb-az7&U5?|&X_8c
95 ;#'L|f.P,]|\\50pgSVw_}byL+%m{TrS[\"Ew;dbskaBL[ipk2m4V")))
98 "X-Face: 88Zbg!1nj{i#[*WdSZNrn1$Cdfat,zsG`P)OLo=U05q:RM#72\\p;3XZ
99 ~j|7T)QC7\"(A;~HrfP.D}o>Z.]=f)rOBz:A^G*M3Ea5JCB$a>BL/y!")))
102 "X-Face: Ygq$6P.,%Xt$U)DS)cRY@k$VkW!7(X'X'?U{{osjjFG\"E]hND;SPJ-J?O?R|a?L
103 g2$0rVng=O3Lt}?~IId8Jj&vP^3*o=LKUyk(`t%0c!;t6REk=JbpsEn9MrN7gZ%"))))
104 "Alist of server vs. alist of group vs. X-Face field. It looks like:
106 \((\"asahi\" . ((\"national\" . \"X-face: ***\")
107 (\"business\" . \"X-Face: ***\")
110 (\"default\" . \"X-face: ***\")))
111 (\"sponichi\" . ((\"baseball\" . \"X-face: ***\")
112 (\"soccer\" . \"X-Face: ***\")
115 (\"default\" . \"X-face: ***\")))
117 (\"default\" . ((\"default\" . \"X-face: ***\")))")
119 (defconst shimbun-meta-content-type-charset-regexp
121 (concat "<meta[ \t]+http-equiv=\"?Content-type\"?[ \t]+content=\"\\([^;]+\\)"
122 ";[ \t]*charset=\"?\\([^\"]+\\)\"?"
124 "Regexp used in parsing `<META HTTP-EQUIV=\"Content-Type\" content=\"...;charset=...\">
125 for a charset indication")
127 (defconst shimbun-meta-charset-content-type-regexp
129 (concat "<meta[ \t]+content=\"\\([^;]+\\)"
130 ";[ \t]*charset=\"?\\([^\"]+\\)\"?"
131 "[ \t]+http-equiv=\"?Content-type\"?>"))
132 "Regexp used in parsing `<META content=\"...;charset=...\" HTTP-EQUIV=\"Content-Type\">
133 for a charset indication")
135 (defvar shimbun-hash-length 997
136 "Length of header hashtable.")
138 (static-when (boundp 'MULE)
139 (unless (coding-system-p 'euc-japan)
140 (copy-coding-system '*euc-japan* 'euc-japan))
141 (unless (coding-system-p 'shift_jis)
142 (copy-coding-system '*sjis* 'shift_jis))
144 (defalias-maybe 'coding-system-category 'get-code-mnemonic)))
146 (static-if (and (ignore-errors (require 'w3m))
147 (fboundp 'w3m-retrieve))
150 (defun shimbun-retrieve-url (shimbun url &optional no-cache)
151 "Rertrieve URL contents and insert to current buffer."
152 (when (w3m-retrieve url nil no-cache)
153 (insert-buffer w3m-work-buffer-name))))
156 (defun shimbun-retrieve-url (shimbun url &optional no-cache)
157 "Rertrieve URL contents and insert to current buffer."
158 (let ((buf (current-buffer))
159 (url-working-buffer url-working-buffer))
160 (let ((old-asynch (default-value 'url-be-asynchronous))
161 (old-caching (default-value 'url-automatic-caching))
162 (old-mode (default-value 'url-standalone-mode)))
163 (setq-default url-be-asynchronous nil)
165 (setq-default url-automatic-caching nil)
166 (setq-default url-standalone-mode nil))
168 (let ((coding-system-for-read 'binary)
169 (coding-system-for-write 'binary)
170 (input-coding-system 'binary)
171 (output-coding-system 'binary)
172 (default-enable-multibyte-characters nil))
174 (setq url-working-buffer
175 (cdr (url-retrieve url no-cache))))
177 (setq-default url-be-asynchronous old-asynch)
178 (setq-default url-automatic-caching old-caching)
179 (setq-default url-standalone-mode old-mode)))
181 (or (and (boundp 'url-current-mime-charset)
182 (symbol-value 'url-current-mime-charset))
183 (let ((case-fold-search t))
184 (goto-char (point-min))
185 (if (or (re-search-forward
186 shimbun-meta-content-type-charset-regexp nil t)
188 shimbun-meta-charset-content-type-regexp nil t))
189 (buffer-substring-no-properties (match-beginning 2)
191 (decode-coding-region
192 (point-min) (point-max)
194 (let ((mime-charset-coding-system-alist
195 (append '((euc-jp . euc-japan)
196 (shift-jis . shift_jis)
197 (shift_jis . shift_jis)
199 (x-euc-jp . euc-japan)
200 (x-shift-jis . shift_jis)
201 (x-shift_jis . shift_jis)
202 (x-sjis . shift_jis))
203 mime-charset-coding-system-alist)))
204 (mime-charset-to-coding-system charset))
205 (let ((default (condition-case nil
206 (coding-system-category
207 (shimbun-coding-system-internal shimbun))
209 (candidate (detect-coding-region (point-min) (point-max))))
210 (unless (listp candidate)
211 (setq candidate (list candidate)))
213 (dolist (coding candidate)
214 (if (eq default (coding-system-category coding))
215 (throw 'coding coding)))
216 (if (eq (coding-system-category 'binary)
217 (coding-system-category (car candidate)))
218 (shimbun-coding-system-internal shimbun)
219 (car candidate)))))))
220 (set-buffer-multibyte t)
222 (insert-buffer url-working-buffer)
223 (kill-buffer url-working-buffer)))
224 ;; End of shimbun-retrieve-url definition
227 ;;; Implementation of Header API.
228 (defun shimbun-make-header (&optional number subject from date id
229 references chars lines xref
231 (vector number subject from date id references chars lines xref extra))
233 ;;(defsubst shimbun-header-number (header)
236 (defsubst shimbun-header-field-value ()
239 (buffer-substring (match-end 0) (std11-field-end))
242 (defsubst shimbun-header-subject (header)
245 (defsubst shimbun-header-set-subject (header subject)
246 (aset header 1 subject))
248 (defsubst shimbun-header-from (header)
251 (defsubst shimbun-header-set-from (header from)
252 (aset header 2 from))
254 (defsubst shimbun-header-date (header)
257 (defsubst shimbun-header-set-date (header date)
258 (aset header 3 date))
260 (defsubst shimbun-header-id (header)
263 (defsubst shimbun-header-set-id (header id)
266 (defsubst shimbun-header-references (header)
269 (defsubst shimbun-header-set-references (header references)
270 (aset header 5 references))
272 (defsubst shimbun-header-chars (header)
275 (defsubst shimbun-header-set-chars (header chars)
276 (aset header 6 chars))
278 (defsubst shimbun-header-lines (header)
281 (defsubst shimbun-header-set-lines (header lines)
282 (aset header 7 lines))
284 (defsubst shimbun-header-xref (header)
287 (defsubst shimbun-header-set-xref (header xref)
288 (aset header 8 xref))
290 (defsubst shimbun-header-extra (header)
293 (defsubst shimbun-header-set-extra (header extra)
294 (aset header 9 extra))
296 (defvar shimbun-hash-length 997
297 "Length of shimbun-hash.")
299 (defun shimbun-header-insert (header)
300 (insert "Subject: " (or (shimbun-header-subject header) "(none)") "\n"
301 "From: " (or (shimbun-header-from header) "(nobody)") "\n"
302 "Date: " (or (shimbun-header-date header) "") "\n"
303 "Message-ID: " (shimbun-header-id header) "\n")
304 (let ((refs (shimbun-header-references header)))
307 (insert "References: " refs "\n")))
308 (insert "Lines: " (number-to-string (or (shimbun-header-lines header) 0))
310 "Xref: " (or (shimbun-header-xref header) "") "\n"))
312 ;;; Implementation of Shimbun API.
314 (defvar shimbun-attributes
315 '(url groups coding-system from-address content-start content-end))
317 (defun shimbun-open (server)
318 "Open a shimbun for SERVER."
319 (require (intern (concat "sb-" server)))
320 (let (url groups coding-system from-address content-start content-end)
321 (dolist (attr shimbun-attributes)
323 (symbol-value (intern-soft
324 (concat "shimbun-" server "-" (symbol-name attr))))))
325 (luna-make-entity (intern (concat "shimbun-" server))
329 :coding-system coding-system
330 :from-address from-address
331 :content-start content-start
332 :content-end content-end)))
334 (defun shimbun-groups (shimbun)
335 "Return a list of groups which are available in the SHIMBUN."
336 (shimbun-groups-internal shimbun))
338 (defun shimbun-open-group (shimbun group)
339 "Open a SHIMBUN GROUP."
340 (unless (shimbun-current-group-internal shimbun)
341 ; (condition-case nil
342 (if (member group (shimbun-groups-internal shimbun))
344 (shimbun-set-current-group-internal shimbun group)
345 (let ((x-faces (cdr (or (assoc (shimbun-server-internal shimbun)
346 shimbun-x-face-alist)
347 (assoc "default" shimbun-x-face-alist)))))
348 (shimbun-set-x-face-internal shimbun
349 (cdr (or (assoc group x-faces)
350 (assoc "default" x-faces)))))
352 (shimbun-retrieve-url shimbun (shimbun-index-url shimbun))
353 (shimbun-set-headers-internal shimbun
354 (shimbun-get-headers shimbun)))
355 (shimbun-set-hash-internal shimbun
356 (make-vector shimbun-hash-length 0))
357 (dolist (header (shimbun-headers-internal shimbun))
358 (set (intern (shimbun-header-id header)
359 (shimbun-hash-internal shimbun))
361 (error "Cannot open group %s" group))))
362 ; (error (shimbun-set-current-group-internal shimbun nil)))))
364 (defun shimbun-close-group (shimbun)
365 "Close opened group of SHIMBUN."
366 (when (shimbun-current-group-internal shimbun)
367 (shimbun-set-current-group-internal shimbun nil)
368 (shimbun-set-headers-internal shimbun nil)
369 (shimbun-set-hash-internal shimbun nil)))
371 (defun shimbun-headers (shimbun)
372 "Return a SHIMBUN header list."
373 (shimbun-headers-internal shimbun))
375 (defun shimbun-header (shimbun id)
376 "Return a SHIMBUN header which corresponds to ID."
377 (when (shimbun-current-group-internal shimbun)
378 (let ((sym (intern-soft id (shimbun-hash-internal shimbun))))
380 (symbol-value sym)))))
382 (defun shimbun-article (shimbun id &optional outbuf)
383 "Retrieve a SHIMBUN article which corresponds to ID to the OUTBUF.
384 If OUTBUF is not specified, article is retrieved to the current buffer."
385 (when (shimbun-current-group-internal shimbun)
386 (let* ((header (shimbun-header shimbun id))
387 (xref (shimbun-header-xref header)))
388 (with-current-buffer (or outbuf (current-buffer))
390 (or (with-temp-buffer
391 (shimbun-retrieve-url shimbun xref)
392 (message "shimbun: Make contents...")
393 (goto-char (point-min))
394 (prog1 (shimbun-make-contents shimbun header)
395 (message "shimbun: Make contents...done")))
398 (defsubst shimbun-make-html-contents (shimbun header)
400 (when (and (re-search-forward (shimbun-content-start-internal shimbun)
403 (re-search-forward (shimbun-content-end-internal shimbun)
405 (delete-region (match-beginning 0) (point-max))
406 (delete-region (point-min) start))
407 (goto-char (point-min))
408 (shimbun-header-insert header)
409 (insert "Content-Type: text/html; charset=ISO-2022-JP\n"
410 "MIME-Version: 1.0\n")
411 (when (shimbun-x-face-internal shimbun)
412 (insert (shimbun-x-face-internal shimbun))
416 (encode-coding-string (buffer-string)
417 (mime-charset-to-coding-system "ISO-2022-JP"))))
419 (luna-define-generic shimbun-make-contents (shimbun header)
420 "Return a content string of SHIMBUN article using current buffer content.
421 HEADER is a header structure obtained via `shimbun-get-headers'.")
423 (luna-define-method shimbun-make-contents ((shimbun shimbun) header)
424 (shimbun-make-html-contents shimbun header))
426 (luna-define-generic shimbun-index-url (shimbun)
427 "Return a index URL of SHIMBUN.")
429 ;; Default is same as base url.
430 (luna-define-method shimbun-index-url ((shimbun shimbun))
431 (shimbun-url-internal shimbun))
433 (luna-define-generic shimbun-get-headers (shimbun)
434 "Return a shimbun header list of SHIMBUN.")
436 (luna-define-generic shimbun-close (shimbun)
439 (luna-define-method shimbun-close ((shimbun shimbun))
440 (shimbun-close-group shimbun))
443 (defun shimbun-mime-encode-string (string)
446 (split-string (eword-encode-string
447 (shimbun-decode-entities-string string)) "\n")
450 (defun shimbun-make-date-string (year month day &optional time)
451 (format "%02d %s %04d %s +0900"
453 (aref [nil "Jan" "Feb" "Mar" "Apr" "May" "Jun"
454 "Jul" "Aug" "Sep" "Oct" "Nov" "Dec"]
460 ((< year 1000) ; possible 3-digit years.
465 (if (fboundp 'regexp-opt)
466 (defalias 'shimbun-regexp-opt 'regexp-opt)
467 (defun shimbun-regexp-opt (strings &optional paren)
468 "Return a regexp to match a string in STRINGS.
469 Each string should be unique in STRINGS and should not contain any regexps,
470 quoted or not. If optional PAREN is non-nil, ensure that the returned regexp
471 is enclosed by at least one regexp grouping construct."
472 (let ((open-paren (if paren "\\(" "")) (close-paren (if paren "\\)" "")))
473 (concat open-paren (mapconcat 'regexp-quote strings "\\|") close-paren))))
474 ;; Fast fill-region function
476 (defvar shimbun-fill-column (min 80 (- (frame-width) 4)))
478 (defconst shimbun-kinsoku-bol-list
479 (append "!)-_~}]:;',.?
\e$B!"!#!$!%!&!'!(!)!*!+!,!-!.!/!0!1!2!3!4!5!6!7
\e(B\
480 \e$B!8!9!:!;!<!=!>!?!@!A!B!C!D!E!G!I!K!M!O!Q!S!U!W!Y![!k!l!m!n$!$#$%$'$)
\e(B\
481 \e$B$C$c$e$g$n%!%#%%%'%)%C%c%e%g%n%u%v
\e(B" nil))
483 (defconst shimbun-kinsoku-eol-list
484 (append "({[`
\e$B!F!H!J!L!N!P!R!T!V!X!Z!k!l!m!x
\e(B" nil))
486 (defun shimbun-fill-line ()
488 (let ((top (point)) chr)
489 (while (if (>= (move-to-column shimbun-fill-column)
492 (if (memq (preceding-char) shimbun-kinsoku-eol-list)
495 (while (memq (preceding-char) shimbun-kinsoku-eol-list)
498 (while (memq (setq chr (following-char)) shimbun-kinsoku-bol-list)
500 (if (looking-at "\\s-+")
501 (or (eolp) (delete-region (point) (match-end 0)))
502 (or (> (char-width chr) 1)
503 (re-search-backward "\\<" top t)
505 (or (eolp) (insert "\n"))))))
510 (defsubst shimbun-shallow-rendering ()
511 (goto-char (point-min))
512 (while (search-forward "<p>" nil t)
514 (goto-char (point-min))
515 (while (search-forward "<br>" nil t)
517 (shimbun-remove-markup)
518 (shimbun-decode-entities)
519 (goto-char (point-min))
520 (while (shimbun-fill-line))
521 (goto-char (point-min))
522 (when (skip-chars-forward "\n")
523 (delete-region (point-min) (point)))
524 (while (search-forward "\n\n" nil t)
526 (when (skip-chars-forward "\n")
527 (delete-region p (point)))))
528 (goto-char (point-max))
529 (when (skip-chars-backward "\n")
530 (delete-region (point) (point-max)))
533 ;;; entity decoding (stolen from w3m.el)
535 (defconst shimbun-entity-alist ; html character entities and values
537 (let ((basic-entity-alist
546 ("iexcl" . 161) ("cent" . 162) ("pound" . 163)
547 ("curren" . 164) ("yen" . 165) ("brvbar" . 166) ("sect" . 167)
548 ("uml" . 168) ("copy" . 169) ("ordf" . 170) ("laquo" . 171)
549 ("not" . 172) ("shy" . 173) ("reg" . 174) ("macr" . 175)
550 ("deg" . 176) ("plusmn" . 177) ("sup2" . 178) ("sup3" . 179)
551 ("acute" . 180) ("micro" . 181) ("para" . 182) ("middot" . 183)
552 ("cedil" . 184) ("sup1" . 185) ("ordm" . 186) ("raquo" . 187)
553 ("frac14" . 188) ("frac12" . 189) ("frac34" . 190) ("iquest" . 191)
554 ("Agrave" . 192) ("Aacute" . 193) ("Acirc" . 194) ("Atilde" . 195)
555 ("Auml" . 196) ("Aring" . 197) ("AElig" . 198) ("Ccedil" . 199)
556 ("Egrave" . 200) ("Eacute" . 201) ("Ecirc" . 202) ("Euml" . 203)
557 ("Igrave" . 204) ("Iacute" . 205) ("Icirc" . 206) ("Iuml" . 207)
558 ("ETH" . 208) ("Ntilde" . 209) ("Ograve" . 210) ("Oacute" . 211)
559 ("Ocirc" . 212) ("Otilde" . 213) ("Ouml" . 214) ("times" . 215)
560 ("Oslash" . 216) ("Ugrave" . 217) ("Uacute" . 218) ("Ucirc" . 219)
561 ("Uuml" . 220) ("Yacute" . 221) ("THORN" . 222) ("szlig" . 223)
562 ("agrave" . 224) ("aacute" . 225) ("acirc" . 226) ("atilde" . 227)
563 ("auml" . 228) ("aring" . 229) ("aelig" . 230) ("ccedil" . 231)
564 ("egrave" . 232) ("eacute" . 233) ("ecirc" . 234) ("euml" . 235)
565 ("igrave" . 236) ("iacute" . 237) ("icirc" . 238) ("iuml" . 239)
566 ("eth" . 240) ("ntilde" . 241) ("ograve" . 242) ("oacute" . 243)
567 ("ocirc" . 244) ("otilde" . 245) ("ouml" . 246) ("divide" . 247)
568 ("oslash" . 248) ("ugrave" . 249) ("uacute" . 250) ("ucirc" . 251)
569 ("uuml" . 252) ("yacute" . 253) ("thorn" . 254) ("yuml" . 255))))
570 (append basic-entity-alist
577 (static-if (boundp 'MULE) lc-ltn1 'latin-iso8859-1)
581 (defconst shimbun-entity-regexp
583 (format "&\\(%s\\|#[0-9]+\\);?"
584 (if (fboundp 'regexp-opt)
585 (let ((fn (function regexp-opt)))
586 ;; Don't funcall directly for avoiding compile warning.
587 (funcall fn (mapcar (function car)
588 shimbun-entity-alist)))
589 (mapconcat (lambda (s)
590 (regexp-quote (car s)))
594 (defvar shimbun-entity-db nil) ; nil means un-initialized
595 (defconst shimbun-entity-db-size 13) ; size of obarray
597 (defun shimbun-entity-db-setup ()
598 ;; initialise entity database (obarray)
599 (setq shimbun-entity-db (make-vector shimbun-entity-db-size 0))
600 (dolist (elem shimbun-entity-alist)
601 (set (intern (car elem) shimbun-entity-db)
604 (defsubst shimbun-entity-value (name)
605 ;; initialise if need
606 (if (null shimbun-entity-db)
607 (shimbun-entity-db-setup))
608 ;; return value of specified entity, or empty string for unknown entity.
609 (or (symbol-value (intern-soft name shimbun-entity-db))
610 (if (not (char-equal (string-to-char name) ?#))
611 (concat "&" name) ; unknown entity
612 ;; case of immediate character (accept only 0x20 .. 0x7e)
613 (let ((char (string-to-int (substring name 1)))
615 ;; make character's representation with learning
616 (set (setq sym (intern name shimbun-entity-db))
617 (if (or (< char 32) (< 127 char))
618 "~" ; un-supported character
619 (char-to-string char)))))))
621 (defun shimbun-decode-entities ()
622 "Decode entities in the current buffer."
624 (goto-char (point-min))
625 (while (re-search-forward shimbun-entity-regexp nil t)
626 (replace-match (shimbun-entity-value (match-string 1)) nil t))))
628 (defun shimbun-decode-entities-string (string)
629 "Decode entities in the STRING."
632 (shimbun-decode-entities)
635 (defun shimbun-remove-markup ()
636 "Remove all HTML markup, leaving just plain text."
638 (goto-char (point-min))
639 (while (search-forward "<!--" nil t)
640 (delete-region (match-beginning 0)
641 (or (search-forward "-->" nil t)
643 (goto-char (point-min))
644 (while (re-search-forward "<[^>]+>" nil t)
645 (replace-match "" t t))))
648 ;;; shimbun.el ends here.