Synch to Gnus 200312071540.
[elisp/gnus.git-] / lisp / spam.el
1 ;; TODO: spam scores, detection of spam in newsgroups, cross-server splitting, remote processing, training through files
2
3 ;;; spam.el --- Identifying spam
4 ;; Copyright (C) 2002, 2003 Free Software Foundation, Inc.
5
6 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
7 ;; Keywords: network
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;;; This module addresses a few aspects of spam control under Gnus.  Page
29 ;;; breaks are used for grouping declarations and documentation relating to
30 ;;; each particular aspect.
31
32 ;;; The integration with Gnus is not yet complete.  See various `FIXME'
33 ;;; comments, below, for supplementary explanations or discussions.
34
35 ;;; Several TODO items are marked as such
36
37 ;;; Code:
38
39 (require 'path-util)
40
41 (eval-when-compile (require 'cl))
42
43 (require 'gnus-sum)
44
45 (require 'gnus-uu)                      ; because of key prefix issues
46 ;;; for the definitions of group content classification and spam processors
47 (require 'gnus) 
48 (require 'message)              ;for the message-fetch-field functions
49
50 ;; for nnimap-split-download-body-default
51 (eval-when-compile (require 'nnimap))
52
53 ;; autoload query-dig
54 (eval-and-compile
55   (autoload 'query-dig "dig"))
56
57 ;; autoload spam-report
58 (eval-and-compile
59   (autoload 'spam-report-gmane "spam-report"))
60
61 ;; autoload gnus-registry
62 (eval-and-compile
63   (autoload 'gnus-registry-store-extra-entry "gnus-registry")
64   (autoload 'gnus-registry-fetch-extra "gnus-registry"))
65
66 ;; autoload query-dns
67 (eval-and-compile
68   (autoload 'query-dns "dns"))
69
70 ;;; Main parameters.
71
72 (defgroup spam nil
73   "Spam configuration.")
74
75 (defcustom spam-directory "~/News/spam/"
76   "Directory for spam whitelists and blacklists."
77   :type 'directory
78   :group 'spam)
79
80 (defcustom spam-move-spam-nonspam-groups-only t
81   "Whether spam should be moved in non-spam groups only.
82 When t, only ham and unclassified groups will have their spam moved
83 to the spam-process-destination.  When nil, spam will also be moved from
84 spam groups."
85   :type 'boolean
86   :group 'spam)
87
88 (defcustom spam-process-ham-in-nonham-groups nil
89   "Whether ham should be processed in non-ham groups."
90   :type 'boolean
91   :group 'spam)
92
93 (defcustom spam-log-to-registry nil
94   "Whether spam/ham processing should be logged in the registry."
95   :type 'boolean
96   :group 'spam)
97
98 (defcustom spam-process-ham-in-spam-groups nil
99   "Whether ham should be processed in spam groups."
100   :type 'boolean
101   :group 'spam)
102
103 (defcustom spam-mark-only-unseen-as-spam t
104   "Whether only unseen articles should be marked as spam in spam
105 groups.  When nil, all unread articles in a spam group are marked as
106 spam.  Set this if you want to leave an article unread in a spam group
107 without losing it to the automatic spam-marking process."
108   :type 'boolean
109   :group 'spam)
110
111 (defcustom spam-mark-ham-unread-before-move-from-spam-group nil
112   "Whether ham should be marked unread before it's moved out of a spam
113 group according to ham-process-destination.  This variable is an
114 official entry in the international Longest Variable Name
115 Competition."
116   :type 'boolean
117   :group 'spam)
118
119 (defcustom spam-whitelist (expand-file-name "whitelist" spam-directory)
120   "The location of the whitelist.
121 The file format is one regular expression per line.
122 The regular expression is matched against the address."
123   :type 'file
124   :group 'spam)
125
126 (defcustom spam-blacklist (expand-file-name "blacklist" spam-directory)
127   "The location of the blacklist.
128 The file format is one regular expression per line.
129 The regular expression is matched against the address."
130   :type 'file
131   :group 'spam)
132
133 (defcustom spam-use-dig t
134   "Whether query-dig should be used instead of query-dns."
135   :type 'boolean
136   :group 'spam)
137
138 (defcustom spam-use-blacklist nil
139   "Whether the blacklist should be used by spam-split."
140   :type 'boolean
141   :group 'spam)
142
143 (defcustom spam-blacklist-ignored-regexes nil
144   "Regular expressions that the blacklist should ignore."
145   :type '(repeat (regexp :tag "Regular expression to ignore when blacklisting"))
146   :group 'spam)
147
148 (defcustom spam-use-whitelist nil
149   "Whether the whitelist should be used by spam-split."
150   :type 'boolean
151   :group 'spam)
152
153 (defcustom spam-use-whitelist-exclusive nil
154   "Whether whitelist-exclusive should be used by spam-split.
155 Exclusive whitelisting means that all messages from senders not in the whitelist
156 are considered spam."
157   :type 'boolean
158   :group 'spam)
159
160 (defcustom spam-use-blackholes nil
161   "Whether blackholes should be used by spam-split."
162   :type 'boolean
163   :group 'spam)
164
165 (defcustom spam-use-hashcash nil
166   "Whether hashcash payments should be detected by spam-split."
167   :type 'boolean
168   :group 'spam)
169
170 (defcustom spam-use-regex-headers nil
171   "Whether a header regular expression match should be used by spam-split.
172 Also see the variables `spam-regex-headers-spam' and `spam-regex-headers-ham'."
173   :type 'boolean
174   :group 'spam)
175
176 (defcustom spam-use-regex-body nil
177   "Whether a body regular expression match should be used by spam-split.
178 Also see the variables `spam-regex-body-spam' and `spam-regex-body-ham'."
179   :type 'boolean
180   :group 'spam)
181
182 (defcustom spam-use-bogofilter-headers nil
183   "Whether bogofilter headers should be used by spam-split.
184 Enable this if you pre-process messages with Bogofilter BEFORE Gnus sees them."
185   :type 'boolean
186   :group 'spam)
187
188 (defcustom spam-use-bogofilter nil
189   "Whether bogofilter should be invoked by spam-split.
190 Enable this if you want Gnus to invoke Bogofilter on new messages."
191   :type 'boolean
192   :group 'spam)
193
194 (defcustom spam-use-BBDB nil
195   "Whether BBDB should be used by spam-split."
196   :type 'boolean
197   :group 'spam)
198
199 (defcustom spam-use-BBDB-exclusive nil
200   "Whether BBDB-exclusive should be used by spam-split.
201 Exclusive BBDB means that all messages from senders not in the BBDB are 
202 considered spam."
203   :type 'boolean
204   :group 'spam)
205
206 (defcustom spam-use-ifile nil
207   "Whether ifile should be used by spam-split."
208   :type 'boolean
209   :group 'spam)
210
211 (defcustom spam-use-stat nil
212   "Whether spam-stat should be used by spam-split."
213   :type 'boolean
214   :group 'spam)
215
216 (defcustom spam-use-spamoracle nil
217   "Whether spamoracle should be used by spam-split."
218   :type 'boolean
219   :group 'spam)
220
221 (defcustom spam-install-hooks (or
222                                spam-use-dig
223                                spam-use-blacklist
224                                spam-use-whitelist 
225                                spam-use-whitelist-exclusive 
226                                spam-use-blackholes 
227                                spam-use-hashcash 
228                                spam-use-regex-headers 
229                                spam-use-regex-body 
230                                spam-use-bogofilter-headers 
231                                spam-use-bogofilter 
232                                spam-use-BBDB 
233                                spam-use-BBDB-exclusive 
234                                spam-use-ifile 
235                                spam-use-stat
236                                spam-use-spamoracle)
237   "Whether the spam hooks should be installed, default to t if one of
238 the spam-use-* variables is set."
239   :group 'spam
240   :type 'boolean)
241
242 (defcustom spam-split-group "spam"
243   "Group name where incoming spam should be put by spam-split."
244   :type 'string
245   :group 'spam)
246
247 ;;; TODO: deprecate this variable, it's confusing since it's a list of strings,
248 ;;; not regular expressions
249 (defcustom spam-junk-mailgroups (cons 
250                                  spam-split-group 
251                                  '("mail.junk" "poste.pourriel"))
252   "Mailgroups with spam contents.
253 All unmarked article in such group receive the spam mark on group entry."
254   :type '(repeat (string :tag "Group"))
255   :group 'spam)
256
257 (defcustom spam-blackhole-servers '("bl.spamcop.net" "relays.ordb.org" 
258                                     "dev.null.dk" "relays.visi.com")
259   "List of blackhole servers."
260   :type '(repeat (string :tag "Server"))
261   :group 'spam)
262
263 (defcustom spam-blackhole-good-server-regex nil
264   "String matching IP addresses that should not be checked in the blackholes"
265   :type '(radio (const nil)
266                 (regexp :format "%t: %v\n" :size 0))
267   :group 'spam)
268
269 (defcustom spam-face 'gnus-splash-face
270   "Face for spam-marked articles"
271   :type 'face
272   :group 'spam)
273
274 (defcustom spam-regex-headers-spam '("^X-Spam-Flag: YES")
275   "Regular expression for positive header spam matches"
276   :type '(repeat (regexp :tag "Regular expression to match spam header"))
277   :group 'spam)
278
279 (defcustom spam-regex-headers-ham '("^X-Spam-Flag: NO")
280   "Regular expression for positive header ham matches"
281   :type '(repeat (regexp :tag "Regular expression to match ham header"))
282   :group 'spam)
283
284 (defcustom spam-regex-body-spam '()
285   "Regular expression for positive body spam matches"
286   :type '(repeat (regexp :tag "Regular expression to match spam body"))
287   :group 'spam)
288
289 (defcustom spam-regex-body-ham '()
290   "Regular expression for positive body ham matches"
291   :type '(repeat (regexp :tag "Regular expression to match ham body"))
292   :group 'spam)
293
294 (defgroup spam-ifile nil
295   "Spam ifile configuration."
296   :group 'spam)
297
298 (defcustom spam-ifile-path (exec-installed-p "ifile")
299   "File path of the ifile executable program."
300   :type '(choice (file :tag "Location of ifile")
301                  (const :tag "ifile is not installed"))
302   :group 'spam-ifile)
303
304 (defcustom spam-ifile-database-path nil
305   "File path of the ifile database."
306   :type '(choice (file :tag "Location of the ifile database")
307                  (const :tag "Use the default"))
308   :group 'spam-ifile)
309
310 (defcustom spam-ifile-spam-category "spam"
311   "Name of the spam ifile category."  
312   :type 'string
313   :group 'spam-ifile)
314
315 (defcustom spam-ifile-ham-category nil
316   "Name of the ham ifile category.  If nil, the current group name will
317 be used."
318   :type '(choice (string :tag "Use a fixed category")
319                  (const :tag "Use the current group name"))
320   :group 'spam-ifile)
321
322 (defcustom spam-ifile-all-categories nil
323   "Whether the ifile check will return all categories, or just spam.
324 Set this to t if you want to use the spam-split invocation of ifile as
325 your main source of newsgroup names."
326   :type 'boolean
327   :group 'spam-ifile)
328
329 (defgroup spam-bogofilter nil
330   "Spam bogofilter configuration."
331   :group 'spam)
332
333 (defcustom spam-bogofilter-path (exec-installed-p "bogofilter")
334   "File path of the Bogofilter executable program."
335   :type '(choice (file :tag "Location of bogofilter")
336                  (const :tag "Bogofilter is not installed"))
337   :group 'spam-bogofilter)
338
339 (defcustom spam-bogofilter-header "X-Bogosity"
340   "The header that Bogofilter inserts in messages."
341   :type 'string
342   :group 'spam-bogofilter)
343
344 (defcustom spam-bogofilter-spam-switch "-s"
345   "The switch that Bogofilter uses to register spam messages."
346   :type 'string
347   :group 'spam-bogofilter)
348
349 (defcustom spam-bogofilter-ham-switch "-n"
350   "The switch that Bogofilter uses to register ham messages."
351   :type 'string
352   :group 'spam-bogofilter)
353
354 (defcustom spam-bogofilter-spam-strong-switch "-S"
355   "The switch that Bogofilter uses to unregister ham messages."
356   :type 'string
357   :group 'spam-bogofilter)
358
359 (defcustom spam-bogofilter-ham-strong-switch "-N"
360   "The switch that Bogofilter uses to unregister spam messages."
361   :type 'string
362   :group 'spam-bogofilter)
363
364 (defcustom spam-bogofilter-bogosity-positive-spam-header "^\\(Yes\\|Spam\\)"
365   "The regex on `spam-bogofilter-header' for positive spam identification."
366   :type 'regexp
367   :group 'spam-bogofilter)
368
369 (defcustom spam-bogofilter-database-directory nil
370   "Directory path of the Bogofilter databases."
371   :type '(choice (directory 
372                   :tag "Location of the Bogofilter database directory")
373                  (const :tag "Use the default"))
374   :group 'spam-ifile)
375
376 (defgroup spam-spamoracle nil
377   "Spam spamoracle configuration."
378   :group 'spam)
379
380 (defcustom spam-spamoracle-database nil 
381   "Location of spamoracle database file. When nil, use the default
382 spamoracle database."
383   :type '(choice (directory :tag "Location of spamoracle database file.")
384                  (const :tag "Use the default"))
385   :group 'spam-spamoracle)
386
387 (defcustom spam-spamoracle-binary (executable-find "spamoracle")
388   "Location of the spamoracle binary."
389   :type '(choice (directory :tag "Location of the spamoracle binary")
390                  (const :tag "Use the default"))
391   :group 'spam-spamoracle)
392
393 ;;; Key bindings for spam control.
394
395 (gnus-define-keys gnus-summary-mode-map
396   "St" spam-bogofilter-score
397   "Sx" gnus-summary-mark-as-spam
398   "Mst" spam-bogofilter-score
399   "Msx" gnus-summary-mark-as-spam
400   "\M-d" gnus-summary-mark-as-spam)
401
402 (defvar spam-old-ham-articles nil
403   "List of old ham articles, generated when a group is entered.")
404
405 (defvar spam-old-spam-articles nil
406   "List of old spam articles, generated when a group is entered.")
407
408
409 ;; convenience functions
410 (defun spam-xor (a b) ; logical exclusive or
411   (and (or a b) (not (and a b))))
412
413 (defun spam-group-ham-mark-p (group mark &optional spam)
414   (when (stringp group)
415     (let* ((marks (spam-group-ham-marks group spam))
416            (marks (if (symbolp mark) 
417                       marks 
418                     (mapcar 'symbol-value marks))))
419       (memq mark marks))))
420
421 (defun spam-group-spam-mark-p (group mark)
422   (spam-group-ham-mark-p group mark t))
423
424 (defun spam-group-ham-marks (group &optional spam)
425   (when (stringp group)
426     (let* ((marks (if spam
427                       (gnus-parameter-spam-marks group)
428                     (gnus-parameter-ham-marks group)))
429            (marks (car marks))
430            (marks (if (listp (car marks)) (car marks) marks)))
431       marks)))
432
433 (defun spam-group-spam-marks (group)
434   (spam-group-ham-marks group t))
435
436 (defun spam-group-spam-contents-p (group)
437   (if (stringp group)
438       (or (member group spam-junk-mailgroups)
439           (memq 'gnus-group-spam-classification-spam 
440                 (gnus-parameter-spam-contents group)))
441     nil))
442   
443 (defun spam-group-ham-contents-p (group)
444   (if (stringp group)
445       (memq 'gnus-group-spam-classification-ham 
446             (gnus-parameter-spam-contents group))
447     nil))
448
449 (defvar spam-list-of-processors
450   '((gnus-group-spam-exit-processor-report-gmane spam spam-use-gmane)
451     (gnus-group-spam-exit-processor-bogofilter   spam spam-use-bogofilter)
452     (gnus-group-spam-exit-processor-blacklist    spam spam-use-blacklist)
453     (gnus-group-spam-exit-processor-ifile        spam spam-use-ifile)
454     (gnus-group-spam-exit-processor-stat         spam spam-use-stat)
455     (gnus-group-spam-exit-processor-spamoracle   spam spam-use-spamoracle)
456     (gnus-group-ham-exit-processor-ifile         ham spam-use-ifile)
457     (gnus-group-ham-exit-processor-bogofilter    ham spam-use-bogofilter)
458     (gnus-group-ham-exit-processor-stat          ham spam-use-stat)
459     (gnus-group-ham-exit-processor-whitelist     ham spam-use-whitelist)
460     (gnus-group-ham-exit-processor-BBDB          ham spam-use-BBDB)
461     (gnus-group-ham-exit-processor-copy          ham spam-use-ham-copy)
462     (gnus-group-ham-exit-processor-spamoracle    ham spam-use-spamoracle))
463   "The spam-list-of-processors list contains pairs associating a
464 ham/spam exit processor variable with a classification and a
465 spam-use-* variable.")
466
467 (defun spam-group-processor-p (group processor)
468   (if (and (stringp group)
469            (symbolp processor))
470       (or (member processor (nth 0 (gnus-parameter-spam-process group)))
471           (spam-group-processor-multiple-p 
472            group 
473            (cdr-safe (assoc processor spam-list-of-processors))))
474     nil))
475
476 (defun spam-group-processor-multiple-p (group processor-info)
477   (let* ((classification (nth 0 processor-info))
478          (check (nth 1 processor-info))
479          (parameters (nth 0 (gnus-parameter-spam-process group)))
480          found)
481     (dolist (parameter parameters)
482       (when (and (null found)
483                  (listp parameter)
484                  (eq classification (nth 0 parameter))
485                  (eq check (nth 1 parameter)))
486         (setq found t)))
487     found))
488
489 (defun spam-group-spam-processor-report-gmane-p (group)
490   (spam-group-processor-p group 'gnus-group-spam-exit-processor-report-gmane))
491
492 (defun spam-group-spam-processor-bogofilter-p (group)
493   (spam-group-processor-p group 'gnus-group-spam-exit-processor-bogofilter))
494
495 (defun spam-group-spam-processor-blacklist-p (group)
496   (spam-group-processor-p group 'gnus-group-spam-exit-processor-blacklist))
497
498 (defun spam-group-spam-processor-ifile-p (group)
499   (spam-group-processor-p group 'gnus-group-spam-exit-processor-ifile))
500
501 (defun spam-group-ham-processor-ifile-p (group)
502   (spam-group-processor-p group 'gnus-group-ham-exit-processor-ifile))
503
504 (defun spam-group-spam-processor-spamoracle-p (group)
505   (spam-group-processor-p group 'gnus-group-spam-exit-processor-spamoracle))
506
507 (defun spam-group-ham-processor-bogofilter-p (group)
508   (spam-group-processor-p group 'gnus-group-ham-exit-processor-bogofilter))
509
510 (defun spam-group-spam-processor-stat-p (group)
511   (spam-group-processor-p group 'gnus-group-spam-exit-processor-stat))
512
513 (defun spam-group-ham-processor-stat-p (group)
514   (spam-group-processor-p group 'gnus-group-ham-exit-processor-stat))
515
516 (defun spam-group-ham-processor-whitelist-p (group)
517   (spam-group-processor-p group 'gnus-group-ham-exit-processor-whitelist))
518
519 (defun spam-group-ham-processor-BBDB-p (group)
520   (spam-group-processor-p group 'gnus-group-ham-exit-processor-BBDB))
521
522 (defun spam-group-ham-processor-copy-p (group)
523   (spam-group-processor-p group 'gnus-group-ham-exit-processor-copy))
524
525 (defun spam-group-ham-processor-spamoracle-p (group)
526   (spam-group-processor-p group 'gnus-group-ham-exit-processor-spamoracle))
527
528 ;;; Summary entry and exit processing.
529
530 (defun spam-summary-prepare ()
531   (setq spam-old-ham-articles 
532         (spam-list-articles gnus-newsgroup-articles 'ham))
533   (setq spam-old-spam-articles 
534         (spam-list-articles gnus-newsgroup-articles 'spam))
535   (spam-mark-junk-as-spam-routine))
536
537 ;; The spam processors are invoked for any group, spam or ham or neither
538 (defun spam-summary-prepare-exit ()
539   (unless gnus-group-is-exiting-without-update-p
540     (gnus-message 6 "Exiting summary buffer and applying spam rules")
541
542     ;; first of all, unregister any articles that are no longer ham or spam
543     ;; we have to iterate over the processors, or else we'll be too slow
544     (dolist (classification '(spam ham))
545       (let* ((old-articles (if (eq classification 'spam)
546                                spam-old-spam-articles 
547                              spam-old-ham-articles))
548              (new-articles (spam-list-articles 
549                             gnus-newsgroup-articles 
550                             classification))
551              (changed-articles (gnus-set-difference old-articles new-articles)))
552         ;; now that we have the changed articles, we go through the processors
553         (dolist (processor-param spam-list-of-processors)
554           (let ((processor (nth 0 processor-param))
555                 (processor-classification (nth 1 processor-param))
556                 (check (nth 2 processor-param))
557                 unregister-list)
558             (dolist (article changed-articles)
559               (let ((id (spam-fetch-field-message-id-fast article)))
560                 (when (spam-log-unregistration-needed-p 
561                        id 'process classification check)
562                   (push article unregister-list))))
563             ;; call spam-register-routine with specific articles to unregister,
564             ;; when there are articles to unregister and the check is enabled
565             (when (and unregister-list (symbol-value check))
566               (spam-register-routine classification check t unregister-list))))))
567       
568     ;; find all the spam processors applicable to this group
569     (dolist (processor-param spam-list-of-processors)
570       (let ((processor (nth 0 processor-param))
571             (classification (nth 1 processor-param))
572             (check (nth 2 processor-param)))
573         (when (and (eq 'spam classification)
574                    (spam-group-processor-p gnus-newsgroup-name processor))
575           (spam-register-routine classification check))))
576
577     (if spam-move-spam-nonspam-groups-only      
578         (when (not (spam-group-spam-contents-p gnus-newsgroup-name))
579           (spam-mark-spam-as-expired-and-move-routine
580            (gnus-parameter-spam-process-destination gnus-newsgroup-name)))
581       (gnus-message 5 "Marking spam as expired and moving it to %s" 
582                     gnus-newsgroup-name)
583       (spam-mark-spam-as-expired-and-move-routine 
584        (gnus-parameter-spam-process-destination gnus-newsgroup-name)))
585
586     ;; now we redo spam-mark-spam-as-expired-and-move-routine to only
587     ;; expire spam, in case the above did not expire them
588     (gnus-message 5 "Marking spam as expired without moving it")
589     (spam-mark-spam-as-expired-and-move-routine nil)
590
591     (when (or (spam-group-ham-contents-p gnus-newsgroup-name)
592               (and (spam-group-spam-contents-p gnus-newsgroup-name)
593                    spam-process-ham-in-spam-groups)
594               spam-process-ham-in-nonham-groups)
595       ;; find all the ham processors applicable to this group
596       (dolist (processor-param spam-list-of-processors)
597         (let ((processor (nth 0 processor-param))
598               (classification (nth 1 processor-param))
599               (check (nth 2 processor-param)))
600           (when (and (eq 'ham classification)
601                      (spam-group-processor-p gnus-newsgroup-name processor))
602             (spam-register-routine classification check)))))
603
604     (when (spam-group-ham-processor-copy-p gnus-newsgroup-name)
605       (gnus-message 5 "Copying ham")
606       (spam-ham-copy-routine
607        (gnus-parameter-ham-process-destination gnus-newsgroup-name)))
608
609     ;; now move all ham articles out of spam groups
610     (when (spam-group-spam-contents-p gnus-newsgroup-name)
611       (gnus-message 5 "Moving ham messages from spam group")
612       (spam-ham-move-routine
613        (gnus-parameter-ham-process-destination gnus-newsgroup-name))))
614
615   (setq spam-old-ham-articles nil)
616   (setq spam-old-spam-articles nil))
617
618 (defun spam-mark-junk-as-spam-routine ()
619   ;; check the global list of group names spam-junk-mailgroups and the
620   ;; group parameters
621   (when (spam-group-spam-contents-p gnus-newsgroup-name)
622     (gnus-message 5 "Marking %s articles as spam"
623                   (if spam-mark-only-unseen-as-spam 
624                       "unseen"
625                     "unread"))
626     (let ((articles (if spam-mark-only-unseen-as-spam 
627                         gnus-newsgroup-unseen
628                       gnus-newsgroup-unreads)))
629       (dolist (article articles)
630         (gnus-summary-mark-article article gnus-spam-mark)))))
631
632 (defun spam-mark-spam-as-expired-and-move-routine (&rest groups)
633   (if (and (car-safe groups) (listp (car-safe groups)))
634       (apply 'spam-mark-spam-as-expired-and-move-routine (car groups))
635     (gnus-summary-kill-process-mark)
636     (let ((articles gnus-newsgroup-articles)
637           (backend-supports-deletions
638            (gnus-check-backend-function
639             'request-move-article gnus-newsgroup-name))
640           article tomove deletep)
641       (dolist (article articles)
642         (when (eq (gnus-summary-article-mark article) gnus-spam-mark)
643           (gnus-summary-mark-article article gnus-expirable-mark)
644           (push article tomove)))
645     
646       ;; now do the actual copies
647       (dolist (group groups)
648         (when (and tomove
649                    (stringp group))
650           (dolist (article tomove)
651             (gnus-summary-set-process-mark article))
652           (when tomove
653             (if (or (not backend-supports-deletions)
654                     (> (length groups) 1))
655                 (progn 
656                   (gnus-summary-copy-article nil group)
657                   (setq deletep t))
658               (gnus-summary-move-article nil group)))))
659     
660       ;; now delete the articles, if there was a copy done, and the
661       ;; backend allows it
662       (when (and deletep backend-supports-deletions)
663         (dolist (article tomove)
664           (gnus-summary-set-process-mark article))
665         (when tomove
666           (let ((gnus-novice-user nil)) ; don't ask me if I'm sure
667             (gnus-summary-delete-article nil))))
668     
669       (gnus-summary-yank-process-mark))))
670  
671 (defun spam-ham-copy-or-move-routine (copy groups)
672   (gnus-summary-kill-process-mark)
673   (let ((articles gnus-newsgroup-articles)
674         (backend-supports-deletions
675          (gnus-check-backend-function
676           'request-move-article gnus-newsgroup-name))
677         (respool-method (gnus-find-method-for-group gnus-newsgroup-name))
678         article mark todo deletep respool)
679     (dolist (article articles)
680       (when (spam-group-ham-mark-p gnus-newsgroup-name
681                                    (gnus-summary-article-mark article))
682         (push article todo)))
683
684     (when (member 'respool groups)
685       (setq respool t)                  ; boolean for later
686       (setq groups '("fake"))) ; when respooling, groups are dynamic so fake it
687
688     ;; now do the actual move
689     (dolist (group groups)
690       (when (and todo (stringp group))
691         (dolist (article todo)
692           (when spam-mark-ham-unread-before-move-from-spam-group
693             (gnus-summary-mark-article article gnus-unread-mark))
694           (gnus-summary-set-process-mark article))
695
696         (if respool                ; respooling is with a "fake" group
697             (gnus-summary-respool-article nil respool-method)
698           (if (or (not backend-supports-deletions) ; else, we are not respooling
699                   (> (length groups) 1))
700               (progn                ; if copying, copy and set deletep
701                 (gnus-summary-copy-article nil group)
702                 (setq deletep t))
703             (gnus-summary-move-article nil group))))) ; else move articles
704     
705     ;; now delete the articles, unless a) copy is t, and there was a copy done
706     ;;                                 b) a move was done to a single group
707     ;;                                 c) backend-supports-deletions is nil
708     (unless copy
709       (when (and deletep backend-supports-deletions)
710         (dolist (article todo)
711           (gnus-summary-set-process-mark article))
712         (when todo
713           (let ((gnus-novice-user nil)) ; don't ask me if I'm sure
714             (gnus-summary-delete-article nil))))))
715   
716   (gnus-summary-yank-process-mark))
717  
718 (defun spam-ham-copy-routine (&rest groups)
719   (if (and (car-safe groups) (listp (car-safe groups)))
720       (apply 'spam-ham-copy-routine (car groups))
721     (spam-ham-copy-or-move-routine t groups)))
722  
723 (defun spam-ham-move-routine (&rest groups)
724   (if (and (car-safe groups) (listp (car-safe groups)))
725       (apply 'spam-ham-move-routine (car groups))
726     (spam-ham-copy-or-move-routine nil groups)))
727  
728 (eval-and-compile
729   (defalias 'spam-point-at-eol (if (fboundp 'point-at-eol)
730                                    'point-at-eol
731                                  'line-end-position)))
732
733 (defun spam-get-article-as-string (article)
734   (let ((article-buffer (spam-get-article-as-buffer article))
735         article-string)
736     (when article-buffer
737       (save-window-excursion
738         (set-buffer article-buffer)
739         (setq article-string (buffer-string))))
740     article-string))
741
742 (defun spam-get-article-as-buffer (article)
743   (let ((article-buffer))
744     (when (numberp article)
745       (save-window-excursion
746         (gnus-summary-goto-subject article)
747         (gnus-summary-show-article t)
748         (setq article-buffer (get-buffer gnus-article-buffer))))
749     article-buffer))
750
751 ;; disabled for now
752 ;; (defun spam-get-article-as-filename (article)
753 ;;   (let ((article-filename))
754 ;;     (when (numberp article)
755 ;;       (nnml-possibly-change-directory 
756 ;;        (gnus-group-real-name gnus-newsgroup-name))
757 ;;       (setq article-filename (expand-file-name 
758 ;;                              (int-to-string article) nnml-current-directory)))
759 ;;     (if (file-exists-p article-filename)
760 ;;      article-filename
761 ;;       nil)))
762
763 (defun spam-fetch-field-from-fast (article)
764   "Fetch the `from' field quickly, using the internal gnus-data-list function"
765   (if (and (numberp article)
766            (assoc article (gnus-data-list nil)))
767       (mail-header-from 
768        (gnus-data-header (assoc article (gnus-data-list nil))))
769     nil))
770
771 (defun spam-fetch-field-subject-fast (article)
772   "Fetch the `subject' field quickly, using the internal
773   gnus-data-list function"
774   (if (and (numberp article)
775            (assoc article (gnus-data-list nil)))
776       (mail-header-subject 
777        (gnus-data-header (assoc article (gnus-data-list nil))))
778     nil))
779
780 (defun spam-fetch-field-message-id-fast (article)
781   "Fetch the `Message-ID' field quickly, using the internal
782   gnus-data-list function"
783   (if (and (numberp article)
784            (assoc article (gnus-data-list nil)))
785       (mail-header-message-id 
786        (gnus-data-header (assoc article (gnus-data-list nil))))
787     nil))
788
789 \f
790 ;;;; Spam determination.
791
792 (defvar spam-list-of-checks
793   '((spam-use-blacklist          . spam-check-blacklist)
794     (spam-use-regex-headers      . spam-check-regex-headers)
795     (spam-use-regex-body         . spam-check-regex-body)
796     (spam-use-whitelist          . spam-check-whitelist)
797     (spam-use-BBDB               . spam-check-BBDB)
798     (spam-use-ifile              . spam-check-ifile)
799     (spam-use-spamoracle         . spam-check-spamoracle)
800     (spam-use-stat               . spam-check-stat)
801     (spam-use-blackholes         . spam-check-blackholes)
802     (spam-use-hashcash           . spam-check-hashcash)
803     (spam-use-bogofilter-headers . spam-check-bogofilter-headers)
804     (spam-use-bogofilter         . spam-check-bogofilter))
805   "The spam-list-of-checks list contains pairs associating a parameter
806 variable with a spam checking function.  If the parameter variable is
807 true, then the checking function is called, and its value decides what
808 happens.  Each individual check may return nil, t, or a mailgroup
809 name.  The value nil means that the check does not yield a decision,
810 and so, that further checks are needed.  The value t means that the
811 message is definitely not spam, and that further spam checks should be
812 inhibited.  Otherwise, a mailgroup name is returned where the mail
813 should go, and further checks are also inhibited.  The usual mailgroup
814 name is the value of `spam-split-group', meaning that the message is
815 definitely a spam.")
816
817 (defvar spam-list-of-statistical-checks 
818   '(spam-use-ifile
819     spam-use-regex-body 
820     spam-use-stat 
821     spam-use-bogofilter
822     spam-use-spamoracle)
823   "The spam-list-of-statistical-checks list contains all the mail
824 splitters that need to have the full message body available.")
825
826 ;;;TODO: modify to invoke self with each check if invoked without specifics
827 (defun spam-split (&rest specific-checks)
828   "Split this message into the `spam' group if it is spam.
829 This function can be used as an entry in `nnmail-split-fancy',
830 for example like this: (: spam-split).  It can take checks as
831 parameters.  A string as a parameter will set the
832 spam-split-group to that string.
833
834 See the Info node `(gnus)Fancy Mail Splitting' for more details."
835   (interactive)
836   (let ((spam-split-group-choice spam-split-group))
837     (dolist (check specific-checks)
838       (when (stringp check)
839         (setq spam-split-group-choice check)
840         (setq specific-checks (delq check specific-checks))))
841
842     (let ((spam-split-group spam-split-group-choice))
843       (save-excursion
844         (save-restriction
845           (dolist (check spam-list-of-statistical-checks)
846             (when (and (symbolp check) (symbol-value check))
847               (widen)
848               (gnus-message 8 "spam-split: widening the buffer (%s requires it)"
849                             (symbol-name check))
850               (return)))
851           ;;   (progn (widen) (debug (buffer-string)))
852           (let ((list-of-checks spam-list-of-checks)
853                 decision)
854             (while (and list-of-checks (not decision))
855               (let ((pair (pop list-of-checks)))
856                 (when (and (symbol-value (car pair))
857                            (or (null specific-checks)
858                                (memq (car pair) specific-checks)))
859                   (gnus-message 5 "spam-split: calling the %s function" 
860                                 (symbol-name (cdr pair)))
861                   (setq decision (funcall (cdr pair))))))
862             (if (eq decision t)
863                 nil
864               decision)))))))
865
866 (defvar spam-registration-functions
867   ;; first the ham register, second the spam register function
868   ;; third the ham unregister, fourth the spam unregister function
869   '((spam-use-blacklist  nil 
870                          spam-blacklist-register-routine
871                          nil
872                          spam-blacklist-unregister-routine)
873     (spam-use-whitelist  spam-whitelist-register-routine
874                          nil
875                          spam-whitelist-unregister-routine
876                          nil)
877     (spam-use-BBDB       spam-BBDB-register-routine 
878                          nil
879                          spam-BBDB-unregister-routine 
880                          nil)
881     (spam-use-ifile      spam-ifile-register-ham-routine 
882                          spam-ifile-register-spam-routine
883                          spam-ifile-unregister-ham-routine 
884                          spam-ifile-unregister-spam-routine)
885     (spam-use-spamoracle spam-spamoracle-learn-ham 
886                          spam-spamoracle-learn-spam
887                          spam-spamoracle-unlearn-ham 
888                          spam-spamoracle-unlearn-spam)
889     (spam-use-stat       spam-stat-register-ham-routine 
890                          spam-stat-register-spam-routine
891                          spam-stat-unregister-ham-routine 
892                          spam-stat-unregister-spam-routine)
893     ;; note that spam-use-gmane is not a legitimate check
894     (spam-use-gmane      nil 
895                          spam-report-gmane-register-routine
896                          ;; does Gmane support unregistration?
897                          nil
898                          nil)
899     (spam-use-bogofilter spam-bogofilter-register-ham-routine 
900                          spam-bogofilter-register-spam-routine
901                          spam-bogofilter-unregister-ham-routine 
902                          spam-bogofilter-unregister-spam-routine))
903   "The spam-registration-functions list contains pairs
904 associating a parameter variable with the ham and spam
905 registration functions, and the ham and spam unregistration
906 functions")
907
908 (defun spam-classification-valid-p (classification)
909   (or  (eq classification 'spam)
910        (eq classification 'ham)))
911
912 (defun spam-process-type-valid-p (process-type)
913   (or  (eq process-type 'incoming)
914        (eq process-type 'process)))
915
916 (defun spam-registration-check-valid-p (check)
917   (assoc check spam-registration-functions))
918
919 (defun spam-unregistration-check-valid-p (check)
920   (assoc check spam-registration-functions))
921
922 (defun spam-registration-function (classification check)
923   (let ((flist (cdr-safe (assoc check spam-registration-functions))))
924     (if (eq classification 'spam)
925         (nth 1 flist)
926       (nth 0 flist))))
927
928 (defun spam-unregistration-function (classification check)
929   (let ((flist (cdr-safe (assoc check spam-registration-functions))))
930     (if (eq classification 'spam)
931         (nth 3 flist)
932       (nth 2 flist))))
933
934 (defun spam-list-articles (articles classification)
935   (let ((mark-check (if (eq classification 'spam) 
936                         'spam-group-spam-mark-p 
937                       'spam-group-ham-mark-p))
938         mark list)
939     (dolist (article articles)
940       (when (funcall mark-check 
941                      gnus-newsgroup-name 
942                      (gnus-summary-article-mark article))
943         (push article list)))
944     list))
945
946 (defun spam-register-routine (classification 
947                               check 
948                               &optional unregister 
949                               specific-articles)
950   (when (and (spam-classification-valid-p classification)
951              (spam-registration-check-valid-p check))
952     (let* ((register-function
953             (spam-registration-function classification check))
954            (unregister-function
955             (spam-unregistration-function classification check))
956            (run-function (if unregister 
957                              unregister-function 
958                            register-function))
959            (log-function (if unregister
960                              'spam-log-undo-registration
961                            'spam-log-processing-to-registry))
962            article articles)
963
964       (when run-function
965         ;; make list of articles, using specific-articles if given
966         (setq articles (or specific-articles
967                            (spam-list-articles 
968                             gnus-newsgroup-articles 
969                             classification)))
970         ;; process them
971         (gnus-message 5 "%s %d %s articles with classification %s, check %s"
972                       (if unregister "Unregistering" "Registering")
973                       (length articles)
974                       (if specific-articles "specific" "")
975                       (symbol-name classification)
976                       (symbol-name check))
977         (funcall run-function articles)
978         ;; now log all the registrations (or undo them, depending on unregister)
979         (dolist (article articles)
980           (funcall log-function
981                    (spam-fetch-field-message-id-fast article)
982                    'process
983                    classification
984                    check
985                    gnus-newsgroup-name))))))
986
987 ;;; log a ham- or spam-processor invocation to the registry
988 (defun spam-log-processing-to-registry (id type classification check group)
989   (when spam-log-to-registry
990     (if (and (stringp id)
991              (stringp group)
992              (spam-process-type-valid-p type)
993              (spam-classification-valid-p classification)
994              (spam-registration-check-valid-p check))
995         (let ((cell-list (cdr-safe (gnus-registry-fetch-extra id type)))
996               (cell (list classification check group)))
997           (push cell cell-list)
998           (gnus-registry-store-extra-entry
999            id
1000            type
1001            cell-list))
1002
1003       (gnus-message 5 (format "%s called with bad ID, type, classification, check, or group"
1004                               "spam-log-processing-to-registry")))))
1005
1006 ;;; check if a ham- or spam-processor registration needs to be undone
1007 (defun spam-log-unregistration-needed-p (id type classification check)
1008   (when spam-log-to-registry
1009     (if (and (stringp id)
1010              (spam-process-type-valid-p type)
1011              (spam-classification-valid-p classification)
1012              (spam-registration-check-valid-p check))
1013         (let ((cell-list (cdr-safe (gnus-registry-fetch-extra id type)))
1014               found)
1015           (dolist (cell cell-list)
1016             (unless found
1017               (when (and (eq classification (nth 0 cell))
1018                          (eq check (nth 1 cell)))
1019                 (setq found t))))
1020           found)
1021       (progn 
1022         (gnus-message 5 (format "%s called with bad ID, type, classification, or check"
1023                                 "spam-log-unregistration-needed-p"))
1024         nil))))
1025
1026
1027 ;;; undo a ham- or spam-processor registration (the group is not used)
1028 (defun spam-log-undo-registration (id type classification check &optional group)
1029   (when (and spam-log-to-registry
1030              (spam-log-unregistration-needed-p id type classification check))
1031     (if (and (stringp id)
1032              (spam-process-type-valid-p type)
1033              (spam-classification-valid-p classification)
1034              (spam-registration-check-valid-p check))
1035         (let ((cell-list (cdr-safe (gnus-registry-fetch-extra id type)))
1036               new-cell-list found)
1037           (dolist (cell cell-list)
1038             (unless (and (eq classification (nth 0 cell))
1039                          (eq check (nth 1 cell)))
1040               (push cell new-cell-list)))
1041           (gnus-registry-store-extra-entry
1042            id
1043            type
1044            new-cell-list))
1045       (progn 
1046         (gnus-message 5 (format "%s called with bad ID, type, check, or group"
1047                                 "spam-log-undo-registration"))
1048         nil))))
1049
1050 ;;; set up IMAP widening if it's necessary  
1051 (defun spam-setup-widening ()
1052   (dolist (check spam-list-of-statistical-checks)
1053     (when (symbol-value check)
1054       (setq nnimap-split-download-body-default t))))
1055
1056 \f
1057 ;;;; Regex body
1058
1059 (defun spam-check-regex-body ()
1060   (let ((spam-regex-headers-ham spam-regex-body-ham)
1061         (spam-regex-headers-spam spam-regex-body-spam))
1062     (spam-check-regex-headers t)))
1063
1064 \f
1065 ;;;; Regex headers
1066
1067 (defun spam-check-regex-headers (&optional body)
1068   (let ((type (if body "body" "header"))
1069         ret found)
1070     (dolist (h-regex spam-regex-headers-ham)
1071       (unless found
1072         (goto-char (point-min))
1073         (when (re-search-forward h-regex nil t)
1074           (message "Ham regex %s search positive." type)
1075           (setq found t))))
1076     (dolist (s-regex spam-regex-headers-spam)
1077       (unless found
1078         (goto-char (point-min))
1079         (when (re-search-forward s-regex nil t)
1080           (message "Spam regex %s search positive." type)
1081           (setq found t)
1082           (setq ret spam-split-group))))
1083     ret))
1084
1085 \f
1086 ;;;; Blackholes.
1087
1088 (defun spam-reverse-ip-string (ip)
1089   (when (stringp ip)
1090     (mapconcat 'identity
1091                (nreverse (split-string ip "\\."))
1092                ".")))
1093
1094 (defun spam-check-blackholes ()
1095   "Check the Received headers for blackholed relays."
1096   (let ((headers (nnmail-fetch-field "received"))
1097         ips matches)
1098     (when headers
1099       (with-temp-buffer
1100         (insert headers)
1101         (goto-char (point-min))
1102         (gnus-message 5 "Checking headers for relay addresses")
1103         (while (re-search-forward
1104                 "\\([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+\\)" nil t)
1105           (gnus-message 9 "Blackhole search found host IP %s." (match-string 1))
1106           (push (spam-reverse-ip-string (match-string 1))
1107                 ips)))
1108       (dolist (server spam-blackhole-servers)
1109         (dolist (ip ips)
1110           (unless (and spam-blackhole-good-server-regex
1111                        ;; match the good-server-regex against the reversed (again) IP string
1112                        (string-match 
1113                         spam-blackhole-good-server-regex
1114                         (spam-reverse-ip-string ip)))
1115             (unless matches
1116               (let ((query-string (concat ip "." server)))
1117                 (if spam-use-dig
1118                     (let ((query-result (query-dig query-string)))
1119                       (when query-result
1120                         (gnus-message 5 "(DIG): positive blackhole check '%s'" 
1121                                       query-result)
1122                         (push (list ip server query-result)
1123                               matches)))
1124                   ;; else, if not using dig.el
1125                   (when (query-dns query-string)
1126                     (gnus-message 5 "positive blackhole check")
1127                     (push (list ip server (query-dns query-string 'TXT))
1128                           matches)))))))))
1129     (when matches
1130       spam-split-group)))
1131 \f
1132 ;;;; Hashcash.
1133
1134 (condition-case nil
1135     (progn
1136       (require 'hashcash)
1137       
1138       (defun spam-check-hashcash ()
1139         "Check the headers for hashcash payments."
1140         (mail-check-payment)))   ;mail-check-payment returns a boolean
1141
1142   (file-error (progn
1143                 (defalias 'mail-check-payment 'ignore)
1144                 (defalias 'spam-check-hashcash 'ignore))))
1145 \f
1146 ;;;; BBDB 
1147
1148 ;;; original idea for spam-check-BBDB from Alexander Kotelnikov
1149 ;;; <sacha@giotto.sj.ru>
1150
1151 ;; all this is done inside a condition-case to trap errors
1152
1153 (condition-case nil
1154     (progn
1155       (require 'bbdb)
1156       (require 'bbdb-com)
1157       
1158       (defun spam-enter-ham-BBDB (addresses &optional remove)
1159         "Enter an address into the BBDB; implies ham (non-spam) sender"
1160         (dolist (from addresses)
1161           (when (stringp from)
1162             (let* ((parsed-address (gnus-extract-address-components from))
1163                    (name (or (nth 0 parsed-address) "Ham Sender"))
1164                    (remove-function (if remove 
1165                                         'bbdb-delete-record-internal
1166                                       'ignore))
1167                    (net-address (nth 1 parsed-address))
1168                    (record (and net-address 
1169                                 (bbdb-search-simple nil net-address))))
1170               (when net-address
1171                 (gnus-message 5 "%s address %s %s BBDB" 
1172                               (if remove "Deleting" "Adding") 
1173                               from
1174                               (if remove "from" "to"))
1175                 (if record
1176                     (funcall remove-function record)
1177                   (bbdb-create-internal name nil net-address nil nil 
1178                                         "ham sender added by spam.el")))))))
1179       
1180       (defun spam-BBDB-register-routine (articles &optional unregister)
1181         (let (addresses)
1182           (dolist (article articles)
1183             (when (stringp (spam-fetch-field-from-fast article))
1184               (push (spam-fetch-field-from-fast article) addresses)))
1185           ;; now do the register/unregister action
1186           (spam-enter-ham-BBDB addresses unregister)))
1187
1188       (defun spam-BBDB-unregister-routine (articles)
1189         (spam-BBDB-register-routine articles t))
1190
1191       (defun spam-check-BBDB ()
1192         "Mail from people in the BBDB is classified as ham or non-spam"
1193         (let ((who (nnmail-fetch-field "from")))
1194           (when who
1195             (setq who (nth 1 (gnus-extract-address-components who)))
1196             (if (bbdb-search-simple nil who)
1197                 t 
1198               (if spam-use-BBDB-exclusive
1199                   spam-split-group
1200                 nil))))))
1201
1202   (file-error (progn
1203                 (defalias 'bbdb-search-simple 'ignore)
1204                 (defalias 'spam-check-BBDB 'ignore)
1205                 (defalias 'spam-BBDB-register-routine 'ignore)
1206                 (defalias 'spam-enter-ham-BBDB 'ignore)
1207                 (defalias 'bbdb-create-internal 'ignore)
1208                 (defalias 'bbdb-delete-record-internal 'ignore)
1209                 (defalias 'bbdb-records 'ignore))))
1210
1211 \f
1212 ;;;; ifile
1213
1214 ;;; check the ifile backend; return nil if the mail was NOT classified
1215 ;;; as spam
1216
1217 (defun spam-get-ifile-database-parameter ()
1218   "Get the command-line parameter for ifile's database from
1219   spam-ifile-database-path."
1220   (if spam-ifile-database-path
1221       (format "--db-file=%s" spam-ifile-database-path)
1222     nil))
1223     
1224 (defun spam-check-ifile ()
1225   "Check the ifile backend for the classification of this message"
1226   (let ((article-buffer-name (buffer-name)) 
1227         category return)
1228     (with-temp-buffer
1229       (let ((temp-buffer-name (buffer-name))
1230             (db-param (spam-get-ifile-database-parameter)))
1231         (save-excursion
1232           (set-buffer article-buffer-name)
1233           (apply 'call-process-region
1234                  (point-min) (point-max) spam-ifile-path
1235                  nil temp-buffer-name nil "-c"
1236                  (if db-param `(,db-param "-q") `("-q"))))
1237         ;; check the return now (we're back in the temp buffer)
1238         (goto-char (point-min))
1239         (if (not (eobp))
1240             (setq category (buffer-substring (point) (spam-point-at-eol))))
1241         (when (not (zerop (length category))) ; we need a category here
1242           (if spam-ifile-all-categories
1243               (setq return category)
1244             ;; else, if spam-ifile-all-categories is not set...
1245             (when (string-equal spam-ifile-spam-category category)
1246               (setq return spam-split-group)))))) ; note return is nil otherwise
1247     return))
1248
1249 (defun spam-ifile-register-with-ifile (articles category &optional unregister)
1250   "Register an article, given as a string, with a category.
1251 Uses `gnus-newsgroup-name' if category is nil (for ham registration)."
1252   (let ((category (or category gnus-newsgroup-name))
1253         (add-or-delete-option (if unregister "-d" "-i"))
1254         (db (spam-get-ifile-database-parameter))
1255         parameters)
1256     (with-temp-buffer
1257       (dolist (article articles)
1258         (let ((article-string (spam-get-article-as-string article)))
1259           (when (stringp article-string)
1260             (insert article-string))))
1261       (apply 'call-process-region
1262              (point-min) (point-max) spam-ifile-path
1263              nil nil nil 
1264              add-or-delete-option category
1265              (if db `(,db "-h") `("-h"))))))
1266
1267 (defun spam-ifile-register-spam-routine (articles &optional unregister)
1268   (spam-ifile-register-with-ifile articles spam-ifile-spam-category unregister))
1269
1270 (defun spam-ifile-unregister-spam-routine (articles)
1271   (spam-ifile-register-spam-routine articles t))
1272
1273 (defun spam-ifile-register-ham-routine (articles &optional unregister)
1274   (spam-ifile-register-with-ifile articles spam-ifile-ham-category unregister))
1275
1276 (defun spam-ifile-unregister-ham-routine (articles)
1277   (spam-ifile-register-ham-routine articles t))
1278
1279 \f
1280 ;;;; spam-stat
1281
1282 (condition-case nil
1283     (progn
1284       (let ((spam-stat-install-hooks nil))
1285         (require 'spam-stat))
1286       
1287       (defun spam-check-stat ()
1288         "Check the spam-stat backend for the classification of this message"
1289         (let ((spam-stat-split-fancy-spam-group spam-split-group) ; override
1290               (spam-stat-buffer (buffer-name)) ; stat the current buffer
1291               category return)
1292           (spam-stat-split-fancy)))
1293
1294       (defun spam-stat-register-spam-routine (articles &optional unregister)
1295         (dolist (article articles)
1296           (let ((article-string (spam-get-article-as-string article)))
1297             (with-temp-buffer
1298               (insert article-string)
1299               (if unregister
1300                   (spam-stat-buffer-change-to-non-spam)
1301               (spam-stat-buffer-is-spam))))))
1302
1303       (defun spam-stat-unregister-spam-routine (articles)
1304         (spam-stat-register-spam-routine articles t))
1305
1306       (defun spam-stat-register-ham-routine (articles &optional unregister)
1307         (dolist (article articles)
1308           (let ((article-string (spam-get-article-as-string article)))
1309             (with-temp-buffer
1310               (insert article-string)
1311               (if unregister
1312                   (spam-stat-buffer-change-to-spam)
1313               (spam-stat-buffer-is-non-spam))))))
1314
1315       (defun spam-stat-unregister-ham-routine (articles)
1316         (spam-stat-register-ham-routine articles t))
1317
1318       (defun spam-maybe-spam-stat-load ()
1319         (when spam-use-stat (spam-stat-load)))
1320       
1321       (defun spam-maybe-spam-stat-save ()
1322         (when spam-use-stat (spam-stat-save))))
1323
1324   (file-error (progn
1325                 (defalias 'spam-stat-load 'ignore)
1326                 (defalias 'spam-stat-save 'ignore)
1327                 (defalias 'spam-maybe-spam-stat-load 'ignore)
1328                 (defalias 'spam-maybe-spam-stat-save 'ignore)
1329                 (defalias 'spam-stat-register-ham-routine 'ignore)
1330                 (defalias 'spam-stat-unregister-ham-routine 'ignore)
1331                 (defalias 'spam-stat-register-spam-routine 'ignore)
1332                 (defalias 'spam-stat-unregister-spam-routine 'ignore)
1333                 (defalias 'spam-stat-buffer-is-spam 'ignore)
1334                 (defalias 'spam-stat-buffer-change-to-spam 'ignore)
1335                 (defalias 'spam-stat-buffer-is-non-spam 'ignore)
1336                 (defalias 'spam-stat-buffer-change-to-non-spam 'ignore)
1337                 (defalias 'spam-stat-split-fancy 'ignore)
1338                 (defalias 'spam-check-stat 'ignore))))
1339
1340 \f
1341
1342 ;;;; Blacklists and whitelists.
1343
1344 (defvar spam-whitelist-cache nil)
1345 (defvar spam-blacklist-cache nil)
1346
1347 (defun spam-kill-whole-line ()
1348   (beginning-of-line)
1349   (let ((kill-whole-line t))
1350     (kill-line)))
1351
1352 ;;; address can be a list, too
1353 (defun spam-enter-whitelist (address &optional remove)
1354   "Enter ADDRESS (list or single) into the whitelist.  With a
1355   non-nil REMOVE, remove them."
1356   (interactive "sAddress: ")
1357   (spam-enter-list address spam-whitelist remove)
1358   (setq spam-whitelist-cache nil))
1359
1360 ;;; address can be a list, too
1361 (defun spam-enter-blacklist (address &optional remove)
1362   "Enter ADDRESS (list or single) into the blacklist.  With a
1363   non-nil REMOVE, remove them."
1364   (interactive "sAddress: ")
1365   (spam-enter-list address spam-blacklist remove)
1366   (setq spam-blacklist-cache nil))
1367
1368 (defun spam-enter-list (addresses file &optional remove)
1369   "Enter ADDRESSES into the given FILE.
1370 Either the whitelist or the blacklist files can be used.  With
1371 REMOVE not nil, remove the ADDRESSES."
1372   (if (stringp addresses)
1373       (spam-enter-list (list addresses) file remove)
1374     ;; else, we have a list of addresses here
1375     (unless (file-exists-p (file-name-directory file))
1376       (make-directory (file-name-directory file) t))
1377     (save-excursion
1378       (set-buffer
1379        (find-file-noselect file))
1380       (dolist (a addresses)
1381         (when (stringp a)
1382           (goto-char (point-min))
1383           (if (re-search-forward (regexp-quote a) nil t)
1384               ;; found the address
1385               (when remove
1386                 (spam-kill-whole-line))
1387             ;; else, the address was not found
1388             (unless remove
1389               (goto-char (point-max))
1390               (unless (bobp)
1391                 (insert "\n"))
1392               (insert a "\n")))))
1393       (save-buffer))))
1394
1395 ;;; returns t if the sender is in the whitelist, nil or
1396 ;;; spam-split-group otherwise
1397 (defun spam-check-whitelist ()
1398   ;; FIXME!  Should it detect when file timestamps change?
1399   (unless spam-whitelist-cache
1400     (setq spam-whitelist-cache (spam-parse-list spam-whitelist)))
1401   (if (spam-from-listed-p spam-whitelist-cache) 
1402       t
1403     (if spam-use-whitelist-exclusive
1404         spam-split-group
1405       nil)))
1406
1407 (defun spam-check-blacklist ()
1408   ;; FIXME!  Should it detect when file timestamps change?
1409   (unless spam-blacklist-cache
1410     (setq spam-blacklist-cache (spam-parse-list spam-blacklist)))
1411   (and (spam-from-listed-p spam-blacklist-cache) spam-split-group))
1412
1413 (defun spam-parse-list (file)
1414   (when (file-readable-p file)
1415     (let (contents address)
1416       (with-temp-buffer
1417         (insert-file-contents file)
1418         (while (not (eobp))
1419           (setq address (buffer-substring (point) (spam-point-at-eol)))
1420           (forward-line 1)
1421           ;; insert the e-mail address if detected, otherwise the raw data
1422           (unless (zerop (length address))
1423             (let ((pure-address (nth 1 (gnus-extract-address-components address))))
1424               (push (or pure-address address) contents)))))
1425       (nreverse contents))))
1426
1427 (defun spam-from-listed-p (cache)
1428   (let ((from (nnmail-fetch-field "from"))
1429         found)
1430     (while cache
1431       (let ((address (pop cache)))
1432         (unless (zerop (length address)) ; 0 for a nil address too
1433           (setq address (regexp-quote address))
1434           ;; fix regexp-quote's treatment of user-intended regexes
1435           (while (string-match "\\\\\\*" address)
1436             (setq address (replace-match ".*" t t address))))
1437         (when (and address (string-match address from))
1438           (setq found t
1439                 cache nil))))
1440     found))
1441
1442 (defun spam-filelist-register-routine (articles blacklist &optional unregister)
1443   (let ((de-symbol (if blacklist 'spam-use-whitelist 'spam-use-blacklist))
1444         (declassification (if blacklist 'ham 'spam))
1445         (enter-function 
1446          (if blacklist 'spam-enter-blacklist 'spam-enter-whitelist))
1447         (remove-function
1448          (if blacklist 'spam-enter-whitelist 'spam-enter-blacklist))
1449         from addresses unregister-list)
1450     (dolist (article articles)
1451       (let ((from (spam-fetch-field-from-fast article))
1452             (id (spam-fetch-field-message-id-fast article))
1453             sender-ignored)
1454         (when (stringp from)
1455           (dolist (ignore-regex spam-blacklist-ignored-regexes)
1456             (when (and (not sender-ignored)
1457                        (stringp ignore-regex)
1458                        (string-match ignore-regex from))
1459               (setq sender-ignored t)))
1460           ;; remember the messages we need to unregister, unless remove is set
1461           (when (and
1462                  (null unregister) 
1463                  (spam-log-unregistration-needed-p
1464                   id 'process declassification de-symbol))
1465             (push from unregister-list))
1466           (unless sender-ignored
1467             (push from addresses)))))
1468
1469     (if unregister
1470         (funcall enter-function addresses t) ; unregister all these addresses
1471       ;; else, register normally and unregister what we need to
1472       (funcall remove-function unregister-list t)
1473       (dolist (article unregister-list)
1474         (spam-log-undo-registration
1475          (spam-fetch-field-message-id-fast article)
1476          'process
1477          declassification
1478          de-symbol))
1479       (funcall enter-function addresses nil))))
1480
1481 (defun spam-blacklist-unregister-routine (articles)
1482   (spam-blacklist-register-routine articles t))
1483
1484 (defun spam-blacklist-register-routine (articles &optional unregister)
1485   (spam-filelist-register-routine articles t unregister))
1486
1487 (defun spam-whitelist-unregister-routine (articles)
1488   (spam-whitelist-register-routine articles t))
1489
1490 (defun spam-whitelist-register-routine (articles &optional unregister)
1491   (spam-filelist-register-routine articles nil unregister))
1492
1493 \f
1494 ;;;; Spam-report glue
1495 (defun spam-report-gmane-register-routine (articles)
1496   (when articles
1497     (apply 'spam-report-gmane articles)))
1498
1499 \f
1500 ;;;; Bogofilter
1501 (defun spam-check-bogofilter-headers (&optional score)
1502   (let ((header (nnmail-fetch-field spam-bogofilter-header)))
1503     (when header                        ; return nil when no header
1504       (if score                         ; scoring mode
1505           (if (string-match "spamicity=\\([0-9.]+\\)" header)
1506               (match-string 1 header)
1507             "0")
1508         ;; spam detection mode
1509         (when (string-match spam-bogofilter-bogosity-positive-spam-header
1510                             header)
1511           spam-split-group)))))
1512
1513 ;; return something sensible if the score can't be determined
1514 (defun spam-bogofilter-score ()
1515   "Get the Bogofilter spamicity score"
1516   (interactive)
1517   (save-window-excursion
1518     (gnus-summary-show-article t)
1519     (set-buffer gnus-article-buffer)
1520     (let ((score (or (spam-check-bogofilter-headers t)
1521                      (spam-check-bogofilter t))))
1522       (message "Spamicity score %s" score)
1523       (or score "0"))
1524     (gnus-summary-show-article)))
1525
1526 (defun spam-check-bogofilter (&optional score)
1527   "Check the Bogofilter backend for the classification of this message"
1528   (let ((article-buffer-name (buffer-name))
1529         (db spam-bogofilter-database-directory)
1530         return)
1531     (with-temp-buffer
1532       (let ((temp-buffer-name (buffer-name)))
1533         (save-excursion
1534           (set-buffer article-buffer-name)
1535           (apply 'call-process-region
1536                  (point-min) (point-max) 
1537                  spam-bogofilter-path
1538                  nil temp-buffer-name nil
1539                  (if db `("-d" ,db "-v") `("-v")))
1540           (setq return (spam-check-bogofilter-headers score)))))
1541     return))
1542
1543 (defun spam-bogofilter-register-with-bogofilter (articles 
1544                                                  spam 
1545                                                  &optional unregister)
1546   "Register an article, given as a string, as spam or non-spam."
1547   (dolist (article articles)
1548     (let ((article-string (spam-get-article-as-string article))
1549           (db spam-bogofilter-database-directory)
1550           (switch (if unregister
1551                       (if spam 
1552                           spam-bogofilter-spam-strong-switch
1553                         spam-bogofilter-ham-strong-switch)
1554                     (if spam 
1555                         spam-bogofilter-spam-switch 
1556                       spam-bogofilter-ham-switch))))
1557       (when (stringp article-string)
1558         (with-temp-buffer
1559           (insert article-string)
1560
1561           (apply 'call-process-region
1562                  (point-min) (point-max) 
1563                  spam-bogofilter-path
1564                  nil nil nil switch
1565                  (if db `("-d" ,db "-v") `("-v"))))))))
1566   
1567 (defun spam-bogofilter-register-spam-routine (articles &optional unregister)
1568   (spam-bogofilter-register-with-bogofilter articles t unregister))
1569
1570 (defun spam-bogofilter-unregister-spam-routine (articles)
1571   (spam-bogofilter-register-spam-routine articles t))
1572
1573 (defun spam-bogofilter-register-ham-routine (articles &optional unregister)
1574   (spam-bogofilter-register-with-bogofilter articles nil unregister))
1575
1576 (defun spam-bogofilter-unregister-ham-routine (articles)
1577   (spam-bogofilter-register-ham-routine articles t))
1578
1579
1580 \f
1581 ;;;; spamoracle
1582 (defun spam-check-spamoracle ()
1583   "Run spamoracle on an article to determine whether it's spam."
1584   (let ((article-buffer-name (buffer-name)))
1585     (with-temp-buffer
1586       (let ((temp-buffer-name (buffer-name)))
1587         (save-excursion
1588           (set-buffer article-buffer-name)
1589           (let ((status 
1590                  (apply 'call-process-region 
1591                         (point-min) (point-max)
1592                         spam-spamoracle-binary 
1593                         nil temp-buffer-name nil
1594                         (if spam-spamoracle-database
1595                             `("-f" ,spam-spamoracle-database "mark")
1596                           '("mark")))))
1597             (if (eq 0 status)
1598                 (progn
1599                   (set-buffer temp-buffer-name)
1600                   (goto-char (point-min))
1601                   (when (re-search-forward "^X-Spam: yes;" nil t)
1602                     spam-split-group))
1603               (error "Error running spamoracle" status))))))))
1604
1605 (defun spam-spamoracle-learn (articles article-is-spam-p &optional unregister)
1606   "Run spamoracle in training mode."
1607   (with-temp-buffer
1608     (let ((temp-buffer-name (buffer-name)))
1609       (save-excursion
1610         (goto-char (point-min))
1611         (dolist (article articles)
1612           (insert (spam-get-article-as-string article)))
1613         (let* ((arg (if (spam-xor unregister article-is-spam-p)
1614                         "-spam" 
1615                       "-good"))
1616                (status 
1617                 (apply 'call-process-region
1618                        (point-min) (point-max)
1619                        spam-spamoracle-binary
1620                        nil temp-buffer-name nil
1621                        (if spam-spamoracle-database
1622                            `("-f" ,spam-spamoracle-database 
1623                              "add" ,arg)
1624                          `("add" ,arg)))))
1625           (when (not (eq 0 status))
1626             (error "Error running spamoracle" status)))))))
1627
1628 (defun spam-spamoracle-learn-ham (articles &optional unregister)
1629   (spam-spamoracle-learn articles nil unregister))
1630
1631 (defun spam-spamoracle-unlearn-ham (articles &optional unregister)
1632   (spam-spamoracle-learn-ham articles t))
1633
1634 (defun spam-spamoracle-learn-spam (articles &optional unregister)
1635   (spam-spamoracle-learn articles t unregister))
1636
1637 (defun spam-spamoracle-unlearn-spam (articles &optional unregister)
1638   (spam-spamoracle-learn-spam articles t))
1639
1640 \f
1641 ;;;; Hooks
1642
1643 ;;;###autoload
1644 (defun spam-initialize ()
1645   "Install the spam.el hooks and do other initialization"
1646   (interactive)
1647   (setq spam-install-hooks t)
1648   ;; TODO: How do we redo this every time spam-face is customized?
1649   (push '((eq mark gnus-spam-mark) . spam-face)
1650         gnus-summary-highlight)
1651   ;; Add hooks for loading and saving the spam stats
1652   (add-hook 'gnus-save-newsrc-hook 'spam-maybe-spam-stat-save)
1653   (add-hook 'gnus-get-top-new-news-hook 'spam-maybe-spam-stat-load)
1654   (add-hook 'gnus-startup-hook 'spam-maybe-spam-stat-load)
1655   (add-hook 'gnus-summary-prepare-exit-hook 'spam-summary-prepare-exit)
1656   (add-hook 'gnus-summary-prepare-hook 'spam-summary-prepare)
1657   (add-hook 'gnus-get-new-news-hook 'spam-setup-widening))
1658
1659 (defun spam-unload-hook ()
1660   "Uninstall the spam.el hooks"
1661   (interactive)
1662   (remove-hook 'gnus-save-newsrc-hook 'spam-maybe-spam-stat-save)
1663   (remove-hook 'gnus-get-top-new-news-hook 'spam-maybe-spam-stat-load)
1664   (remove-hook 'gnus-startup-hook 'spam-maybe-spam-stat-load)
1665   (remove-hook 'gnus-summary-prepare-exit-hook 'spam-summary-prepare-exit)
1666   (remove-hook 'gnus-summary-prepare-hook 'spam-summary-prepare)
1667   (remove-hook 'gnus-get-new-news-hook 'spam-setup-widening))
1668
1669 (when spam-install-hooks
1670   (spam-initialize))
1671
1672 (provide 'spam)
1673
1674 ;;; spam.el ends here.