Synch to Gnus 200312102120.
[elisp/gnus.git-] / lisp / spam.el
1 ;; TODO: spam scores, detection of spam in newsgroups, cross-server splitting, remote processing, training through files
2
3 ;;; spam.el --- Identifying spam
4 ;; Copyright (C) 2002, 2003 Free Software Foundation, Inc.
5
6 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
7 ;; Keywords: network
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING.  If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;;; This module addresses a few aspects of spam control under Gnus.  Page
29 ;;; breaks are used for grouping declarations and documentation relating to
30 ;;; each particular aspect.
31
32 ;;; The integration with Gnus is not yet complete.  See various `FIXME'
33 ;;; comments, below, for supplementary explanations or discussions.
34
35 ;;; Several TODO items are marked as such
36
37 ;;; Code:
38
39 (require 'path-util)
40
41 (eval-when-compile (require 'cl))
42
43 (require 'gnus-sum)
44
45 (require 'gnus-uu)                      ; because of key prefix issues
46 ;;; for the definitions of group content classification and spam processors
47 (require 'gnus) 
48 (require 'message)              ;for the message-fetch-field functions
49
50 ;; for nnimap-split-download-body-default
51 (eval-when-compile (require 'nnimap))
52
53 ;; autoload query-dig
54 (eval-and-compile
55   (autoload 'query-dig "dig"))
56
57 ;; autoload spam-report
58 (eval-and-compile
59   (autoload 'spam-report-gmane "spam-report"))
60
61 ;; autoload gnus-registry
62 (eval-and-compile
63   (autoload 'gnus-registry-store-extra-entry "gnus-registry")
64   (autoload 'gnus-registry-fetch-extra "gnus-registry"))
65
66 ;; autoload query-dns
67 (eval-and-compile
68   (autoload 'query-dns "dns"))
69
70 ;;; Main parameters.
71
72 (defgroup spam nil
73   "Spam configuration.")
74
75 (defcustom spam-directory "~/News/spam/"
76   "Directory for spam whitelists and blacklists."
77   :type 'directory
78   :group 'spam)
79
80 (defcustom spam-move-spam-nonspam-groups-only t
81   "Whether spam should be moved in non-spam groups only.
82 When t, only ham and unclassified groups will have their spam moved
83 to the spam-process-destination.  When nil, spam will also be moved from
84 spam groups."
85   :type 'boolean
86   :group 'spam)
87
88 (defcustom spam-process-ham-in-nonham-groups nil
89   "Whether ham should be processed in non-ham groups."
90   :type 'boolean
91   :group 'spam)
92
93 (defcustom spam-log-to-registry nil
94   "Whether spam/ham processing should be logged in the registry."
95   :type 'boolean
96   :group 'spam)
97
98 (defcustom spam-process-ham-in-spam-groups nil
99   "Whether ham should be processed in spam groups."
100   :type 'boolean
101   :group 'spam)
102
103 (defcustom spam-mark-only-unseen-as-spam t
104   "Whether only unseen articles should be marked as spam in spam
105 groups.  When nil, all unread articles in a spam group are marked as
106 spam.  Set this if you want to leave an article unread in a spam group
107 without losing it to the automatic spam-marking process."
108   :type 'boolean
109   :group 'spam)
110
111 (defcustom spam-mark-ham-unread-before-move-from-spam-group nil
112   "Whether ham should be marked unread before it's moved out of a spam
113 group according to ham-process-destination.  This variable is an
114 official entry in the international Longest Variable Name
115 Competition."
116   :type 'boolean
117   :group 'spam)
118
119 (defcustom spam-disable-spam-split-during-ham-respool nil
120   "Whether spam-split should be ignored while resplitting ham in
121 a process destination.  This is useful to prevent ham from ending
122 up in the same spam group after the resplit.  Don't set this to t
123 if you have spam-split as the last rule in your split
124 configuration."
125   :type 'boolean
126   :group 'spam)
127
128 (defcustom spam-whitelist (expand-file-name "whitelist" spam-directory)
129   "The location of the whitelist.
130 The file format is one regular expression per line.
131 The regular expression is matched against the address."
132   :type 'file
133   :group 'spam)
134
135 (defcustom spam-blacklist (expand-file-name "blacklist" spam-directory)
136   "The location of the blacklist.
137 The file format is one regular expression per line.
138 The regular expression is matched against the address."
139   :type 'file
140   :group 'spam)
141
142 (defcustom spam-use-dig t
143   "Whether query-dig should be used instead of query-dns."
144   :type 'boolean
145   :group 'spam)
146
147 (defcustom spam-use-blacklist nil
148   "Whether the blacklist should be used by spam-split."
149   :type 'boolean
150   :group 'spam)
151
152 (defcustom spam-blacklist-ignored-regexes nil
153   "Regular expressions that the blacklist should ignore."
154   :type '(repeat (regexp :tag "Regular expression to ignore when blacklisting"))
155   :group 'spam)
156
157 (defcustom spam-use-whitelist nil
158   "Whether the whitelist should be used by spam-split."
159   :type 'boolean
160   :group 'spam)
161
162 (defcustom spam-use-whitelist-exclusive nil
163   "Whether whitelist-exclusive should be used by spam-split.
164 Exclusive whitelisting means that all messages from senders not in the whitelist
165 are considered spam."
166   :type 'boolean
167   :group 'spam)
168
169 (defcustom spam-use-blackholes nil
170   "Whether blackholes should be used by spam-split."
171   :type 'boolean
172   :group 'spam)
173
174 (defcustom spam-use-hashcash nil
175   "Whether hashcash payments should be detected by spam-split."
176   :type 'boolean
177   :group 'spam)
178
179 (defcustom spam-use-regex-headers nil
180   "Whether a header regular expression match should be used by spam-split.
181 Also see the variables `spam-regex-headers-spam' and `spam-regex-headers-ham'."
182   :type 'boolean
183   :group 'spam)
184
185 (defcustom spam-use-regex-body nil
186   "Whether a body regular expression match should be used by spam-split.
187 Also see the variables `spam-regex-body-spam' and `spam-regex-body-ham'."
188   :type 'boolean
189   :group 'spam)
190
191 (defcustom spam-use-bogofilter-headers nil
192   "Whether bogofilter headers should be used by spam-split.
193 Enable this if you pre-process messages with Bogofilter BEFORE Gnus sees them."
194   :type 'boolean
195   :group 'spam)
196
197 (defcustom spam-use-bogofilter nil
198   "Whether bogofilter should be invoked by spam-split.
199 Enable this if you want Gnus to invoke Bogofilter on new messages."
200   :type 'boolean
201   :group 'spam)
202
203 (defcustom spam-use-BBDB nil
204   "Whether BBDB should be used by spam-split."
205   :type 'boolean
206   :group 'spam)
207
208 (defcustom spam-use-BBDB-exclusive nil
209   "Whether BBDB-exclusive should be used by spam-split.
210 Exclusive BBDB means that all messages from senders not in the BBDB are 
211 considered spam."
212   :type 'boolean
213   :group 'spam)
214
215 (defcustom spam-use-ifile nil
216   "Whether ifile should be used by spam-split."
217   :type 'boolean
218   :group 'spam)
219
220 (defcustom spam-use-stat nil
221   "Whether spam-stat should be used by spam-split."
222   :type 'boolean
223   :group 'spam)
224
225 (defcustom spam-use-spamoracle nil
226   "Whether spamoracle should be used by spam-split."
227   :type 'boolean
228   :group 'spam)
229
230 (defcustom spam-install-hooks (or
231                                spam-use-dig
232                                spam-use-blacklist
233                                spam-use-whitelist 
234                                spam-use-whitelist-exclusive 
235                                spam-use-blackholes 
236                                spam-use-hashcash 
237                                spam-use-regex-headers 
238                                spam-use-regex-body 
239                                spam-use-bogofilter-headers 
240                                spam-use-bogofilter 
241                                spam-use-BBDB 
242                                spam-use-BBDB-exclusive 
243                                spam-use-ifile 
244                                spam-use-stat
245                                spam-use-spamoracle)
246   "Whether the spam hooks should be installed, default to t if one of
247 the spam-use-* variables is set."
248   :group 'spam
249   :type 'boolean)
250
251 (defcustom spam-split-group "spam"
252   "Group name where incoming spam should be put by spam-split."
253   :type 'string
254   :group 'spam)
255
256 ;;; TODO: deprecate this variable, it's confusing since it's a list of strings,
257 ;;; not regular expressions
258 (defcustom spam-junk-mailgroups (cons 
259                                  spam-split-group 
260                                  '("mail.junk" "poste.pourriel"))
261   "Mailgroups with spam contents.
262 All unmarked article in such group receive the spam mark on group entry."
263   :type '(repeat (string :tag "Group"))
264   :group 'spam)
265
266 (defcustom spam-blackhole-servers '("bl.spamcop.net" "relays.ordb.org" 
267                                     "dev.null.dk" "relays.visi.com")
268   "List of blackhole servers."
269   :type '(repeat (string :tag "Server"))
270   :group 'spam)
271
272 (defcustom spam-blackhole-good-server-regex nil
273   "String matching IP addresses that should not be checked in the blackholes"
274   :type '(radio (const nil)
275                 (regexp :format "%t: %v\n" :size 0))
276   :group 'spam)
277
278 (defcustom spam-face 'gnus-splash-face
279   "Face for spam-marked articles"
280   :type 'face
281   :group 'spam)
282
283 (defcustom spam-regex-headers-spam '("^X-Spam-Flag: YES")
284   "Regular expression for positive header spam matches"
285   :type '(repeat (regexp :tag "Regular expression to match spam header"))
286   :group 'spam)
287
288 (defcustom spam-regex-headers-ham '("^X-Spam-Flag: NO")
289   "Regular expression for positive header ham matches"
290   :type '(repeat (regexp :tag "Regular expression to match ham header"))
291   :group 'spam)
292
293 (defcustom spam-regex-body-spam '()
294   "Regular expression for positive body spam matches"
295   :type '(repeat (regexp :tag "Regular expression to match spam body"))
296   :group 'spam)
297
298 (defcustom spam-regex-body-ham '()
299   "Regular expression for positive body ham matches"
300   :type '(repeat (regexp :tag "Regular expression to match ham body"))
301   :group 'spam)
302
303 (defgroup spam-ifile nil
304   "Spam ifile configuration."
305   :group 'spam)
306
307 (defcustom spam-ifile-path (exec-installed-p "ifile")
308   "File path of the ifile executable program."
309   :type '(choice (file :tag "Location of ifile")
310                  (const :tag "ifile is not installed"))
311   :group 'spam-ifile)
312
313 (defcustom spam-ifile-database-path nil
314   "File path of the ifile database."
315   :type '(choice (file :tag "Location of the ifile database")
316                  (const :tag "Use the default"))
317   :group 'spam-ifile)
318
319 (defcustom spam-ifile-spam-category "spam"
320   "Name of the spam ifile category."  
321   :type 'string
322   :group 'spam-ifile)
323
324 (defcustom spam-ifile-ham-category nil
325   "Name of the ham ifile category.  If nil, the current group name will
326 be used."
327   :type '(choice (string :tag "Use a fixed category")
328                  (const :tag "Use the current group name"))
329   :group 'spam-ifile)
330
331 (defcustom spam-ifile-all-categories nil
332   "Whether the ifile check will return all categories, or just spam.
333 Set this to t if you want to use the spam-split invocation of ifile as
334 your main source of newsgroup names."
335   :type 'boolean
336   :group 'spam-ifile)
337
338 (defgroup spam-bogofilter nil
339   "Spam bogofilter configuration."
340   :group 'spam)
341
342 (defcustom spam-bogofilter-path (exec-installed-p "bogofilter")
343   "File path of the Bogofilter executable program."
344   :type '(choice (file :tag "Location of bogofilter")
345                  (const :tag "Bogofilter is not installed"))
346   :group 'spam-bogofilter)
347
348 (defcustom spam-bogofilter-header "X-Bogosity"
349   "The header that Bogofilter inserts in messages."
350   :type 'string
351   :group 'spam-bogofilter)
352
353 (defcustom spam-bogofilter-spam-switch "-s"
354   "The switch that Bogofilter uses to register spam messages."
355   :type 'string
356   :group 'spam-bogofilter)
357
358 (defcustom spam-bogofilter-ham-switch "-n"
359   "The switch that Bogofilter uses to register ham messages."
360   :type 'string
361   :group 'spam-bogofilter)
362
363 (defcustom spam-bogofilter-spam-strong-switch "-S"
364   "The switch that Bogofilter uses to unregister ham messages."
365   :type 'string
366   :group 'spam-bogofilter)
367
368 (defcustom spam-bogofilter-ham-strong-switch "-N"
369   "The switch that Bogofilter uses to unregister spam messages."
370   :type 'string
371   :group 'spam-bogofilter)
372
373 (defcustom spam-bogofilter-bogosity-positive-spam-header "^\\(Yes\\|Spam\\)"
374   "The regex on `spam-bogofilter-header' for positive spam identification."
375   :type 'regexp
376   :group 'spam-bogofilter)
377
378 (defcustom spam-bogofilter-database-directory nil
379   "Directory path of the Bogofilter databases."
380   :type '(choice (directory 
381                   :tag "Location of the Bogofilter database directory")
382                  (const :tag "Use the default"))
383   :group 'spam-bogofilter)
384
385 (defgroup spam-spamoracle nil
386   "Spam spamoracle configuration."
387   :group 'spam)
388
389 (defcustom spam-spamoracle-database nil 
390   "Location of spamoracle database file. When nil, use the default
391 spamoracle database."
392   :type '(choice (directory :tag "Location of spamoracle database file.")
393                  (const :tag "Use the default"))
394   :group 'spam-spamoracle)
395
396 (defcustom spam-spamoracle-binary (executable-find "spamoracle")
397   "Location of the spamoracle binary."
398   :type '(choice (directory :tag "Location of the spamoracle binary")
399                  (const :tag "Use the default"))
400   :group 'spam-spamoracle)
401
402 ;;; Key bindings for spam control.
403
404 (gnus-define-keys gnus-summary-mode-map
405   "St" spam-bogofilter-score
406   "Sx" gnus-summary-mark-as-spam
407   "Mst" spam-bogofilter-score
408   "Msx" gnus-summary-mark-as-spam
409   "\M-d" gnus-summary-mark-as-spam)
410
411 (defvar spam-old-ham-articles nil
412   "List of old ham articles, generated when a group is entered.")
413
414 (defvar spam-old-spam-articles nil
415   "List of old spam articles, generated when a group is entered.")
416
417
418 ;; convenience functions
419 (defun spam-xor (a b) ; logical exclusive or
420   (and (or a b) (not (and a b))))
421
422 (defun spam-group-ham-mark-p (group mark &optional spam)
423   (when (stringp group)
424     (let* ((marks (spam-group-ham-marks group spam))
425            (marks (if (symbolp mark) 
426                       marks 
427                     (mapcar 'symbol-value marks))))
428       (memq mark marks))))
429
430 (defun spam-group-spam-mark-p (group mark)
431   (spam-group-ham-mark-p group mark t))
432
433 (defun spam-group-ham-marks (group &optional spam)
434   (when (stringp group)
435     (let* ((marks (if spam
436                       (gnus-parameter-spam-marks group)
437                     (gnus-parameter-ham-marks group)))
438            (marks (car marks))
439            (marks (if (listp (car marks)) (car marks) marks)))
440       marks)))
441
442 (defun spam-group-spam-marks (group)
443   (spam-group-ham-marks group t))
444
445 (defun spam-group-spam-contents-p (group)
446   (if (stringp group)
447       (or (member group spam-junk-mailgroups)
448           (memq 'gnus-group-spam-classification-spam 
449                 (gnus-parameter-spam-contents group)))
450     nil))
451   
452 (defun spam-group-ham-contents-p (group)
453   (if (stringp group)
454       (memq 'gnus-group-spam-classification-ham 
455             (gnus-parameter-spam-contents group))
456     nil))
457
458 (defvar spam-list-of-processors
459   '((gnus-group-spam-exit-processor-report-gmane spam spam-use-gmane)
460     (gnus-group-spam-exit-processor-bogofilter   spam spam-use-bogofilter)
461     (gnus-group-spam-exit-processor-blacklist    spam spam-use-blacklist)
462     (gnus-group-spam-exit-processor-ifile        spam spam-use-ifile)
463     (gnus-group-spam-exit-processor-stat         spam spam-use-stat)
464     (gnus-group-spam-exit-processor-spamoracle   spam spam-use-spamoracle)
465     (gnus-group-ham-exit-processor-ifile         ham spam-use-ifile)
466     (gnus-group-ham-exit-processor-bogofilter    ham spam-use-bogofilter)
467     (gnus-group-ham-exit-processor-stat          ham spam-use-stat)
468     (gnus-group-ham-exit-processor-whitelist     ham spam-use-whitelist)
469     (gnus-group-ham-exit-processor-BBDB          ham spam-use-BBDB)
470     (gnus-group-ham-exit-processor-copy          ham spam-use-ham-copy)
471     (gnus-group-ham-exit-processor-spamoracle    ham spam-use-spamoracle))
472   "The spam-list-of-processors list contains pairs associating a
473 ham/spam exit processor variable with a classification and a
474 spam-use-* variable.")
475
476 (defun spam-group-processor-p (group processor)
477   (if (and (stringp group)
478            (symbolp processor))
479       (or (member processor (nth 0 (gnus-parameter-spam-process group)))
480           (spam-group-processor-multiple-p 
481            group 
482            (cdr-safe (assoc processor spam-list-of-processors))))
483     nil))
484
485 (defun spam-group-processor-multiple-p (group processor-info)
486   (let* ((classification (nth 0 processor-info))
487          (check (nth 1 processor-info))
488          (parameters (nth 0 (gnus-parameter-spam-process group)))
489          found)
490     (dolist (parameter parameters)
491       (when (and (null found)
492                  (listp parameter)
493                  (eq classification (nth 0 parameter))
494                  (eq check (nth 1 parameter)))
495         (setq found t)))
496     found))
497
498 (defun spam-group-spam-processor-report-gmane-p (group)
499   (spam-group-processor-p group 'gnus-group-spam-exit-processor-report-gmane))
500
501 (defun spam-group-spam-processor-bogofilter-p (group)
502   (spam-group-processor-p group 'gnus-group-spam-exit-processor-bogofilter))
503
504 (defun spam-group-spam-processor-blacklist-p (group)
505   (spam-group-processor-p group 'gnus-group-spam-exit-processor-blacklist))
506
507 (defun spam-group-spam-processor-ifile-p (group)
508   (spam-group-processor-p group 'gnus-group-spam-exit-processor-ifile))
509
510 (defun spam-group-ham-processor-ifile-p (group)
511   (spam-group-processor-p group 'gnus-group-ham-exit-processor-ifile))
512
513 (defun spam-group-spam-processor-spamoracle-p (group)
514   (spam-group-processor-p group 'gnus-group-spam-exit-processor-spamoracle))
515
516 (defun spam-group-ham-processor-bogofilter-p (group)
517   (spam-group-processor-p group 'gnus-group-ham-exit-processor-bogofilter))
518
519 (defun spam-group-spam-processor-stat-p (group)
520   (spam-group-processor-p group 'gnus-group-spam-exit-processor-stat))
521
522 (defun spam-group-ham-processor-stat-p (group)
523   (spam-group-processor-p group 'gnus-group-ham-exit-processor-stat))
524
525 (defun spam-group-ham-processor-whitelist-p (group)
526   (spam-group-processor-p group 'gnus-group-ham-exit-processor-whitelist))
527
528 (defun spam-group-ham-processor-BBDB-p (group)
529   (spam-group-processor-p group 'gnus-group-ham-exit-processor-BBDB))
530
531 (defun spam-group-ham-processor-copy-p (group)
532   (spam-group-processor-p group 'gnus-group-ham-exit-processor-copy))
533
534 (defun spam-group-ham-processor-spamoracle-p (group)
535   (spam-group-processor-p group 'gnus-group-ham-exit-processor-spamoracle))
536
537 ;;; Summary entry and exit processing.
538
539 (defun spam-summary-prepare ()
540   (setq spam-old-ham-articles 
541         (spam-list-articles gnus-newsgroup-articles 'ham))
542   (setq spam-old-spam-articles 
543         (spam-list-articles gnus-newsgroup-articles 'spam))
544   (spam-mark-junk-as-spam-routine))
545
546 ;; The spam processors are invoked for any group, spam or ham or neither
547 (defun spam-summary-prepare-exit ()
548   (unless gnus-group-is-exiting-without-update-p
549     (gnus-message 6 "Exiting summary buffer and applying spam rules")
550
551     ;; first of all, unregister any articles that are no longer ham or spam
552     ;; we have to iterate over the processors, or else we'll be too slow
553     (dolist (classification '(spam ham))
554       (let* ((old-articles (if (eq classification 'spam)
555                                spam-old-spam-articles 
556                              spam-old-ham-articles))
557              (new-articles (spam-list-articles 
558                             gnus-newsgroup-articles 
559                             classification))
560              (changed-articles (gnus-set-difference old-articles new-articles)))
561         ;; now that we have the changed articles, we go through the processors
562         (dolist (processor-param spam-list-of-processors)
563           (let ((processor (nth 0 processor-param))
564                 (processor-classification (nth 1 processor-param))
565                 (check (nth 2 processor-param))
566                 unregister-list)
567             (dolist (article changed-articles)
568               (let ((id (spam-fetch-field-message-id-fast article)))
569                 (when (spam-log-unregistration-needed-p 
570                        id 'process classification check)
571                   (push article unregister-list))))
572             ;; call spam-register-routine with specific articles to unregister,
573             ;; when there are articles to unregister and the check is enabled
574             (when (and unregister-list (symbol-value check))
575               (spam-register-routine classification check t unregister-list))))))
576       
577     ;; find all the spam processors applicable to this group
578     (dolist (processor-param spam-list-of-processors)
579       (let ((processor (nth 0 processor-param))
580             (classification (nth 1 processor-param))
581             (check (nth 2 processor-param)))
582         (when (and (eq 'spam classification)
583                    (spam-group-processor-p gnus-newsgroup-name processor))
584           (spam-register-routine classification check))))
585
586     (if spam-move-spam-nonspam-groups-only      
587         (when (not (spam-group-spam-contents-p gnus-newsgroup-name))
588           (spam-mark-spam-as-expired-and-move-routine
589            (gnus-parameter-spam-process-destination gnus-newsgroup-name)))
590       (gnus-message 5 "Marking spam as expired and moving it to %s" 
591                     gnus-newsgroup-name)
592       (spam-mark-spam-as-expired-and-move-routine 
593        (gnus-parameter-spam-process-destination gnus-newsgroup-name)))
594
595     ;; now we redo spam-mark-spam-as-expired-and-move-routine to only
596     ;; expire spam, in case the above did not expire them
597     (gnus-message 5 "Marking spam as expired without moving it")
598     (spam-mark-spam-as-expired-and-move-routine nil)
599
600     (when (or (spam-group-ham-contents-p gnus-newsgroup-name)
601               (and (spam-group-spam-contents-p gnus-newsgroup-name)
602                    spam-process-ham-in-spam-groups)
603               spam-process-ham-in-nonham-groups)
604       ;; find all the ham processors applicable to this group
605       (dolist (processor-param spam-list-of-processors)
606         (let ((processor (nth 0 processor-param))
607               (classification (nth 1 processor-param))
608               (check (nth 2 processor-param)))
609           (when (and (eq 'ham classification)
610                      (spam-group-processor-p gnus-newsgroup-name processor))
611             (spam-register-routine classification check)))))
612
613     (when (spam-group-ham-processor-copy-p gnus-newsgroup-name)
614       (gnus-message 5 "Copying ham")
615       (spam-ham-copy-routine
616        (gnus-parameter-ham-process-destination gnus-newsgroup-name)))
617
618     ;; now move all ham articles out of spam groups
619     (when (spam-group-spam-contents-p gnus-newsgroup-name)
620       (gnus-message 5 "Moving ham messages from spam group")
621       (spam-ham-move-routine
622        (gnus-parameter-ham-process-destination gnus-newsgroup-name))))
623
624   (setq spam-old-ham-articles nil)
625   (setq spam-old-spam-articles nil))
626
627 (defun spam-mark-junk-as-spam-routine ()
628   ;; check the global list of group names spam-junk-mailgroups and the
629   ;; group parameters
630   (when (spam-group-spam-contents-p gnus-newsgroup-name)
631     (gnus-message 5 "Marking %s articles as spam"
632                   (if spam-mark-only-unseen-as-spam 
633                       "unseen"
634                     "unread"))
635     (let ((articles (if spam-mark-only-unseen-as-spam 
636                         gnus-newsgroup-unseen
637                       gnus-newsgroup-unreads)))
638       (dolist (article articles)
639         (gnus-summary-mark-article article gnus-spam-mark)))))
640
641 (defun spam-mark-spam-as-expired-and-move-routine (&rest groups)
642   (if (and (car-safe groups) (listp (car-safe groups)))
643       (apply 'spam-mark-spam-as-expired-and-move-routine (car groups))
644     (gnus-summary-kill-process-mark)
645     (let ((articles gnus-newsgroup-articles)
646           (backend-supports-deletions
647            (gnus-check-backend-function
648             'request-move-article gnus-newsgroup-name))
649           article tomove deletep)
650       (dolist (article articles)
651         (when (eq (gnus-summary-article-mark article) gnus-spam-mark)
652           (gnus-summary-mark-article article gnus-expirable-mark)
653           (push article tomove)))
654     
655       ;; now do the actual copies
656       (dolist (group groups)
657         (when (and tomove
658                    (stringp group))
659           (dolist (article tomove)
660             (gnus-summary-set-process-mark article))
661           (when tomove
662             (if (or (not backend-supports-deletions)
663                     (> (length groups) 1))
664                 (progn 
665                   (gnus-summary-copy-article nil group)
666                   (setq deletep t))
667               (gnus-summary-move-article nil group)))))
668     
669       ;; now delete the articles, if there was a copy done, and the
670       ;; backend allows it
671       (when (and deletep backend-supports-deletions)
672         (dolist (article tomove)
673           (gnus-summary-set-process-mark article))
674         (when tomove
675           (let ((gnus-novice-user nil)) ; don't ask me if I'm sure
676             (gnus-summary-delete-article nil))))
677     
678       (gnus-summary-yank-process-mark))))
679  
680 (defun spam-ham-copy-or-move-routine (copy groups)
681   (gnus-summary-kill-process-mark)
682   (let ((articles gnus-newsgroup-articles)
683         (backend-supports-deletions
684          (gnus-check-backend-function
685           'request-move-article gnus-newsgroup-name))
686         (respool-method (gnus-find-method-for-group gnus-newsgroup-name))
687         article mark todo deletep respool)
688     (dolist (article articles)
689       (when (spam-group-ham-mark-p gnus-newsgroup-name
690                                    (gnus-summary-article-mark article))
691         (push article todo)))
692
693     (when (member 'respool groups)
694       (setq respool t)                  ; boolean for later
695       (setq groups '("fake"))) ; when respooling, groups are dynamic so fake it
696
697     ;; now do the actual move
698     (dolist (group groups)
699       (when (and todo (stringp group))
700         (dolist (article todo)
701           (when spam-mark-ham-unread-before-move-from-spam-group
702             (gnus-summary-mark-article article gnus-unread-mark))
703           (gnus-summary-set-process-mark article))
704
705         (if respool                        ; respooling is with a "fake" group
706             (let ((spam-split-disabled
707                    (or spam-split-disabled
708                        spam-disable-spam-split-during-ham-respool)))
709               (gnus-summary-respool-article nil respool-method))
710           (if (or (not backend-supports-deletions) ; else, we are not respooling
711                   (> (length groups) 1))
712               (progn                ; if copying, copy and set deletep
713                 (gnus-summary-copy-article nil group)
714                 (setq deletep t))
715             (gnus-summary-move-article nil group))))) ; else move articles
716     
717     ;; now delete the articles, unless a) copy is t, and there was a copy done
718     ;;                                 b) a move was done to a single group
719     ;;                                 c) backend-supports-deletions is nil
720     (unless copy
721       (when (and deletep backend-supports-deletions)
722         (dolist (article todo)
723           (gnus-summary-set-process-mark article))
724         (when todo
725           (let ((gnus-novice-user nil)) ; don't ask me if I'm sure
726             (gnus-summary-delete-article nil))))))
727   
728   (gnus-summary-yank-process-mark))
729  
730 (defun spam-ham-copy-routine (&rest groups)
731   (if (and (car-safe groups) (listp (car-safe groups)))
732       (apply 'spam-ham-copy-routine (car groups))
733     (spam-ham-copy-or-move-routine t groups)))
734  
735 (defun spam-ham-move-routine (&rest groups)
736   (if (and (car-safe groups) (listp (car-safe groups)))
737       (apply 'spam-ham-move-routine (car groups))
738     (spam-ham-copy-or-move-routine nil groups)))
739  
740 (eval-and-compile
741   (defalias 'spam-point-at-eol (if (fboundp 'point-at-eol)
742                                    'point-at-eol
743                                  'line-end-position)))
744
745 (defun spam-get-article-as-string (article)
746   (let ((article-buffer (spam-get-article-as-buffer article))
747         article-string)
748     (when article-buffer
749       (save-window-excursion
750         (set-buffer article-buffer)
751         (setq article-string (buffer-string))))
752     article-string))
753
754 (defun spam-get-article-as-buffer (article)
755   (let ((article-buffer))
756     (when (numberp article)
757       (save-window-excursion
758         (gnus-summary-goto-subject article)
759         (gnus-summary-show-article t)
760         (setq article-buffer (get-buffer gnus-article-buffer))))
761     article-buffer))
762
763 ;; disabled for now
764 ;; (defun spam-get-article-as-filename (article)
765 ;;   (let ((article-filename))
766 ;;     (when (numberp article)
767 ;;       (nnml-possibly-change-directory 
768 ;;        (gnus-group-real-name gnus-newsgroup-name))
769 ;;       (setq article-filename (expand-file-name 
770 ;;                              (int-to-string article) nnml-current-directory)))
771 ;;     (if (file-exists-p article-filename)
772 ;;      article-filename
773 ;;       nil)))
774
775 (defun spam-fetch-field-from-fast (article)
776   "Fetch the `from' field quickly, using the internal gnus-data-list function"
777   (if (and (numberp article)
778            (assoc article (gnus-data-list nil)))
779       (mail-header-from 
780        (gnus-data-header (assoc article (gnus-data-list nil))))
781     nil))
782
783 (defun spam-fetch-field-subject-fast (article)
784   "Fetch the `subject' field quickly, using the internal
785   gnus-data-list function"
786   (if (and (numberp article)
787            (assoc article (gnus-data-list nil)))
788       (mail-header-subject 
789        (gnus-data-header (assoc article (gnus-data-list nil))))
790     nil))
791
792 (defun spam-fetch-field-message-id-fast (article)
793   "Fetch the `Message-ID' field quickly, using the internal
794   gnus-data-list function"
795   (if (and (numberp article)
796            (assoc article (gnus-data-list nil)))
797       (mail-header-message-id 
798        (gnus-data-header (assoc article (gnus-data-list nil))))
799     nil))
800
801 \f
802 ;;;; Spam determination.
803
804 (defvar spam-list-of-checks
805   '((spam-use-blacklist          . spam-check-blacklist)
806     (spam-use-regex-headers      . spam-check-regex-headers)
807     (spam-use-regex-body         . spam-check-regex-body)
808     (spam-use-whitelist          . spam-check-whitelist)
809     (spam-use-BBDB               . spam-check-BBDB)
810     (spam-use-ifile              . spam-check-ifile)
811     (spam-use-spamoracle         . spam-check-spamoracle)
812     (spam-use-stat               . spam-check-stat)
813     (spam-use-blackholes         . spam-check-blackholes)
814     (spam-use-hashcash           . spam-check-hashcash)
815     (spam-use-bogofilter-headers . spam-check-bogofilter-headers)
816     (spam-use-bogofilter         . spam-check-bogofilter))
817   "The spam-list-of-checks list contains pairs associating a parameter
818 variable with a spam checking function.  If the parameter variable is
819 true, then the checking function is called, and its value decides what
820 happens.  Each individual check may return nil, t, or a mailgroup
821 name.  The value nil means that the check does not yield a decision,
822 and so, that further checks are needed.  The value t means that the
823 message is definitely not spam, and that further spam checks should be
824 inhibited.  Otherwise, a mailgroup name is returned where the mail
825 should go, and further checks are also inhibited.  The usual mailgroup
826 name is the value of `spam-split-group', meaning that the message is
827 definitely a spam.")
828
829 (defvar spam-list-of-statistical-checks 
830   '(spam-use-ifile
831     spam-use-regex-body 
832     spam-use-stat 
833     spam-use-bogofilter
834     spam-use-spamoracle)
835   "The spam-list-of-statistical-checks list contains all the mail
836 splitters that need to have the full message body available.")
837
838 (defvar spam-split-disabled nil
839   "If non-nil, spam-split is disabled, and always returns nil.")
840
841 ;;;TODO: modify to invoke self with each check if invoked without specifics
842 (defun spam-split (&rest specific-checks)
843   "Split this message into the `spam' group if it is spam.
844 This function can be used as an entry in `nnmail-split-fancy',
845 for example like this: (: spam-split).  It can take checks as
846 parameters.  A string as a parameter will set the
847 spam-split-group to that string.
848
849 See the Info node `(gnus)Fancy Mail Splitting' for more details."
850   (interactive)
851   (unless spam-split-disabled
852     (let ((spam-split-group-choice spam-split-group))
853       (dolist (check specific-checks)
854         (when (stringp check)
855           (setq spam-split-group-choice check)
856           (setq specific-checks (delq check specific-checks))))
857       
858       (let ((spam-split-group spam-split-group-choice))
859         (save-excursion
860           (save-restriction
861             (dolist (check spam-list-of-statistical-checks)
862               (when (and (symbolp check) (symbol-value check))
863                 (widen)
864                 (gnus-message 8 "spam-split: widening the buffer (%s requires it)"
865                               (symbol-name check))
866                 (return)))
867             ;;   (progn (widen) (debug (buffer-string)))
868             (let ((list-of-checks spam-list-of-checks)
869                   decision)
870               (while (and list-of-checks (not decision))
871                 (let ((pair (pop list-of-checks)))
872                   (when (and (symbol-value (car pair))
873                              (or (null specific-checks)
874                                  (memq (car pair) specific-checks)))
875                     (gnus-message 5 "spam-split: calling the %s function" 
876                                   (symbol-name (cdr pair)))
877                     (setq decision (funcall (cdr pair))))))
878               (if (eq decision t)
879                   nil
880                 decision))))))))
881
882 (defvar spam-registration-functions
883   ;; first the ham register, second the spam register function
884   ;; third the ham unregister, fourth the spam unregister function
885   '((spam-use-blacklist  nil 
886                          spam-blacklist-register-routine
887                          nil
888                          spam-blacklist-unregister-routine)
889     (spam-use-whitelist  spam-whitelist-register-routine
890                          nil
891                          spam-whitelist-unregister-routine
892                          nil)
893     (spam-use-BBDB       spam-BBDB-register-routine 
894                          nil
895                          spam-BBDB-unregister-routine 
896                          nil)
897     (spam-use-ifile      spam-ifile-register-ham-routine 
898                          spam-ifile-register-spam-routine
899                          spam-ifile-unregister-ham-routine 
900                          spam-ifile-unregister-spam-routine)
901     (spam-use-spamoracle spam-spamoracle-learn-ham 
902                          spam-spamoracle-learn-spam
903                          spam-spamoracle-unlearn-ham 
904                          spam-spamoracle-unlearn-spam)
905     (spam-use-stat       spam-stat-register-ham-routine 
906                          spam-stat-register-spam-routine
907                          spam-stat-unregister-ham-routine 
908                          spam-stat-unregister-spam-routine)
909     ;; note that spam-use-gmane is not a legitimate check
910     (spam-use-gmane      nil 
911                          spam-report-gmane-register-routine
912                          ;; does Gmane support unregistration?
913                          nil
914                          nil)
915     (spam-use-bogofilter spam-bogofilter-register-ham-routine 
916                          spam-bogofilter-register-spam-routine
917                          spam-bogofilter-unregister-ham-routine 
918                          spam-bogofilter-unregister-spam-routine))
919   "The spam-registration-functions list contains pairs
920 associating a parameter variable with the ham and spam
921 registration functions, and the ham and spam unregistration
922 functions")
923
924 (defun spam-classification-valid-p (classification)
925   (or  (eq classification 'spam)
926        (eq classification 'ham)))
927
928 (defun spam-process-type-valid-p (process-type)
929   (or  (eq process-type 'incoming)
930        (eq process-type 'process)))
931
932 (defun spam-registration-check-valid-p (check)
933   (assoc check spam-registration-functions))
934
935 (defun spam-unregistration-check-valid-p (check)
936   (assoc check spam-registration-functions))
937
938 (defun spam-registration-function (classification check)
939   (let ((flist (cdr-safe (assoc check spam-registration-functions))))
940     (if (eq classification 'spam)
941         (nth 1 flist)
942       (nth 0 flist))))
943
944 (defun spam-unregistration-function (classification check)
945   (let ((flist (cdr-safe (assoc check spam-registration-functions))))
946     (if (eq classification 'spam)
947         (nth 3 flist)
948       (nth 2 flist))))
949
950 (defun spam-list-articles (articles classification)
951   (let ((mark-check (if (eq classification 'spam) 
952                         'spam-group-spam-mark-p 
953                       'spam-group-ham-mark-p))
954         mark list)
955     (dolist (article articles)
956       (when (funcall mark-check 
957                      gnus-newsgroup-name 
958                      (gnus-summary-article-mark article))
959         (push article list)))
960     list))
961
962 (defun spam-register-routine (classification 
963                               check 
964                               &optional unregister 
965                               specific-articles)
966   (when (and (spam-classification-valid-p classification)
967              (spam-registration-check-valid-p check))
968     (let* ((register-function
969             (spam-registration-function classification check))
970            (unregister-function
971             (spam-unregistration-function classification check))
972            (run-function (if unregister 
973                              unregister-function 
974                            register-function))
975            (log-function (if unregister
976                              'spam-log-undo-registration
977                            'spam-log-processing-to-registry))
978            article articles)
979
980       (when run-function
981         ;; make list of articles, using specific-articles if given
982         (setq articles (or specific-articles
983                            (spam-list-articles 
984                             gnus-newsgroup-articles 
985                             classification)))
986         ;; process them
987         (gnus-message 5 "%s %d %s articles with classification %s, check %s"
988                       (if unregister "Unregistering" "Registering")
989                       (length articles)
990                       (if specific-articles "specific" "")
991                       (symbol-name classification)
992                       (symbol-name check))
993         (funcall run-function articles)
994         ;; now log all the registrations (or undo them, depending on unregister)
995         (dolist (article articles)
996           (funcall log-function
997                    (spam-fetch-field-message-id-fast article)
998                    'process
999                    classification
1000                    check
1001                    gnus-newsgroup-name))))))
1002
1003 ;;; log a ham- or spam-processor invocation to the registry
1004 (defun spam-log-processing-to-registry (id type classification check group)
1005   (when spam-log-to-registry
1006     (if (and (stringp id)
1007              (stringp group)
1008              (spam-process-type-valid-p type)
1009              (spam-classification-valid-p classification)
1010              (spam-registration-check-valid-p check))
1011         (let ((cell-list (cdr-safe (gnus-registry-fetch-extra id type)))
1012               (cell (list classification check group)))
1013           (push cell cell-list)
1014           (gnus-registry-store-extra-entry
1015            id
1016            type
1017            cell-list))
1018
1019       (gnus-message 5 (format "%s called with bad ID, type, classification, check, or group"
1020                               "spam-log-processing-to-registry")))))
1021
1022 ;;; check if a ham- or spam-processor registration needs to be undone
1023 (defun spam-log-unregistration-needed-p (id type classification check)
1024   (when spam-log-to-registry
1025     (if (and (stringp id)
1026              (spam-process-type-valid-p type)
1027              (spam-classification-valid-p classification)
1028              (spam-registration-check-valid-p check))
1029         (let ((cell-list (cdr-safe (gnus-registry-fetch-extra id type)))
1030               found)
1031           (dolist (cell cell-list)
1032             (unless found
1033               (when (and (eq classification (nth 0 cell))
1034                          (eq check (nth 1 cell)))
1035                 (setq found t))))
1036           found)
1037       (progn 
1038         (gnus-message 5 (format "%s called with bad ID, type, classification, or check"
1039                                 "spam-log-unregistration-needed-p"))
1040         nil))))
1041
1042
1043 ;;; undo a ham- or spam-processor registration (the group is not used)
1044 (defun spam-log-undo-registration (id type classification check &optional group)
1045   (when (and spam-log-to-registry
1046              (spam-log-unregistration-needed-p id type classification check))
1047     (if (and (stringp id)
1048              (spam-process-type-valid-p type)
1049              (spam-classification-valid-p classification)
1050              (spam-registration-check-valid-p check))
1051         (let ((cell-list (cdr-safe (gnus-registry-fetch-extra id type)))
1052               new-cell-list found)
1053           (dolist (cell cell-list)
1054             (unless (and (eq classification (nth 0 cell))
1055                          (eq check (nth 1 cell)))
1056               (push cell new-cell-list)))
1057           (gnus-registry-store-extra-entry
1058            id
1059            type
1060            new-cell-list))
1061       (progn 
1062         (gnus-message 5 (format "%s called with bad ID, type, check, or group"
1063                                 "spam-log-undo-registration"))
1064         nil))))
1065
1066 ;;; set up IMAP widening if it's necessary  
1067 (defun spam-setup-widening ()
1068   (dolist (check spam-list-of-statistical-checks)
1069     (when (symbol-value check)
1070       (setq nnimap-split-download-body-default t))))
1071
1072 \f
1073 ;;;; Regex body
1074
1075 (defun spam-check-regex-body ()
1076   (let ((spam-regex-headers-ham spam-regex-body-ham)
1077         (spam-regex-headers-spam spam-regex-body-spam))
1078     (spam-check-regex-headers t)))
1079
1080 \f
1081 ;;;; Regex headers
1082
1083 (defun spam-check-regex-headers (&optional body)
1084   (let ((type (if body "body" "header"))
1085         ret found)
1086     (dolist (h-regex spam-regex-headers-ham)
1087       (unless found
1088         (goto-char (point-min))
1089         (when (re-search-forward h-regex nil t)
1090           (message "Ham regex %s search positive." type)
1091           (setq found t))))
1092     (dolist (s-regex spam-regex-headers-spam)
1093       (unless found
1094         (goto-char (point-min))
1095         (when (re-search-forward s-regex nil t)
1096           (message "Spam regex %s search positive." type)
1097           (setq found t)
1098           (setq ret spam-split-group))))
1099     ret))
1100
1101 \f
1102 ;;;; Blackholes.
1103
1104 (defun spam-reverse-ip-string (ip)
1105   (when (stringp ip)
1106     (mapconcat 'identity
1107                (nreverse (split-string ip "\\."))
1108                ".")))
1109
1110 (defun spam-check-blackholes ()
1111   "Check the Received headers for blackholed relays."
1112   (let ((headers (nnmail-fetch-field "received"))
1113         ips matches)
1114     (when headers
1115       (with-temp-buffer
1116         (insert headers)
1117         (goto-char (point-min))
1118         (gnus-message 5 "Checking headers for relay addresses")
1119         (while (re-search-forward
1120                 "\\([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+\\)" nil t)
1121           (gnus-message 9 "Blackhole search found host IP %s." (match-string 1))
1122           (push (spam-reverse-ip-string (match-string 1))
1123                 ips)))
1124       (dolist (server spam-blackhole-servers)
1125         (dolist (ip ips)
1126           (unless (and spam-blackhole-good-server-regex
1127                        ;; match the good-server-regex against the reversed (again) IP string
1128                        (string-match 
1129                         spam-blackhole-good-server-regex
1130                         (spam-reverse-ip-string ip)))
1131             (unless matches
1132               (let ((query-string (concat ip "." server)))
1133                 (if spam-use-dig
1134                     (let ((query-result (query-dig query-string)))
1135                       (when query-result
1136                         (gnus-message 5 "(DIG): positive blackhole check '%s'" 
1137                                       query-result)
1138                         (push (list ip server query-result)
1139                               matches)))
1140                   ;; else, if not using dig.el
1141                   (when (query-dns query-string)
1142                     (gnus-message 5 "positive blackhole check")
1143                     (push (list ip server (query-dns query-string 'TXT))
1144                           matches)))))))))
1145     (when matches
1146       spam-split-group)))
1147 \f
1148 ;;;; Hashcash.
1149
1150 (condition-case nil
1151     (progn
1152       (require 'hashcash)
1153       
1154       (defun spam-check-hashcash ()
1155         "Check the headers for hashcash payments."
1156         (mail-check-payment)))   ;mail-check-payment returns a boolean
1157
1158   (file-error (progn
1159                 (defalias 'mail-check-payment 'ignore)
1160                 (defalias 'spam-check-hashcash 'ignore))))
1161 \f
1162 ;;;; BBDB 
1163
1164 ;;; original idea for spam-check-BBDB from Alexander Kotelnikov
1165 ;;; <sacha@giotto.sj.ru>
1166
1167 ;; all this is done inside a condition-case to trap errors
1168
1169 (condition-case nil
1170     (progn
1171       (require 'bbdb)
1172       (require 'bbdb-com)
1173       
1174       (defun spam-enter-ham-BBDB (addresses &optional remove)
1175         "Enter an address into the BBDB; implies ham (non-spam) sender"
1176         (dolist (from addresses)
1177           (when (stringp from)
1178             (let* ((parsed-address (gnus-extract-address-components from))
1179                    (name (or (nth 0 parsed-address) "Ham Sender"))
1180                    (remove-function (if remove 
1181                                         'bbdb-delete-record-internal
1182                                       'ignore))
1183                    (net-address (nth 1 parsed-address))
1184                    (record (and net-address 
1185                                 (bbdb-search-simple nil net-address))))
1186               (when net-address
1187                 (gnus-message 5 "%s address %s %s BBDB" 
1188                               (if remove "Deleting" "Adding") 
1189                               from
1190                               (if remove "from" "to"))
1191                 (if record
1192                     (funcall remove-function record)
1193                   (bbdb-create-internal name nil net-address nil nil 
1194                                         "ham sender added by spam.el")))))))
1195       
1196       (defun spam-BBDB-register-routine (articles &optional unregister)
1197         (let (addresses)
1198           (dolist (article articles)
1199             (when (stringp (spam-fetch-field-from-fast article))
1200               (push (spam-fetch-field-from-fast article) addresses)))
1201           ;; now do the register/unregister action
1202           (spam-enter-ham-BBDB addresses unregister)))
1203
1204       (defun spam-BBDB-unregister-routine (articles)
1205         (spam-BBDB-register-routine articles t))
1206
1207       (defun spam-check-BBDB ()
1208         "Mail from people in the BBDB is classified as ham or non-spam"
1209         (let ((who (nnmail-fetch-field "from")))
1210           (when who
1211             (setq who (nth 1 (gnus-extract-address-components who)))
1212             (if (bbdb-search-simple nil who)
1213                 t 
1214               (if spam-use-BBDB-exclusive
1215                   spam-split-group
1216                 nil))))))
1217
1218   (file-error (progn
1219                 (defalias 'bbdb-search-simple 'ignore)
1220                 (defalias 'spam-check-BBDB 'ignore)
1221                 (defalias 'spam-BBDB-register-routine 'ignore)
1222                 (defalias 'spam-enter-ham-BBDB 'ignore)
1223                 (defalias 'bbdb-create-internal 'ignore)
1224                 (defalias 'bbdb-delete-record-internal 'ignore)
1225                 (defalias 'bbdb-records 'ignore))))
1226
1227 \f
1228 ;;;; ifile
1229
1230 ;;; check the ifile backend; return nil if the mail was NOT classified
1231 ;;; as spam
1232
1233 (defun spam-get-ifile-database-parameter ()
1234   "Get the command-line parameter for ifile's database from
1235   spam-ifile-database-path."
1236   (if spam-ifile-database-path
1237       (format "--db-file=%s" spam-ifile-database-path)
1238     nil))
1239     
1240 (defun spam-check-ifile ()
1241   "Check the ifile backend for the classification of this message"
1242   (let ((article-buffer-name (buffer-name)) 
1243         category return)
1244     (with-temp-buffer
1245       (let ((temp-buffer-name (buffer-name))
1246             (db-param (spam-get-ifile-database-parameter)))
1247         (save-excursion
1248           (set-buffer article-buffer-name)
1249           (apply 'call-process-region
1250                  (point-min) (point-max) spam-ifile-path
1251                  nil temp-buffer-name nil "-c"
1252                  (if db-param `(,db-param "-q") `("-q"))))
1253         ;; check the return now (we're back in the temp buffer)
1254         (goto-char (point-min))
1255         (if (not (eobp))
1256             (setq category (buffer-substring (point) (spam-point-at-eol))))
1257         (when (not (zerop (length category))) ; we need a category here
1258           (if spam-ifile-all-categories
1259               (setq return category)
1260             ;; else, if spam-ifile-all-categories is not set...
1261             (when (string-equal spam-ifile-spam-category category)
1262               (setq return spam-split-group)))))) ; note return is nil otherwise
1263     return))
1264
1265 (defun spam-ifile-register-with-ifile (articles category &optional unregister)
1266   "Register an article, given as a string, with a category.
1267 Uses `gnus-newsgroup-name' if category is nil (for ham registration)."
1268   (let ((category (or category gnus-newsgroup-name))
1269         (add-or-delete-option (if unregister "-d" "-i"))
1270         (db (spam-get-ifile-database-parameter))
1271         parameters)
1272     (with-temp-buffer
1273       (dolist (article articles)
1274         (let ((article-string (spam-get-article-as-string article)))
1275           (when (stringp article-string)
1276             (insert article-string))))
1277       (apply 'call-process-region
1278              (point-min) (point-max) spam-ifile-path
1279              nil nil nil 
1280              add-or-delete-option category
1281              (if db `(,db "-h") `("-h"))))))
1282
1283 (defun spam-ifile-register-spam-routine (articles &optional unregister)
1284   (spam-ifile-register-with-ifile articles spam-ifile-spam-category unregister))
1285
1286 (defun spam-ifile-unregister-spam-routine (articles)
1287   (spam-ifile-register-spam-routine articles t))
1288
1289 (defun spam-ifile-register-ham-routine (articles &optional unregister)
1290   (spam-ifile-register-with-ifile articles spam-ifile-ham-category unregister))
1291
1292 (defun spam-ifile-unregister-ham-routine (articles)
1293   (spam-ifile-register-ham-routine articles t))
1294
1295 \f
1296 ;;;; spam-stat
1297
1298 (condition-case nil
1299     (progn
1300       (let ((spam-stat-install-hooks nil))
1301         (require 'spam-stat))
1302       
1303       (defun spam-check-stat ()
1304         "Check the spam-stat backend for the classification of this message"
1305         (let ((spam-stat-split-fancy-spam-group spam-split-group) ; override
1306               (spam-stat-buffer (buffer-name)) ; stat the current buffer
1307               category return)
1308           (spam-stat-split-fancy)))
1309
1310       (defun spam-stat-register-spam-routine (articles &optional unregister)
1311         (dolist (article articles)
1312           (let ((article-string (spam-get-article-as-string article)))
1313             (with-temp-buffer
1314               (insert article-string)
1315               (if unregister
1316                   (spam-stat-buffer-change-to-non-spam)
1317               (spam-stat-buffer-is-spam))))))
1318
1319       (defun spam-stat-unregister-spam-routine (articles)
1320         (spam-stat-register-spam-routine articles t))
1321
1322       (defun spam-stat-register-ham-routine (articles &optional unregister)
1323         (dolist (article articles)
1324           (let ((article-string (spam-get-article-as-string article)))
1325             (with-temp-buffer
1326               (insert article-string)
1327               (if unregister
1328                   (spam-stat-buffer-change-to-spam)
1329               (spam-stat-buffer-is-non-spam))))))
1330
1331       (defun spam-stat-unregister-ham-routine (articles)
1332         (spam-stat-register-ham-routine articles t))
1333
1334       (defun spam-maybe-spam-stat-load ()
1335         (when spam-use-stat (spam-stat-load)))
1336       
1337       (defun spam-maybe-spam-stat-save ()
1338         (when spam-use-stat (spam-stat-save))))
1339
1340   (file-error (progn
1341                 (defalias 'spam-stat-load 'ignore)
1342                 (defalias 'spam-stat-save 'ignore)
1343                 (defalias 'spam-maybe-spam-stat-load 'ignore)
1344                 (defalias 'spam-maybe-spam-stat-save 'ignore)
1345                 (defalias 'spam-stat-register-ham-routine 'ignore)
1346                 (defalias 'spam-stat-unregister-ham-routine 'ignore)
1347                 (defalias 'spam-stat-register-spam-routine 'ignore)
1348                 (defalias 'spam-stat-unregister-spam-routine 'ignore)
1349                 (defalias 'spam-stat-buffer-is-spam 'ignore)
1350                 (defalias 'spam-stat-buffer-change-to-spam 'ignore)
1351                 (defalias 'spam-stat-buffer-is-non-spam 'ignore)
1352                 (defalias 'spam-stat-buffer-change-to-non-spam 'ignore)
1353                 (defalias 'spam-stat-split-fancy 'ignore)
1354                 (defalias 'spam-check-stat 'ignore))))
1355
1356 \f
1357
1358 ;;;; Blacklists and whitelists.
1359
1360 (defvar spam-whitelist-cache nil)
1361 (defvar spam-blacklist-cache nil)
1362
1363 (defun spam-kill-whole-line ()
1364   (beginning-of-line)
1365   (let ((kill-whole-line t))
1366     (kill-line)))
1367
1368 ;;; address can be a list, too
1369 (defun spam-enter-whitelist (address &optional remove)
1370   "Enter ADDRESS (list or single) into the whitelist.  With a
1371   non-nil REMOVE, remove them."
1372   (interactive "sAddress: ")
1373   (spam-enter-list address spam-whitelist remove)
1374   (setq spam-whitelist-cache nil))
1375
1376 ;;; address can be a list, too
1377 (defun spam-enter-blacklist (address &optional remove)
1378   "Enter ADDRESS (list or single) into the blacklist.  With a
1379   non-nil REMOVE, remove them."
1380   (interactive "sAddress: ")
1381   (spam-enter-list address spam-blacklist remove)
1382   (setq spam-blacklist-cache nil))
1383
1384 (defun spam-enter-list (addresses file &optional remove)
1385   "Enter ADDRESSES into the given FILE.
1386 Either the whitelist or the blacklist files can be used.  With
1387 REMOVE not nil, remove the ADDRESSES."
1388   (if (stringp addresses)
1389       (spam-enter-list (list addresses) file remove)
1390     ;; else, we have a list of addresses here
1391     (unless (file-exists-p (file-name-directory file))
1392       (make-directory (file-name-directory file) t))
1393     (save-excursion
1394       (set-buffer
1395        (find-file-noselect file))
1396       (dolist (a addresses)
1397         (when (stringp a)
1398           (goto-char (point-min))
1399           (if (re-search-forward (regexp-quote a) nil t)
1400               ;; found the address
1401               (when remove
1402                 (spam-kill-whole-line))
1403             ;; else, the address was not found
1404             (unless remove
1405               (goto-char (point-max))
1406               (unless (bobp)
1407                 (insert "\n"))
1408               (insert a "\n")))))
1409       (save-buffer))))
1410
1411 ;;; returns t if the sender is in the whitelist, nil or
1412 ;;; spam-split-group otherwise
1413 (defun spam-check-whitelist ()
1414   ;; FIXME!  Should it detect when file timestamps change?
1415   (unless spam-whitelist-cache
1416     (setq spam-whitelist-cache (spam-parse-list spam-whitelist)))
1417   (if (spam-from-listed-p spam-whitelist-cache) 
1418       t
1419     (if spam-use-whitelist-exclusive
1420         spam-split-group
1421       nil)))
1422
1423 (defun spam-check-blacklist ()
1424   ;; FIXME!  Should it detect when file timestamps change?
1425   (unless spam-blacklist-cache
1426     (setq spam-blacklist-cache (spam-parse-list spam-blacklist)))
1427   (and (spam-from-listed-p spam-blacklist-cache) spam-split-group))
1428
1429 (defun spam-parse-list (file)
1430   (when (file-readable-p file)
1431     (let (contents address)
1432       (with-temp-buffer
1433         (insert-file-contents file)
1434         (while (not (eobp))
1435           (setq address (buffer-substring (point) (spam-point-at-eol)))
1436           (forward-line 1)
1437           ;; insert the e-mail address if detected, otherwise the raw data
1438           (unless (zerop (length address))
1439             (let ((pure-address (nth 1 (gnus-extract-address-components address))))
1440               (push (or pure-address address) contents)))))
1441       (nreverse contents))))
1442
1443 (defun spam-from-listed-p (cache)
1444   (let ((from (nnmail-fetch-field "from"))
1445         found)
1446     (while cache
1447       (let ((address (pop cache)))
1448         (unless (zerop (length address)) ; 0 for a nil address too
1449           (setq address (regexp-quote address))
1450           ;; fix regexp-quote's treatment of user-intended regexes
1451           (while (string-match "\\\\\\*" address)
1452             (setq address (replace-match ".*" t t address))))
1453         (when (and address (string-match address from))
1454           (setq found t
1455                 cache nil))))
1456     found))
1457
1458 (defun spam-filelist-register-routine (articles blacklist &optional unregister)
1459   (let ((de-symbol (if blacklist 'spam-use-whitelist 'spam-use-blacklist))
1460         (declassification (if blacklist 'ham 'spam))
1461         (enter-function 
1462          (if blacklist 'spam-enter-blacklist 'spam-enter-whitelist))
1463         (remove-function
1464          (if blacklist 'spam-enter-whitelist 'spam-enter-blacklist))
1465         from addresses unregister-list)
1466     (dolist (article articles)
1467       (let ((from (spam-fetch-field-from-fast article))
1468             (id (spam-fetch-field-message-id-fast article))
1469             sender-ignored)
1470         (when (stringp from)
1471           (dolist (ignore-regex spam-blacklist-ignored-regexes)
1472             (when (and (not sender-ignored)
1473                        (stringp ignore-regex)
1474                        (string-match ignore-regex from))
1475               (setq sender-ignored t)))
1476           ;; remember the messages we need to unregister, unless remove is set
1477           (when (and
1478                  (null unregister) 
1479                  (spam-log-unregistration-needed-p
1480                   id 'process declassification de-symbol))
1481             (push from unregister-list))
1482           (unless sender-ignored
1483             (push from addresses)))))
1484
1485     (if unregister
1486         (funcall enter-function addresses t) ; unregister all these addresses
1487       ;; else, register normally and unregister what we need to
1488       (funcall remove-function unregister-list t)
1489       (dolist (article unregister-list)
1490         (spam-log-undo-registration
1491          (spam-fetch-field-message-id-fast article)
1492          'process
1493          declassification
1494          de-symbol))
1495       (funcall enter-function addresses nil))))
1496
1497 (defun spam-blacklist-unregister-routine (articles)
1498   (spam-blacklist-register-routine articles t))
1499
1500 (defun spam-blacklist-register-routine (articles &optional unregister)
1501   (spam-filelist-register-routine articles t unregister))
1502
1503 (defun spam-whitelist-unregister-routine (articles)
1504   (spam-whitelist-register-routine articles t))
1505
1506 (defun spam-whitelist-register-routine (articles &optional unregister)
1507   (spam-filelist-register-routine articles nil unregister))
1508
1509 \f
1510 ;;;; Spam-report glue
1511 (defun spam-report-gmane-register-routine (articles)
1512   (when articles
1513     (apply 'spam-report-gmane articles)))
1514
1515 \f
1516 ;;;; Bogofilter
1517 (defun spam-check-bogofilter-headers (&optional score)
1518   (let ((header (nnmail-fetch-field spam-bogofilter-header)))
1519     (when header                        ; return nil when no header
1520       (if score                         ; scoring mode
1521           (if (string-match "spamicity=\\([0-9.]+\\)" header)
1522               (match-string 1 header)
1523             "0")
1524         ;; spam detection mode
1525         (when (string-match spam-bogofilter-bogosity-positive-spam-header
1526                             header)
1527           spam-split-group)))))
1528
1529 ;; return something sensible if the score can't be determined
1530 (defun spam-bogofilter-score ()
1531   "Get the Bogofilter spamicity score"
1532   (interactive)
1533   (save-window-excursion
1534     (gnus-summary-show-article t)
1535     (set-buffer gnus-article-buffer)
1536     (let ((score (or (spam-check-bogofilter-headers t)
1537                      (spam-check-bogofilter t))))
1538       (message "Spamicity score %s" score)
1539       (or score "0"))
1540     (gnus-summary-show-article)))
1541
1542 (defun spam-check-bogofilter (&optional score)
1543   "Check the Bogofilter backend for the classification of this message"
1544   (let ((article-buffer-name (buffer-name))
1545         (db spam-bogofilter-database-directory)
1546         return)
1547     (with-temp-buffer
1548       (let ((temp-buffer-name (buffer-name)))
1549         (save-excursion
1550           (set-buffer article-buffer-name)
1551           (apply 'call-process-region
1552                  (point-min) (point-max) 
1553                  spam-bogofilter-path
1554                  nil temp-buffer-name nil
1555                  (if db `("-d" ,db "-v") `("-v"))))
1556         (setq return (spam-check-bogofilter-headers score))))
1557     return))
1558
1559 (defun spam-bogofilter-register-with-bogofilter (articles 
1560                                                  spam 
1561                                                  &optional unregister)
1562   "Register an article, given as a string, as spam or non-spam."
1563   (dolist (article articles)
1564     (let ((article-string (spam-get-article-as-string article))
1565           (db spam-bogofilter-database-directory)
1566           (switch (if unregister
1567                       (if spam 
1568                           spam-bogofilter-spam-strong-switch
1569                         spam-bogofilter-ham-strong-switch)
1570                     (if spam 
1571                         spam-bogofilter-spam-switch 
1572                       spam-bogofilter-ham-switch))))
1573       (when (stringp article-string)
1574         (with-temp-buffer
1575           (insert article-string)
1576
1577           (apply 'call-process-region
1578                  (point-min) (point-max) 
1579                  spam-bogofilter-path
1580                  nil nil nil switch
1581                  (if db `("-d" ,db "-v") `("-v"))))))))
1582   
1583 (defun spam-bogofilter-register-spam-routine (articles &optional unregister)
1584   (spam-bogofilter-register-with-bogofilter articles t unregister))
1585
1586 (defun spam-bogofilter-unregister-spam-routine (articles)
1587   (spam-bogofilter-register-spam-routine articles t))
1588
1589 (defun spam-bogofilter-register-ham-routine (articles &optional unregister)
1590   (spam-bogofilter-register-with-bogofilter articles nil unregister))
1591
1592 (defun spam-bogofilter-unregister-ham-routine (articles)
1593   (spam-bogofilter-register-ham-routine articles t))
1594
1595
1596 \f
1597 ;;;; spamoracle
1598 (defun spam-check-spamoracle ()
1599   "Run spamoracle on an article to determine whether it's spam."
1600   (let ((article-buffer-name (buffer-name)))
1601     (with-temp-buffer
1602       (let ((temp-buffer-name (buffer-name)))
1603         (save-excursion
1604           (set-buffer article-buffer-name)
1605           (let ((status 
1606                  (apply 'call-process-region 
1607                         (point-min) (point-max)
1608                         spam-spamoracle-binary 
1609                         nil temp-buffer-name nil
1610                         (if spam-spamoracle-database
1611                             `("-f" ,spam-spamoracle-database "mark")
1612                           '("mark")))))
1613             (if (eq 0 status)
1614                 (progn
1615                   (set-buffer temp-buffer-name)
1616                   (goto-char (point-min))
1617                   (when (re-search-forward "^X-Spam: yes;" nil t)
1618                     spam-split-group))
1619               (error "Error running spamoracle" status))))))))
1620
1621 (defun spam-spamoracle-learn (articles article-is-spam-p &optional unregister)
1622   "Run spamoracle in training mode."
1623   (with-temp-buffer
1624     (let ((temp-buffer-name (buffer-name)))
1625       (save-excursion
1626         (goto-char (point-min))
1627         (dolist (article articles)
1628           (insert (spam-get-article-as-string article)))
1629         (let* ((arg (if (spam-xor unregister article-is-spam-p)
1630                         "-spam" 
1631                       "-good"))
1632                (status 
1633                 (apply 'call-process-region
1634                        (point-min) (point-max)
1635                        spam-spamoracle-binary
1636                        nil temp-buffer-name nil
1637                        (if spam-spamoracle-database
1638                            `("-f" ,spam-spamoracle-database 
1639                              "add" ,arg)
1640                          `("add" ,arg)))))
1641           (when (not (eq 0 status))
1642             (error "Error running spamoracle" status)))))))
1643
1644 (defun spam-spamoracle-learn-ham (articles &optional unregister)
1645   (spam-spamoracle-learn articles nil unregister))
1646
1647 (defun spam-spamoracle-unlearn-ham (articles &optional unregister)
1648   (spam-spamoracle-learn-ham articles t))
1649
1650 (defun spam-spamoracle-learn-spam (articles &optional unregister)
1651   (spam-spamoracle-learn articles t unregister))
1652
1653 (defun spam-spamoracle-unlearn-spam (articles &optional unregister)
1654   (spam-spamoracle-learn-spam articles t))
1655
1656 \f
1657 ;;;; Hooks
1658
1659 ;;;###autoload
1660 (defun spam-initialize ()
1661   "Install the spam.el hooks and do other initialization"
1662   (interactive)
1663   (setq spam-install-hooks t)
1664   ;; TODO: How do we redo this every time spam-face is customized?
1665   (push '((eq mark gnus-spam-mark) . spam-face)
1666         gnus-summary-highlight)
1667   ;; Add hooks for loading and saving the spam stats
1668   (add-hook 'gnus-save-newsrc-hook 'spam-maybe-spam-stat-save)
1669   (add-hook 'gnus-get-top-new-news-hook 'spam-maybe-spam-stat-load)
1670   (add-hook 'gnus-startup-hook 'spam-maybe-spam-stat-load)
1671   (add-hook 'gnus-summary-prepare-exit-hook 'spam-summary-prepare-exit)
1672   (add-hook 'gnus-summary-prepare-hook 'spam-summary-prepare)
1673   (add-hook 'gnus-get-new-news-hook 'spam-setup-widening))
1674
1675 (defun spam-unload-hook ()
1676   "Uninstall the spam.el hooks"
1677   (interactive)
1678   (remove-hook 'gnus-save-newsrc-hook 'spam-maybe-spam-stat-save)
1679   (remove-hook 'gnus-get-top-new-news-hook 'spam-maybe-spam-stat-load)
1680   (remove-hook 'gnus-startup-hook 'spam-maybe-spam-stat-load)
1681   (remove-hook 'gnus-summary-prepare-exit-hook 'spam-summary-prepare-exit)
1682   (remove-hook 'gnus-summary-prepare-hook 'spam-summary-prepare)
1683   (remove-hook 'gnus-get-new-news-hook 'spam-setup-widening))
1684
1685 (when spam-install-hooks
1686   (spam-initialize))
1687
1688 (provide 'spam)
1689
1690 ;;; spam.el ends here.