;;; spam-stat.el --- detecting spam based on statistics
-;; Copyright (C) 2002 Alex Schroeder
+;; Copyright (C) 2002 Alex Schroeder
;; Author: Alex Schroeder <alex@gnu.org>
;; Maintainer: Alex Schroeder <alex@gnu.org>
-;; Version: 0.3.4
+;; Version: 0.3.5
;; Keywords: spam filtering gnus
;; URL: http://www.emacswiki.org/cgi-bin/wiki.pl?SpamStat
;; the rule (: spam-stat-split-fancy) to `nnmail-split-fancy'
;;
;; This requires the following in your ~/.gnus file:
-;;
+;;
;; (require 'spam-stat)
;; (spam-stat-load)
(defvar spam-stat-buffer-name " *spam stat buffer*"
"Name of the `spam-stat-buffer'.")
+;; Functions missing in Emacs 20
+
+(when (memq nil (mapcar 'fboundp
+ '(gethash hash-table-count make-hash-table
+ mapc puthash)))
+ (require 'cl)
+ (unless (fboundp 'puthash)
+ ;; alias puthash is missing from Emacs 20 cl-extra.el
+ (defalias 'puthash 'cl-puthash)))
+
+(eval-when-compile
+ (unless (fboundp 'with-syntax-table)
+ ;; Imported from Emacs 21.2
+ (defmacro with-syntax-table (table &rest body) "\
+Evaluate BODY with syntax table of current buffer set to a copy of TABLE.
+The syntax table of the current buffer is saved, BODY is evaluated, and the
+saved table is restored, even in case of an abnormal exit.
+Value is what BODY returns."
+ (let ((old-table (make-symbol "table"))
+ (old-buffer (make-symbol "buffer")))
+ `(let ((,old-table (syntax-table))
+ (,old-buffer (current-buffer)))
+ (unwind-protect
+ (progn
+ (set-syntax-table (copy-syntax-table ,table))
+ ,@body)
+ (save-current-buffer
+ (set-buffer ,old-buffer)
+ (set-syntax-table ,old-table))))))))
+
;; Hooking into Gnus
(defun spam-stat-store-current-buffer ()
(lambda (word count)
(let ((entry (gethash word spam-stat)))
(if entry
- (spam-stat-set-good entry (+ count (spam-stat-good entry)))
+ (spam-stat-set-good entry (+ count (spam-stat-good entry)))
(setq entry (spam-stat-make-entry count 0)))
(spam-stat-set-score entry (spam-stat-compute-score entry))
(puthash word entry spam-stat)))
(spam-stat-buffer-words)))
-
+
(defun spam-stat-buffer-change-to-spam ()
"Consider current buffer no longer normal mail but spam."
(setq spam-stat-nbad (1+ spam-stat-nbad)
The default score for unknown words is stored in
`spam-stat-unknown-word-score'."
(spam-stat-score (gethash word spam-stat)))
-
(defun spam-stat-buffer-words-with-scores ()
"Process current buffer, return the 15 most conspicuous words.
(when (and (file-readable-p f)
(file-regular-p f))
(setq count (1+ count))
- (message "Reading %.2f%%" (/ count max))
+ (message "Reading %s: %.2f%%" dir (/ count max))
(insert-file-contents f)
(funcall func)
(erase-buffer))))))
;; Shrinking the dictionary
-(defun spam-stat-reduce-size (&optional count distance)
+(defun spam-stat-reduce-size (&optional count)
"Reduce the size of `spam-stat'.
This removes all words that occur less than COUNT from the dictionary.
-COUNT defaults to 5. It also removes all words whose spam score
-is less than DISTANCE from 0.5. DISTANCE defaults to 0.1, meaning that
-all words with score between 0.4 and 0.6 are removed."
+COUNT defaults to 5"
(interactive)
- (setq count (or count 5)
- distance (or distance 0.1))
+ (setq count (or count 5))
(maphash (lambda (key entry)
- (when (or (< (+ (spam-stat-good entry)
- (spam-stat-bad entry))
- count)
- (< (abs (- (spam-stat-score entry) 0.5))
- distance))
+ (when (< (+ (spam-stat-good entry)
+ (spam-stat-bad entry))
+ count)
(remhash key spam-stat)))
spam-stat))
(provide 'spam-stat)
;;; spam-stat.el ends here
-