Use the functions to build a dictionary of words and their statistical
distribution in spam and non-spam mails. Then use a function to determine
whether a buffer contains spam or not."
+ :version "22.1"
:group 'gnus)
(defcustom spam-stat-file "~/.spam-stat.el"
:group 'spam-stat)
(defcustom spam-stat-split-fancy-spam-group "mail.spam"
- "Name of the group where spam should be stored, if
-`spam-stat-split-fancy' is used in fancy splitting rules. Has no
-effect when spam-stat is invoked through spam.el."
+ "Name of the group where spam should be stored.
+If `spam-stat-split-fancy' is used in fancy splitting rules. Has
+no effect when spam-stat is invoked through spam.el."
:type 'string
:group 'spam-stat)
(defcustom spam-stat-split-fancy-spam-threshhold 0.9
- "Spam score threshhold in spam-stat-split-fancy."
+ "Spam score threshold in spam-stat-split-fancy."
:type 'number
:group 'spam-stat)
this filter, re-training spam-stat with several thousand messages
will start to take a very long time.")
+(defvar spam-stat-last-saved-at nil
+ "Time stamp of last change of spam-stat-file on this run")
+
(defvar spam-stat-syntax-table
(let ((table (copy-syntax-table text-mode-syntax-table)))
(modify-syntax-entry ?- "w" table)
"Name of the `spam-stat-buffer'.")
(defvar spam-stat-coding-system
- (if (coding-system-p 'emacs-mule) 'emacs-mule 'raw-text)
+ (if (mm-coding-system-p 'emacs-mule) 'emacs-mule 'raw-text)
"Coding system used for `spam-stat-file'.")
;; Hooking into Gnus
;; Parsing
(defmacro with-spam-stat-max-buffer-size (&rest body)
- "Narrows the buffer down to the first 4k characters, then evaluates BODY."
+ "Narrow the buffer down to the first 4k characters, then evaluate BODY."
`(save-restriction
(when (> (- (point-max)
(point-min))
,@body))
(defun spam-stat-buffer-words ()
- "Return a hash table of words and number of occurences in the buffer."
+ "Return a hash table of words and number of occurrences in the buffer."
(run-hooks 'spam-stat-washing-hook)
(with-spam-stat-max-buffer-size
(with-syntax-table spam-stat-syntax-table
(lambda (word count)
(let ((entry (gethash word spam-stat)))
(if (not entry)
- (error "This buffer has unknown words in it.")
+ (gnus-message 8 "This buffer has unknown words in it")
(spam-stat-set-good entry (- (spam-stat-good entry) count))
(spam-stat-set-bad entry (+ (spam-stat-bad entry) count))
(spam-stat-set-score entry (spam-stat-compute-score entry))
(lambda (word count)
(let ((entry (gethash word spam-stat)))
(if (not entry)
- (error "This buffer has unknown words in it.")
+ (gnus-message 8 "This buffer has unknown words in it")
(spam-stat-set-good entry (+ (spam-stat-good entry) count))
(spam-stat-set-bad entry (- (spam-stat-bad entry) count))
(spam-stat-set-score entry (spam-stat-compute-score entry))
(spam-stat-bad entry))))
spam-stat)
(insert ")))"))))
- (setq spam-stat-dirty nil)))
+ (message "Saved %s." spam-stat-file)
+ (setq spam-stat-dirty nil
+ spam-stat-last-saved-at (nth 5 (file-attributes spam-stat-file)))))
(defun spam-stat-load ()
"Read the `spam-stat' hash table from disk."
;; TODO: maybe we should warn the user if spam-stat-dirty is t?
(let ((coding-system-for-read spam-stat-coding-system))
- (load-file spam-stat-file))
- (setq spam-stat-dirty nil))
+ (cond (spam-stat-dirty (message "Spam stat not loaded: spam-stat-dirty t"))
+ ((or (not (boundp 'spam-stat-last-saved-at))
+ (null spam-stat-last-saved-at)
+ (not (equal spam-stat-last-saved-at
+ (nth 5 (file-attributes spam-stat-file)))))
+ (progn
+ (load-file spam-stat-file)
+ (setq spam-stat-dirty nil
+ spam-stat-last-saved-at
+ (nth 5 (file-attributes spam-stat-file)))))
+ (t (message "Spam stat file not loaded: no change in disk..")))))
(defun spam-stat-to-hash-table (entries)
"Turn list ENTRIES into a hash table and store as `spam-stat'.
(defun spam-stat-score-buffer ()
"Return a score describing the spam-probability for this buffer."
(setq spam-stat-score-data (spam-stat-buffer-words-with-scores))
- (let* ((probs (mapcar (lambda (e) (cadr e)) spam-stat-score-data))
+ (let* ((probs (mapcar 'cadr spam-stat-score-data))
(prod (apply #'* probs)))
(/ prod (+ prod (apply #'* (mapcar #'(lambda (x) (- 1 x))
probs))))))
(setq spam-stat-dirty t))
(defun spam-stat-install-hooks-function ()
- "Install the spam-stat function hooks"
+ "Install the spam-stat function hooks."
(interactive)
(add-hook 'nnmail-prepare-incoming-message-hook
'spam-stat-store-current-buffer)
(spam-stat-install-hooks-function))
(defun spam-stat-unload-hook ()
- "Uninstall the spam-stat function hooks"
+ "Uninstall the spam-stat function hooks."
(interactive)
(remove-hook 'nnmail-prepare-incoming-message-hook
'spam-stat-store-current-buffer)
(remove-hook 'gnus-select-article-hook
'spam-stat-store-gnus-article-buffer))
+(add-hook 'spam-stat-unload-hook 'spam-stat-unload-hook)
+
(provide 'spam-stat)
;;; spam-stat.el ends here