From e1765f77e3d8b01d61dd7a8838ac558c5c090563 Mon Sep 17 00:00:00 2001 From: yamaoka Date: Thu, 10 Oct 2002 22:23:37 +0000 Subject: [PATCH] Synch with Oort Gnus. --- lisp/ChangeLog | 16 ++ lisp/mml2015.el | 89 ++++----- lisp/spam-stat.el | 523 +++++++++++++++++++++++++++++++++++++++++++++++++++++ lisp/spam.el | 6 + 4 files changed, 590 insertions(+), 44 deletions(-) create mode 100644 lisp/spam-stat.el diff --git a/lisp/ChangeLog b/lisp/ChangeLog index d4f7498..275d684 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,5 +1,21 @@ 2002-10-10 Simon Josefsson + * mml2015.el (mml2015-pgg-decrypt): Set gnus details even when + decrypt failed. + (mml2015-trust-boundaries-alist): Removed. + (mml2015-gpg-extract-signature-details): Don't use it. + (mml2015-unabbrev-trust-alist): New. + (mml2015-gpg-extract-signature-details): Use it. + +2002-10-10 Ted Zlatanov + * spam.el: compilation fixes, spam-check-bbdb function is nil if no + BBDB installed + + * spam-stat.el: added code from Alex Schroeder to do + statistical analysis of spam in Lisp only + +2002-10-10 Simon Josefsson + * nnimap.el (nnimap-open-server): Re-open server if it isn't in auth, selected or examine state. diff --git a/lisp/mml2015.el b/lisp/mml2015.el index f51aaba..4272f3f 100644 --- a/lisp/mml2015.el +++ b/lisp/mml2015.el @@ -76,17 +76,15 @@ (defvar mml2015-result-buffer nil) -(defvar mml2015-trust-boundaries-alist - '((trust-undefined . nil) - (trust-none . nil) - (trust-marginal . t) - (trust-full . t) - (trust-ultimate . t)) - "Trust boundaries for a signer's GnuPG key. -This alist contains pairs of the form (trust-symbol . boolean), with -symbols that are contained in `gpg-unabbrev-trust-alist'. The boolean -specifies whether the given trust value is good enough to be trusted -by you.") +(defcustom mml2015-unabbrev-trust-alist + '(("TRUST_UNDEFINED" . nil) + ("TRUST_NEVER" . nil) + ("TRUST_MARGINAL" . t) + ("TRUST_FULLY" . t) + ("TRUST_ULTIMATE" . t)) + "Map GnuPG trust output values to a boolean saying if you trust the key." + :type '(repeat (cons (regexp :tag "GnuPG output regexp") + (boolean :tag "Trust key")))) ;;; mailcrypt wrapper @@ -428,36 +426,36 @@ by you.") (defun mml2015-gpg-extract-signature-details () (goto-char (point-min)) - (if (boundp 'gpg-unabbrev-trust-alist) - (let* ((expired (re-search-forward - "^\\[GNUPG:\\] SIGEXPIRED$" - nil t)) - (signer (and (re-search-forward - "^\\[GNUPG:\\] GOODSIG \\([0-9A-Za-z]*\\) \\(.*\\)$" - nil t) - (cons (match-string 1) (match-string 2)))) - (fprint (and (re-search-forward - "^\\[GNUPG:\\] VALIDSIG \\([0-9a-zA-Z]*\\) " + (let* ((expired (re-search-forward + "^\\[GNUPG:\\] SIGEXPIRED$" + nil t)) + (signer (and (re-search-forward + "^\\[GNUPG:\\] GOODSIG \\([0-9A-Za-z]*\\) \\(.*\\)$" nil t) - (match-string 1))) - (trust (and (re-search-forward "^\\[GNUPG:\\] \\(TRUST_.*\\)$" nil t) - (match-string 1))) - (trust-good-enough-p - (cdr (assoc (cdr (assoc trust gpg-unabbrev-trust-alist)) - mml2015-trust-boundaries-alist)))) - (cond ((and signer fprint) - (concat (cdr signer) - (unless trust-good-enough-p - (concat "\nUntrusted, Fingerprint: " - (mml2015-gpg-pretty-print-fpr fprint))) - (when expired - (format "\nWARNING: Signature from expired key (%s)" - (car signer))))) - (t - "From unknown user"))) - (if (re-search-forward "^\\(gpg: \\)?Good signature from \"\\(.*\\)\"$" nil t) - (match-string 2) - "From unknown user"))) + (cons (match-string 1) (match-string 2)))) + (fprint (and (re-search-forward + "^\\[GNUPG:\\] VALIDSIG \\([0-9a-zA-Z]*\\) " + nil t) + (match-string 1))) + (trust (and (re-search-forward + "^\\[GNUPG:\\] \\(TRUST_.*\\)$" + nil t) + (match-string 1))) + (trust-good-enough-p + (cdr (assoc trust mml2015-unabbrev-trust-alist)))) + (cond ((and signer fprint) + (concat (cdr signer) + (unless trust-good-enough-p + (concat "\nUntrusted, Fingerprint: " + (mml2015-gpg-pretty-print-fpr fprint))) + (when expired + (format "\nWARNING: Signature from expired key (%s)" + (car signer))))) + ((re-search-forward + "^\\(gpg: \\)?Good signature from \"\\(.*\\)\"$" nil t) + (match-string 2)) + (t + "From unknown user")))) (defun mml2015-gpg-verify (handle ctl) (catch 'error @@ -589,7 +587,7 @@ by you.") ;; set up a function to call the correct gpg encrypt routine ;; with the right arguments. (FIXME: this should be done ;; differently.) - (flet ((gpg-encrypt-func + (flet ((gpg-encrypt-func (sign plaintext ciphertext result recipients &optional passphrase sign-with-key armor textmode) (if sign @@ -660,9 +658,12 @@ by you.") (if (condition-case err (prog1 (pgg-decrypt-region (point-min) (point-max)) - (setq decrypt-status + (setq decrypt-status (with-current-buffer mml2015-result-buffer - (buffer-string)))) + (buffer-string))) + (mm-set-handle-multipart-parameter + mm-security-handle 'gnus-details + decrypt-status)) (error (mm-set-handle-multipart-parameter mm-security-handle 'gnus-details (mml2015-format-error err)) @@ -735,7 +736,7 @@ by you.") (mm-insert-part signature)) (if (condition-case err (prog1 - (pgg-verify-region (point-min) (point-max) + (pgg-verify-region (point-min) (point-max) signature-file t) (mm-set-handle-multipart-parameter mm-security-handle 'gnus-details diff --git a/lisp/spam-stat.el b/lisp/spam-stat.el new file mode 100644 index 0000000..70d20bc --- /dev/null +++ b/lisp/spam-stat.el @@ -0,0 +1,523 @@ +;;; spam-stat.el --- detecting spam based on statistics + +;; Copyright (C) 2002 Alex Schroeder + +;; Author: Alex Schroeder +;; Maintainer: Alex Schroeder +;; Version: 0.3.3 +;; Keywords: spam filtering gnus +;; URL: http://www.emacswiki.org/cgi-bin/wiki.pl?SpamStat + +;; This file is NOT part of GNU Emacs. + +;; This is free software; you can redistribute it and/or modify it +;; under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; This is distributed in the hope that it will be useful, but WITHOUT +;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public +;; License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; This implements spam analysis according to Paul Graham in "A Plan +;; for Spam". The basis for all this is a statistical distribution of +;; words for your spam and non-spam mails. We need this information +;; in a hash-table so that the analysis can use the information when +;; looking at your mails. Therefore, before you begin, you need tons +;; of mails (Graham uses 4000 non-spam and 4000 spam mails for his +;; experiments). +;; +;; The main interface to using spam-stat, are the following functions: +;; +;; `spam-stat-buffer-is-spam' -- called in a buffer, that buffer is +;; considered to be a new spam mail; use this for new mail that has +;; not been processed before +;; +;; `spam-stat-buffer-is-no-spam' -- called in a buffer, that buffer +;; is considered to be a new non-spam mail; use this for new mail that +;; has not been processed before +;; +;; `spam-stat-buffer-change-to-spam' -- called in a buffer, that +;; buffer is no longer considered to be normal mail but spam; use this +;; to change the status of a mail that has already been processed as +;; non-spam +;; +;; `spam-stat-buffer-change-to-non-spam' -- called in a buffer, that +;; buffer is no longer considered to be spam but normal mail; use this +;; to change the status of a mail that has already been processed as +;; spam +;; +;; `spam-stat-save' -- save the hash table to the file; the filename +;; used is stored in the variable `spam-stat-file' +;; +;; `spam-stat-load' -- load the hash table from a file; the filename +;; used is stored in the variable `spam-stat-file' +;; +;; `spam-stat-score-word' -- return the spam score for a word +;; +;; `spam-stat-score-buffer' -- return the spam score for a buffer +;; +;; `spam-stat-split-fancy' -- for fancy mail splitting; add +;; the rule (: spam-stat-split-fancy) to `nnmail-split-fancy' +;; +;; This requires the following in your ~/.gnus file: +;; +;; (require 'spam-stat) +;; (spam-stat-load) + +;;; Testing: + +;; Typical test will involve calls to the following functions: +;; +;; Reset: (setq spam-stat (make-hash-table :test 'equal)) +;; Learn spam: (spam-stat-process-spam-directory "~/Mail/mail/spam") +;; Learn non-spam: (spam-stat-process-non-spam-directory "~/Mail/mail/misc") +;; Save table: (spam-stat-save) +;; File size: (nth 7 (file-attributes spam-stat-file)) +;; Number of words: (hash-table-count spam-stat) +;; Test spam: (spam-stat-test-directory "~/Mail/mail/spam") +;; Test non-spam: (spam-stat-test-directory "~/Mail/mail/misc") +;; Reduce table size: (spam-stat-reduce-size) +;; Save table: (spam-stat-save) +;; File size: (nth 7 (file-attributes spam-stat-file)) +;; Number of words: (hash-table-count spam-stat) +;; Test spam: (spam-stat-test-directory "~/Mail/mail/spam") +;; Test non-spam: (spam-stat-test-directory "~/Mail/mail/misc") + +;;; Dictionary Creation: + +;; Typically, you will filter away mailing lists etc. using specific +;; rules in `nnmail-split-fancy'. Somewhere among these rules, you +;; will filter spam. Here is how you would create your dictionary: + +;; Reset: (setq spam-stat (make-hash-table :test 'equal)) +;; Learn spam: (spam-stat-process-spam-directory "~/Mail/mail/spam") +;; Learn non-spam: (spam-stat-process-non-spam-directory "~/Mail/mail/misc") +;; Repeat for any other non-spam group you need... +;; Reduce table size: (spam-stat-reduce-size) +;; Save table: (spam-stat-save) + +;;; Todo: + +;; Speed it up. Integrate with Gnus such that it uses spam and expiry +;; marks to call the appropriate functions when leaving the summary +;; buffer and saves the hash table when leaving Gnus. More testing: +;; More mails, disabling SpamAssassin, double checking algorithm, find +;; improved algorithm. + +;;; Thanks: + +;; Ted Zlatanov +;; Jesper Harder +;; Dan Schmidt + + + +;;; Code: + +(defgroup spam-stat nil + "Statistical spam detection for Emacs. +Use the functions to build a dictionary of words and their statistical +distribution in spam and non-spam mails. Then use a function to determine +wether a buffer contains spam or not." + :group 'gnus) + +(defcustom spam-stat-file "~/.spam-stat.el" + "File used to save and load the dictionary. +See `spam-stat-to-hash-table' for the format of the file." + :type 'file + :group 'spam-stat) + +(defcustom spam-stat-unknown-word-score 0.2 + "The score to use for unknown words. +Also used for words that don't appear often enough." + :type 'number + :group 'spam-stat) + +(defcustom spam-stat-max-word-length 15 + "Only words shorter than this will be considered." + :type 'integer + :group 'spam-stat) + +(defcustom spam-stat-max-buffer-length 10240 + "Only the beginning of buffers will be analyzed. +This variable says how many characters this will be." + :type 'integer + :group 'spam-stat) + +(defcustom spam-stat-split-fancy-spam-group "mail.spam" + "Name of the group where spam should be stored, if +`spam-stat-split-fancy' is used in fancy splitting rules." + :type 'string + :group 'spam-stat) + +(defvar spam-stat-syntax-table + (let ((table (copy-syntax-table text-mode-syntax-table))) + (modify-syntax-entry ?- "w" table) + (modify-syntax-entry ?_ "w" table) + (modify-syntax-entry ?. "w" table) + (modify-syntax-entry ?! "w" table) + (modify-syntax-entry ?? "w" table) + (modify-syntax-entry ?+ "w" table) + table) + "Syntax table used when processing mails for statistical analysis. +The important part is which characters are word constituents.") + +(defvar spam-stat-buffer nil + "Buffer to use for scoring while splitting. +This is set by hooking into Gnus.") + +(defvar spam-stat-buffer-name " *spam stat buffer*" + "Name of the `spam-stat-buffer'.") + +;; Hooking into Gnus + +(defun spam-stat-store-current-buffer () + "Store a copy of the current buffer in `spam-stat-buffer'." + (save-excursion + (let ((str (buffer-string))) + (set-buffer (get-buffer-create spam-stat-buffer-name)) + (erase-buffer) + (insert str) + (setq spam-stat-buffer (current-buffer))))) + +(defun spam-stat-store-gnus-article-buffer () + "Store a copy of the current article in `spam-stat-buffer'. +This uses `gnus-article-buffer'." + (save-excursion + (set-buffer gnus-original-article-buffer) + (spam-stat-store-current-buffer))) + +(add-hook 'nnmail-prepare-incoming-message-hook + 'spam-stat-store-current-buffer) +(add-hook 'gnus-select-article-hook + 'spam-stat-store-gnus-article-buffer) + +;; Data -- not using defstruct in order to save space and time + +(defvar spam-stat (make-hash-table :test 'equal) + "Hash table used to store the statistics. +Use `spam-stat-load' to load the file. +Every word is used as a key in this table. The value is a vector. +Use `spam-stat-ngood', `spam-stat-nbad', `spam-stat-good', +`spam-stat-bad', and `spam-stat-score' to access this vector.") + +(defvar spam-stat-ngood 0 + "The number of good mails in the dictionary.") + +(defvar spam-stat-nbad 0 + "The number of bad mails in the dictionary.") + +(defsubst spam-stat-good (entry) + "Return the number of times this word belongs to good mails." + (aref entry 0)) + +(defsubst spam-stat-bad (entry) + "Return the number of times this word belongs to bad mails." + (aref entry 1)) + +(defsubst spam-stat-score (entry) + "Set the score of this word." + (if entry + (aref entry 2) + spam-stat-unknown-word-score)) + +(defsubst spam-stat-set-good (entry value) + "Set the number of times this word belongs to good mails." + (aset entry 0 value)) + +(defsubst spam-stat-set-bad (entry value) + "Set the number of times this word belongs to bad mails." + (aset entry 1 value)) + +(defsubst spam-stat-set-score (entry value) + "Set the score of this word." + (aset entry 2 value)) + +(defsubst spam-stat-make-entry (good bad) + "Return a vector with the given properties." + (let ((entry (vector good bad nil))) + (spam-stat-set-score entry (spam-stat-compute-score entry)) + entry)) + +;; Computing + +(defun spam-stat-compute-score (entry) + "Compute the score of this word. 1.0 means spam." + ;; promote all numbers to floats for the divisions + (let* ((g (* 2.0 (spam-stat-good entry))) + (b (float (spam-stat-bad entry)))) + (cond ((< (+ g b) 5) + .2) + ((= 0 spam-stat-ngood) + .99) + ((= 0 spam-stat-nbad) + .01) + (t + (max .01 + (min .99 (/ (/ b spam-stat-nbad) + (+ (/ g spam-stat-ngood) + (/ b spam-stat-nbad))))))))) + +;; Parsing + +(defmacro with-spam-stat-max-buffer-size (&rest body) + "Narrows the buffer down to the first 4k characters, then evaluates BODY." + `(save-restriction + (when (> (- (point-max) + (point-min)) + spam-stat-max-buffer-length) + (narrow-to-region (point-min) + (+ (point-min) spam-stat-max-buffer-length))) + ,@body)) + +(defun spam-stat-buffer-words () + "Return a hash table of words and number of occurences in the buffer." + (with-spam-stat-max-buffer-size + (with-syntax-table spam-stat-syntax-table + (goto-char (point-min)) + (let ((result (make-hash-table :test 'equal)) + word count) + (while (re-search-forward "\\w+" nil t) + (setq word (match-string-no-properties 0) + count (1+ (gethash word result 0))) + (when (< (length word) spam-stat-max-word-length) + (puthash word count result))) + result)))) + +(defun spam-stat-buffer-is-spam () + "Consider current buffer to be a new spam mail." + (setq spam-stat-nbad (1+ spam-stat-nbad)) + (maphash + (lambda (word count) + (let ((entry (gethash word spam-stat))) + (if entry + (spam-stat-set-bad entry (+ count (spam-stat-bad entry))) + (setq entry (spam-stat-make-entry 0 count))) + (spam-stat-set-score entry (spam-stat-compute-score entry)) + (puthash word entry spam-stat))) + (spam-stat-buffer-words))) + +(defun spam-stat-buffer-is-non-spam () + "Consider current buffer to be a new non-spam mail." + (setq spam-stat-ngood (1+ spam-stat-ngood)) + (maphash + (lambda (word count) + (let ((entry (gethash word spam-stat))) + (if entry + (spam-stat-set-good entry (+ count (spam-stat-good entry))) + (setq entry (spam-stat-make-entry count 0))) + (spam-stat-set-score entry (spam-stat-compute-score entry)) + (puthash word entry spam-stat))) + (spam-stat-buffer-words))) + +(defun spam-stat-buffer-change-to-spam () + "Consider current buffer no longer normal mail but spam." + (setq spam-stat-nbad (1+ spam-stat-nbad) + spam-stat-ngood (1- spam-stat-ngood)) + (maphash + (lambda (word count) + (let ((entry (gethash word spam-stat))) + (if (not entry) + (error "This buffer has unknown words in it.") + (spam-stat-set-good entry (- (spam-stat-good entry) count)) + (spam-stat-set-bad entry (+ (spam-stat-bad entry) count)) + (spam-stat-set-score entry (spam-stat-compute-score entry)) + (puthash word entry spam-stat)))) + (spam-stat-buffer-words))) + +(defun spam-stat-buffer-change-to-non-spam () + "Consider current buffer no longer spam but normal mail." + (setq spam-stat-nbad (1- spam-stat-nbad) + spam-stat-ngood (1+ spam-stat-ngood)) + (maphash + (lambda (word count) + (let ((entry (gethash word spam-stat))) + (if (not entry) + (error "This buffer has unknown words in it.") + (spam-stat-set-good entry (+ (spam-stat-good entry) count)) + (spam-stat-set-bad entry (- (spam-stat-bad entry) count)) + (spam-stat-set-score entry (spam-stat-compute-score entry)) + (puthash word entry spam-stat)))) + (spam-stat-buffer-words))) + +;; Saving and Loading + +(defun spam-stat-save () + "Save the `spam-stat' hash table as lisp file." + (with-temp-buffer + (let ((standard-output (current-buffer))) + (insert "(setq spam-stat (spam-stat-to-hash-table '(") + (maphash (lambda (word entry) + (prin1 (list word + (spam-stat-good entry) + (spam-stat-bad entry)))) + spam-stat) + (insert ")) spam-stat-ngood " + (number-to-string spam-stat-ngood) + " spam-stat-nbad " + (number-to-string spam-stat-nbad) + ")")) + (write-file spam-stat-file))) + +(defun spam-stat-load () + "Read the `spam-stat' hash table from disk." + (load-file spam-stat-file)) + +(defun spam-stat-to-hash-table (entries) + "Turn list ENTRIES into a hash table and store as `spam-stat'. +Every element in ENTRIES has the form \(WORD GOOD BAD) where WORD is +the word string, NGOOD is the number of good mails it has appeared in, +NBAD is the number of bad mails it has appeared in, GOOD is the number +of times it appeared in good mails, and BAD is the number of times it +has appeared in bad mails." + (let ((table (make-hash-table :test 'equal))) + (mapc (lambda (l) + (puthash (car l) + (spam-stat-make-entry (nth 1 l) (nth 2 l)) + table)) + entries) + table)) + +;; Scoring buffers + +(defvar spam-stat-score-data nil + "Raw data used in the last run of `spam-stat-score-buffer'.") + +(defsubst spam-stat-score-word (word) + "Return score for WORD. +The default score for unknown words is stored in +`spam-stat-unknown-word-score'." + (spam-stat-score (gethash word spam-stat))) + + +(defun spam-stat-buffer-words-with-scores () + "Process current buffer, return the 15 most conspicuous words. +These are the words whose spam-stat differs the most from 0.5. +The list returned contains elements of the form \(WORD SCORE DIFF), +where DIFF is the difference between SCORE and 0.5." + (with-spam-stat-max-buffer-size + (with-syntax-table spam-stat-syntax-table + (let (result word score) + (maphash (lambda (word ignore) + (setq score (spam-stat-score-word word) + result (cons (list word score (abs (- score 0.5))) + result))) + (spam-stat-buffer-words)) + (setq result (sort result (lambda (a b) (< (nth 2 b) (nth 2 a))))) + (setcdr (nthcdr 14 result) nil) + result)))) + +(defun spam-stat-score-buffer () + "Return a score describing the spam-probability for this buffer." + (setq spam-stat-score-data (spam-stat-buffer-words-with-scores)) + (let* ((probs (mapcar (lambda (e) (cadr e)) spam-stat-score-data)) + (prod (apply #'* probs))) + (/ prod (+ prod (apply #'* (mapcar #'(lambda (x) (- 1 x)) + probs)))))) + +(defun spam-stat-split-fancy () + "Return the name of the spam group if the current mail is spam. +Use this function on `nnmail-split-fancy'. If you are interested in +the raw data used for the last run of `spam-stat-score-buffer', +check the variable `spam-stat-score-data'." + (condition-case var + (progn + (set-buffer spam-stat-buffer) + (goto-char (point-min)) + (when (> (spam-stat-score-buffer) 0.9) + (when (boundp 'nnmail-split-trace) + (mapc (lambda (entry) + (push entry nnmail-split-trace)) + spam-stat-score-data)) + spam-stat-split-fancy-spam-group)) + (error (message "Error in spam-stat-split-fancy: %S" var) + nil))) + +;; Testing + +(defun spam-stat-process-directory (dir func) + "Process all the regular files in directory DIR using function FUNC." + (let* ((files (directory-files dir t "^[^.]")) + (max (/ (length files) 100.0)) + (count 0)) + (with-temp-buffer + (dolist (f files) + (when (and (file-readable-p f) + (file-regular-p f)) + (setq count (1+ count)) + (message "Reading %.2f%%" (/ count max)) + (insert-file-contents f) + (funcall func) + (erase-buffer)))))) + +(defun spam-stat-process-spam-directory (dir) + "Process all the regular files in directory DIR as spam." + (interactive "D") + (spam-stat-process-directory dir 'spam-stat-buffer-is-spam)) + +(defun spam-stat-process-non-spam-directory (dir) + "Process all the regular files in directory DIR as non-spam." + (interactive "D") + (spam-stat-process-directory dir 'spam-stat-buffer-is-non-spam)) + +(defun spam-stat-count () + "Return size of `spam-stat'." + (interactive) + (hash-table-count spam-stat)) + +(defun spam-stat-test-directory (dir) + "Test all the regular files in directory DIR for spam. +If the result is 1.0, then all files are considered spam. +If the result is 0.0, non of the files is considered spam. +You can use this to determine error rates." + (interactive "D") + (let* ((files (directory-files dir t "^[^.]")) + (total (length files)) + (score 0.0); float + (max (/ total 100.0)); float + (count 0)) + (with-temp-buffer + (dolist (f files) + (when (and (file-readable-p f) + (file-regular-p f)) + (setq count (1+ count)) + (message "Reading %.2f%%, score %.2f%%" + (/ count max) (/ score count)) + (insert-file-contents f) + (when (> (spam-stat-score-buffer) 0.9) + (setq score (1+ score))) + (erase-buffer)))) + (message "Final score: %d / %d = %f" score total (/ score total)))) + +;; Shrinking the dictionary + +(defun spam-stat-reduce-size (&optional count distance) + "Reduce the size of `spam-stat'. +This removes all words that occur less than COUNT from the dictionary. +COUNT defaults to 5. It also removes all words whose spam score +is less than DISTANCE from 0.5. DISTANCE defaults to 0.1, meaning that +all words with score between 0.4 and 0.6 are removed." + (setq count (or count 5) + distance (or distance 0.1)) + (maphash (lambda (key entry) + (when (or (< (+ (spam-stat-good entry) + (spam-stat-bad entry)) + count) + (< (abs (- (spam-stat-score entry) 0.5)) + distance)) + (remhash key spam-stat))) + spam-stat)) + +(provide 'spam-stat) + +;;; spam-stat.el ends here + diff --git a/lisp/spam.el b/lisp/spam.el index 5feacd2..8138b7f 100644 --- a/lisp/spam.el +++ b/lisp/spam.el @@ -45,6 +45,7 @@ (eval-when-compile (condition-case nil (require 'bbdb-com) + (file-error (defalias 'bbdb-search 'ignore)) (error))) ;; autoload executable-find @@ -279,6 +280,11 @@ The regular expression is matched against the address.") (setq who (regexp-quote (cadr (gnus-extract-address-components who)))) (if (bbdb-search (bbdb-records) nil nil who) nil spam-split-group)))) +;; let spam-check-bbdb be nil if the BBDB can't be loaded +(condition-case nil + (require 'bbdb) + (file-error (defalias 'spam-check-bbdb 'ignore))) + (defun spam-check-blacklist () ;; FIXME! Should it detect when file timestamps change? (unless spam-blacklist-cache -- 1.7.10.4