X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Fspam.el;h=090d8802a7c281f76310976e82bb1991f1b6093d;hb=e801641e73d4d42680e96fea8dc7e77c3aa5ed4e;hp=008f8e90274a70b350ebcad934e22a55f778c324;hpb=04ba5250e9e47ebe40860a0902d4ef6405ca143f;p=elisp%2Fgnus.git- diff --git a/lisp/spam.el b/lisp/spam.el index 008f8e9..090d880 100644 --- a/lisp/spam.el +++ b/lisp/spam.el @@ -132,9 +132,9 @@ Competition." :group 'spam) (defcustom spam-disable-spam-split-during-ham-respool nil - "Whether `spam-split' should be ignored while resplitting ham in a process -destination. This is useful to prevent ham from ending up in the same spam -group after the resplit. Don't set this to t if you have spam-split as the + "Whether `spam-split' should be ignored while resplitting ham. +This is useful to prevent ham from ending up in the same spam +group after the resplit. Don't set this to t if you have `spam-split' as the last rule in your split configuration." :type 'boolean :group 'spam) @@ -407,8 +407,8 @@ your main source of newsgroup names." :group 'spam) (defcustom spam-spamoracle-database nil - "Location of spamoracle database file. When nil, use the default -spamoracle database." + "Location of spamoracle database file. +When nil, use the default spamoracle database." :type '(choice (directory :tag "Location of spamoracle database file.") (const :tag "Use the default")) :group 'spam-spamoracle) @@ -428,6 +428,14 @@ spamoracle database." "Msx" gnus-summary-mark-as-spam "\M-d" gnus-summary-mark-as-spam) +(defvar spam-cache-lookups t + "Whether spam.el will try to cache lookups using `spam-caches'.") + +(defvar spam-caches (make-hash-table + :size 10 + :test 'equal) + "Cache of spam detection entries.") + (defvar spam-old-ham-articles nil "List of old ham articles, generated when a group is entered.") @@ -438,15 +446,21 @@ spamoracle database." "If non-nil, `spam-split' is disabled, and always returns nil.") (defvar spam-split-last-successful-check nil - "`spam-split' will set this to nil or a spam-use-XYZ check if it - finds ham or spam.") + "Internal variable. +`spam-split' will set this to nil or a spam-use-XYZ check if it +finds ham or spam.") ;; convenience functions +(defun spam-clear-cache (symbol) + "Clear the spam-caches entry for a check." + (remhash symbol spam-caches)) + (defun spam-xor (a b) - "Logical exclusive `or'." + "Logical A xor B." (and (or a b) (not (and a b)))) (defun spam-group-ham-mark-p (group mark &optional spam) + "Checks if MARK is considered a ham mark in GROUP." (when (stringp group) (let* ((marks (spam-group-ham-marks group spam)) (marks (if (symbolp mark) @@ -455,9 +469,11 @@ spamoracle database." (memq mark marks)))) (defun spam-group-spam-mark-p (group mark) + "Checks if MARK is considered a spam mark in GROUP." (spam-group-ham-mark-p group mark t)) (defun spam-group-ham-marks (group &optional spam) + "In GROUP, get all the ham marks." (when (stringp group) (let* ((marks (if spam (gnus-parameter-spam-marks group) @@ -467,9 +483,11 @@ spamoracle database." marks))) (defun spam-group-spam-marks (group) + "In GROUP, get all the spam marks." (spam-group-ham-marks group t)) (defun spam-group-spam-contents-p (group) + "Is GROUP a spam group?" (if (stringp group) (or (member group spam-junk-mailgroups) (memq 'gnus-group-spam-classification-spam @@ -477,6 +495,7 @@ spamoracle database." nil)) (defun spam-group-ham-contents-p (group) + "Is GROUP a ham group?" (if (stringp group) (memq 'gnus-group-spam-classification-ham (gnus-parameter-spam-contents group)) @@ -496,9 +515,9 @@ spamoracle database." (gnus-group-ham-exit-processor-BBDB ham spam-use-BBDB) (gnus-group-ham-exit-processor-copy ham spam-use-ham-copy) (gnus-group-ham-exit-processor-spamoracle ham spam-use-spamoracle)) - "The spam-list-of-processors list contains pairs associating a -ham/spam exit processor variable with a classification and a -spam-use-* variable.") + "The `spam-list-of-processors' list. +This list contains pairs associating a ham/spam exit processor +variable with a classification and a spam-use-* variable.") (defun spam-group-processor-p (group processor) (if (and (stringp group) @@ -561,6 +580,14 @@ spam-use-* variable.") (defun spam-group-ham-processor-spamoracle-p (group) (spam-group-processor-p group 'gnus-group-ham-exit-processor-spamoracle)) +(defun spam-report-articles-gmane (n) + "Report the current message as spam. +Respects the process/prefix convention." + (interactive "P") + (dolist (article (gnus-summary-work-articles n)) + (gnus-summary-remove-process-mark article) + (spam-report-gmane article))) + ;;; Summary entry and exit processing. (defun spam-summary-prepare () @@ -760,11 +787,6 @@ spam-use-* variable.") (apply 'spam-ham-move-routine (car groups)) (spam-ham-copy-or-move-routine nil groups))) -(eval-and-compile - (defalias 'spam-point-at-eol (if (fboundp 'point-at-eol) - 'point-at-eol - 'line-end-position))) - (defun spam-get-article-as-string (article) (let ((article-buffer (spam-get-article-as-buffer article)) article-string) @@ -795,31 +817,47 @@ spam-use-* variable.") ;; article-filename ;; nil))) +(defun spam-fetch-field-fast (article field) + "Fetch a field quickly, using the internal gnus-data-list function" + (when (numberp article) + (let* ((header (assoc article (gnus-data-list nil))) + (data-header (if header (gnus-data-header header) nil))) + (cond + ((equal field 'from) + (mail-header-from data-header)) + ((equal field 'message-id) + (mail-header-message-id data-header)) + ((equal field 'subject) + (mail-header-subject data-header)) + ((equal field 'references) + (mail-header-references data-header)) + ((equal field 'date) + (mail-header-date data-header)) + ((equal field 'xref) + (mail-header-xref data-header)) + ((equal field 'extra) + (mail-header-extra data-header)) + (t + nil))))) + (defun spam-fetch-field-from-fast (article) - "Fetch the `from' field quickly, using the internal gnus-data-list function" - (if (and (numberp article) - (assoc article (gnus-data-list nil))) - (mail-header-from - (gnus-data-header (assoc article (gnus-data-list nil)))) - nil)) + (spam-fetch-field-fast article 'from)) (defun spam-fetch-field-subject-fast (article) - "Fetch the `subject' field quickly, using the internal - gnus-data-list function" - (if (and (numberp article) - (assoc article (gnus-data-list nil))) - (mail-header-subject - (gnus-data-header (assoc article (gnus-data-list nil)))) - nil)) + (spam-fetch-field-fast article 'subject)) (defun spam-fetch-field-message-id-fast (article) - "Fetch the `Message-ID' field quickly, using the internal - gnus-data-list function" - (if (and (numberp article) - (assoc article (gnus-data-list nil))) - (mail-header-message-id - (gnus-data-header (assoc article (gnus-data-list nil)))) - nil)) + (spam-fetch-field-fast article 'message-id)) + +(defun spam-insert-fake-headers (article) + (insert (format "From: %s\n" (spam-fetch-field-fast article 'from))) + (insert (format "Subject: %s\n" (spam-fetch-field-fast article 'subject))) + (insert (format "Message-ID: %s\n" (spam-fetch-field-fast article 'message-id))) + (insert (format "Date: %s\n" (spam-fetch-field-fast article 'date))) + (insert (format "References: %s\n" (spam-fetch-field-fast article 'references))) + (insert (format "Xref: %s\n" (spam-fetch-field-fast article 'xref))) + (when (spam-fetch-field-fast article 'extra) + (insert (format "%s\n" (spam-fetch-field-fast article 'extra))))) ;;;; Spam determination. @@ -856,11 +894,13 @@ definitely a spam.") spam-use-regex-body spam-use-stat spam-use-bogofilter + spam-use-blackholes spam-use-spamoracle) "The spam-list-of-statistical-checks list contains all the mail -splitters that need to have the full message body available.") +splitters that need to have the full message body available. +Note that you should fetch extra headers if you don't like this, +e.g. fetch the 'Received' header for spam-use-blackholes.") -;;;TODO: modify to invoke self with each check if invoked without specifics (defun spam-split (&rest specific-checks) "Split this message into the `spam' group if it is spam. This function can be used as an entry in the variable `nnmail-split-fancy', @@ -882,7 +922,9 @@ See the Info node `(gnus)Fancy Mail Splitting' for more details." (save-excursion (save-restriction (dolist (check spam-list-of-statistical-checks) - (when (and (symbolp check) (symbol-value check)) + (when (and (symbolp check) + (or (symbol-value check) + (memq check specific-checks))) (widen) (gnus-message 8 "spam-split: widening the buffer (%s requires it)" (symbol-name check)) @@ -892,9 +934,11 @@ See the Info node `(gnus)Fancy Mail Splitting' for more details." decision) (while (and list-of-checks (not decision)) (let ((pair (pop list-of-checks))) - (when (and (symbol-value (car pair)) - (or (null specific-checks) - (memq (car pair) specific-checks))) + (when (or + ;; either, given specific checks, this is one of them + (and specific-checks (memq (car pair) specific-checks)) + ;; or, given no specific checks, spam-use-CHECK is set + (and (null specific-checks) (symbol-value (car pair)))) (gnus-message 5 "spam-split: calling the %s function" (symbol-name (cdr pair))) (setq decision (funcall (cdr pair))) @@ -903,8 +947,7 @@ See the Info node `(gnus)Fancy Mail Splitting' for more details." (setq spam-split-last-successful-check (car pair))) (when (eq decision 'spam) - (if spam-split-symbolic-return - (setq decision spam-split-group) + (unless spam-split-symbolic-return (gnus-error 5 (format "spam-split got %s but %s is nil" @@ -921,44 +964,71 @@ See the Info node `(gnus)Fancy Mail Splitting' for more details." (let* ((group gnus-newsgroup-name) (autodetect (gnus-parameter-spam-autodetect group)) (methods (gnus-parameter-spam-autodetect-methods group)) - (first-method (nth 0 methods))) - (when (and autodetect - (not (equal first-method 'none))) + (first-method (nth 0 methods)) + (articles (if spam-autodetect-recheck-messages + gnus-newsgroup-articles + gnus-newsgroup-unseen)) + article-cannot-be-faked) + + (dolist (check spam-list-of-statistical-checks) + (when (and (symbolp check) + (memq check methods)) + (setq article-cannot-be-faked t) + (return))) + + (when (memq 'default methods) + (setq article-cannot-be-faked t)) + + (when (and autodetect + (not (equal first-method 'none))) (mapcar (lambda (article) (let ((id (spam-fetch-field-message-id-fast article)) (subject (spam-fetch-field-subject-fast article)) - (sender (spam-fetch-field-from-fast article))) - (unless (and spam-log-to-registry - (spam-log-registered-p id 'incoming)) - (let* ((spam-split-symbolic-return t) - (spam-split-symbolic-return-positive t) - (split-return - (with-temp-buffer - (gnus-request-article-this-buffer - article - group) - (if (or (null first-method) - (equal first-method 'default)) - (spam-split) - (apply 'spam-split methods))))) - (if (equal split-return 'spam) - (gnus-summary-mark-article article gnus-spam-mark)) - - (when (and split-return spam-log-to-registry) - (when (zerop (gnus-registry-group-count id)) - (gnus-registry-add-group - id group subject sender)) - + (sender (spam-fetch-field-from-fast article)) + registry-lookup) + + (unless id + (gnus-error 5 "Article %d has no message ID!" article)) + + (when (and id spam-log-to-registry) + (setq registry-lookup (spam-log-registration-type id 'incoming)) + (when registry-lookup + (gnus-message + 9 + "spam-find-spam: message %s was already registered incoming" + id))) + + (let* ((spam-split-symbolic-return t) + (spam-split-symbolic-return-positive t) + (split-return + (or registry-lookup + (with-temp-buffer + (if article-cannot-be-faked + (gnus-request-article-this-buffer + article + group) + (spam-insert-fake-headers article)) + (if (or (null first-method) + (equal first-method 'default)) + (spam-split) + (apply 'spam-split methods)))))) + (if (equal split-return 'spam) + (gnus-summary-mark-article article gnus-spam-mark)) + + (when (and id split-return spam-log-to-registry) + (when (zerop (gnus-registry-group-count id)) + (gnus-registry-add-group + id group subject sender)) + + (unless registry-lookup (spam-log-processing-to-registry id 'incoming split-return spam-split-last-successful-check group)))))) - (if spam-autodetect-recheck-messages - gnus-newsgroup-articles - gnus-newsgroup-unseen))))) + articles)))) (defvar spam-registration-functions ;; first the ham register, second the spam register function @@ -1105,8 +1175,8 @@ functions") type cell-list)) - (gnus-message 5 (format "%s called with bad ID, type, classification, check, or group" - "spam-log-processing-to-registry"))))) + (gnus-error 5 (format "%s called with bad ID, type, classification, check, or group" + "spam-log-processing-to-registry"))))) ;;; check if a ham- or spam-processor registration has been done (defun spam-log-registered-p (id type) @@ -1115,10 +1185,26 @@ functions") (spam-process-type-valid-p type)) (cdr-safe (gnus-registry-fetch-extra id type)) (progn - (gnus-message 5 (format "%s called with bad ID, type, classification, or check" - "spam-log-registered-p")) + (gnus-error 5 (format "%s called with bad ID, type, classification, or check" + "spam-log-registered-p")) nil)))) +;;; check what a ham- or spam-processor registration says +;;; returns nil if conflicting registrations are found +(defun spam-log-registration-type (id type) + (let ((count 0) + decision) + (dolist (reg (spam-log-registered-p id type)) + (let ((classification (nth 0 reg))) + (when (spam-classification-valid-p classification) + (when (and decision + (not (eq classification decision))) + (setq count (+ 1 count))) + (setq decision classification)))) + (if (< 0 count) + nil + decision))) + ;;; check if a ham- or spam-processor registration needs to be undone (defun spam-log-unregistration-needed-p (id type classification check) (when spam-log-to-registry @@ -1135,8 +1221,8 @@ functions") (setq found t)))) found) (progn - (gnus-message 5 (format "%s called with bad ID, type, classification, or check" - "spam-log-unregistration-needed-p")) + (gnus-error 5 (format "%s called with bad ID, type, classification, or check" + "spam-log-unregistration-needed-p")) nil)))) @@ -1159,8 +1245,8 @@ functions") type new-cell-list)) (progn - (gnus-message 5 (format "%s called with bad ID, type, check, or group" - "spam-log-undo-registration")) + (gnus-error 5 (format "%s called with bad ID, type, check, or group" + "spam-log-undo-registration")) nil)))) ;;; set up IMAP widening if it's necessary @@ -1212,7 +1298,7 @@ functions") (defun spam-check-blackholes () "Check the Received headers for blackholed relays." - (let ((headers (nnmail-fetch-field "received")) + (let ((headers (message-fetch-field "received")) (spam-split-group (if spam-split-symbolic-return 'spam spam-split-group)) @@ -1277,6 +1363,12 @@ functions") (require 'bbdb) (require 'bbdb-com) + ;; when the BBDB changes, we want to clear out our cache + (defun spam-clear-cache-BBDB (&rest immaterial) + (spam-clear-cache 'spam-use-BBDB)) + + (add-hook 'bbdb-change-hook 'spam-clear-cache-BBDB) + (defun spam-enter-ham-BBDB (addresses &optional remove) "Enter an address into the BBDB; implies ham (non-spam) sender" (dolist (from addresses) @@ -1312,13 +1404,30 @@ functions") (defun spam-check-BBDB () "Mail from people in the BBDB is classified as ham or non-spam" - (let ((who (nnmail-fetch-field "from")) + (let ((who (message-fetch-field "from")) (spam-split-group (if spam-split-symbolic-return 'spam - spam-split-group))) + spam-split-group)) + bbdb-cache bbdb-hashtable) + (when spam-cache-lookups + (setq bbdb-cache (gethash 'spam-use-BBDB spam-caches)) + (unless bbdb-cache + (setq bbdb-cache + ;; this is the expanded (bbdb-hashtable) macro + ;; without the debugging support + (with-current-buffer (bbdb-buffer) + (save-excursion + (save-window-excursion + (bbdb-records nil t) + bbdb-hashtable)))) + (puthash 'spam-use-BBDB bbdb-cache spam-caches))) (when who (setq who (nth 1 (gnus-extract-address-components who))) - (if (bbdb-search-simple nil who) + (if + (if spam-cache-lookups + (symbol-value + (intern-soft who bbdb-cache)) + (bbdb-search-simple nil who)) t (if spam-use-BBDB-exclusive spam-split-group @@ -1326,6 +1435,8 @@ functions") (file-error (progn (defalias 'bbdb-search-simple 'ignore) + (defalias 'bbdb-records 'ignore) + (defalias 'bbdb-buffer 'ignore) (defalias 'spam-check-BBDB 'ignore) (defalias 'spam-BBDB-register-routine 'ignore) (defalias 'spam-enter-ham-BBDB 'ignore) @@ -1365,7 +1476,7 @@ functions") ;; check the return now (we're back in the temp buffer) (goto-char (point-min)) (if (not (eobp)) - (setq category (buffer-substring (point) (spam-point-at-eol)))) + (setq category (buffer-substring (point) (point-at-eol)))) (when (not (zerop (length category))) ; we need a category here (if spam-ifile-all-categories (setq return category) @@ -1486,7 +1597,8 @@ Uses `gnus-newsgroup-name' if category is nil (for ham registration)." With a non-nil REMOVE, remove them." (interactive "sAddress: ") (spam-enter-list address spam-whitelist remove) - (setq spam-whitelist-cache nil)) + (setq spam-whitelist-cache nil) + (spam-clear-cache 'spam-use-whitelist)) ;;; address can be a list, too (defun spam-enter-blacklist (address &optional remove) @@ -1494,7 +1606,8 @@ With a non-nil REMOVE, remove them." With a non-nil REMOVE, remove them." (interactive "sAddress: ") (spam-enter-list address spam-blacklist remove) - (setq spam-blacklist-cache nil)) + (setq spam-blacklist-cache nil) + (spam-clear-cache 'spam-use-whitelist)) (defun spam-enter-list (addresses file &optional remove) "Enter ADDRESSES into the given FILE. @@ -1523,6 +1636,32 @@ REMOVE not nil, remove the ADDRESSES." (insert a "\n"))))) (save-buffer)))) +(defun spam-filelist-build-cache (type) + (let ((cache (if (eq type 'spam-use-blacklist) + spam-blacklist-cache + spam-whitelist-cache)) + parsed-cache) + (unless (gethash type spam-caches) + (while cache + (let ((address (pop cache))) + (unless (zerop (length address)) ; 0 for a nil address too + (setq address (regexp-quote address)) + ;; fix regexp-quote's treatment of user-intended regexes + (while (string-match "\\\\\\*" address) + (setq address (replace-match ".*" t t address)))) + (push address parsed-cache))) + (puthash type parsed-cache spam-caches)))) + +(defun spam-filelist-check-cache (type from) + (when (stringp from) + (spam-filelist-build-cache type) + (let (found) + (dolist (address (gethash type spam-caches)) + (when (and address (string-match address from)) + (setq found t) + (return))) + found))) + ;;; returns t if the sender is in the whitelist, nil or ;;; spam-split-group otherwise (defun spam-check-whitelist () @@ -1532,7 +1671,7 @@ REMOVE not nil, remove the ADDRESSES." spam-split-group))) (unless spam-whitelist-cache (setq spam-whitelist-cache (spam-parse-list spam-whitelist))) - (if (spam-from-listed-p spam-whitelist-cache) + (if (spam-from-listed-p 'spam-use-whitelist) t (if spam-use-whitelist-exclusive spam-split-group @@ -1545,7 +1684,7 @@ REMOVE not nil, remove the ADDRESSES." spam-split-group))) (unless spam-blacklist-cache (setq spam-blacklist-cache (spam-parse-list spam-blacklist))) - (and (spam-from-listed-p spam-blacklist-cache) spam-split-group))) + (and (spam-from-listed-p 'spam-use-blacklist) spam-split-group))) (defun spam-parse-list (file) (when (file-readable-p file) @@ -1553,7 +1692,7 @@ REMOVE not nil, remove the ADDRESSES." (with-temp-buffer (insert-file-contents file) (while (not (eobp)) - (setq address (buffer-substring (point) (spam-point-at-eol))) + (setq address (buffer-substring (point) (point-at-eol))) (forward-line 1) ;; insert the e-mail address if detected, otherwise the raw data (unless (zerop (length address)) @@ -1561,20 +1700,10 @@ REMOVE not nil, remove the ADDRESSES." (push (or pure-address address) contents))))) (nreverse contents)))) -(defun spam-from-listed-p (cache) - (let ((from (nnmail-fetch-field "from")) +(defun spam-from-listed-p (type) + (let ((from (message-fetch-field "from")) found) - (while cache - (let ((address (pop cache))) - (unless (zerop (length address)) ; 0 for a nil address too - (setq address (regexp-quote address)) - ;; fix regexp-quote's treatment of user-intended regexes - (while (string-match "\\\\\\*" address) - (setq address (replace-match ".*" t t address)))) - (when (and address (string-match address from)) - (setq found t - cache nil)))) - found)) + (spam-filelist-check-cache type from))) (defun spam-filelist-register-routine (articles blacklist &optional unregister) (let ((de-symbol (if blacklist 'spam-use-whitelist 'spam-use-blacklist)) @@ -1636,7 +1765,7 @@ REMOVE not nil, remove the ADDRESSES." ;;;; Bogofilter (defun spam-check-bogofilter-headers (&optional score) - (let ((header (nnmail-fetch-field spam-bogofilter-header)) + (let ((header (message-fetch-field spam-bogofilter-header)) (spam-split-group (if spam-split-symbolic-return 'spam spam-split-group))) @@ -1816,8 +1945,4 @@ REMOVE not nil, remove the ADDRESSES." (provide 'spam) -;;; spam.el ends here. - -(provide 'spam) - ;;; spam.el ends here