+2005-04-06 D Goel <deego@gnufans.org>
+
+ * spam-stat.el (spam-stat-score-buffer): Add a call to a
+ user-function allow user modifications of the scores.
+ (spam-stat-score-buffer-user): New function, to allow
+ user-computed modifications to the score.
+ (spam-stat-score-buffer-user-functions): list of additional
+ scoring functions
+ (spam-stat-error-holder): global temporary error holder
+ (spam-stat-split-fancy): use the new `spam-stat-error-holder'
+ variable
+
+2005-04-06 Teodor Zlatanov <tzz@lifelogs.com>
+
+ * gnus-registry.el (gnus-registry-clean-empty-function)
+ (gnus-registry-trim, gnus-registry-fetch-groups)
+ (gnus-registry-delete-group): now groups that match
+ `gnus-registry-ignored-groups' will be removed from the registry
+ entries, not just ignored for splitting. This helps clean up the
+ registry. Also, `gnus-registry-fetch-groups' is a convenient way
+ to get all the groups a message ID is in.
+
+ * spam-stat.el (spam-stat-split-fancy-spam-threshold)
+ (spam-stat-split-fancy): changed "threshhold" to "threshold"
+ (spam-stat-score-buffer-user-functions): added :number custom type
+
2005-04-06 Katsumi Yamaoka <yamaoka@jpl.org>
* mm-util.el (mm-coding-system-p): Don't return binary for the nil
(defun gnus-registry-clean-empty-function ()
"Remove all empty entries from the registry. Returns count thereof."
(let ((count 0))
+
(maphash
(lambda (key value)
- (unless (or
- (gnus-registry-fetch-group key)
- ;; TODO: look for specific extra data here!
- ;; in this example, we look for 'label
- (gnus-registry-fetch-extra key 'label))
- (incf count)
- (remhash key gnus-registry-hashtb)))
+ (when (stringp key)
+ (dolist (group (gnus-registry-fetch-groups key))
+ (when (gnus-parameter-registry-ignore group)
+ (gnus-message
+ 10
+ "gnus-registry: deleted ignored group %s from key %s"
+ group key)
+ (gnus-registry-delete-group key group)))
+
+ (unless (gnus-registry-group-count key)
+ (gnus-registry-delete-id key))
+
+ (unless (or
+ (gnus-registry-fetch-group key)
+ ;; TODO: look for specific extra data here!
+ ;; in this example, we look for 'label
+ (gnus-registry-fetch-extra key 'label)
+ (stringp key))
+ (incf count)
+ (gnus-registry-delete-id key))
+
+ (unless (stringp key)
+ (gnus-message
+ 10
+ "gnus-registry key %s was not a string, removing"
+ key)
+ (gnus-registry-delete-id key))))
+
gnus-registry-hashtb)
count))
(setq gnus-registry-dirty nil))
(defun gnus-registry-trim (alist)
- "Trim alist to size, using gnus-registry-max-entries."
+ "Trim alist to size, using gnus-registry-max-entries.
+Also, drop all gnus-registry-ignored-groups matches."
(if (null gnus-registry-max-entries)
alist ; just return the alist
;; else, when given max-entries, trim the alist
(lambda (key value)
(puthash key (gnus-registry-fetch-extra key 'mtime) timehash))
gnus-registry-hashtb)
-
+
;; we use the return value of this setq, which is the trimmed alist
(setq alist
(nthcdr
trim-length
(sort alist
(lambda (a b)
- (time-less-p
- (cdr (gethash (car a) timehash))
- (cdr (gethash (car b) timehash))))))))))
+ (time-less-p
+ (or (cdr (gethash (car a) timehash)) '(0 0 0))
+ (or (cdr (gethash (car b) timehash)) '(0 0 0))))))))))
(defun alist-to-hashtable (alist)
"Build a hashtable from the values in ALIST."
crumb
(gnus-group-short-name crumb))))))))
+(defun gnus-registry-fetch-groups (id)
+ "Get the groups of a message, based on the message ID."
+ (let ((trail (gethash id gnus-registry-hashtb))
+ groups)
+ (dolist (crumb trail)
+ (when (stringp crumb)
+ ;; push the group name into the list
+ (setq
+ groups
+ (cons
+ (if (or (not (stringp crumb)) gnus-registry-use-long-group-names)
+ crumb
+ (gnus-group-short-name crumb))
+ groups))))
+ ;; return the list of groups
+ groups))
+
(defun gnus-registry-group-count (id)
"Get the number of groups of a message, based on the message ID."
(let ((trail (gethash id gnus-registry-hashtb)))
(defun gnus-registry-delete-group (id group)
"Delete a group for a message, based on the message ID."
- (when group
- (when id
+ (when (and group id)
(let ((trail (gethash id gnus-registry-hashtb))
- (group (gnus-group-short-name group)))
+ (short-group (gnus-group-short-name group)))
(puthash id (if trail
- (delete group trail)
+ (delete short-group (delete group trail))
nil)
gnus-registry-hashtb))
;; now, clear the entry if there are no more groups
(gnus-registry-delete-id id)))
;; is this ID still in the registry?
(when (gethash id gnus-registry-hashtb)
- (gnus-registry-store-extra-entry id 'mtime (current-time))))))
+ (gnus-registry-store-extra-entry id 'mtime (current-time)))))
(defun gnus-registry-delete-id (id)
"Delete a message ID from the registry."
:type 'string
:group 'spam-stat)
-(defcustom spam-stat-split-fancy-spam-threshhold 0.9
+(defcustom spam-stat-split-fancy-spam-threshold 0.9
"Spam score threshold in spam-stat-split-fancy."
:type 'number
:group 'spam-stat)
:type 'hook
:group 'spam-stat)
+(defcustom spam-stat-score-buffer-user-functions nil
+ "List of additional scoring functions.
+Called one by one on the buffer.
+
+If all of these functions return non-nil answers, these numerical
+answers are added to the computed spam stat score on the buffer. If
+you defun such functions, make sure they don't return the buffer in a
+narrowed state or such: use, for example, `save-excursion'. Each of
+your functions is also passed the initial spam-stat score which might
+aid in your scoring.
+
+Also be careful when defining such functions. If they take a long
+time, they will slow down your mail splitting. Thus, if the buffer is
+large, don't forget to use smaller regions, by wrapping your work in,
+say, `with-spam-stat-max-buffer-size'."
+ :type '(repeat sexp)
+ :group 'spam-stat)
+
(defcustom spam-stat-process-directory-age 90
"Max. age of files to be processed in directory, in days.
When using `spam-stat-process-spam-directory' or
`spam-stat-process-non-spam-directory', only files that have
been touched in this many days will be considered. Without
this filter, re-training spam-stat with several thousand messages
-will start to take a very long time.")
+will start to take a very long time."
+ :type 'number
+ :group 'spam-stat)
(defvar spam-stat-last-saved-at nil
"Time stamp of last change of spam-stat-file on this run")
(defvar spam-stat-nbad 0
"The number of bad mails in the dictionary.")
+(defvar spam-stat-error-holder nil
+ "A holder for condition-case errors while scoring buffers.")
+
(defsubst spam-stat-good (entry)
"Return the number of times this word belongs to good mails."
(aref entry 0))
result))
(defun spam-stat-score-buffer ()
- "Return a score describing the spam-probability for this buffer."
+ "Return a score describing the spam-probability for this buffer.
+Add user supplied modifications if supplied."
+ (interactive) ; helps in debugging.
(setq spam-stat-score-data (spam-stat-buffer-words-with-scores))
(let* ((probs (mapcar 'cadr spam-stat-score-data))
- (prod (apply #'* probs)))
- (/ prod (+ prod (apply #'* (mapcar #'(lambda (x) (- 1 x))
- probs))))))
+ (prod (apply #'* probs))
+ (score0
+ (/ prod (+ prod (apply #'* (mapcar #'(lambda (x) (- 1 x))
+ probs)))))
+ (score1s
+ (condition-case
+ spam-stat-error-holder
+ (spam-stat-score-buffer-user score0)
+ (error nil)))
+ (ans
+ (if score1s (+ score0 score1s) score0)))
+ (when (interactive-p)
+ (message "%S" ans))
+ ans))
+
+(defun spam-stat-score-buffer-user (&rest args)
+ (let* ((scores
+ (mapcar
+ (lambda (fn)
+ (apply fn args))
+ spam-stat-score-buffer-user-functions)))
+ (if (memq nil scores) nil
+ (apply #'+ scores))))
(defun spam-stat-split-fancy ()
"Return the name of the spam group if the current mail is spam.
Use this function on `nnmail-split-fancy'. If you are interested in
the raw data used for the last run of `spam-stat-score-buffer',
check the variable `spam-stat-score-data'."
- (condition-case var
+ (condition-case spam-stat-error-holder
(progn
(set-buffer spam-stat-buffer)
(goto-char (point-min))
- (when (> (spam-stat-score-buffer) spam-stat-split-fancy-spam-threshhold)
+ (when (> (spam-stat-score-buffer) spam-stat-split-fancy-spam-threshold)
(when (boundp 'nnmail-split-trace)
(mapc (lambda (entry)
(push entry nnmail-split-trace))
spam-stat-score-data))
spam-stat-split-fancy-spam-group))
- (error (message "Error in spam-stat-split-fancy: %S" var)
+ (error (message "Error in spam-stat-split-fancy: %S" spam-stat-error-holder)
nil)))
;; Testing