From: yamaoka Date: Wed, 6 Apr 2005 23:11:57 +0000 (+0000) Subject: Synch to No Gnus 200504061904. X-Git-Tag: t-gnus-6_17_4-quimby-~513 X-Git-Url: http://git.chise.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a08b8b8cf0ec2b1f5c1e68793e7e2c17ed15d44f;p=elisp%2Fgnus.git- Synch to No Gnus 200504061904. --- diff --git a/lisp/ChangeLog b/lisp/ChangeLog index cc94b9f..dded9b3 100644 --- a/lisp/ChangeLog +++ b/lisp/ChangeLog @@ -1,3 +1,29 @@ +2005-04-06 D Goel + + * spam-stat.el (spam-stat-score-buffer): Add a call to a + user-function allow user modifications of the scores. + (spam-stat-score-buffer-user): New function, to allow + user-computed modifications to the score. + (spam-stat-score-buffer-user-functions): list of additional + scoring functions + (spam-stat-error-holder): global temporary error holder + (spam-stat-split-fancy): use the new `spam-stat-error-holder' + variable + +2005-04-06 Teodor Zlatanov + + * gnus-registry.el (gnus-registry-clean-empty-function) + (gnus-registry-trim, gnus-registry-fetch-groups) + (gnus-registry-delete-group): now groups that match + `gnus-registry-ignored-groups' will be removed from the registry + entries, not just ignored for splitting. This helps clean up the + registry. Also, `gnus-registry-fetch-groups' is a convenient way + to get all the groups a message ID is in. + + * spam-stat.el (spam-stat-split-fancy-spam-threshold) + (spam-stat-split-fancy): changed "threshhold" to "threshold" + (spam-stat-score-buffer-user-functions): added :number custom type + 2005-04-06 Katsumi Yamaoka * mm-util.el (mm-coding-system-p): Don't return binary for the nil diff --git a/lisp/gnus-registry.el b/lisp/gnus-registry.el index 6fe20ff..d4d165a 100644 --- a/lisp/gnus-registry.el +++ b/lisp/gnus-registry.el @@ -250,15 +250,37 @@ way." (defun gnus-registry-clean-empty-function () "Remove all empty entries from the registry. Returns count thereof." (let ((count 0)) + (maphash (lambda (key value) - (unless (or - (gnus-registry-fetch-group key) - ;; TODO: look for specific extra data here! - ;; in this example, we look for 'label - (gnus-registry-fetch-extra key 'label)) - (incf count) - (remhash key gnus-registry-hashtb))) + (when (stringp key) + (dolist (group (gnus-registry-fetch-groups key)) + (when (gnus-parameter-registry-ignore group) + (gnus-message + 10 + "gnus-registry: deleted ignored group %s from key %s" + group key) + (gnus-registry-delete-group key group))) + + (unless (gnus-registry-group-count key) + (gnus-registry-delete-id key)) + + (unless (or + (gnus-registry-fetch-group key) + ;; TODO: look for specific extra data here! + ;; in this example, we look for 'label + (gnus-registry-fetch-extra key 'label) + (stringp key)) + (incf count) + (gnus-registry-delete-id key)) + + (unless (stringp key) + (gnus-message + 10 + "gnus-registry key %s was not a string, removing" + key) + (gnus-registry-delete-id key)))) + gnus-registry-hashtb) count)) @@ -268,7 +290,8 @@ way." (setq gnus-registry-dirty nil)) (defun gnus-registry-trim (alist) - "Trim alist to size, using gnus-registry-max-entries." + "Trim alist to size, using gnus-registry-max-entries. +Also, drop all gnus-registry-ignored-groups matches." (if (null gnus-registry-max-entries) alist ; just return the alist ;; else, when given max-entries, trim the alist @@ -281,16 +304,16 @@ way." (lambda (key value) (puthash key (gnus-registry-fetch-extra key 'mtime) timehash)) gnus-registry-hashtb) - + ;; we use the return value of this setq, which is the trimmed alist (setq alist (nthcdr trim-length (sort alist (lambda (a b) - (time-less-p - (cdr (gethash (car a) timehash)) - (cdr (gethash (car b) timehash)))))))))) + (time-less-p + (or (cdr (gethash (car a) timehash)) '(0 0 0)) + (or (cdr (gethash (car b) timehash)) '(0 0 0)))))))))) (defun alist-to-hashtable (alist) "Build a hashtable from the values in ALIST." @@ -600,6 +623,23 @@ Returns the first place where the trail finds a group name." crumb (gnus-group-short-name crumb)))))))) +(defun gnus-registry-fetch-groups (id) + "Get the groups of a message, based on the message ID." + (let ((trail (gethash id gnus-registry-hashtb)) + groups) + (dolist (crumb trail) + (when (stringp crumb) + ;; push the group name into the list + (setq + groups + (cons + (if (or (not (stringp crumb)) gnus-registry-use-long-group-names) + crumb + (gnus-group-short-name crumb)) + groups)))) + ;; return the list of groups + groups)) + (defun gnus-registry-group-count (id) "Get the number of groups of a message, based on the message ID." (let ((trail (gethash id gnus-registry-hashtb))) @@ -609,12 +649,11 @@ Returns the first place where the trail finds a group name." (defun gnus-registry-delete-group (id group) "Delete a group for a message, based on the message ID." - (when group - (when id + (when (and group id) (let ((trail (gethash id gnus-registry-hashtb)) - (group (gnus-group-short-name group))) + (short-group (gnus-group-short-name group))) (puthash id (if trail - (delete group trail) + (delete short-group (delete group trail)) nil) gnus-registry-hashtb)) ;; now, clear the entry if there are no more groups @@ -623,7 +662,7 @@ Returns the first place where the trail finds a group name." (gnus-registry-delete-id id))) ;; is this ID still in the registry? (when (gethash id gnus-registry-hashtb) - (gnus-registry-store-extra-entry id 'mtime (current-time)))))) + (gnus-registry-store-extra-entry id 'mtime (current-time))))) (defun gnus-registry-delete-id (id) "Delete a message ID from the registry." diff --git a/lisp/spam-stat.el b/lisp/spam-stat.el index 0c97bd9..b9265fc 100644 --- a/lisp/spam-stat.el +++ b/lisp/spam-stat.el @@ -168,7 +168,7 @@ no effect when spam-stat is invoked through spam.el." :type 'string :group 'spam-stat) -(defcustom spam-stat-split-fancy-spam-threshhold 0.9 +(defcustom spam-stat-split-fancy-spam-threshold 0.9 "Spam score threshold in spam-stat-split-fancy." :type 'number :group 'spam-stat) @@ -178,13 +178,33 @@ no effect when spam-stat is invoked through spam.el." :type 'hook :group 'spam-stat) +(defcustom spam-stat-score-buffer-user-functions nil + "List of additional scoring functions. +Called one by one on the buffer. + +If all of these functions return non-nil answers, these numerical +answers are added to the computed spam stat score on the buffer. If +you defun such functions, make sure they don't return the buffer in a +narrowed state or such: use, for example, `save-excursion'. Each of +your functions is also passed the initial spam-stat score which might +aid in your scoring. + +Also be careful when defining such functions. If they take a long +time, they will slow down your mail splitting. Thus, if the buffer is +large, don't forget to use smaller regions, by wrapping your work in, +say, `with-spam-stat-max-buffer-size'." + :type '(repeat sexp) + :group 'spam-stat) + (defcustom spam-stat-process-directory-age 90 "Max. age of files to be processed in directory, in days. When using `spam-stat-process-spam-directory' or `spam-stat-process-non-spam-directory', only files that have been touched in this many days will be considered. Without this filter, re-training spam-stat with several thousand messages -will start to take a very long time.") +will start to take a very long time." + :type 'number + :group 'spam-stat) (defvar spam-stat-last-saved-at nil "Time stamp of last change of spam-stat-file on this run") @@ -246,6 +266,9 @@ Use `spam-stat-ngood', `spam-stat-nbad', `spam-stat-good', (defvar spam-stat-nbad 0 "The number of bad mails in the dictionary.") +(defvar spam-stat-error-holder nil + "A holder for condition-case errors while scoring buffers.") + (defsubst spam-stat-good (entry) "Return the number of times this word belongs to good mails." (aref entry 0)) @@ -476,29 +499,51 @@ where DIFF is the difference between SCORE and 0.5." result)) (defun spam-stat-score-buffer () - "Return a score describing the spam-probability for this buffer." + "Return a score describing the spam-probability for this buffer. +Add user supplied modifications if supplied." + (interactive) ; helps in debugging. (setq spam-stat-score-data (spam-stat-buffer-words-with-scores)) (let* ((probs (mapcar 'cadr spam-stat-score-data)) - (prod (apply #'* probs))) - (/ prod (+ prod (apply #'* (mapcar #'(lambda (x) (- 1 x)) - probs)))))) + (prod (apply #'* probs)) + (score0 + (/ prod (+ prod (apply #'* (mapcar #'(lambda (x) (- 1 x)) + probs))))) + (score1s + (condition-case + spam-stat-error-holder + (spam-stat-score-buffer-user score0) + (error nil))) + (ans + (if score1s (+ score0 score1s) score0))) + (when (interactive-p) + (message "%S" ans)) + ans)) + +(defun spam-stat-score-buffer-user (&rest args) + (let* ((scores + (mapcar + (lambda (fn) + (apply fn args)) + spam-stat-score-buffer-user-functions))) + (if (memq nil scores) nil + (apply #'+ scores)))) (defun spam-stat-split-fancy () "Return the name of the spam group if the current mail is spam. Use this function on `nnmail-split-fancy'. If you are interested in the raw data used for the last run of `spam-stat-score-buffer', check the variable `spam-stat-score-data'." - (condition-case var + (condition-case spam-stat-error-holder (progn (set-buffer spam-stat-buffer) (goto-char (point-min)) - (when (> (spam-stat-score-buffer) spam-stat-split-fancy-spam-threshhold) + (when (> (spam-stat-score-buffer) spam-stat-split-fancy-spam-threshold) (when (boundp 'nnmail-split-trace) (mapc (lambda (entry) (push entry nnmail-split-trace)) spam-stat-score-data)) spam-stat-split-fancy-spam-group)) - (error (message "Error in spam-stat-split-fancy: %S" var) + (error (message "Error in spam-stat-split-fancy: %S" spam-stat-error-holder) nil))) ;; Testing