X-Git-Url: http://git.chise.org/gitweb/?a=blobdiff_plain;f=lisp%2Fnnir.el;h=846db89de1fe6748b1f8654f1d0eb939322da1c4;hb=9b741e050b400987d68ff761c6cc3276c932839c;hp=6230143069add72adaa18efb21dbba8ca0230ba5;hpb=509a8e7082aea415f157fae31ccac13b8c68ed4f;p=elisp%2Fgnus.git- diff --git a/lisp/nnir.el b/lisp/nnir.el index 6230143..846db89 100644 --- a/lisp/nnir.el +++ b/lisp/nnir.el @@ -1,8 +1,6 @@ ;;; nnir.el --- search mail with various search engines ;; Copyright (C) 1998 Kai Großjohann -;; $Id: nnir.el,v 1.68 2001/05/26 23:34:07 grossjoh Exp $ - ;; Author: Kai Großjohann ;; Keywords: news, mail, searching, ir, glimpse, wais @@ -160,6 +158,50 @@ ;; one to search this Glimpse index. I have indexed my whole home ;; directory with Glimpse, so I assume a default of `$HOME'. +;; 3. Namazu +;; +;; The Namazu backend requires you to have one directory containing all +;; index files, this is controlled by the `nnir-namazu-index-directory' +;; variable. To function the `nnir-namazu-remove-prefix' variable must +;; also be correct, see the documentation for `nnir-wais-remove-prefix' +;; above. +;; +;; It is particularly important not to pass any any switches to namazu +;; that will change the output format. Good switches to use include +;; `--sort', `--ascending', `--early' and `--late'. Refer to the Namazu +;; documentation for further information on valid switches. +;; +;; To index my mail with the `mknmz' program I use the following +;; configuration file: +;; +;; ,---- +;; | package conf; # Don't remove this line! +;; | +;; | # Paths which will not be indexed. Don't use `^' or `$' anchors. +;; | $EXCLUDE_PATH = "spam|sent"; +;; | +;; | # Header fields which should be searchable. case-insensitive +;; | $REMAIN_HEADER = "from|date|message-id|subject"; +;; | +;; | # Searchable fields. case-insensitive +;; | $SEARCH_FIELD = "from|date|message-id|subject"; +;; | +;; | # The max length of a word. +;; | $WORD_LENG_MAX = 128; +;; | +;; | # The max length of a field. +;; | $MAX_FIELD_LENGTH = 256; +;; `---- +;; +;; My mail is stored in the directories ~/Mail/mail/, ~/Mail/lists/ and +;; ~/Mail/archive/, so to index them I go to the directory set in +;; `nnir-namazu-index-directory' and issue the following command. +;; +;; mknmz --mailnews ~/Mail/archive/ ~/Mail/mail/ ~/Mail/lists/ +;; +;; For maximum searching efficiency I have a cron job set to run this +;; command every four hours. + ;; Developer information: ;; I have tried to make the code expandable. Basically, it is divided @@ -243,7 +285,7 @@ ;;; Setup Code: -(defconst nnir-version "$Id: nnir.el,v 1.68 2001/05/26 23:34:07 grossjoh Exp $" +(defconst nnir-version "1.72" "Version of NNIR.") (require 'cl) @@ -272,7 +314,9 @@ (swish++ nnir-run-swish++ ((group . "Group spec: "))) (swish-e nnir-run-swish-e - ((group . "Group spec: ")))) + ((group . "Group spec: "))) + (namazu nnir-run-namazu + ())) "Alist of supported search engines. Each element in the alist is a three-element list (ENGINE FUNCTION ARGS). ENGINE is a symbol designating the searching engine. FUNCTION is also @@ -341,7 +385,8 @@ settings of `nnir-search-engine'." (defcustom nnir-glimpse-remove-prefix (concat (getenv "HOME") "/Mail/") "*The prefix to remove from each file name returned by Glimpse -in order to get a group name (albeit with / instead of .). +in order to get a group name (albeit with / instead of .). This is a +regular expression. For example, suppose that Glimpse returns file names such as \"/home/john/Mail/mail/misc/42\". For this example, use the following @@ -349,7 +394,7 @@ setting: (setq nnir-glimpse-remove-prefix \"/home/john/Mail/\") Note the trailing slash. Removing this prefix gives \"mail/misc/42\". `nnir' knows to remove the \"/42\" and to replace \"/\" with \".\" to arrive at the correct group name, \"mail.misc\"." - :type '(directory) + :type '(regexp) :group 'nnir) (defcustom nnir-glimpse-additional-switches '("-i") @@ -384,11 +429,12 @@ The string given here is passed to `waissearch -d' as-is." (defcustom nnir-wais-remove-prefix (concat (getenv "HOME") "/Mail/") "*The prefix to remove from each directory name returned by waissearch -in order to get a group name (albeit with / instead of .). +in order to get a group name (albeit with / instead of .). This is a +regular expression. This variable is similar to `nnir-glimpse-remove-prefix', only for Wais, not Glimpse." - :type '(directory) + :type '(regexp) :group 'nnir) ;; EWS (Excite for Web Servers) engine. @@ -405,11 +451,17 @@ not Glimpse." (defcustom nnir-excite-remove-prefix (concat (getenv "HOME") "/Mail/") "*The prefix to remove from each file name returned by EWS -in order to get a group name (albeit with / instead of .). +in order to get a group name (albeit with / instead of .). This is a +regular expression. This variable is very similar to `nnir-glimpse-remove-prefix', except that it is for EWS, not Glimpse." - :type '(directory) + :type '(regexp) + :group 'nnir) + +(defcustom nnir-imap-default-charset nil + "*Name of the charset of the strings that appear in the search criteria." + :type '(choice (const nil) symbol) :group 'nnir) ;; Swish++. Next three variables Copyright (C) 2000, 2001 Christoph @@ -439,11 +491,12 @@ Instead, use this: (defcustom nnir-swish++-remove-prefix (concat (getenv "HOME") "/Mail/") "*The prefix to remove from each file name returned by swish++ -in order to get a group name (albeit with / instead of .). +in order to get a group name (albeit with / instead of .). This is a +regular expression. This variable is very similar to `nnir-glimpse-remove-prefix', except that it is for swish++, not Glimpse." - :type '(directory) + :type '(regexp) :group 'nnir) ;; Swish-E. Next three variables Copyright (C) 2000 Christoph Conrad @@ -474,10 +527,44 @@ Instead, use this: (defcustom nnir-swish-e-remove-prefix (concat (getenv "HOME") "/Mail/") "*The prefix to remove from each file name returned by swish-e -in order to get a group name (albeit with / instead of .). +in order to get a group name (albeit with / instead of .). This is a +regular expression. This variable is very similar to `nnir-glimpse-remove-prefix', except that it is for swish-e, not Glimpse." + :type '(regexp) + :group 'nnir) + +;; Namazu engine, see + +(defcustom nnir-namazu-program "namazu" + "*Name of Namazu search executable." + :type '(string) + :group 'nnir) + +(defcustom nnir-namazu-index-directory (expand-file-name "~/Mail/namazu/") + "*Index directory for Namazu." + :type '(directory) + :group 'nnir) + +(defcustom nnir-namazu-additional-switches '() + "*A list of strings, to be given as additional arguments to namazu. +The switches `-q', `-a', and `-s' are always used, very few other switches +make any sense in this context. + +Note that this should be a list. Ie, do NOT use the following: + (setq nnir-namazu-additional-switches \"-i -w\") ; wrong +Instead, use this: + (setq nnir-namazu-additional-switches '(\"-i\" \"-w\"))" + :type '(repeat (string)) + :group 'nnir) + +(defcustom nnir-namazu-remove-prefix (concat (getenv "HOME") "/Mail/") + "*The prefix to remove from each file name returned by Namazu +in order to get a group name (albeit with / instead of .). + +This variable is very similar to `nnir-glimpse-remove-prefix', except +that it is for Namazu, not Glimpse." :type '(directory) :group 'nnir) @@ -524,7 +611,10 @@ that it is for swish-e, not Glimpse." (kbd "G G") "GG") ; XEmacs 19 compat 'gnus-group-make-nnir-group)) -(add-hook 'gnus-group-mode-hook 'nnir-group-mode-hook) +(add-hook 'gnus-group-mode-hook + (lambda () + (unless (string-match "T-gnus" gnus-version) + (nnir-group-mode-hook)))) @@ -613,6 +703,7 @@ and show thread that contains this article." (setq art (car artlist)) (or (numberp art) (nnheader-report + 'nnir "nnir-retrieve-headers doesn't grok message ids: %s" art)) (setq artitem (nnir-artlist-article nnir-artlist art)) @@ -640,7 +731,7 @@ and show thread that contains this article." (error "nnheader-parse-head returned nil for article %s in group %s" artno artfullgroup))) - (t (nnheader-report "Don't support header type %s." foo))) + (t (nnheader-report 'nnir "Don't support header type %s." foo))) ;; replace article number in original group with article number ;; in nnir group (mail-header-set-number novitem idx) @@ -723,7 +814,7 @@ pairs (also vectors, actually)." (apply 'call-process cp-list)))) (unless (or (null exitstatus) (zerop exitstatus)) - (nnheader-report "Couldn't run glimpse: %s" exitstatus) + (nnheader-report 'nnir "Couldn't run glimpse: %s" exitstatus) ;; Glimpse failure reason is in this buffer, show it if ;; the user wants it. (when (> gnus-verbose 6) @@ -874,9 +965,13 @@ pairs (also vectors, actually)." ;; send queries as literals ;; handle errors +(eval-when-compile + (defvar nnimap-server-buffer)) + (defun nnir-run-imap (query &optional group) (require 'imap) (require 'nnimap) + (require 'mm-util) (unless group (error "Must specify groups for IMAP searching.")) (save-excursion @@ -890,13 +985,30 @@ pairs (also vectors, actually)." (setq buf nnimap-server-buffer) ;; xxx (message "Searching %s..." group) (let ((arts 0) - (mbx (gnus-group-real-name group))) + (mbx (gnus-group-real-name group)) + (multibyte-p (mm-multibyte-p)) + charset coding-system) (when (imap-mailbox-select mbx nil buf) + (with-temp-buffer + (if multibyte-p + (mm-enable-multibyte)) + (insert qstring) + (setq charset (car (mm-find-mime-charset-region + (point-min)(point-max))))) + (unless charset + (setq charset nnir-imap-default-charset)) (mapcar (lambda (artnum) (push (vector mbx artnum 1) artlist) (setq arts (1+ arts))) - (imap-search (concat "TEXT \"" qstring "\"") buf)) + (if (and (not (eq charset 'us-ascii)) + (setq coding-system (mm-charset-to-coding-system + charset))) + (imap-search + (concat "CHARSET " (symbol-name charset) " TEXT \"" + (mm-encode-coding-string qstring coding-system) + "\"") buf) + (imap-search (concat "TEXT \"" qstring "\"") buf))) (message "Searching %s... %d matches" mbx arts))) (message "Searching %s...done" group)) (quit nil)) @@ -925,6 +1037,7 @@ Windows NT 4.0." (save-excursion (let ( (qstring (cdr (assq 'query query))) + (groupspec (cdr (assq 'group query))) (artlist nil) (score nil) (artno nil) (dirnam nil) (group nil) ) @@ -934,7 +1047,10 @@ Windows NT 4.0." (set-buffer (get-buffer-create nnir-tmp-buffer)) (erase-buffer) - (message "Doing swish++ query %s..." query) + (if groupspec + (message "Doing swish++ query %s on %s..." qstring groupspec) + (message "Doing swish++ query %s..." qstring)) + (let* ((cp-list `( ,nnir-swish++-program nil ; input from /dev/null t ; output @@ -950,7 +1066,7 @@ Windows NT 4.0." (apply 'call-process cp-list)))) (unless (or (null exitstatus) (zerop exitstatus)) - (nnheader-report "Couldn't run swish++: %s" exitstatus) + (nnheader-report 'nnir "Couldn't run swish++: %s" exitstatus) ;; swish++ failure reason is in this buffer, show it if ;; the user wants it. (when (> gnus-verbose 6) @@ -961,7 +1077,7 @@ Windows NT 4.0." ;; rank relative-path-name file-size file-title ;; V 5.0b2: ;; rank relative-path-name file-size topic?? - ;; where rank is an integer from from 1 to 100. + ;; where rank is an integer from 1 to 100. (goto-char (point-min)) (while (re-search-forward "\\(^[0-9]+\\) \\([^ ]+\\) [0-9]+ \\(.*\\)$" nil t) @@ -973,23 +1089,27 @@ Windows NT 4.0." (when (string-match "^[0-9]+$" artno) (when (not (null dirnam)) - ;; remove nnir-swish++-remove-prefix from beginning of dirname - (when (string-match (concat "^" nnir-swish++-remove-prefix) - dirnam) - (setq dirnam (replace-match "" t t dirnam))) + ; maybe limit results to matching groups. + (when (or (not groupspec) + (string-match groupspec dirnam)) - (setq dirnam (substring dirnam 0 -1)) - ;; eliminate all ".", "/", "\" from beginning. Always matches. - (string-match "^[./\\]*\\(.*\\)$" dirnam) - ;; "/" -> "." - (setq group (substitute ?. ?/ (match-string 1 dirnam))) - ;; "\\" -> "." - (setq group (substitute ?. ?\\ group)) + ;; remove nnir-swish++-remove-prefix from beginning of dirname + (when (string-match (concat "^" nnir-swish++-remove-prefix) + dirnam) + (setq dirnam (replace-match "" t t dirnam))) - (push (vector group - (string-to-int artno) - (string-to-int score)) - artlist)))) + (setq dirnam (substring dirnam 0 -1)) + ;; eliminate all ".", "/", "\" from beginning. Always matches. + (string-match "^[./\\]*\\(.*\\)$" dirnam) + ;; "/" -> "." + (setq group (substitute ?. ?/ (match-string 1 dirnam))) + ;; "\\" -> "." + (setq group (substitute ?. ?\\ group)) + + (push (vector group + (string-to-int artno) + (string-to-int score)) + artlist))))) (message "Massaging swish++ output...done") @@ -1041,7 +1161,7 @@ Tested with swish-e-2.0.1 on Windows NT 4.0." (apply 'call-process cp-list)))) (unless (or (null exitstatus) (zerop exitstatus)) - (nnheader-report "Couldn't run swish-e: %s" exitstatus) + (nnheader-report 'nnir "Couldn't run swish-e: %s" exitstatus) ;; swish-e failure reason is in this buffer, show it if ;; the user wants it. (when (> gnus-verbose 6) @@ -1087,6 +1207,84 @@ Tested with swish-e-2.0.1 on Windows NT 4.0." (> (nnir-artitem-rsv x) (nnir-artitem-rsv y))))))))) +;; Namazu interface +(defun nnir-run-namazu (query &optional group) + "Run given query against Namazu. Returns a vector of (group name, file name) +pairs (also vectors, actually). + +Tested with Namazu 2.0.6 on a GNU/Linux system." + (when group + (error "The Namazu backend cannot search specific groups")) + (save-excursion + (let ( + (artlist nil) + (qstring (cdr (assq 'query query))) + (score nil) + (group nil) + (article nil) + ) + (set-buffer (get-buffer-create nnir-tmp-buffer)) + (erase-buffer) + (let* ((cp-list + `( ,nnir-namazu-program + nil ; input from /dev/null + t ; output + nil ; don't redisplay + "-q" ; don't be verbose + "-a" ; show all matches + "-s" ; use short format + ,@nnir-namazu-additional-switches + ,qstring ; the query, in namazu format + ,nnir-namazu-index-directory ; index directory + )) + (exitstatus + (let ((process-environment (copy-sequence process-environment))) + ;; Disable locale. + (setenv "LC_ALL" "C") + (message "%s args: %s" nnir-namazu-program + (mapconcat 'identity (cddddr cp-list) " ")) + (apply 'call-process cp-list)))) + (unless (or (null exitstatus) + (zerop exitstatus)) + (nnheader-report 'nnir "Couldn't run namazu: %s" exitstatus) + ;; Namazu failure reason is in this buffer, show it if + ;; the user wants it. + (when (> gnus-verbose 6) + (display-buffer nnir-tmp-buffer)))) + + ;; Namazu output looks something like this: + ;; 2. Re: Gnus agent expire broken (score: 55) + ;; /home/henrik/Mail/mail/sent/1310 (4,138 bytes) + + (goto-char (point-min)) + (while (re-search-forward + "^\\([0-9]+\\.\\).*\\((score: \\([0-9]+\\)\\))\n\\([^ ]+\\)" + nil t) + (setq score (match-string 3) + group (file-name-directory (match-string 4)) + article (file-name-nondirectory (match-string 4))) + + ;; make sure article and group is sane + (when (and (string-match "^[0-9]+$" article) + (not (null group))) + (when (string-match (concat "^" nnir-namazu-remove-prefix) group) + (setq group (replace-match "" t t group))) + + ;; remove trailing slash from groupname + (setq group (substring group 0 -1)) + + ;; stuff results into artlist vector + (push (vector (substitute ?. ?/ group) + (string-to-int article) + (string-to-int score)) artlist))) + + ;; sort artlist by score + (apply 'vector + (sort* artlist + (function (lambda (x y) + (> (nnir-artitem-rsv x) + (nnir-artitem-rsv y))))))))) + ;;; Util Code: (defun nnir-read-parms (query)