From: tsuchiya Date: Tue, 4 Sep 2001 11:14:19 +0000 (+0000) Subject: Import nnir-1.72. X-Git-Url: http://git.chise.org/gitweb/?a=commitdiff_plain;h=aace82d80e88752b4ebd0e2362d9446735daba86;p=elisp%2Fgnus.git- Import nnir-1.72. --- diff --git a/lisp/nnir.el b/lisp/nnir.el index 6230143..eb73dfe 100644 --- a/lisp/nnir.el +++ b/lisp/nnir.el @@ -1,10 +1,10 @@ ;;; nnir.el --- search mail with various search engines ;; Copyright (C) 1998 Kai Großjohann -;; $Id: nnir.el,v 1.68 2001/05/26 23:34:07 grossjoh Exp $ +;; $Id: nnir.el,v 1.72 2001/08/17 11:15:13 grossjoh Exp $ ;; Author: Kai Großjohann -;; Keywords: news, mail, searching, ir, glimpse, wais +;; Keywords: news, mail, searching, ir, glimpse, wais, Namazu ;; This file is not part of GNU Emacs. @@ -160,6 +160,54 @@ ;; one to search this Glimpse index. I have indexed my whole home ;; directory with Glimpse, so I assume a default of `$HOME'. +;; 3. Namazu +;; +;; The Namazu backend requires you to have one directory containing all +;; index files, this is controlled by the `nnir-namazu-index-directory' +;; variable. To function the `nnir-namazu-remove-prefix' variable must +;; also be correct, see the documentation for `nnir-wais-remove-prefix' +;; above. +;; +;; It is particularly important not to pass any any switches to namazu +;; that will change the output format. Good switches to use include +;; `--sort', `--ascending', `--early' and `--late'. Refer to the Namazu +;; documentation for further information on valid switches. +;; +;; To index my mail with the `mknmz' program I use the following +;; configuration file: +;; +;; ,---- +;; | package conf; # Don't remove this line! +;; | +;; | # Paths which will not be indexed. Don't use `^' or `$' anchors. +;; | $EXCLUDE_PATH = "spam|sent"; +;; | +;; | # Header fields which should be searchable. case-insensitive +;; | $REMAIN_HEADER = "from|date|message-id|subject"; +;; | +;; | # Searchable fields. case-insensitive +;; | $SEARCH_FIELD = "from|date|message-id|subject"; +;; | +;; | # The max length of a word. +;; | $WORD_LENG_MAX = 128; +;; | +;; | # The max length of a field. +;; | $MAX_FIELD_LENGTH = 256; +;; `---- +;; +;; My mail is stored in the directories ~/Mail/mail/, ~/Mail/lists/ and +;; ~/Mail/archive/, so to index them I go to the directory set in +;; `nnir-namazu-index-directory' and issue the following command. +;; +;; mknmz --mailnews ~/Mail/archive/ ~/Mail/mail/ ~/Mail/lists/ +;; +;; For maximum searching efficiency I have a cron job set to run this +;; command every four hours. + +;; 3. Namazu +;; +;; + ;; Developer information: ;; I have tried to make the code expandable. Basically, it is divided @@ -243,7 +291,7 @@ ;;; Setup Code: -(defconst nnir-version "$Id: nnir.el,v 1.68 2001/05/26 23:34:07 grossjoh Exp $" +(defconst nnir-version "$Id: nnir.el,v 1.72 2001/08/17 11:15:13 grossjoh Exp $" "Version of NNIR.") (require 'cl) @@ -263,6 +311,8 @@ (defvar nnir-engines '((glimpse nnir-run-glimpse ((group . "Group spec: "))) + (namazu nnir-run-namazu + ((group . "Group spec: "))) (wais nnir-run-waissearch ()) (excite nnir-run-excite-search @@ -272,7 +322,9 @@ (swish++ nnir-run-swish++ ((group . "Group spec: "))) (swish-e nnir-run-swish-e - ((group . "Group spec: ")))) + ((group . "Group spec: "))) + (namazu nnir-run-namazu + ())) "Alist of supported search engines. Each element in the alist is a three-element list (ENGINE FUNCTION ARGS). ENGINE is a symbol designating the searching engine. FUNCTION is also @@ -295,7 +347,8 @@ Add an entry here when adding a new search engine.") ;;; User Customizable Variables: (defgroup nnir nil - "Search nnmh and nnml groups in Gnus with Glimpse, freeWAIS-sf, or EWS.") + "Search nnmh and nnml groups in Gnus with Glimpse, freeWAIS-sf, Namazu, +or EWS.") ;; Mail backend. @@ -341,7 +394,8 @@ settings of `nnir-search-engine'." (defcustom nnir-glimpse-remove-prefix (concat (getenv "HOME") "/Mail/") "*The prefix to remove from each file name returned by Glimpse -in order to get a group name (albeit with / instead of .). +in order to get a group name (albeit with / instead of .). This is a +regular expression. For example, suppose that Glimpse returns file names such as \"/home/john/Mail/mail/misc/42\". For this example, use the following @@ -349,7 +403,7 @@ setting: (setq nnir-glimpse-remove-prefix \"/home/john/Mail/\") Note the trailing slash. Removing this prefix gives \"mail/misc/42\". `nnir' knows to remove the \"/42\" and to replace \"/\" with \".\" to arrive at the correct group name, \"mail.misc\"." - :type '(directory) + :type '(regexp) :group 'nnir) (defcustom nnir-glimpse-additional-switches '("-i") @@ -365,6 +419,24 @@ Instead, use this: :type '(repeat (string)) :group 'nnir) +;; Namazu engine. + +(defcustom nnir-namazu-program "namazu" + "*Name of Namazu executable." + :type '(string) + :group 'nnir) + +(defcustom nnir-namazu-index (expand-file-name "Namazu/" (getenv "HOME")) + "*Namazu index directory" + :type '(directory) + :group 'nnir) + +(defcustom nnir-namazu-remove-prefix (concat (getenv "HOME") "/Mail/") + "*The prefix to remove from each file name returned by Namazu +in order to get a group name (albeit with / instead of .)." + :type '(directory) + :group 'nnir) + ;; freeWAIS-sf. (defcustom nnir-wais-program "waissearch" @@ -384,11 +456,12 @@ The string given here is passed to `waissearch -d' as-is." (defcustom nnir-wais-remove-prefix (concat (getenv "HOME") "/Mail/") "*The prefix to remove from each directory name returned by waissearch -in order to get a group name (albeit with / instead of .). +in order to get a group name (albeit with / instead of .). This is a +regular expression. This variable is similar to `nnir-glimpse-remove-prefix', only for Wais, not Glimpse." - :type '(directory) + :type '(regexp) :group 'nnir) ;; EWS (Excite for Web Servers) engine. @@ -405,11 +478,12 @@ not Glimpse." (defcustom nnir-excite-remove-prefix (concat (getenv "HOME") "/Mail/") "*The prefix to remove from each file name returned by EWS -in order to get a group name (albeit with / instead of .). +in order to get a group name (albeit with / instead of .). This is a +regular expression. This variable is very similar to `nnir-glimpse-remove-prefix', except that it is for EWS, not Glimpse." - :type '(directory) + :type '(regexp) :group 'nnir) ;; Swish++. Next three variables Copyright (C) 2000, 2001 Christoph @@ -439,11 +513,12 @@ Instead, use this: (defcustom nnir-swish++-remove-prefix (concat (getenv "HOME") "/Mail/") "*The prefix to remove from each file name returned by swish++ -in order to get a group name (albeit with / instead of .). +in order to get a group name (albeit with / instead of .). This is a +regular expression. This variable is very similar to `nnir-glimpse-remove-prefix', except that it is for swish++, not Glimpse." - :type '(directory) + :type '(regexp) :group 'nnir) ;; Swish-E. Next three variables Copyright (C) 2000 Christoph Conrad @@ -474,10 +549,44 @@ Instead, use this: (defcustom nnir-swish-e-remove-prefix (concat (getenv "HOME") "/Mail/") "*The prefix to remove from each file name returned by swish-e -in order to get a group name (albeit with / instead of .). +in order to get a group name (albeit with / instead of .). This is a +regular expression. This variable is very similar to `nnir-glimpse-remove-prefix', except that it is for swish-e, not Glimpse." + :type '(regexp) + :group 'nnir) + +;; Namazu engine, see + +(defcustom nnir-namazu-program "namazu" + "*Name of Namazu search executable." + :type '(string) + :group 'nnir) + +(defcustom nnir-namazu-index-directory (expand-file-name "~/Mail/namazu/") + "*Index directory for Namazu." + :type '(directory) + :group 'nnir) + +(defcustom nnir-namazu-additional-switches '() + "*A list of strings, to be given as additional arguments to namazu. +The switches `-q', `-a', and `-s' are always used, very few other switches +make any sense in this context. + +Note that this should be a list. Ie, do NOT use the following: + (setq nnir-namazu-additional-switches \"-i -w\") ; wrong +Instead, use this: + (setq nnir-namazu-additional-switches '(\"-i\" \"-w\"))" + :type '(repeat (string)) + :group 'nnir) + +(defcustom nnir-namazu-remove-prefix (concat (getenv "HOME") "/Mail/") + "*The prefix to remove from each file name returned by Namazu +in order to get a group name (albeit with / instead of .). + +This variable is very similar to `nnir-glimpse-remove-prefix', except +that it is for Namazu, not Glimpse." :type '(directory) :group 'nnir) @@ -613,6 +722,7 @@ and show thread that contains this article." (setq art (car artlist)) (or (numberp art) (nnheader-report + 'nnir "nnir-retrieve-headers doesn't grok message ids: %s" art)) (setq artitem (nnir-artlist-article nnir-artlist art)) @@ -640,7 +750,7 @@ and show thread that contains this article." (error "nnheader-parse-head returned nil for article %s in group %s" artno artfullgroup))) - (t (nnheader-report "Don't support header type %s." foo))) + (t (nnheader-report 'nnir "Don't support header type %s." foo))) ;; replace article number in original group with article number ;; in nnir group (mail-header-set-number novitem idx) @@ -723,7 +833,7 @@ pairs (also vectors, actually)." (apply 'call-process cp-list)))) (unless (or (null exitstatus) (zerop exitstatus)) - (nnheader-report "Couldn't run glimpse: %s" exitstatus) + (nnheader-report 'nnir "Couldn't run glimpse: %s" exitstatus) ;; Glimpse failure reason is in this buffer, show it if ;; the user wants it. (when (> gnus-verbose 6) @@ -781,6 +891,94 @@ pairs (also vectors, actually)." (nnir-artitem-number y)))))) ))) +;; Namazu interface +(defun nnir-run-namazu (query &optional group) + "Run given query against namazu. Returns a vector of (group name, file name) +pairs (also vectors, actually)." + (save-excursion + (let ((artlist nil) + (groupspec (cdr (assq 'group query))) + (qstring (cdr (assq 'query query))) + (coding-system-for-read 'euc-japan) + (coding-system-for-write 'euc-japan)) + (when (and group groupspec) + (error (concat "It does not make sense to use a group spec" + " with process-marked groups."))) + (when group + (setq groupspec (gnus-group-real-name group))) + (set-buffer (get-buffer-create nnir-tmp-buffer)) + (erase-buffer) + (if groupspec + (message "Doing namazu query %s on %s..." query groupspec) + (message "Doing namazu query %s..." query)) + (let* ((cp-list + `( ,nnir-namazu-program + nil ; input from /dev/null + t ; output + nil ; don't redisplay + "--all" "--list" "--early" + ,(if groupspec + (format "+uri:%s %s" groupspec qstring) + (format "%s" qstring)) + ,nnir-namazu-index ; index + )) + (exitstatus + (progn + (message "%s args: %s" nnir-namazu-program + (mapconcat 'identity (cddddr cp-list) " ")) + (apply 'call-process cp-list)))) + (unless (or (null exitstatus) + (zerop exitstatus)) + (nnheader-report "Couldn't run namazu: %s" exitstatus) + ;; Namazu failure reason is in this buffer, show it if + ;; the user wants it. + (when (> gnus-verbose 6) + (display-buffer nnir-tmp-buffer)))) + (if groupspec + (message "Doing namazu query %s on %s..." query groupspec) + (message "Doing namazu query %s...done" query)) + (sit-for 0) + ;; CCC: The following work of extracting group name and article + ;; number from the Namazu output can probably better be done by + ;; just going through the buffer once, and plucking out the + ;; right information from each line. + ;; remove superfluous stuff from namazu output + (goto-char (point-min)) + (delete-non-matching-lines "/[0-9]+$") + ;;(delete-matching-lines "\\.overview~?$") + (goto-char (point-min)) + (while (re-search-forward (concat "^" nnir-namazu-remove-prefix) nil t) + (replace-match "")) + ;; separate group name from article number with \t + ;; XEmacs compatible version + (goto-char (point-max)) + (while (re-search-backward "/[0-9]+$" nil t) + (delete-char 1 nil) + (insert-char ?\t 1)) +; Emacs compatible version +; (goto-char (point-min)) +; (while (re-search-forward "\\(/\\)[0-9]+$" nil t) +; (replace-match "\t" t t nil 1)) + ;; replace / with . in group names + (subst-char-in-region (point-min) (point-max) ?/ ?. t) + ;; massage buffer to contain some Lisp; + ;; this depends on the artlist encoding internals + ;; maybe this dependency should be removed? + (goto-char (point-min)) + (while (not (eobp)) + (insert "(\"") + (skip-chars-forward "^\t") + (insert "\" ") + (end-of-line) + (insert " 1000 )") ; 1000 = score + (forward-line 1)) + (insert "))\n") + (goto-char (point-min)) + (insert "(setq artlist '(\n") + (eval-buffer) + artlist + ))) + ;; freeWAIS-sf interface. (defun nnir-run-waissearch (query &optional group) "Run given query agains waissearch. Returns vector of (group name, file name) @@ -925,6 +1123,7 @@ Windows NT 4.0." (save-excursion (let ( (qstring (cdr (assq 'query query))) + (groupspec (cdr (assq 'group query))) (artlist nil) (score nil) (artno nil) (dirnam nil) (group nil) ) @@ -934,7 +1133,10 @@ Windows NT 4.0." (set-buffer (get-buffer-create nnir-tmp-buffer)) (erase-buffer) - (message "Doing swish++ query %s..." query) + (if groupspec + (message "Doing swish++ query %s on %s..." qstring groupspec) + (message "Doing swish++ query %s..." qstring)) + (let* ((cp-list `( ,nnir-swish++-program nil ; input from /dev/null t ; output @@ -950,7 +1152,7 @@ Windows NT 4.0." (apply 'call-process cp-list)))) (unless (or (null exitstatus) (zerop exitstatus)) - (nnheader-report "Couldn't run swish++: %s" exitstatus) + (nnheader-report 'nnir "Couldn't run swish++: %s" exitstatus) ;; swish++ failure reason is in this buffer, show it if ;; the user wants it. (when (> gnus-verbose 6) @@ -961,7 +1163,7 @@ Windows NT 4.0." ;; rank relative-path-name file-size file-title ;; V 5.0b2: ;; rank relative-path-name file-size topic?? - ;; where rank is an integer from from 1 to 100. + ;; where rank is an integer from 1 to 100. (goto-char (point-min)) (while (re-search-forward "\\(^[0-9]+\\) \\([^ ]+\\) [0-9]+ \\(.*\\)$" nil t) @@ -973,23 +1175,27 @@ Windows NT 4.0." (when (string-match "^[0-9]+$" artno) (when (not (null dirnam)) - ;; remove nnir-swish++-remove-prefix from beginning of dirname - (when (string-match (concat "^" nnir-swish++-remove-prefix) - dirnam) - (setq dirnam (replace-match "" t t dirnam))) + ; maybe limit results to matching groups. + (when (or (not groupspec) + (string-match groupspec dirnam)) - (setq dirnam (substring dirnam 0 -1)) - ;; eliminate all ".", "/", "\" from beginning. Always matches. - (string-match "^[./\\]*\\(.*\\)$" dirnam) - ;; "/" -> "." - (setq group (substitute ?. ?/ (match-string 1 dirnam))) - ;; "\\" -> "." - (setq group (substitute ?. ?\\ group)) + ;; remove nnir-swish++-remove-prefix from beginning of dirname + (when (string-match (concat "^" nnir-swish++-remove-prefix) + dirnam) + (setq dirnam (replace-match "" t t dirnam))) - (push (vector group - (string-to-int artno) - (string-to-int score)) - artlist)))) + (setq dirnam (substring dirnam 0 -1)) + ;; eliminate all ".", "/", "\" from beginning. Always matches. + (string-match "^[./\\]*\\(.*\\)$" dirnam) + ;; "/" -> "." + (setq group (substitute ?. ?/ (match-string 1 dirnam))) + ;; "\\" -> "." + (setq group (substitute ?. ?\\ group)) + + (push (vector group + (string-to-int artno) + (string-to-int score)) + artlist))))) (message "Massaging swish++ output...done") @@ -1041,7 +1247,7 @@ Tested with swish-e-2.0.1 on Windows NT 4.0." (apply 'call-process cp-list)))) (unless (or (null exitstatus) (zerop exitstatus)) - (nnheader-report "Couldn't run swish-e: %s" exitstatus) + (nnheader-report 'nnir "Couldn't run swish-e: %s" exitstatus) ;; swish-e failure reason is in this buffer, show it if ;; the user wants it. (when (> gnus-verbose 6) @@ -1087,6 +1293,82 @@ Tested with swish-e-2.0.1 on Windows NT 4.0." (> (nnir-artitem-rsv x) (nnir-artitem-rsv y))))))))) +;; Namazu interface +(defun nnir-run-namazu (query &optional group) + "Run given query against Namazu. Returns a vector of (group name, file name) +pairs (also vectors, actually). + +Tested with Namazu 2.0.6 on a GNU/Linux system." + (when group + (error "The Namazu backend cannot search specific groups")) + (save-excursion + (let ( + (artlist nil) + (qstring (cdr (assq 'query query))) + (score nil) + (group nil) + (article nil) + ) + (set-buffer (get-buffer-create nnir-tmp-buffer)) + (erase-buffer) + (let* ((cp-list + `( ,nnir-namazu-program + nil ; input from /dev/null + t ; output + nil ; don't redisplay + "-q" ; don't be verbose + "-a" ; show all matches + "-s" ; use short format + ,@nnir-namazu-additional-switches + ,qstring ; the query, in namazu format + ,nnir-namazu-index-directory ; index directory + )) + (exitstatus + (progn + (message "%s args: %s" nnir-namazu-program + (mapconcat 'identity (cddddr cp-list) " ")) + (apply 'call-process cp-list)))) + (unless (or (null exitstatus) + (zerop exitstatus)) + (nnheader-report 'nnir "Couldn't run namazu: %s" exitstatus) + ;; Namazu failure reason is in this buffer, show it if + ;; the user wants it. + (when (> gnus-verbose 6) + (display-buffer nnir-tmp-buffer)))) + + ;; Namazu output looks something like this: + ;; 2. Re: Gnus agent expire broken (score: 55) + ;; /home/henrik/Mail/mail/sent/1310 (4,138 bytes) + + (goto-char (point-min)) + (while (re-search-forward + "^\\([0-9]+\\.\\).*\\((score: \\([0-9]+\\)\\))\n\\([^ ]+\\)" + nil t) + (setq score (match-string 3) + group (file-name-directory (match-string 4)) + article (file-name-nondirectory (match-string 4))) + + ;; make sure article and group is sane + (when (and (string-match "^[0-9]+$" article) + (not (null group))) + (when (string-match (concat "^" nnir-namazu-remove-prefix) group) + (setq group (replace-match "" t t group))) + + ;; remove trailing slash from groupname + (setq group (substring group 0 -1)) + + ;; stuff results into artlist vector + (push (vector (substitute ?. ?/ group) + (string-to-int article) + (string-to-int score)) artlist))) + + ;; sort artlist by score + (apply 'vector + (sort* artlist + (function (lambda (x y) + (> (nnir-artitem-rsv x) + (nnir-artitem-rsv y))))))))) + ;;; Util Code: (defun nnir-read-parms (query)