;;; nnir.el --- search mail with various search engines
;; Copyright (C) 1998 Kai Großjohann
-;; $Id: nnir.el,v 1.68 2001/05/26 23:34:07 grossjoh Exp $
+;; $Id: nnir.el,v 1.72 2001/08/17 11:15:13 grossjoh Exp $
;; Author: Kai Großjohann <grossjohann@ls6.cs.uni-dortmund.de>
-;; Keywords: news, mail, searching, ir, glimpse, wais
+;; Keywords: news, mail, searching, ir, glimpse, wais, Namazu
;; This file is not part of GNU Emacs.
;; one to search this Glimpse index. I have indexed my whole home
;; directory with Glimpse, so I assume a default of `$HOME'.
+;; 3. Namazu
+;;
+;; The Namazu backend requires you to have one directory containing all
+;; index files, this is controlled by the `nnir-namazu-index-directory'
+;; variable. To function the `nnir-namazu-remove-prefix' variable must
+;; also be correct, see the documentation for `nnir-wais-remove-prefix'
+;; above.
+;;
+;; It is particularly important not to pass any any switches to namazu
+;; that will change the output format. Good switches to use include
+;; `--sort', `--ascending', `--early' and `--late'. Refer to the Namazu
+;; documentation for further information on valid switches.
+;;
+;; To index my mail with the `mknmz' program I use the following
+;; configuration file:
+;;
+;; ,----
+;; | package conf; # Don't remove this line!
+;; |
+;; | # Paths which will not be indexed. Don't use `^' or `$' anchors.
+;; | $EXCLUDE_PATH = "spam|sent";
+;; |
+;; | # Header fields which should be searchable. case-insensitive
+;; | $REMAIN_HEADER = "from|date|message-id|subject";
+;; |
+;; | # Searchable fields. case-insensitive
+;; | $SEARCH_FIELD = "from|date|message-id|subject";
+;; |
+;; | # The max length of a word.
+;; | $WORD_LENG_MAX = 128;
+;; |
+;; | # The max length of a field.
+;; | $MAX_FIELD_LENGTH = 256;
+;; `----
+;;
+;; My mail is stored in the directories ~/Mail/mail/, ~/Mail/lists/ and
+;; ~/Mail/archive/, so to index them I go to the directory set in
+;; `nnir-namazu-index-directory' and issue the following command.
+;;
+;; mknmz --mailnews ~/Mail/archive/ ~/Mail/mail/ ~/Mail/lists/
+;;
+;; For maximum searching efficiency I have a cron job set to run this
+;; command every four hours.
+
+;; 3. Namazu
+;;
+;;
+
;; Developer information:
;; I have tried to make the code expandable. Basically, it is divided
;;; Setup Code:
-(defconst nnir-version "$Id: nnir.el,v 1.68 2001/05/26 23:34:07 grossjoh Exp $"
+(defconst nnir-version "$Id: nnir.el,v 1.72 2001/08/17 11:15:13 grossjoh Exp $"
"Version of NNIR.")
(require 'cl)
(defvar nnir-engines
'((glimpse nnir-run-glimpse
((group . "Group spec: ")))
+ (namazu nnir-run-namazu
+ ((group . "Group spec: ")))
(wais nnir-run-waissearch
())
(excite nnir-run-excite-search
(swish++ nnir-run-swish++
((group . "Group spec: ")))
(swish-e nnir-run-swish-e
- ((group . "Group spec: "))))
+ ((group . "Group spec: ")))
+ (namazu nnir-run-namazu
+ ()))
"Alist of supported search engines.
Each element in the alist is a three-element list (ENGINE FUNCTION ARGS).
ENGINE is a symbol designating the searching engine. FUNCTION is also
;;; User Customizable Variables:
(defgroup nnir nil
- "Search nnmh and nnml groups in Gnus with Glimpse, freeWAIS-sf, or EWS.")
+ "Search nnmh and nnml groups in Gnus with Glimpse, freeWAIS-sf, Namazu,
+or EWS.")
;; Mail backend.
(defcustom nnir-glimpse-remove-prefix (concat (getenv "HOME") "/Mail/")
"*The prefix to remove from each file name returned by Glimpse
-in order to get a group name (albeit with / instead of .).
+in order to get a group name (albeit with / instead of .). This is a
+regular expression.
For example, suppose that Glimpse returns file names such as
\"/home/john/Mail/mail/misc/42\". For this example, use the following
Note the trailing slash. Removing this prefix gives \"mail/misc/42\".
`nnir' knows to remove the \"/42\" and to replace \"/\" with \".\" to
arrive at the correct group name, \"mail.misc\"."
- :type '(directory)
+ :type '(regexp)
:group 'nnir)
(defcustom nnir-glimpse-additional-switches '("-i")
:type '(repeat (string))
:group 'nnir)
+;; Namazu engine.
+
+(defcustom nnir-namazu-program "namazu"
+ "*Name of Namazu executable."
+ :type '(string)
+ :group 'nnir)
+
+(defcustom nnir-namazu-index (expand-file-name "Namazu/" (getenv "HOME"))
+ "*Namazu index directory"
+ :type '(directory)
+ :group 'nnir)
+
+(defcustom nnir-namazu-remove-prefix (concat (getenv "HOME") "/Mail/")
+ "*The prefix to remove from each file name returned by Namazu
+in order to get a group name (albeit with / instead of .)."
+ :type '(directory)
+ :group 'nnir)
+
;; freeWAIS-sf.
(defcustom nnir-wais-program "waissearch"
(defcustom nnir-wais-remove-prefix (concat (getenv "HOME") "/Mail/")
"*The prefix to remove from each directory name returned by waissearch
-in order to get a group name (albeit with / instead of .).
+in order to get a group name (albeit with / instead of .). This is a
+regular expression.
This variable is similar to `nnir-glimpse-remove-prefix', only for Wais,
not Glimpse."
- :type '(directory)
+ :type '(regexp)
:group 'nnir)
;; EWS (Excite for Web Servers) engine.
(defcustom nnir-excite-remove-prefix (concat (getenv "HOME") "/Mail/")
"*The prefix to remove from each file name returned by EWS
-in order to get a group name (albeit with / instead of .).
+in order to get a group name (albeit with / instead of .). This is a
+regular expression.
This variable is very similar to `nnir-glimpse-remove-prefix', except
that it is for EWS, not Glimpse."
- :type '(directory)
+ :type '(regexp)
:group 'nnir)
;; Swish++. Next three variables Copyright (C) 2000, 2001 Christoph
(defcustom nnir-swish++-remove-prefix (concat (getenv "HOME") "/Mail/")
"*The prefix to remove from each file name returned by swish++
-in order to get a group name (albeit with / instead of .).
+in order to get a group name (albeit with / instead of .). This is a
+regular expression.
This variable is very similar to `nnir-glimpse-remove-prefix', except
that it is for swish++, not Glimpse."
- :type '(directory)
+ :type '(regexp)
:group 'nnir)
;; Swish-E. Next three variables Copyright (C) 2000 Christoph Conrad
(defcustom nnir-swish-e-remove-prefix (concat (getenv "HOME") "/Mail/")
"*The prefix to remove from each file name returned by swish-e
-in order to get a group name (albeit with / instead of .).
+in order to get a group name (albeit with / instead of .). This is a
+regular expression.
This variable is very similar to `nnir-glimpse-remove-prefix', except
that it is for swish-e, not Glimpse."
+ :type '(regexp)
+ :group 'nnir)
+
+;; Namazu engine, see <URL:http://ww.namazu.org/>
+
+(defcustom nnir-namazu-program "namazu"
+ "*Name of Namazu search executable."
+ :type '(string)
+ :group 'nnir)
+
+(defcustom nnir-namazu-index-directory (expand-file-name "~/Mail/namazu/")
+ "*Index directory for Namazu."
+ :type '(directory)
+ :group 'nnir)
+
+(defcustom nnir-namazu-additional-switches '()
+ "*A list of strings, to be given as additional arguments to namazu.
+The switches `-q', `-a', and `-s' are always used, very few other switches
+make any sense in this context.
+
+Note that this should be a list. Ie, do NOT use the following:
+ (setq nnir-namazu-additional-switches \"-i -w\") ; wrong
+Instead, use this:
+ (setq nnir-namazu-additional-switches '(\"-i\" \"-w\"))"
+ :type '(repeat (string))
+ :group 'nnir)
+
+(defcustom nnir-namazu-remove-prefix (concat (getenv "HOME") "/Mail/")
+ "*The prefix to remove from each file name returned by Namazu
+in order to get a group name (albeit with / instead of .).
+
+This variable is very similar to `nnir-glimpse-remove-prefix', except
+that it is for Namazu, not Glimpse."
:type '(directory)
:group 'nnir)
(setq art (car artlist))
(or (numberp art)
(nnheader-report
+ 'nnir
"nnir-retrieve-headers doesn't grok message ids: %s"
art))
(setq artitem (nnir-artlist-article nnir-artlist art))
(error
"nnheader-parse-head returned nil for article %s in group %s"
artno artfullgroup)))
- (t (nnheader-report "Don't support header type %s." foo)))
+ (t (nnheader-report 'nnir "Don't support header type %s." foo)))
;; replace article number in original group with article number
;; in nnir group
(mail-header-set-number novitem idx)
(apply 'call-process cp-list))))
(unless (or (null exitstatus)
(zerop exitstatus))
- (nnheader-report "Couldn't run glimpse: %s" exitstatus)
+ (nnheader-report 'nnir "Couldn't run glimpse: %s" exitstatus)
;; Glimpse failure reason is in this buffer, show it if
;; the user wants it.
(when (> gnus-verbose 6)
(nnir-artitem-number y))))))
)))
+;; Namazu interface
+(defun nnir-run-namazu (query &optional group)
+ "Run given query against namazu. Returns a vector of (group name, file name)
+pairs (also vectors, actually)."
+ (save-excursion
+ (let ((artlist nil)
+ (groupspec (cdr (assq 'group query)))
+ (qstring (cdr (assq 'query query)))
+ (coding-system-for-read 'euc-japan)
+ (coding-system-for-write 'euc-japan))
+ (when (and group groupspec)
+ (error (concat "It does not make sense to use a group spec"
+ " with process-marked groups.")))
+ (when group
+ (setq groupspec (gnus-group-real-name group)))
+ (set-buffer (get-buffer-create nnir-tmp-buffer))
+ (erase-buffer)
+ (if groupspec
+ (message "Doing namazu query %s on %s..." query groupspec)
+ (message "Doing namazu query %s..." query))
+ (let* ((cp-list
+ `( ,nnir-namazu-program
+ nil ; input from /dev/null
+ t ; output
+ nil ; don't redisplay
+ "--all" "--list" "--early"
+ ,(if groupspec
+ (format "+uri:%s %s" groupspec qstring)
+ (format "%s" qstring))
+ ,nnir-namazu-index ; index
+ ))
+ (exitstatus
+ (progn
+ (message "%s args: %s" nnir-namazu-program
+ (mapconcat 'identity (cddddr cp-list) " "))
+ (apply 'call-process cp-list))))
+ (unless (or (null exitstatus)
+ (zerop exitstatus))
+ (nnheader-report "Couldn't run namazu: %s" exitstatus)
+ ;; Namazu failure reason is in this buffer, show it if
+ ;; the user wants it.
+ (when (> gnus-verbose 6)
+ (display-buffer nnir-tmp-buffer))))
+ (if groupspec
+ (message "Doing namazu query %s on %s..." query groupspec)
+ (message "Doing namazu query %s...done" query))
+ (sit-for 0)
+ ;; CCC: The following work of extracting group name and article
+ ;; number from the Namazu output can probably better be done by
+ ;; just going through the buffer once, and plucking out the
+ ;; right information from each line.
+ ;; remove superfluous stuff from namazu output
+ (goto-char (point-min))
+ (delete-non-matching-lines "/[0-9]+$")
+ ;;(delete-matching-lines "\\.overview~?$")
+ (goto-char (point-min))
+ (while (re-search-forward (concat "^" nnir-namazu-remove-prefix) nil t)
+ (replace-match ""))
+ ;; separate group name from article number with \t
+ ;; XEmacs compatible version
+ (goto-char (point-max))
+ (while (re-search-backward "/[0-9]+$" nil t)
+ (delete-char 1 nil)
+ (insert-char ?\t 1))
+; Emacs compatible version
+; (goto-char (point-min))
+; (while (re-search-forward "\\(/\\)[0-9]+$" nil t)
+; (replace-match "\t" t t nil 1))
+ ;; replace / with . in group names
+ (subst-char-in-region (point-min) (point-max) ?/ ?. t)
+ ;; massage buffer to contain some Lisp;
+ ;; this depends on the artlist encoding internals
+ ;; maybe this dependency should be removed?
+ (goto-char (point-min))
+ (while (not (eobp))
+ (insert "(\"")
+ (skip-chars-forward "^\t")
+ (insert "\" ")
+ (end-of-line)
+ (insert " 1000 )") ; 1000 = score
+ (forward-line 1))
+ (insert "))\n")
+ (goto-char (point-min))
+ (insert "(setq artlist '(\n")
+ (eval-buffer)
+ artlist
+ )))
+
;; freeWAIS-sf interface.
(defun nnir-run-waissearch (query &optional group)
"Run given query agains waissearch. Returns vector of (group name, file name)
(save-excursion
(let ( (qstring (cdr (assq 'query query)))
+ (groupspec (cdr (assq 'group query)))
(artlist nil)
(score nil) (artno nil) (dirnam nil) (group nil) )
(set-buffer (get-buffer-create nnir-tmp-buffer))
(erase-buffer)
- (message "Doing swish++ query %s..." query)
+ (if groupspec
+ (message "Doing swish++ query %s on %s..." qstring groupspec)
+ (message "Doing swish++ query %s..." qstring))
+
(let* ((cp-list `( ,nnir-swish++-program
nil ; input from /dev/null
t ; output
(apply 'call-process cp-list))))
(unless (or (null exitstatus)
(zerop exitstatus))
- (nnheader-report "Couldn't run swish++: %s" exitstatus)
+ (nnheader-report 'nnir "Couldn't run swish++: %s" exitstatus)
;; swish++ failure reason is in this buffer, show it if
;; the user wants it.
(when (> gnus-verbose 6)
;; rank relative-path-name file-size file-title
;; V 5.0b2:
;; rank relative-path-name file-size topic??
- ;; where rank is an integer from from 1 to 100.
+ ;; where rank is an integer from 1 to 100.
(goto-char (point-min))
(while (re-search-forward
"\\(^[0-9]+\\) \\([^ ]+\\) [0-9]+ \\(.*\\)$" nil t)
(when (string-match "^[0-9]+$" artno)
(when (not (null dirnam))
- ;; remove nnir-swish++-remove-prefix from beginning of dirname
- (when (string-match (concat "^" nnir-swish++-remove-prefix)
- dirnam)
- (setq dirnam (replace-match "" t t dirnam)))
+ ; maybe limit results to matching groups.
+ (when (or (not groupspec)
+ (string-match groupspec dirnam))
- (setq dirnam (substring dirnam 0 -1))
- ;; eliminate all ".", "/", "\" from beginning. Always matches.
- (string-match "^[./\\]*\\(.*\\)$" dirnam)
- ;; "/" -> "."
- (setq group (substitute ?. ?/ (match-string 1 dirnam)))
- ;; "\\" -> "."
- (setq group (substitute ?. ?\\ group))
+ ;; remove nnir-swish++-remove-prefix from beginning of dirname
+ (when (string-match (concat "^" nnir-swish++-remove-prefix)
+ dirnam)
+ (setq dirnam (replace-match "" t t dirnam)))
- (push (vector group
- (string-to-int artno)
- (string-to-int score))
- artlist))))
+ (setq dirnam (substring dirnam 0 -1))
+ ;; eliminate all ".", "/", "\" from beginning. Always matches.
+ (string-match "^[./\\]*\\(.*\\)$" dirnam)
+ ;; "/" -> "."
+ (setq group (substitute ?. ?/ (match-string 1 dirnam)))
+ ;; "\\" -> "."
+ (setq group (substitute ?. ?\\ group))
+
+ (push (vector group
+ (string-to-int artno)
+ (string-to-int score))
+ artlist)))))
(message "Massaging swish++ output...done")
(apply 'call-process cp-list))))
(unless (or (null exitstatus)
(zerop exitstatus))
- (nnheader-report "Couldn't run swish-e: %s" exitstatus)
+ (nnheader-report 'nnir "Couldn't run swish-e: %s" exitstatus)
;; swish-e failure reason is in this buffer, show it if
;; the user wants it.
(when (> gnus-verbose 6)
(> (nnir-artitem-rsv x)
(nnir-artitem-rsv y)))))))))
+;; Namazu interface
+(defun nnir-run-namazu (query &optional group)
+ "Run given query against Namazu. Returns a vector of (group name, file name)
+pairs (also vectors, actually).
+
+Tested with Namazu 2.0.6 on a GNU/Linux system."
+ (when group
+ (error "The Namazu backend cannot search specific groups"))
+ (save-excursion
+ (let (
+ (artlist nil)
+ (qstring (cdr (assq 'query query)))
+ (score nil)
+ (group nil)
+ (article nil)
+ )
+ (set-buffer (get-buffer-create nnir-tmp-buffer))
+ (erase-buffer)
+ (let* ((cp-list
+ `( ,nnir-namazu-program
+ nil ; input from /dev/null
+ t ; output
+ nil ; don't redisplay
+ "-q" ; don't be verbose
+ "-a" ; show all matches
+ "-s" ; use short format
+ ,@nnir-namazu-additional-switches
+ ,qstring ; the query, in namazu format
+ ,nnir-namazu-index-directory ; index directory
+ ))
+ (exitstatus
+ (progn
+ (message "%s args: %s" nnir-namazu-program
+ (mapconcat 'identity (cddddr cp-list) " "))
+ (apply 'call-process cp-list))))
+ (unless (or (null exitstatus)
+ (zerop exitstatus))
+ (nnheader-report 'nnir "Couldn't run namazu: %s" exitstatus)
+ ;; Namazu failure reason is in this buffer, show it if
+ ;; the user wants it.
+ (when (> gnus-verbose 6)
+ (display-buffer nnir-tmp-buffer))))
+
+ ;; Namazu output looks something like this:
+ ;; 2. Re: Gnus agent expire broken (score: 55)
+ ;; /home/henrik/Mail/mail/sent/1310 (4,138 bytes)
+
+ (goto-char (point-min))
+ (while (re-search-forward
+ "^\\([0-9]+\\.\\).*\\((score: \\([0-9]+\\)\\))\n\\([^ ]+\\)"
+ nil t)
+ (setq score (match-string 3)
+ group (file-name-directory (match-string 4))
+ article (file-name-nondirectory (match-string 4)))
+
+ ;; make sure article and group is sane
+ (when (and (string-match "^[0-9]+$" article)
+ (not (null group)))
+ (when (string-match (concat "^" nnir-namazu-remove-prefix) group)
+ (setq group (replace-match "" t t group)))
+
+ ;; remove trailing slash from groupname
+ (setq group (substring group 0 -1))
+
+ ;; stuff results into artlist vector
+ (push (vector (substitute ?. ?/ group)
+ (string-to-int article)
+ (string-to-int score)) artlist)))
+
+ ;; sort artlist by score
+ (apply 'vector
+ (sort* artlist
+ (function (lambda (x y)
+ (> (nnir-artitem-rsv x)
+ (nnir-artitem-rsv y)))))))))
+
;;; Util Code:
(defun nnir-read-parms (query)