1 ;;; nnir.el --- search mail with various search engines
2 ;; Copyright (C) 1998 Kai Großjohann
4 ;; $Id: nnir.el,v 1.72 2001/08/17 11:15:13 grossjoh Exp $
6 ;; Author: Kai Großjohann <grossjohann@ls6.cs.uni-dortmund.de>
7 ;; Keywords: news, mail, searching, ir, glimpse, wais
9 ;; This file is not part of GNU Emacs.
11 ;; This is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
28 ;; The most recent version of this can always be fetched from the
29 ;; following FTP site:
30 ;; ls6-ftp.cs.uni-dortmund.de:/pub/src/emacs
32 ;; This code is still in the development stage but I'd like other
33 ;; people to have a look at it. Please do not hesitate to contact me
36 ;; What does it do? Well, it allows you to index your mail using some
37 ;; search engine (freeWAIS-sf and Glimpse are currently supported),
38 ;; then type `G G' in the Group buffer and issue a query to the search
39 ;; engine. You will then get a buffer which shows all articles
40 ;; matching the query, sorted by Retrieval Status Value (score).
42 ;; When looking at the retrieval result (in the Summary buffer) you
43 ;; can type `G T' (aka M-x gnus-summary-nnir-goto-thread RET) on an
44 ;; article. You will be teleported into the group this article came
45 ;; from, showing the thread this article is part of. (See below for
48 ;; The Lisp installation is simple: just put this file on your
49 ;; load-path, byte-compile it, and load it from ~/.gnus or something.
50 ;; This will install a new command `G G' in your Group buffer for
51 ;; searching your mail. Note that you also need to configure a number
52 ;; of variables, as described below.
56 ;; * Currently, this expects that you use nnml or another
57 ;; one-file-per-message backend.
58 ;; * It can only search one mail backend.
59 ;; * There are restrictions to the Glimpse setup.
60 ;; * There are restrictions to the Wais setup.
61 ;; * gnus-summary-nnir-goto-thread: Fetches whole group first, before
62 ;; limiting to the right articles. This is much too slow, of
63 ;; course. May issue a query for number of articles to fetch; you
64 ;; must accept the default of all articles at this point or things
67 ;; The Lisp setup involves setting a few variables and setting up the
68 ;; search engine. The first variable to set is `nnir-mail-backend'.
69 ;; For me, `gnus-secondary-select-methods' contains just one select
70 ;; method, and this is also what I put in `nnir-mail-backend'. Type
71 ;; `C-h v nnir-mail-backend RET' for more information -- the variable
72 ;; documentation includes more details and a few examples. The second
73 ;; variable to set is `nnir-search-engine'. Choose one of the engines
74 ;; listed in `nnir-engines'. (Actually `nnir-engines' is an alist,
75 ;; type `C-h v nnir-engines RET' for more information; this includes
76 ;; examples for setting `nnir-search-engine', too.)
78 ;; You must also set up a search engine. I'll tell you about the two
79 ;; search engines currently supported:
83 ;; As always with freeWAIS-sf, you need a so-called `format file'. I
84 ;; use the following file:
87 ;; | # Kai's format file for freeWAIS-sf for indexing mails.
88 ;; | # Each mail is in a file, much like the MH format.
90 ;; | # Document separator should never match -- each file is a document.
91 ;; | record-sep: /^@this regex should never match@$/
93 ;; | # Searchable fields specification.
95 ;; | region: /^[sS]ubject:/ /^[sS]ubject: */
96 ;; | subject "Subject header" stemming TEXT BOTH
99 ;; | region: /^([tT][oO]|[cC][cC]):/ /^([tT][oO]|[cC][cC]): */
100 ;; | to "To and Cc headers" SOUNDEX BOTH
103 ;; | region: /^[fF][rR][oO][mM]:/ /^[fF][rR][oO][mM]: */
104 ;; | from "From header" SOUNDEX BOTH
108 ;; | stemming TEXT GLOBAL
109 ;; | end: /^@this regex should never match@$/
112 ;; 1998-07-22: waisindex would dump core on me for large articles with
113 ;; the above settings. I used /^$/ as the end regex for the global
114 ;; field. That seemed to work okay.
116 ;; There is a Perl module called `WAIS.pm' which is available from
117 ;; CPAN as well as ls6-ftp.cs.uni-dortmund.de:/pub/wais/Perl. This
118 ;; module comes with a nifty tool called `makedb', which I use for
119 ;; indexing. Here's my `makedb.conf':
122 ;; | # Config file for makedb
124 ;; | # Global options
125 ;; | waisindex = /usr/local/bin/waisindex
126 ;; | wais_opt = -stem -t fields
127 ;; | # `-stem' option necessary when `stemming' is specified for the
128 ;; | # global field in the *.fmt file
131 ;; | homedir = /home/kai
133 ;; | # The mail database.
135 ;; | files = `find $homedir/Mail -name \*[0-9] -print`
136 ;; | dbdir = $homedir/.wais
140 ;; The Lisp setup involves the `nnir-wais-*' variables. The most
141 ;; difficult to understand variable is probably
142 ;; `nnir-wais-remove-prefix'. Here's what it does: the output of
143 ;; `waissearch' basically contains the file name and the (full)
144 ;; directory name. As Gnus works with group names rather than
145 ;; directory names, the directory name is transformed into a group
146 ;; name as follows: first, a prefix is removed from the (full)
147 ;; directory name, then all `/' are replaced with `.'. The variable
148 ;; `nnir-wais-remove-prefix' should contain a regex matching exactly
149 ;; this prefix. It defaults to `$HOME/Mail/' (note the trailing
154 ;; The code expects you to have one Glimpse index which contains all
155 ;; your mail files. The Lisp setup involves setting the
156 ;; `nnir-glimpse-*' variables. The most difficult to understand
157 ;; variable is probably `nnir-glimpse-remove-prefix', it corresponds
158 ;; to `nnir-wais-remove-prefix', see above. The `nnir-glimpse-home'
159 ;; variable should be set to the value of the `-H' option which allows
160 ;; one to search this Glimpse index. I have indexed my whole home
161 ;; directory with Glimpse, so I assume a default of `$HOME'.
165 ;; The Namazu backend requires you to have one directory containing all
166 ;; index files, this is controlled by the `nnir-namazu-index-directory'
167 ;; variable. To function the `nnir-namazu-remove-prefix' variable must
168 ;; also be correct, see the documentation for `nnir-wais-remove-prefix'
171 ;; It is particularly important not to pass any any switches to namazu
172 ;; that will change the output format. Good switches to use include
173 ;; `--sort', `--ascending', `--early' and `--late'. Refer to the Namazu
174 ;; documentation for further information on valid switches.
176 ;; To index my mail with the `mknmz' program I use the following
177 ;; configuration file:
180 ;; | package conf; # Don't remove this line!
182 ;; | # Paths which will not be indexed. Don't use `^' or `$' anchors.
183 ;; | $EXCLUDE_PATH = "spam|sent";
185 ;; | # Header fields which should be searchable. case-insensitive
186 ;; | $REMAIN_HEADER = "from|date|message-id|subject";
188 ;; | # Searchable fields. case-insensitive
189 ;; | $SEARCH_FIELD = "from|date|message-id|subject";
191 ;; | # The max length of a word.
192 ;; | $WORD_LENG_MAX = 128;
194 ;; | # The max length of a field.
195 ;; | $MAX_FIELD_LENGTH = 256;
198 ;; My mail is stored in the directories ~/Mail/mail/, ~/Mail/lists/ and
199 ;; ~/Mail/archive/, so to index them I go to the directory set in
200 ;; `nnir-namazu-index-directory' and issue the following command.
202 ;; mknmz --mailnews ~/Mail/archive/ ~/Mail/mail/ ~/Mail/lists/
204 ;; For maximum searching efficiency I have a cron job set to run this
205 ;; command every four hours.
207 ;; Developer information:
209 ;; I have tried to make the code expandable. Basically, it is divided
210 ;; into two layers. The upper layer is somewhat like the `nnvirtual'
211 ;; or `nnkiboze' backends: given a specification of what articles to
212 ;; show from another backend, it creates a group containing exactly
213 ;; those articles. The lower layer issues a query to a search engine
214 ;; and produces such a specification of what articles to show from the
217 ;; The interface between the two layers consists of the single
218 ;; function `nnir-run-query', which just selects the appropriate
219 ;; function for the search engine one is using. The input to
220 ;; `nnir-run-query' is a string, representing the query as input by
221 ;; the user. The output of `nnir-run-query' is supposed to be a
222 ;; vector, each element of which should in turn be a three-element
223 ;; vector. The first element should be group name of the article, the
224 ;; second element should be the article number, and the third element
225 ;; should be the Retrieval Status Value (RSV) as returned from the
226 ;; search engine. An RSV is the score assigned to the document by the
227 ;; search engine. For Boolean search engines like Glimpse, the RSV is
228 ;; always 1000 (or 1 or 100, or whatever you like).
230 ;; The sorting order of the articles in the summary buffer created by
231 ;; nnir is based on the order of the articles in the above mentioned
232 ;; vector, so that's where you can do the sorting you'd like. Maybe
233 ;; it would be nice to have a way of displaying the search result
234 ;; sorted differently?
236 ;; So what do you need to do when you want to add another search
237 ;; engine? You write a function that executes the query. Temporary
238 ;; data from the search engine can be put in `nnir-tmp-buffer'. This
239 ;; function should return the list of articles as a vector, as
240 ;; described above. Then, you need to register this backend in
241 ;; `nnir-engines'. Then, users can choose the backend by setting
242 ;; `nnir-search-engine'.
244 ;; Todo, or future ideas:
246 ;; * Make it so that Glimpse can also be called without `-F'.
248 ;; * It should be possible to restrict search to certain groups.
250 ;; * There is currently no error checking.
252 ;; * The summary buffer display is currently really ugly, with all the
253 ;; added information in the subjects. How could I make this
256 ;; * A function which can be called from an nnir summary buffer which
257 ;; teleports you into the group the current article came from and
258 ;; shows you the whole thread this article is part of.
259 ;; Implementation suggestions?
260 ;; (1998-07-24: There is now a preliminary implementation, but
261 ;; it is much too slow and quite fragile.)
263 ;; * Support other mail backends. In particular, probably quite a few
264 ;; people use nnfolder. How would one go about searching nnfolders
265 ;; and producing the right data needed? The group name and the RSV
266 ;; are simple, but what about the article number?
268 ;; * Support compressed mail files. Probably, just stripping off the
269 ;; `.gz' or `.Z' file name extension is sufficient.
271 ;; * Support a find/grep combination.
273 ;; * At least for imap, the query is performed twice.
275 ;; * Support multiple mail backends. The information that is needed
276 ;; by nnir could be put in the server parameters. (Use sensible
277 ;; default values, though: include the name of the backend in the
278 ;; default value such that people do not have to mess with the
279 ;; server parameters if they don't want to.) It is not clear how to
280 ;; do the user interface, though. Hm. Maybe offer the user a
281 ;; completable list of backends to search? Or use the
282 ;; process-marked groups to find out which backends to search? Or
283 ;; always search all backends?
286 ;; Have you got other ideas?
290 (defconst nnir-version "$Id: nnir.el,v 1.72 2001/08/17 11:15:13 grossjoh Exp $"
295 (require 'gnus-group)
298 (require 'gnus-util))
301 (nnoo-define-basics nnir)
303 (gnus-declare-backend "nnir" 'mail)
305 ;;; Developer Extension Variable:
308 '((glimpse nnir-run-glimpse
309 ((group . "Group spec: ")))
310 (wais nnir-run-waissearch
312 (excite nnir-run-excite-search
316 (swish++ nnir-run-swish++
317 ((group . "Group spec: ")))
318 (swish-e nnir-run-swish-e
319 ((group . "Group spec: ")))
320 (namazu nnir-run-namazu
322 "Alist of supported search engines.
323 Each element in the alist is a three-element list (ENGINE FUNCTION ARGS).
324 ENGINE is a symbol designating the searching engine. FUNCTION is also
325 a symbol, giving the function that does the search. The third element
326 ARGS is a list of cons pairs (PARAM . PROMPT). When issuing a query,
327 the FUNCTION will issue a query for each of the PARAMs, using PROMPT.
329 The value of `nnir-search-engine' must be one of the ENGINE symbols.
330 For example, use the following line for searching using freeWAIS-sf:
331 (setq nnir-search-engine 'wais)
332 Use the following line if you read your mail via IMAP and your IMAP
333 server supports searching:
334 (setq nnir-search-engine 'imap)
335 Note that you have to set additional variables for most backends. For
336 example, the `wais' backend needs the variables `nnir-wais-program',
337 `nnir-wais-database' and `nnir-wais-remove-prefix'.
339 Add an entry here when adding a new search engine.")
341 ;;; User Customizable Variables:
344 "Search nnmh and nnml groups in Gnus with Glimpse, freeWAIS-sf, or EWS.")
349 ;; If `nil', use server parameters to find out which server to search. CCC
351 (defcustom nnir-mail-backend '(nnml "")
352 "*Specifies which backend should be searched.
353 More precisely, this is used to determine from which backend to fetch the
356 This must be equal to an existing server, so maybe it is best to use
357 something like the following:
358 (setq nnir-mail-backend (nth 0 gnus-secondary-select-methods))
359 The above line works fine if the mail backend you want to search is
360 the first element of gnus-secondary-select-methods (`nth' starts counting
365 ;; Search engine to use.
367 (defcustom nnir-search-engine 'wais
368 "*The search engine to use. Must be a symbol.
369 See `nnir-engines' for a list of supported engines, and for example
370 settings of `nnir-search-engine'."
376 (defcustom nnir-glimpse-program "glimpse"
377 "*Name of Glimpse executable."
381 (defcustom nnir-glimpse-home (getenv "HOME")
382 "*Value of `-H' glimpse option.
383 `~' and environment variables must be expanded, see the functions
384 `expand-file-name' and `substitute-in-file-name'."
388 (defcustom nnir-glimpse-remove-prefix (concat (getenv "HOME") "/Mail/")
389 "*The prefix to remove from each file name returned by Glimpse
390 in order to get a group name (albeit with / instead of .). This is a
393 For example, suppose that Glimpse returns file names such as
394 \"/home/john/Mail/mail/misc/42\". For this example, use the following
395 setting: (setq nnir-glimpse-remove-prefix \"/home/john/Mail/\")
396 Note the trailing slash. Removing this prefix gives \"mail/misc/42\".
397 `nnir' knows to remove the \"/42\" and to replace \"/\" with \".\" to
398 arrive at the correct group name, \"mail.misc\"."
402 (defcustom nnir-glimpse-additional-switches '("-i")
403 "*A list of strings, to be given as additional arguments to glimpse.
404 The switches `-H', `-W', `-l' and `-y' are always used -- calling
405 glimpse without them does not make sense in our situation.
406 Suggested elements to put here are `-i' and `-w'.
408 Note that this should be a list. Ie, do NOT use the following:
409 (setq nnir-glimpse-additional-switches \"-i -w\") ; wrong!
411 (setq nnir-glimpse-additional-switches '(\"-i\" \"-w\"))"
412 :type '(repeat (string))
417 (defcustom nnir-wais-program "waissearch"
418 "*Name of waissearch executable."
422 (defcustom nnir-wais-database (expand-file-name "~/.wais/mail")
423 "*Name of Wais database containing the mail.
425 Note that this should be a file name without extension. For example,
426 if you have a file /home/john/.wais/mail.fmt, use this:
427 (setq nnir-wais-database \"/home/john/.wais/mail\")
428 The string given here is passed to `waissearch -d' as-is."
432 (defcustom nnir-wais-remove-prefix (concat (getenv "HOME") "/Mail/")
433 "*The prefix to remove from each directory name returned by waissearch
434 in order to get a group name (albeit with / instead of .). This is a
437 This variable is similar to `nnir-glimpse-remove-prefix', only for Wais,
442 ;; EWS (Excite for Web Servers) engine.
444 (defcustom nnir-excite-aquery-program "aquery.pl"
445 "*Name of the EWS query program. Should be `aquery.pl' or a path to same."
449 (defcustom nnir-excite-collection "Mail"
450 "*Name of the EWS collection to search."
454 (defcustom nnir-excite-remove-prefix (concat (getenv "HOME") "/Mail/")
455 "*The prefix to remove from each file name returned by EWS
456 in order to get a group name (albeit with / instead of .). This is a
459 This variable is very similar to `nnir-glimpse-remove-prefix', except
460 that it is for EWS, not Glimpse."
464 (defcustom nnir-imap-default-charset nil
465 "*Name of the charset of the strings that appear in the search criteria."
466 :type '(choice (const nil) string)
469 ;; Swish++. Next three variables Copyright (C) 2000, 2001 Christoph
470 ;; Conrad <christoph.conrad@gmx.de>.
471 ;; Swish++ home page: http://homepage.mac.com/pauljlucas/software/swish/
473 (defcustom nnir-swish++-configuration-file
474 (expand-file-name "~/Mail/swish++.conf")
475 "*Configuration file for swish++."
479 (defcustom nnir-swish++-program "search"
480 "*Name of swish++ search executable."
484 (defcustom nnir-swish++-additional-switches '()
485 "*A list of strings, to be given as additional arguments to swish++.
487 Note that this should be a list. Ie, do NOT use the following:
488 (setq nnir-swish++-additional-switches \"-i -w\") ; wrong
490 (setq nnir-swish++-additional-switches '(\"-i\" \"-w\"))"
491 :type '(repeat (string))
494 (defcustom nnir-swish++-remove-prefix (concat (getenv "HOME") "/Mail/")
495 "*The prefix to remove from each file name returned by swish++
496 in order to get a group name (albeit with / instead of .). This is a
499 This variable is very similar to `nnir-glimpse-remove-prefix', except
500 that it is for swish++, not Glimpse."
504 ;; Swish-E. Next three variables Copyright (C) 2000 Christoph Conrad
505 ;; <christoph.conrad@gmx.de>.
506 ;; URL: http://sunsite.berkeley.edu/SWISH-E/
507 ;; New version: http://www.boe.es/swish-e
509 (defcustom nnir-swish-e-index-file
510 (expand-file-name "~/Mail/index.swish-e")
511 "*Index file for swish-e."
515 (defcustom nnir-swish-e-program "swish-e"
516 "*Name of swish-e search executable."
520 (defcustom nnir-swish-e-additional-switches '()
521 "*A list of strings, to be given as additional arguments to swish-e.
523 Note that this should be a list. Ie, do NOT use the following:
524 (setq nnir-swish-e-additional-switches \"-i -w\") ; wrong
526 (setq nnir-swish-e-additional-switches '(\"-i\" \"-w\"))"
527 :type '(repeat (string))
530 (defcustom nnir-swish-e-remove-prefix (concat (getenv "HOME") "/Mail/")
531 "*The prefix to remove from each file name returned by swish-e
532 in order to get a group name (albeit with / instead of .). This is a
535 This variable is very similar to `nnir-glimpse-remove-prefix', except
536 that it is for swish-e, not Glimpse."
540 ;; Namazu engine, see <URL:http://ww.namazu.org/>
542 (defcustom nnir-namazu-program "namazu"
543 "*Name of Namazu search executable."
547 (defcustom nnir-namazu-index-directory (expand-file-name "~/Mail/namazu/")
548 "*Index directory for Namazu."
552 (defcustom nnir-namazu-additional-switches '()
553 "*A list of strings, to be given as additional arguments to namazu.
554 The switches `-q', `-a', and `-s' are always used, very few other switches
555 make any sense in this context.
557 Note that this should be a list. Ie, do NOT use the following:
558 (setq nnir-namazu-additional-switches \"-i -w\") ; wrong
560 (setq nnir-namazu-additional-switches '(\"-i\" \"-w\"))"
561 :type '(repeat (string))
564 (defcustom nnir-namazu-remove-prefix (concat (getenv "HOME") "/Mail/")
565 "*The prefix to remove from each file name returned by Namazu
566 in order to get a group name (albeit with / instead of .).
568 This variable is very similar to `nnir-glimpse-remove-prefix', except
569 that it is for Namazu, not Glimpse."
573 ;;; Internal Variables:
575 (defvar nnir-current-query nil
576 "Internal: stores current query (= group name).")
578 (defvar nnir-current-server nil
579 "Internal: stores current server (does it ever change?).")
581 (defvar nnir-current-group-marked nil
582 "Internal: stores current list of process-marked groups.")
584 (defvar nnir-artlist nil
585 "Internal: stores search result.")
587 (defvar nnir-tmp-buffer " *nnir*"
588 "Internal: temporary buffer.")
594 (defun gnus-group-make-nnir-group (extra-parms query)
595 "Create an nnir group. Asks for query."
596 (interactive "P\nsQuery: ")
599 (setq parms (nnir-read-parms query))
600 (setq parms (list (cons 'query query))))
601 (gnus-group-read-ephemeral-group
602 (concat "nnir:" (prin1-to-string parms)) '(nnir "") t
603 (cons (current-buffer)
604 gnus-current-window-configuration)
607 ;; Emacs 19 compatibility?
608 (or (fboundp 'kbd) (defalias 'kbd 'read-kbd-macro))
610 (defun nnir-group-mode-hook ()
611 (define-key gnus-group-mode-map
612 (if (fboundp 'read-kbd-macro)
614 "GG") ; XEmacs 19 compat
615 'gnus-group-make-nnir-group))
616 (add-hook 'gnus-group-mode-hook
618 (unless (string-match "T-gnus" gnus-version)
619 (nnir-group-mode-hook))))
623 ;; Summary mode commands.
625 (defun gnus-summary-nnir-goto-thread ()
626 "Only applies to nnir groups. Go to group this article came from
627 and show thread that contains this article."
629 (unless (eq 'nnir (car (gnus-find-method-for-group gnus-newsgroup-name)))
630 (error "Can't execute this command unless in nnir group."))
631 (let* ((cur (gnus-summary-article-number))
632 (backend-group (nnir-artlist-artitem-group nnir-artlist cur))
633 (backend-number (nnir-artlist-artitem-number nnir-artlist cur)))
634 (gnus-group-read-ephemeral-group
638 (cons (current-buffer)
639 'summary) ; window config
641 (list backend-number))
642 (gnus-summary-limit (list backend-number))
643 (gnus-summary-refer-thread)))
645 (if (fboundp 'eval-after-load)
646 (eval-after-load "gnus-sum"
647 '(define-key gnus-summary-goto-map
648 "T" 'gnus-summary-nnir-goto-thread))
649 (add-hook 'gnus-summary-mode-hook
651 (define-key gnus-summary-goto-map
652 "T" 'gnus-summary-nnir-goto-thread)))))
656 ;; Gnus backend interface functions.
658 (deffoo nnir-open-server (server &optional definitions)
659 ;; Just set the server variables appropriately.
660 (nnoo-change-server 'nnir server definitions))
662 (deffoo nnir-request-group (group &optional server fast)
663 "GROUP is the query string."
664 (nnir-possibly-change-server server)
665 ;; Check for cache and return that if appropriate.
666 (if (and (equal group nnir-current-query)
667 (equal gnus-group-marked nnir-current-group-marked)
669 (equal server nnir-current-server)))
672 (setq nnir-artlist (nnir-run-query group))
674 (set-buffer nntp-server-buffer)
675 (if (zerop (length nnir-artlist))
677 (setq nnir-current-query nil
678 nnir-current-server nil
679 nnir-current-group-marked nil
681 (nnheader-report 'nnir "Search produced empty results."))
682 ;; Remember data for cache.
683 (setq nnir-current-query group)
684 (when server (setq nnir-current-server server))
685 (setq nnir-current-group-marked gnus-group-marked)
686 (nnheader-insert "211 %d %d %d %s\n"
687 (nnir-artlist-length nnir-artlist) ; total #
689 (nnir-artlist-length nnir-artlist) ; last #
690 group))))) ; group name
692 (deffoo nnir-retrieve-headers (articles &optional group server fetch-old)
694 (let ((artlist (copy-sequence articles))
698 (artgroup nil) (artno nil)
704 (while (not (null artlist))
705 (setq art (car artlist))
709 "nnir-retrieve-headers doesn't grok message ids: %s"
711 (setq artitem (nnir-artlist-article nnir-artlist art))
712 (setq artrsv (nnir-artitem-rsv artitem))
713 (setq artgroup (nnir-artitem-group artitem))
714 (setq artno (nnir-artitem-number artitem))
715 (setq artfullgroup (nnir-group-full-name artgroup))
716 ;; retrieve NOV or HEAD data for this article, transform into
717 ;; NOV data and prepend to `novdata'
718 (set-buffer nntp-server-buffer)
719 (case (setq foo (gnus-retrieve-headers (list artno) artfullgroup nil))
721 (goto-char (point-min))
722 (setq novitem (nnheader-parse-nov))
724 (pop-to-buffer nntp-server-buffer)
726 "nnheader-parse-nov returned nil for article %s in group %s"
727 artno artfullgroup)))
729 (goto-char (point-min))
730 (setq novitem (nnheader-parse-head))
732 (pop-to-buffer nntp-server-buffer)
734 "nnheader-parse-head returned nil for article %s in group %s"
735 artno artfullgroup)))
736 (t (nnheader-report 'nnir "Don't support header type %s." foo)))
737 ;; replace article number in original group with article number
739 (mail-header-set-number novitem idx)
740 (mail-header-set-from novitem
741 (mail-header-from novitem))
742 (mail-header-set-subject
744 (format "[%d: %s/%d] %s"
745 artrsv artgroup artno
746 (mail-header-subject novitem)))
747 ;;-(mail-header-set-extra novitem nil)
748 (push novitem novdata)
749 (setq artlist (cdr artlist))
751 (setq novdata (nreverse novdata))
752 (set-buffer nntp-server-buffer) (erase-buffer)
753 (mapcar 'nnheader-insert-nov novdata)
756 (deffoo nnir-request-article (article
757 &optional group server to-buffer)
759 (let* ((artitem (nnir-artlist-article nnir-artlist
761 (artgroup (nnir-artitem-group artitem))
762 (artno (nnir-artitem-number artitem))
764 ;; Why must we bind nntp-server-buffer here? It won't
765 ;; work if `buf' is used, say. (Of course, the set-buffer
766 ;; line below must then be updated, too.)
767 (nntp-server-buffer (or to-buffer nntp-server-buffer)))
768 (set-buffer nntp-server-buffer)
770 (message "Requesting article %d from group %s"
772 (nnir-group-full-name artgroup))
773 (gnus-request-article artno (nnir-group-full-name artgroup)
775 (cons artgroup artno))))
778 (nnoo-define-skeleton nnir)
780 ;;; Search Engine Interfaces:
782 ;; Glimpse interface.
783 (defun nnir-run-glimpse (query &optional group)
784 "Run given query against glimpse. Returns a vector of (group name, file name)
785 pairs (also vectors, actually)."
788 (groupspec (cdr (assq 'group query)))
789 (qstring (cdr (assq 'query query))))
790 (when (and group groupspec)
791 (error (concat "It does not make sense to use a group spec"
792 " with process-marked groups.")))
794 (setq groupspec (gnus-group-real-name group)))
795 (set-buffer (get-buffer-create nnir-tmp-buffer))
798 (message "Doing glimpse query %s on %s..." query groupspec)
799 (message "Doing glimpse query %s..." query))
801 `( ,nnir-glimpse-program
802 nil ; input from /dev/null
804 nil ; don't redisplay
805 "-H" ,nnir-glimpse-home ; search home dir
806 "-W" ; match pattern in file
807 "-l" "-y" ; misc options
808 ,@nnir-glimpse-additional-switches
809 "-F" ,nnir-glimpse-remove-prefix ; restrict output to mail
810 ,qstring ; the query, in glimpse format
814 (message "%s args: %s" nnir-glimpse-program
815 (mapconcat 'identity (cddddr cp-list) " "))
816 (apply 'call-process cp-list))))
817 (unless (or (null exitstatus)
819 (nnheader-report 'nnir "Couldn't run glimpse: %s" exitstatus)
820 ;; Glimpse failure reason is in this buffer, show it if
821 ;; the user wants it.
822 (when (> gnus-verbose 6)
823 (display-buffer nnir-tmp-buffer))))
825 (keep-lines groupspec))
827 (message "Doing glimpse query %s on %s...done" query groupspec)
828 (message "Doing glimpse query %s...done" query))
830 ;; CCC: The following work of extracting group name and article
831 ;; number from the Glimpse output can probably better be done by
832 ;; just going through the buffer once, and plucking out the
833 ;; right information from each line.
834 ;; remove superfluous stuff from glimpse output
835 (goto-char (point-min))
836 (delete-non-matching-lines "/[0-9]+$")
837 ;;(delete-matching-lines "\\.overview~?$")
838 (goto-char (point-min))
839 (while (re-search-forward (concat "^" nnir-glimpse-remove-prefix) nil t)
841 ;; separate group name from article number with \t
842 ;; XEmacs compatible version
843 (goto-char (point-max))
844 (while (re-search-backward "/[0-9]+$" nil t)
847 ; Emacs compatible version
848 ; (goto-char (point-min))
849 ; (while (re-search-forward "\\(/\\)[0-9]+$" nil t)
850 ; (replace-match "\t" t t nil 1))
851 ;; replace / with . in group names
852 (subst-char-in-region (point-min) (point-max) ?/ ?. t)
853 ;; massage buffer to contain some Lisp;
854 ;; this depends on the artlist encoding internals
855 ;; maybe this dependency should be removed?
856 (goto-char (point-min))
859 (skip-chars-forward "^\t")
862 (insert " 1000 ]") ; 1000 = score
865 (goto-char (point-min))
866 (insert "(setq artlist [\n")
869 (function (lambda (x y)
870 (if (string-lessp (nnir-artitem-group x)
871 (nnir-artitem-group y))
873 (< (nnir-artitem-number x)
874 (nnir-artitem-number y))))))
877 ;; freeWAIS-sf interface.
878 (defun nnir-run-waissearch (query &optional group)
879 "Run given query agains waissearch. Returns vector of (group name, file name)
880 pairs (also vectors, actually)."
882 (error "The freeWAIS-sf backend cannot search specific groups."))
884 (let ((qstring (cdr (assq 'query query)))
886 (score nil) (artno nil) (dirnam nil) (group nil))
887 (set-buffer (get-buffer-create nnir-tmp-buffer))
889 (message "Doing WAIS query %s..." query)
890 (call-process nnir-wais-program
891 nil ; input from /dev/null
892 t ; output to current buffer
893 nil ; don't redisplay
894 "-d" nnir-wais-database ; database to search
896 (message "Massaging waissearch output...")
897 ;; remove superfluous lines
898 (keep-lines "Score:")
899 ;; extract data from result lines
900 (goto-char (point-min))
901 (while (re-search-forward
902 "Score: +\\([0-9]+\\).*'\\([0-9]+\\) +\\([^']+\\)/'" nil t)
903 (setq score (match-string 1)
904 artno (match-string 2)
905 dirnam (match-string 3))
906 (unless (string-match nnir-wais-remove-prefix dirnam)
907 (nnheader-report 'nnir "Dir name %s doesn't contain prefix %s"
908 dirnam nnir-wais-remove-prefix))
909 (setq group (substitute ?. ?/ (replace-match "" t t dirnam)))
911 (string-to-int artno)
912 (string-to-int score))
914 (message "Massaging waissearch output...done")
917 (function (lambda (x y)
918 (> (nnir-artitem-rsv x)
919 (nnir-artitem-rsv y)))))))))
921 ;; EWS (Excite for Web Servers) interface
922 (defun nnir-run-excite-search (query &optional group)
923 "Run a given query against EWS. Returns vector of (group name, file name)
924 pairs (also vectors, actually)."
926 (error "Searching specific groups not implemented for EWS."))
928 (let ((qstring (cdr (assq 'query query)))
929 artlist group article-num article)
930 (setq nnir-current-query query)
931 (set-buffer (get-buffer-create nnir-tmp-buffer))
933 (message "Doing EWS query %s..." qstring)
934 (call-process nnir-excite-aquery-program
935 nil ; input from /dev/null
936 t ; output to current buffer
937 nil ; don't redisplay
938 nnir-excite-collection
939 (if (string= (substring qstring 0 1) "(")
941 (format "(concept %s)" qstring)))
942 (message "Gathering query output...")
944 (goto-char (point-min))
945 (while (re-search-forward
946 "^[0-9]+\\s-[0-9]+\\s-[0-9]+\\s-\\(\\S-*\\)" nil t)
947 (setq article (match-string 1))
948 (unless (string-match
949 (concat "^" (regexp-quote nnir-excite-remove-prefix)
950 "\\(.*\\)/\\([0-9]+\\)") article)
951 (nnheader-report 'nnir "Dir name %s doesn't contain prefix %s"
952 article nnir-excite-remove-prefix))
953 (setq group (substitute ?. ?/ (match-string 1 article)))
954 (setq article-num (match-string 2 article))
955 (setq artlist (vconcat artlist (vector (vector group
956 (string-to-int article-num)
958 (message "Gathering query output...done")
961 ;; IMAP interface. The following function is Copyright (C) 1998 Simon
962 ;; Josefsson <jas@pdc.kth.se>.
964 ;; nnir invokes this two (2) times???!
965 ;; we should not use nnimap at all but open our own server connection
966 ;; we should not LIST * but use nnimap-list-pattern from defs
967 ;; send queries as literals
971 (defvar nnimap-server-buffer))
973 (defun nnir-run-imap (query &optional group)
978 (error "Must specify groups for IMAP searching."))
980 (let ((qstring (cdr (assq 'query query)))
981 (server (cadr nnir-mail-backend))
982 (defs (caddr nnir-mail-backend))
984 (message "Opening server %s" server)
986 (when (nnimap-open-server server defs) ;; xxx
987 (setq buf nnimap-server-buffer) ;; xxx
988 (message "Searching %s..." group)
990 (mbx (gnus-group-real-name group))
991 (multibyte-p (mm-multibyte-p))
992 charset coding-system)
993 (when (imap-mailbox-select mbx nil buf)
996 (mm-enable-multibyte))
998 (setq charset (car (mm-find-mime-charset-region
999 (point-min)(point-max)))))
1001 (setq charset nnir-imap-default-charset))
1004 (push (vector mbx artnum 1) artlist)
1005 (setq arts (1+ arts)))
1006 (if (and (not (eq charset 'us-ascii))
1007 (setq coding-system (mm-charset-to-coding-system
1010 (concat "CHARSET " (symbol-name charset) " TEXT \""
1011 (mm-encode-coding-string qstring coding-system)
1013 (imap-search (concat "TEXT \"" qstring "\"") buf)))
1014 (message "Searching %s... %d matches" mbx arts)))
1015 (message "Searching %s...done" group))
1017 (reverse artlist))))
1019 ;; Swish++ interface. The following function is Copyright (C) 2000,
1020 ;; 2001 Christoph Conrad <christoph.conrad@gmx.de>.
1029 (defun nnir-run-swish++ (query &optional group)
1030 "Run given query against swish++.
1031 Returns a vector of (group name, file name) pairs (also vectors,
1034 Tested with swish++ 4.7 on GNU/Linux and with with swish++ 5.0b2 on
1038 (error "The swish++ backend cannot search specific groups."))
1041 (let ( (qstring (cdr (assq 'query query)))
1042 (groupspec (cdr (assq 'group query)))
1044 (score nil) (artno nil) (dirnam nil) (group nil) )
1046 (when (equal "" qstring)
1047 (error "swish++: You didn't enter anything."))
1049 (set-buffer (get-buffer-create nnir-tmp-buffer))
1053 (message "Doing swish++ query %s on %s..." qstring groupspec)
1054 (message "Doing swish++ query %s..." qstring))
1056 (let* ((cp-list `( ,nnir-swish++-program
1057 nil ; input from /dev/null
1059 nil ; don't redisplay
1060 "--config-file" ,nnir-swish++-configuration-file
1061 ,@nnir-swish++-additional-switches
1062 ,qstring ; the query, in swish++ format
1066 (message "%s args: %s" nnir-swish++-program
1067 (mapconcat 'identity (cddddr cp-list) " "));; ???
1068 (apply 'call-process cp-list))))
1069 (unless (or (null exitstatus)
1071 (nnheader-report 'nnir "Couldn't run swish++: %s" exitstatus)
1072 ;; swish++ failure reason is in this buffer, show it if
1073 ;; the user wants it.
1074 (when (> gnus-verbose 6)
1075 (display-buffer nnir-tmp-buffer))))
1077 ;; The results are output in the format of:
1079 ;; rank relative-path-name file-size file-title
1081 ;; rank relative-path-name file-size topic??
1082 ;; where rank is an integer from 1 to 100.
1083 (goto-char (point-min))
1084 (while (re-search-forward
1085 "\\(^[0-9]+\\) \\([^ ]+\\) [0-9]+ \\(.*\\)$" nil t)
1086 (setq score (match-string 1)
1087 artno (file-name-nondirectory (match-string 2))
1088 dirnam (file-name-directory (match-string 2)))
1090 ;; don't match directories
1091 (when (string-match "^[0-9]+$" artno)
1092 (when (not (null dirnam))
1094 ; maybe limit results to matching groups.
1095 (when (or (not groupspec)
1096 (string-match groupspec dirnam))
1098 ;; remove nnir-swish++-remove-prefix from beginning of dirname
1099 (when (string-match (concat "^" nnir-swish++-remove-prefix)
1101 (setq dirnam (replace-match "" t t dirnam)))
1103 (setq dirnam (substring dirnam 0 -1))
1104 ;; eliminate all ".", "/", "\" from beginning. Always matches.
1105 (string-match "^[./\\]*\\(.*\\)$" dirnam)
1107 (setq group (substitute ?. ?/ (match-string 1 dirnam)))
1109 (setq group (substitute ?. ?\\ group))
1112 (string-to-int artno)
1113 (string-to-int score))
1116 (message "Massaging swish++ output...done")
1121 (function (lambda (x y)
1122 (> (nnir-artitem-rsv x)
1123 (nnir-artitem-rsv y)))))))))
1125 ;; Swish-E interface. The following function is Copyright (C) 2000,
1126 ;; 2001 by Christoph Conrad <christoph.conrad@gmx.de>.
1127 (defun nnir-run-swish-e (query &optional group)
1128 "Run given query against swish-e.
1129 Returns a vector of (group name, file name) pairs (also vectors,
1132 Tested with swish-e-2.0.1 on Windows NT 4.0."
1134 ;; swish-e crashes with empty parameter to "-w" on commandline...
1136 (error "The swish-e backend cannot search specific groups."))
1139 (let ( (qstring (cdr (assq 'query query)))
1141 (score nil) (artno nil) (dirnam nil) (group nil) )
1143 (when (equal "" qstring)
1144 (error "swish-e: You didn't enter anything."))
1146 (set-buffer (get-buffer-create nnir-tmp-buffer))
1149 (message "Doing swish-e query %s..." query)
1150 (let* ((cp-list `( ,nnir-swish-e-program
1151 nil ; input from /dev/null
1153 nil ; don't redisplay
1154 "-f" ,nnir-swish-e-index-file
1155 ,@nnir-swish-e-additional-switches
1157 ,qstring ; the query, in swish-e format
1161 (message "%s args: %s" nnir-swish-e-program
1162 (mapconcat 'identity (cddddr cp-list) " "))
1163 (apply 'call-process cp-list))))
1164 (unless (or (null exitstatus)
1166 (nnheader-report 'nnir "Couldn't run swish-e: %s" exitstatus)
1167 ;; swish-e failure reason is in this buffer, show it if
1168 ;; the user wants it.
1169 (when (> gnus-verbose 6)
1170 (display-buffer nnir-tmp-buffer))))
1172 ;; The results are output in the format of:
1173 ;; rank path-name file-title file-size
1174 (goto-char (point-min))
1175 (while (re-search-forward
1176 "\\(^[0-9]+\\) \\([^ ]+\\) \"\\([^\"]+\\)\" [0-9]+$" nil t)
1177 (setq score (match-string 1)
1178 artno (match-string 3)
1179 dirnam (file-name-directory (match-string 2)))
1181 ;; don't match directories
1182 (when (string-match "^[0-9]+$" artno)
1183 (when (not (null dirnam))
1185 ;; remove nnir-swish-e-remove-prefix from beginning of dirname
1186 (when (string-match (concat "^" nnir-swish-e-remove-prefix)
1188 (setq dirnam (replace-match "" t t dirnam)))
1190 (setq dirnam (substring dirnam 0 -1))
1191 ;; eliminate all ".", "/", "\" from beginning. Always matches.
1192 (string-match "^[./\\]*\\(.*\\)$" dirnam)
1194 (setq group (substitute ?. ?/ (match-string 1 dirnam)))
1195 ;; Windows "\\" -> "."
1196 (setq group (substitute ?. ?\\ group))
1199 (string-to-int artno)
1200 (string-to-int score))
1203 (message "Massaging swish-e output...done")
1208 (function (lambda (x y)
1209 (> (nnir-artitem-rsv x)
1210 (nnir-artitem-rsv y)))))))))
1213 (defun nnir-run-namazu (query &optional group)
1214 "Run given query against Namazu. Returns a vector of (group name, file name)
1215 pairs (also vectors, actually).
1217 Tested with Namazu 2.0.6 on a GNU/Linux system."
1219 (error "The Namazu backend cannot search specific groups"))
1223 (qstring (cdr (assq 'query query)))
1228 (set-buffer (get-buffer-create nnir-tmp-buffer))
1231 `( ,nnir-namazu-program
1232 nil ; input from /dev/null
1234 nil ; don't redisplay
1235 "-q" ; don't be verbose
1236 "-a" ; show all matches
1237 "-s" ; use short format
1238 ,@nnir-namazu-additional-switches
1239 ,qstring ; the query, in namazu format
1240 ,nnir-namazu-index-directory ; index directory
1243 (let ((process-environment (copy-sequence process-environment)))
1245 (dolist (env process-environment)
1246 (when (string-match "\
1247 \\`\\(L\\(ANG\\|C_\\(ALL\\|CTYPE\\|COLLATE\\|TIME\\|NUMERIC\\|MONETARY\\|MESSAGES\\)\\)\\)=" env)
1248 (setenv (match-string 1 env) nil)))
1250 (message "%s args: %s" nnir-namazu-program
1251 (mapconcat 'identity (cddddr cp-list) " "))
1252 (apply 'call-process cp-list))))
1253 (unless (or (null exitstatus)
1255 (nnheader-report 'nnir "Couldn't run namazu: %s" exitstatus)
1256 ;; Namazu failure reason is in this buffer, show it if
1257 ;; the user wants it.
1258 (when (> gnus-verbose 6)
1259 (display-buffer nnir-tmp-buffer))))
1261 ;; Namazu output looks something like this:
1262 ;; 2. Re: Gnus agent expire broken (score: 55)
1263 ;; /home/henrik/Mail/mail/sent/1310 (4,138 bytes)
1265 (goto-char (point-min))
1266 (while (re-search-forward
1267 "^\\([0-9]+\\.\\).*\\((score: \\([0-9]+\\)\\))\n\\([^ ]+\\)"
1269 (setq score (match-string 3)
1270 group (file-name-directory (match-string 4))
1271 article (file-name-nondirectory (match-string 4)))
1273 ;; make sure article and group is sane
1274 (when (and (string-match "^[0-9]+$" article)
1276 (when (string-match (concat "^" nnir-namazu-remove-prefix) group)
1277 (setq group (replace-match "" t t group)))
1279 ;; remove trailing slash from groupname
1280 (setq group (substring group 0 -1))
1282 ;; stuff results into artlist vector
1283 (push (vector (substitute ?. ?/ group)
1284 (string-to-int article)
1285 (string-to-int score)) artlist)))
1287 ;; sort artlist by score
1290 (function (lambda (x y)
1291 (> (nnir-artitem-rsv x)
1292 (nnir-artitem-rsv y)))))))))
1296 (defun nnir-read-parms (query)
1297 "Reads additional search parameters according to `nnir-engines'."
1298 (let ((parmspec (caddr (assoc nnir-search-engine nnir-engines))))
1299 (cons (cons 'query query)
1300 (mapcar 'nnir-read-parm parmspec))))
1302 (defun nnir-read-parm (parmspec)
1303 "Reads a single search parameter.
1304 `parmspec' is a cons cell, the car is a symbol, the cdr is a prompt."
1305 (let ((sym (car parmspec))
1306 (prompt (cdr parmspec)))
1307 (cons sym (read-string prompt))))
1309 (defun nnir-run-query (query)
1310 "Invoke appropriate search engine function (see `nnir-engines').
1311 If some groups were process-marked, run the query for each of the groups
1312 and concat the results."
1313 (let ((search-func (cadr (assoc nnir-search-engine nnir-engines)))
1314 (q (car (read-from-string query))))
1315 (if gnus-group-marked
1318 (funcall search-func q x))
1320 (funcall search-func q nil))))
1322 (defun nnir-group-full-name (shortname)
1323 "For the given group name, return a full Gnus group name.
1324 The Gnus backend/server information is added."
1325 (gnus-group-prefixed-name shortname nnir-mail-backend))
1327 (defun nnir-possibly-change-server (server)
1328 (unless (and server (nnir-server-opened server))
1329 (nnir-open-server server)))
1332 ;; Data type article list.
1334 (defun nnir-artlist-length (artlist)
1335 "Returns number of articles in artlist."
1338 (defun nnir-artlist-article (artlist n)
1339 "Returns from ARTLIST the Nth artitem (counting starting at 1)."
1340 (elt artlist (1- n)))
1342 (defun nnir-artitem-group (artitem)
1343 "Returns the group from the ARTITEM."
1346 (defun nnir-artlist-artitem-group (artlist n)
1347 "Returns from ARTLIST the group of the Nth artitem (counting from 1)."
1348 (nnir-artitem-group (nnir-artlist-article artlist n)))
1350 (defun nnir-artitem-number (artitem)
1351 "Returns the number from the ARTITEM."
1354 (defun nnir-artlist-artitem-number (artlist n)
1355 "Returns from ARTLIST the number of the Nth artitem (counting from 1)."
1356 (nnir-artitem-number (nnir-artlist-article artlist n)))
1358 (defun nnir-artitem-rsv (artitem)
1359 "Returns the Retrieval Status Value (RSV, score) from the ARTITEM."
1362 (defun nnir-artlist-artitem-rsv (artlist n)
1363 "Returns from ARTLIST the Retrieval Status Value of the Nth artitem
1365 (nnir-artitem-rsv (nnir-artlist-article artlist n)))
1368 (defun nnir-artlist-groups (artlist)
1369 "Returns a list of all groups in the given ARTLIST."
1372 ;; from each artitem, extract group component
1373 (setq with-dups (mapcar 'nnir-artitem-group artlist))
1374 ;; remove duplicates from above
1375 (mapcar (function (lambda (x) (add-to-list 'res x)))