Examples

simplesearch.py

 1#!/usr/bin/env python
 2#
 3# Simple command-line search script.
 4#
 5# Copyright (C) 2003 James Aylett
 6# Copyright (C) 2004,2007,2009,2013 Olly Betts
 7#
 8# This program is free software; you can redistribute it and/or
 9# modify it under the terms of the GNU General Public License as
10# published by the Free Software Foundation; either version 2 of the
11# License, or (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program; if not, write to the Free Software
20# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
21# USA
22
23import sys
24import xapian
25
26# We require at least two command line arguments.
27if len(sys.argv) < 3:
28    print("Usage: %s PATH_TO_DATABASE QUERY" % sys.argv[0], file=sys.stderr)
29    sys.exit(1)
30
31try:
32    # Open the database for searching.
33    database = xapian.Database(sys.argv[1])
34
35    # Start an enquire session.
36    enquire = xapian.Enquire(database)
37
38    # Combine the rest of the command line arguments with spaces between
39    # them, so that simple queries don't have to be quoted at the shell
40    # level.
41    query_string = str.join(' ', sys.argv[2:])
42
43    # Parse the query string to produce a Xapian::Query object.
44    qp = xapian.QueryParser()
45    stemmer = xapian.Stem("english")
46    qp.set_stemmer(stemmer)
47    qp.set_database(database)
48    qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
49    query = qp.parse_query(query_string)
50    print("Parsed query is: %s" % str(query))
51
52    # Find the top 10 results for the query.
53    enquire.set_query(query)
54    matches = enquire.get_mset(0, 10)
55
56    # Display the results.
57    print("%i results found." % matches.get_matches_estimated())
58    print("Results 1-%i:" % matches.size())
59
60    for m in matches:
61        print("%i: %i%% docid=%i [%s]" % (m.rank + 1, m.percent, m.docid, m.document.get_data().decode('utf-8')))
62
63except Exception as e:
64    print("Exception: %s" % str(e), file=sys.stderr)
65    sys.exit(1)

simpleindex.py

 1#!/usr/bin/env python
 2#
 3# Index each paragraph of a text file as a Xapian document.
 4#
 5# Copyright (C) 2003 James Aylett
 6# Copyright (C) 2004,2007,2013,2014 Olly Betts
 7#
 8# This program is free software; you can redistribute it and/or
 9# modify it under the terms of the GNU General Public License as
10# published by the Free Software Foundation; either version 2 of the
11# License, or (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program; if not, write to the Free Software
20# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
21# USA
22
23import sys
24import xapian
25import string
26
27if len(sys.argv) != 2:
28    print("Usage: %s PATH_TO_DATABASE" % sys.argv[0], file=sys.stderr)
29    sys.exit(1)
30
31try:
32    # Open the database for update, creating a new database if necessary.
33    database = xapian.WritableDatabase(sys.argv[1], xapian.DB_CREATE_OR_OPEN)
34
35    indexer = xapian.TermGenerator()
36    stemmer = xapian.Stem("english")
37    indexer.set_stemmer(stemmer)
38
39    para = ''
40    try:
41        for line in sys.stdin:
42            line = line.strip()
43            if line == '':
44                if para != '':
45                    # We've reached the end of a paragraph, so index it.
46                    doc = xapian.Document()
47                    doc.set_data(para)
48
49                    indexer.set_document(doc)
50                    indexer.index_text(para)
51
52                    # Add the document to the database.
53                    database.add_document(doc)
54                    para = ''
55            else:
56                if para != '':
57                    para += ' '
58                para += line
59    except StopIteration:
60        pass
61
62except Exception as e:
63    print("Exception: %s" % str(e), file=sys.stderr)
64    sys.exit(1)

simpleexpand.py

 1#!/usr/bin/env python
 2#
 3# Simple example script demonstrating query expansion.
 4#
 5# Copyright (C) 2003 James Aylett
 6# Copyright (C) 2004,2006,2007,2012,2013,2014 Olly Betts
 7#
 8# This program is free software; you can redistribute it and/or
 9# modify it under the terms of the GNU General Public License as
10# published by the Free Software Foundation; either version 2 of the
11# License, or (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program; if not, write to the Free Software
20# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
21# USA
22
23import sys
24import xapian
25
26# We require at least two command line arguments.
27if len(sys.argv) < 3:
28    print("Usage: %s PATH_TO_DATABASE QUERY [-- [DOCID...]]" % sys.argv[0], file=sys.stderr)
29    sys.exit(1)
30
31try:
32    # Open the database for searching.
33    database = xapian.Database(sys.argv[1])
34
35    # Start an enquire session.
36    enquire = xapian.Enquire(database)
37
38    # Combine command line arguments up to "--" with spaces between
39    # them, so that simple queries don't have to be quoted at the shell
40    # level.
41    query_string = sys.argv[2]
42    index = 3
43    while index < len(sys.argv):
44        arg = sys.argv[index]
45        index += 1
46        if arg == '--':
47            # Passed marker, move to parsing relevant docids.
48            break
49        query_string += ' '
50        query_string += arg
51
52    # Create an RSet with the listed docids in.
53    reldocs = xapian.RSet()
54    for index in range(index, len(sys.argv)):
55        reldocs.add_document(int(sys.argv[index]))
56
57    # Parse the query string to produce a Xapian::Query object.
58    qp = xapian.QueryParser()
59    stemmer = xapian.Stem("english")
60    qp.set_stemmer(stemmer)
61    qp.set_database(database)
62    qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
63    query = qp.parse_query(query_string)
64
65    if not query.empty():
66        print("Parsed query is: %s" % str(query))
67
68        # Find the top 10 results for the query.
69        enquire.set_query(query)
70        matches = enquire.get_mset(0, 10, reldocs)
71
72        # Display the results.
73        print("%i results found." % matches.get_matches_estimated())
74        print("Results 1-%i:" % matches.size())
75
76        for m in matches:
77            print("%i: %i%% docid=%i [%s]" % (m.rank + 1, m.percent, m.docid, m.document.get_data()))
78
79    # Put the top 5 (at most) docs into the rset if rset is empty
80    if reldocs.empty():
81        rel_count = 0
82        for m in matches:
83            reldocs.add_document(m.docid)
84            rel_count += 1
85            if rel_count == 5:
86                break
87
88    # Get the suggested expand terms
89    eterms = enquire.get_eset(10, reldocs)
90    print("%i suggested additional terms" % eterms.size())
91    for k in eterms:
92        print("%s: %f" % (k.term, k.weight))
93
94except Exception as e:
95    print("Exception: %s" % str(e), file=sys.stderr)
96    sys.exit(1)

simplematchdecider.py

 1#!/usr/bin/env python
 2#
 3# Simple command-line match decider example
 4#
 5# Copyright (C) 2003 James Aylett
 6# Copyright (C) 2004,2007,2009,2013 Olly Betts
 7#
 8# This program is free software; you can redistribute it and/or
 9# modify it under the terms of the GNU General Public License as
10# published by the Free Software Foundation; either version 2 of the
11# License, or (at your option) any later version.
12#
13# This program is distributed in the hope that it will be useful,
14# but WITHOUT ANY WARRANTY; without even the implied warranty of
15# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16# GNU General Public License for more details.
17#
18# You should have received a copy of the GNU General Public License
19# along with this program; if not, write to the Free Software
20# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
21# USA
22
23import sys
24import xapian
25
26# This example runs a query like simplesearch does, but uses a MatchDecider
27# (mymatchdecider) to discard any document for which value 0 is equal to
28# the string passed as the second command line argument.
29
30if len(sys.argv) < 4:
31    print("Usage: %s PATH_TO_DATABASE AVOID_VALUE QUERY" % sys.argv[0], file=sys.stderr)
32    sys.exit(1)
33
34class mymatchdecider(xapian.MatchDecider):
35    def __init__(self, avoidvalue):
36        xapian.MatchDecider.__init__(self)
37        self.avoidvalue = avoidvalue
38
39    def __call__(self, doc):
40        return doc.get_value(0) != self.avoidvalue
41
42try:
43    # Open the database for searching.
44    database = xapian.Database(sys.argv[1])
45
46    # Start an enquire session.
47    enquire = xapian.Enquire(database)
48
49    # Combine the rest of the command line arguments with spaces between
50    # them, so that simple queries don't have to be quoted at the shell
51    # level.
52    avoid_value = sys.argv[2]
53    query_string = str.join(' ', sys.argv[3:])
54
55    # Parse the query string to produce a Xapian::Query object.
56    qp = xapian.QueryParser()
57    stemmer = xapian.Stem("english")
58    qp.set_stemmer(stemmer)
59    qp.set_database(database)
60    qp.set_stemming_strategy(xapian.QueryParser.STEM_SOME)
61    query = qp.parse_query(query_string)
62    print("Parsed query is: %s" % str(query))
63
64    # Find the top 10 results for the query.
65    enquire.set_query(query)
66    mdecider = mymatchdecider(avoid_value)
67    matches = enquire.get_mset(0, 10, None, mdecider)
68
69    # Display the results.
70    print("%i results found." % matches.get_matches_estimated())
71    print("Results 1-%i:" % matches.size())
72
73    for m in matches:
74        print("%i: %i%% docid=%i [%s]" % (m.rank + 1, m.percent, m.docid, m.document.get_data()))
75
76except Exception as e:
77    print("Exception: %s" % str(e), file=sys.stderr)
78    sys.exit(1)