dwww Home | Show directory contents | Find package

##
#       SWISH++
#       swish++.conf
#
#       This is a template configuration file.  It contains every configuration
#       variable, lists which executable(s) it's used with, and gives a brief
#       explanation.  Use this as a starting point for your own configuration
#       file.
#
#       Notes:
#       ------
#
#       1. Sets of values are separated by whitespace.
#
#       2. The same variable may be specified on more than one line.  For sets,
#          the value is the union of all the values; for all others, the value
#          is the last value specified.
#
#       3. There is a special "infinity" numeric value.
#
#       4. Boolean values are one of: true, t, false, f, yes, y, no, n.  Case
#          is irrelevant.
#
#       5. COMMENTS ARE TREATED AS SUCH ONLY IF THEY ARE ON LINES BY
#          THEMSELVES!
##

#AssociateMeta          yes
#
# used by: index; when "no", same as the -A option.
#
#       Associate words with meta names during indexing.

#ExcludeClass           no_index
#
# used by: index; same as the -C option.
#
#       For HTML and XHTML files only (as specified by IncludeFile), a set of
#       class names of HTML and XHTML elements whose content text is not to be
#       indexed.

#ExcludeFile            *.gif *.jpg *.png
#
# used by: index, extract; same as the -E option.
#
#       A set of filename patterns of files not to index or extract.  Case is
#       significant.  You normally specify either IncludeFile or ExtractFile
#       -OR- ExcludeFile (whichever is easier to specify), but not both.

# ID3v1.x tag fields
#-------------------
#IncludeMeta            album artist comments genre title

# ID3v2.x tag fields
# ---------- ID3v2.4.0 ---------------- ID3v2.2.0 ------------- ID3v2.3.0 -----
#IncludeMeta comm=comments              com=comments
#IncludeMeta talb=album                 tal=album
#IncludeMeta tcom=composer              tcm=composer
#IncludeMeta tcon=genre                 tco=genre
#IncludeMeta tcop=copyright             tcr=copyright
#IncludeMeta tenc=encoder               ten=encoder
#IncludeMeta text=lyricist              txt=lyricist
#IncludeMeta tipl=people
#IncludeMeta tit1=content               tt1=content
#IncludeMeta tit2=title                 tt2=title
#IncludeMeta tit3=subtitle              tt3=subtitle
#IncludeMeta tmcl=musicians             ipl=musicians           ipls=musicians
#IncludeMeta tmoo=mood
#IncludeMeta toal=original-title        tot=original-title
#IncludeMeta toly=original-lyricist     tol=original-lyricist
#IncludeMeta tope=original-artist       toa=original-artist
#IncludeMeta town=owner
#IncludeMeta tpe1=artist                tp1=artist
#IncludeMeta tpe2=performers            tp2=performers
#IncludeMeta tpe3=conductor             tp3=conductor
#IncludeMeta tpub=publisher             tpb=publisher
#IncludeMeta tsst=set-subtitle
#IncludeMeta txxx=user                  txx=user
#IncludeMeta user=terms
#IncludeMeta sylt=lyrics                slt=lyrics
#IncludeMeta uslt=lyrics                ult=lyrics

# Mail (RFC 822) and News
# -----------------------
#IncludeMeta            Bcc Cc Comments Content-Description From Keywords
#IncludeMeta            Newsgroups Resent-To Subject To

# vCard
# -----
#IncludeMeta            adr=address categories class label=address fn=name
#IncludeMeta            nickname note org role title

# Unix manual pages
# -----------------
#IncludeMeta            author bugs caveats description diagnostics environment
#IncludeMeta            errors examples exit-status files history name notes
#IncludeMeta            options return-value see-also synopsis warnings

#ExcludeMeta            meta1 meta2 metaN
#
# used by: index; same as the -m or -M option, respectively
#
#       For HTML and XHTML files, the values of meta NAME attributes for which
#       the words in the values of the associated CONTENT attribute should be
#       indexed or not indexed, respectively.
#
#       For MP3 files, the values of the ID3 tag field names for which the
#       words in the associated values of the fields should be indexed or not
#       indexed, respectively.
#
#       For mail and news files, the values of the of header names for which
#       the words in the associated values of the headers should be indexed or
#       not indexed, respectively.
#
#       For mail files only, the values of the vCard fields for which the words
#       in the associated values of the fields should be indexed or not
#       indexed, respectively.
#
#       For manual page files, the values of the section headings for which the
#       words in the associated sections should be indexed or not indexed,
#       respectively.
#
#       Case is irrelevant.  You normally specify either IncludeMeta or
#       ExcludeMeta (whichever is easier to specify), but not both.
#       
#       By default, for IncludeMeta, words for all meta names are indexed.
#       Specifying at least one meta name via IncludeMeta changes that so that
#       only the words associated with a member of the set of meta names
#       explicitly specified are indexed.
#
#       If you're indexing MP3 files, it is recommended that you use the
#       IncludeMeta values as given above.  If you're indexing mail or news
#       files, it is recommended that you use the IncludeMeta values as given
#       above.
#
#       Additionally, meta names can be reassigned.

#ExtractExtension       txt
#
# used by extract; same as the -x option.
#
#       The extension to append to filenames during extraction.

#ExtractFile            *.doc *.ppt *.xls
#
# used by extract; same as the -e option.
#
#       A set of filename patterns of files to extract.  Case is significant.
#       Filename patterns specified here MUST NOT also be specified in
#       ExcludeFile below.
#
#       You should modify the set to include only those that you are actually
#       using for increased performance.

#ExtractFilter          no
#
# used by extract; when "yes", same as the -f option.
#
#       When "yes", extract a single file to standard output.

#FilesGrow              100
#
# used by: index; same as the -g option.
#
#       The number of files to grow reserved space for when incrementally
#       indexing.  The number may be specified as either an absolute number or
#       a percentage (when a trailing % is present).

#FilesReserve           1000
#
# used by: index; same as the -F option.
#
#       The initial number of files to reserve space for.  During indexing,
#       this can be exceeded without any problem, but there will be a slight
#       performance penalty.
#
#       If you know approximately how many files you have, modify the above
#       value!

#FilterAttachment application/*word     wvText %f @%F.txt
#FilterAttachment application/pdf       pdftotext %f @%F.txt
#FilterAttachment application/postscript        pstotext %f > @%F.txt
#
# used by: index; no option equivalent
#
#       Filter e-mail attachments having certain MIME types prior to indexing.
#       MIME type patterns MUST be specified entirely in lower case.
#
#       See http://www.wvware.com/ for information about the wvText program.
#       See http://www.research.compaq.com/SRC/virtualpaper/pstotext.html for
#       information about the pstotext program.
#
#Edited for Debian
FilterFile *.bz2                bunzip2 -c %f > @%F
FilterFile *.gz         gunzip -c %f > @%F
FilterFile *.Z          uncompress -c %f > @%F
#FilterFile *.doc       antiword %f @%F.txt
#FilterFile *.doc       wvText %f @%F.txt
#FilterFile *.pdf       pdftotext %f @%F.txt
#FilterFile *.ps        pstotext %f > @%F.txt
#
# used by: index, extract; no option equivalent.
#
#       Filter files having certain extensions prior to either indexing or
#       extraction.
#
#       See http://www.wvware.com/ for information about the wvText program.
#       See http://www.research.compaq.com/SRC/virtualpaper/pstotext.html for
#       information about the pstotext program.

#FollowLinks            no
#
# used by: index, extract; same as the -l option.
#
#       Follow symbolic links during indexing or extraction.

#IncludeFile text       *.txt
#IncludeFile HTML       *.asp *.*htm* *.jsp
#IncludeFile ID3        *.mp3
#IncludeFile LaTeX      *.tex
#IncludeFile Mail       *.m
#IncludeFile Man        *.[1-9n] *.[1-9][a-z]
#IncludeFile RTF        *.rtf
#
# used by: index; same as the -e option.
#
#       A set of filename patterns of files to index and the modules they map
#       to.  Case is irrelevant for the module name but significant for the
#       patterns.  Filename patterns specified here MUST NOT also be specified
#       in ExcludeFile.
#
#       You should modify the set to include only those that you are actually
#       using for increased performance.

#Incremental            no
#
# used by: index; when "yes", same as the -I option.
#
#       When "yes", incrementally index files and add them to an existing
#       index.

#IndexFile              swish++.index
#
# used by: index, search; same as the -i option.
#
#       The name of the index file either generated or searched.

#LaunchdCooperation     no
#
# used by: search; same as the -l option
#
#       If "search" is run as a daemon, cooperate with Mac OS X's launchd(8) by
#       not "daemonizing" itself since launchd handles that.  When "yes", this
#       forces "SearchBackground no".
#
#       This option is available only under Mac OS X, should be used only for
#       version 10.4 (Tiger) or later, and only when search will be started via
#       launchd.

#PidFile                        /var/run/search.pid
#
# used by: search; same as the -P option
#
#       If "search" is run as a daemon, record its process ID in this file.

#RecurseSubdirs         yes
#
# used by: index, extract; when "no", same as the -r option.
#
#       When "no", do not recursively index the files in subdirectories, that
#       is when a directory is encountered, all the files in that directory are
#       indexed (modulo the filename patterns specified via the IncludeFile,
#       ExcludeFile, or ExtractFile variables), but subdirectories encountered
#       are ignored and therefore the files contained in them are not indexed.
#       (This variable is most useful when specifying the directories and files
#       via standard input.)  The default is to index the files in
#       subdirectories recursively.

#ResultsMax             100
#
# used by: search; same as the -m option.
#
#       The maximum number of results to return overriding the compiled-in
#       default (which is usually 100).

#ResultSeparator        " "
#
# used by: search; same as the -R option
#
#       The string to separate the parts in a search result when ResultsFormat
#       is "classic".  Either single or double quotes can be used to preserve
#       whitespace.  Quotes are stripped only if they match.

#ResultsFormat          classic
#
# used by: search; same as the -F option
#
#       The output format of search results: either "classic" or "XML".

#SearchBackground       yes
#
# used by: search; when "no", same as the -B option.
#
#       When "yes" and SearchDaemon is not "none", automatically detach from
#       the terminal and run in the background.
#
#       This option is overridden by "LaunchdCooperation yes".

#SearchDaemon           none
#
# used by: search; same as the -b option.
#
#       When not "none", run "search" as a daemon process listening to either a
#       Unix domain ("unix") or TCP socket ("tcp") or both ("both") for
#       requests.

#SocketAddress          *:1967
#
# used by: search; same as the -a option.
#
#       Default IP address and port of the TCP socket; used only when
#       SearchDaemon  is either "tcp" or "both".

#SocketFile             /tmp/search.socket
#
# used by: search; same as the -u option.
#
#       Default name of the Unix domain socket file; used only when
#       SearchDaemon is either "unix" or "both".

#SocketQueueSize                511
#
# used by: search; same as the -q option.
#
#       Maximum number of queued connections for a socket; used only when
#       SearchDaemon is not "none".  The default 511 value is taken from
#       httpd.h in Apache:
#
#               It defaults to 511 instead of 512 because some systems store it
#               as an 8-bit datatype; 512 truncated to 8-bits is 0, while 511
#               is 255 when truncated.
#
#       If it's good enough for Apache, it's good enough for us.

#SocketTimeout          10
#
# used by search; same as the -o option.
#
#       Number of seconds a client has to complete a search request before
#       being disconnected.  This is to prevent a client from connecting, not
#       completing a request, and causing the thread servicing the request to
#       wait forever.  This is used only when SearchDaemon is not "none".

#StemWords              no
#
# used by: search; when "yes", same as the -s option.
#
#       Perform stemming (suffix stripping) on words during searches.  Words
#       that end in the wildcard character are not stemmed.

#StopWordFile           custom_stop_word_file
#
# used by: index, extract; same as the -s option.
#
#       The name of a file containing the set of stop-words to use instead of
#       the built-in set.

#StoreWordPositions     yes
#
# used by: index; when "no", same as the -P option.
#
#       Store word positions during indexing needed to do "near" searches.
#       Storing said data approximately doubles the size of the generated
#       index.

#TempDirectory          /tmp
#
# used by: index
#
#       Directory to use for temporary files during indexing.  If your OS
#       mounts swap space on /tmp, as indexing progresses and more files get
#       created in /tmp, you will have less swap space, indexing will get
#       slower, and you may run out of memory.  If this is the case, you can
#       specify a directory on a real filesystem, i.e., one on a physical
#       disk.  The directory must exist.

#ThreadsMin             5
#ThreadsMax             100
#
# used by: search; same as the -t or -T option, respectively.
#
#       The minimum/maximum number of simultanous threads, respectively; used
#       only when SearchDaemon is not "none".

#ThreadTimeout          30
#
# used by: search; same as the -O option.
#
#       Number of seconds until an idle spare thread times out and destroys
#       itself; used only when SearchDaemon is not "none".

#TitleLines             12
#
# used by: index; same as the -t option.
#
#       For HTML and XHTML files only, the maximum number of lines into a file
#       to look at for HTML and XHTML <TITLE> tags.  The default is 12.  Larger
#       numbers slow indexing.

#Verbosity              0
#
# used by: index, extract; same as the -v option.
#
#       Print additional information to standard output during indexing or
#       extraction.  The verbosity levels are 0-4; see index(1) or extract(1)
#       for details.

#WordFilesMax           infinity
#
# used by: index; same as the -f option.
#
#       The maximum number of files a word may occur in before it is discarded
#       as being too frequent.  The default is infinity.

#WordPercentMax         100
#
# used by: index; same as the -p option.
#
#       The maximum percentage of files a word may occur in before it is
#       discarded as being too frequent.  If you want to keep all words
#       regardless, specify 101.

#WordsNear              10
#
# used by: search; same as the -n option.
#
#       The maximum number of words apart two words can be to be considered
#       "near" each other.

#WordThreshold          250000
#
# used by: index; same as the -W option.
#
#       The word count past which partial indicies are generated and merged
#       since all the words are too big to fit into memory at the same time.
#       If you index and your machine begins to swap like mad, lower this
#       value.  The above works OK in a 64MB machine.  A rule of thumb is to
#       add 250000 words for each additional 64MB of RAM you have.  These
#       numbers are for a SPARC machine running Solaris.  Other machines
#       running other operating systems use memory differently.  You simply
#       have to experiment.  Only the super-user can specify a value larger
#       than the compiled-in default.

# the end

Generated by dwww version 1.15 on Sun Jun 16 13:43:49 CEST 2024.