01: package it.unimi.dsi.mg4j.index;
02:
03: /*
04: * MG4J: Managing Gigabytes for Java
05: *
06: * Copyright (C) 2005-2007 Sebastiano Vigna
07: *
08: * This library is free software; you can redistribute it and/or modify it
09: * under the terms of the GNU Lesser General Public License as published by the Free
10: * Software Foundation; either version 2.1 of the License, or (at your option)
11: * any later version.
12: *
13: * This library is distributed in the hope that it will be useful, but
14: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
16: * for more details.
17: *
18: * You should have received a copy of the GNU Lesser General Public License
19: * along with this program; if not, write to the Free Software
20: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
21: *
22: */
23:
24: import it.unimi.dsi.lang.FlyweightPrototype;
25: import it.unimi.dsi.lang.MutableString;
26:
27: import java.io.Serializable;
28:
29: /** A term processor, implementing term/prefix transformation and possibly term/prefix filtering.
30: *
31: * <p>Index contruction requires sometimes modifications of
32: * the given terms: downcasing, stemming, and so on. The same
33: * transformation must be applied to terms in a query. This
34: * interface provides a uniform way to perform arbitrary term
35: * transformations.
36: *
37: * <p>Index construction requires also term filtering:
38: * {@link #processTerm(MutableString)} may
39: * return false, indicating that the term should not
40: * be processed at all (e.g., because it is a stopword).
41: *
42: * <p>Additionally, the method {@link #processPrefix(MutableString)} may
43: * process analogously a prefix (used for prefix queries).
44: *
45: * <p>Implementation are encouraged to expose a singleton, when
46: * possible, by means of the static factory method <code>getInstance()</code>.
47: *
48: * <strong>Warning</strong>: implementations of this class are not required
49: * to be thread-safe, but they provide {@link it.unimi.dsi.lang.FlyweightPrototype flyweight copies}.
50: * The {@link #copy()} method is strengthened so to return a instance of this class.
51: *
52: * <p>This interface was originally suggested by Fabien Campagne.
53: */
54: public interface TermProcessor extends Serializable,
55: FlyweightPrototype<TermProcessor> {
56: /** Processes the given term, leaving the result in the same mutable string.
57: *
58: * @param term a mutable string containing the term to be processed,
59: * or <code>null</code>.
60: * @return true if the term is not <code>null</code> and should be indexed, false otherwise.
61: */
62: public boolean processTerm(MutableString term);
63:
64: /** Processes the given prefix, leaving the result in the same mutable string.
65: *
66: * <p>This method is not used during the indexing phase, but rather at query
67: * time. If the user wants to specify a prefix query, it is sometimes necessary
68: * to transform the prefix
69: * (e.g., {@linkplain DowncaseTermProcessor#processPrefix(MutableString)} downcasing it).
70: *
71: * <p>It is of course unlikely that this method returns false, as it is usually not
72: * possible to foresee which are the prefixes of indexable words. In case no natural
73: * transformation applies, this method should leave its argument unchanged.
74: *
75: * @param prefix a mutable string containing a prefix to be processed,
76: * or <code>null</code>.
77: * @return true if the prefix is not <code>null</code> and there might be an indexed
78: * word starting with <code>prefix</code>, false otherwise.
79: */
80: public boolean processPrefix(MutableString prefix);
81:
82: public TermProcessor copy();
83: }
|