001: package it.unimi.dsi.mg4j.search.score;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2004-2007 Paolo Boldi and Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITfNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import it.unimi.dsi.fastutil.ints.IntIterator;
025: import it.unimi.dsi.fastutil.objects.Reference2DoubleMap;
026: import it.unimi.dsi.mg4j.index.Index;
027: import it.unimi.dsi.mg4j.search.DocumentIterator;
028: import it.unimi.dsi.lang.FlyweightPrototype;
029:
030: import java.io.IOException;
031:
032: /** A wrapper for a {@link DocumentIterator} returning scored document pointers.
033: *
034: * <p>Typically, a scorer may have one or more constructors,
035: * but all scorers should provide a constructor that takes only strings as arguments to
036: * make the instantiation from command-line or similar interfaces easier.
037: *
038: * <p>To be (re)used, a scorer must first {@linkplain #wrap(DocumentIterator) wrap} an
039: * underlying {@link it.unimi.dsi.mg4j.search.DocumentIterator}. This
040: * phase usually involves some preprocessing around properties of the document iterator to
041: * be scored. After wrapping, calls to {@link #nextDocument()} and {@link #score()} (or possibly
042: * {@link #score(Index)}) will return the next document pointer and
043: * its score. Note that these methods are not usually idempotent, as they modify the state of the underlying iterator
044: * (e.g., they consume intervals).
045: *
046: * <p>Scores returned by a scorer might depend on some {@linkplain #setWeights(Reference2DoubleMap) weights}
047: * associated to each index.
048: *
049: * <p>Optionally, a scorer might be a {@link it.unimi.dsi.mg4j.search.score.DelegatingScorer}.
050: *
051: * <p><strong>Warning</strong>: implementations of this interface are not required
052: * to be thread-safe, but they provide {@linkplain it.unimi.dsi.lang.FlyweightPrototype flyweight copies}.
053: * The {@link #copy()} method is strengthened so to return an object implementing this interface.
054: */
055: public interface Scorer extends IntIterator, FlyweightPrototype<Scorer> {
056:
057: /** Returns a score for the current document of the last document iterator
058: * given to {@link #wrap(DocumentIterator)}.
059: *
060: * @return the score.
061: */
062: public double score() throws IOException;
063:
064: /** Returns a score for the current document of the last document iterator
065: * given to {@link #wrap(DocumentIterator)}, but
066: * considering only a given index (optional operation).
067: *
068: * @param index the only index to be considered.
069: * @return the score.
070: */
071: public double score(Index index) throws IOException;
072:
073: /** Sets the weight map for this scorer (if applicable).
074: *
075: * <p>The given map will be copied internally and can be used by
076: * the caller without affecting the scorer behaviour. Implementing classes
077: * should rescale the weights so that they have sum equal to one.
078: *
079: * <p>Indices <em>not</em> appearing in the map will have weight equal to 0.
080: *
081: * @param index2Weight a map from indices to weights.
082: * @return true if this scorer supports weights.
083: */
084: public boolean setWeights(Reference2DoubleMap<Index> index2Weight);
085:
086: /** Wraps a document iterator and prepares the internal state of this scorer to work with it.
087: *
088: * <p>Subsequent calls to {@link #score()} and {@link #score(Index)} will use
089: * <code>d</code> to compute the score.
090: *
091: * @param documentIterator the document iterator that will be used in subsequent calls to
092: * {@link #score()} and {@link #score(Index)}.
093: */
094: public void wrap(DocumentIterator documentIterator)
095: throws IOException;
096:
097: /** Whether this scorer uses intervals.
098: *
099: * <p>This method is essential when {@linkplain AbstractAggregator aggregating scorers},
100: * because if several scores need intervals, a {@link it.unimi.dsi.mg4j.search.CachingDocumentIterator}
101: * will be necessary.
102: *
103: * @return true if this scorer uses intervals.
104: */
105: public boolean usesIntervals();
106:
107: /** Returns the next document.
108: *
109: * @deprecated As of MG4J 1.2, the suggested way of iterating over scorer
110: * is {@link #nextDocument()}, which provides fully lazy
111: * iteration. After a couple of releases, however, this annotation will be removed, as it
112: * is very practical to have scorers implementing {@link IntIterator}. Its
113: * main purpose is to let people know about {@link #nextDocument()}, which solves
114: * the same issues as {@link DocumentIterator#nextDocument()}.
115: * @see #nextDocument()
116: */
117: @Deprecated
118: public int nextInt();
119:
120: /** Returns the next document provided by this scorer, or -1 if no more documents are available.
121: *
122: * @return the next document, or -1 if no more documents are available.
123: */
124: public int nextDocument() throws IOException;
125:
126: public Scorer copy();
127: }
|