001: package it.unimi.dsi.mg4j.index;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2005-2007 Paolo Boldi and Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import it.unimi.dsi.io.SafelyCloseable;
025: import it.unimi.dsi.util.StringMap;
026:
027: import java.io.IOException;
028:
029: /** Provides access to an inverted index.
030: *
031: * <P>An {@link it.unimi.dsi.mg4j.index.Index} contains global read-only metadata. To get actual data
032: * from an index, you need to get an index reader <i>via</i> a call to {@link Index#getReader()}. Once
033: * you have an index reader, you can ask for the {@linkplain #documents(CharSequence) documents matching a term}.
034: *
035: * <p>Alternatively, you can perform a <em>read-once scan</em> of the index calling {@link #nextIterator()},
036: * which will return in order the {@linkplain IndexIterator index iterators} of all terms of the underlying index.
037: * More generally, {@link #nextIterator()} returns an iterator positioned at the start of the inverted
038: * list of the term after the current one. When called just after the reader creation, it returns an
039: * index iterator for the first term.
040: *
041: * <p><strong>Warning:</strong> An index reader is exactly what it looks like—a <em>reader</em>. It
042: * cannot be used by many threads at the same time, and all its access methods are exclusive: if you
043: * obtain a {@linkplain #documents(int) document iterator}, the previous one is no longer valid. However,
044: * you can generate many readers, and use them concurrently.
045: *
046: * <p><strong>Warning:</strong> Invoking the {@link it.unimi.dsi.mg4j.search.DocumentIterator#dispose()} method
047: * on iterators returned by an instance of this class will invoke {@link #close()} on the instance, thus
048: * making the instance no longer accessible. This behaviour is necessary to handle cases in which a
049: * reader is created on-the-fly just to create an iterator.
050: *
051: * <P><strong>Warning:<strong> As of MG4J 1.2, direct (i.e., bit-level) access to an inverted index is no longer possible.
052: *
053: * @author Paolo Boldi
054: * @author Sebastiano Vigna
055: * @since 1.0
056: */
057:
058: public interface IndexReader extends SafelyCloseable {
059:
060: /** Returns a document iterator over the documents containing a term.
061: *
062: * <p>Note that the index iterator returned by this method will
063: * return <code>null</code> on a call to {@link IndexIterator#term() term()}.
064: *
065: * <p>Note that it is <em>always</em> possible
066: * to call this method with argument 0, even if the underlying index
067: * does not provide random access.
068: *
069: * @param termNumber the number of a term.
070: * @throws UnsupportedOperationException if this index reader is not accessible by term
071: * number.
072: */
073: public IndexIterator documents(int termNumber) throws IOException;
074:
075: /** Returns an index iterator over the documents containing a term; the term is
076: * given explicitly.
077: *
078: * <p>Unless the {@linkplain Index#termProcessor term processor} of
079: * the associated index is <code>null</code>, words coming from a query will
080: * have to be processed before being used with this method.
081: *
082: * <p>Note that the index iterator returned by this method will
083: * return <code>term</code> on a call to {@link IndexIterator#term() term()}.
084: *
085: * @param term a term (the term will be downcased if the index is case insensitive).
086: * @throws UnsupportedOperationException if the {@linkplain StringMap term map} is not available for the underlying index.
087: */
088: public IndexIterator documents(CharSequence term)
089: throws IOException;
090:
091: /** Returns an {@link IndexIterator} on the term after the current one (optional operation).
092: *
093: * <p>Note that after creation there is no current term. Thus, the first call to this
094: * method will return an {@link IndexIterator} on the first term. As a consequence, repeated
095: * calls to this method provide a way to scan sequentially an index.
096: *
097: * @return the index iterator of the next term, or <code>null</code> if there are no more terms
098: * after the current one.
099: */
100:
101: public IndexIterator nextIterator() throws IOException;
102: }
|