001: package it.unimi.dsi.mg4j.index;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2004-2007 Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import java.io.IOException;
025:
026: import it.unimi.dsi.fastutil.ints.IntIterator;
027: import it.unimi.dsi.mg4j.index.payload.Payload;
028: import it.unimi.dsi.mg4j.search.DocumentIterator;
029:
030: /** An iterator over an inverted list.
031: *
032: * <P>An index iterator scans the inverted list of an indexed term. Each
033: * integer returned by {@link DocumentIterator#nextDocument() nextDocument()}
034: * is the index of a document containing the
035: * term. If the index contains counts, they can be obtained after each call to
036: * {@link #nextDocument()} using {@link #count()}. Then, if the index contains
037: * positions they can be obtained as an array using {@link #positionArray()}, as
038: * an iterator using {@link #positions()}, or stored into an array using {@link #positions(int[])}.
039: *
040: * <P>Note that this interface extends {@link it.unimi.dsi.mg4j.search.DocumentIterator}.
041: * The intervals returned for a document are exactly length-one intervals
042: * corresponding to the positions returned by {@link #positions()}. If the index
043: * to which an instance of this class refers does not contain positions, an {@link UnsupportedOperationException}
044: * will be thrown.
045: *
046: */
047:
048: public interface IndexIterator extends DocumentIterator {
049:
050: /** Returns the index over which this iterator is built.
051: *
052: * @return the index over which this iterator is built.
053: */
054: public Index index();
055:
056: /** Returns the number of the term whose inverted list is returned by this index iterator.
057: *
058: * <p>Usually, the term number is automatically set by {@link IndexReader#documents(CharSequence)} or {@link IndexReader#documents(int)}.
059: * Instances of {@link Index.EmptyIndexIterator} can return <code>-1</code>.
060: *
061: * @return the number of the term over which this iterator is built, or -1 for an {@link Index.EmptyIndexIterator}.
062: * @throws IllegalStateException if no term was set when the iterator was created.
063: * @see #term()
064: */
065: public int termNumber();
066:
067: /** Returns the term whose inverted list is returned by this index iterator.
068: *
069: * <p>Usually, the term is automatically set by {@link IndexReader#documents(CharSequence)} or {@link IndexReader#documents(int)}, but you can
070: * supply your own term with {@link #term(CharSequence)}.
071: *
072: * <p>Instances of {@link Index.EmptyIndexIterator} can return <code>null</code>.
073: *
074: * @return the term over which this iterator is built, as a compact mutable string.
075: * @throws IllegalStateException if no term was set when the iterator was created.
076: * @see #termNumber()
077: */
078: public String term();
079:
080: /** Sets the term whose inverted list is returned by this index iterator.
081: *
082: * <p>Usually, the term is automatically set by {@link Index#documents(CharSequence)}
083: * or by {@link IndexReader#documents(CharSequence)}, but you can
084: * use this method to ensure that {@link #term()} doesn't throw
085: * an exception.
086: *
087: * <p>Instances of {@link Index.EmptyIndexIterator} are allowed to ignore
088: * silently calls to this method.
089: *
090: * @param term a character sequence (that will be defensively copied)
091: * that will be assumed to be the term whose inverted list is returned by this index iterator.
092: */
093: public void term(CharSequence term);
094:
095: /** Returns the frequency, that is, the number of documents that will be returned by this iterator.
096: *
097: * @return the number of documents that will be returned by this iterator.
098: */
099:
100: public int frequency() throws IOException;
101:
102: /** Returns the payload, if any, associated with the current document.
103: *
104: * @return the payload associated with the current document.
105: */
106: public Payload payload() throws IOException;
107:
108: /** Returns the count, that is, the number of occurrences of the term in the current document.
109: *
110: * @return the count (number of occurrences) of the term in the current document.
111: * @throws UnsupportedOperationException if the index of this iterator does not contain counts.
112: */
113:
114: public int count() throws IOException;
115:
116: /** Returns the positions at which the term appears in the current document.
117: *
118: * @return the positions of the current document in which the current term appears.
119: * @throws UnsupportedOperationException if the index of this iterator does not contain positions.
120: */
121: public IntIterator positions() throws IOException;
122:
123: /** Stores the positions at which the term appears in the current document in a given array.
124: *
125: * <P>If the array is not large enough (i.e., it does not contain {@link #count()} elements),
126: * this method will return a negative number (the opposite of the count).
127: *
128: * @param positions an array that will be used to store positions.
129: * @return the {@linkplain #count() count}; it will have the sign changed if <code>positions</code> cannot
130: * hold all positions.
131: * @throws UnsupportedOperationException if the index of this iterator does not contain positions.
132: */
133: public int positions(int[] positions) throws IOException;
134:
135: /** Returns the positions at which the term appears in the current document in an array.
136: *
137: * <P>Implementations are allowed to return the same array across different calls to this method.
138: *
139: * @return an array whose first {@linkplain #count()} elements contain the document positions.
140: * @throws UnsupportedOperationException if the index of this iterator does not contain positions.
141: */
142: public int[] positionArray() throws IOException;
143:
144: /** Sets the id of this index iterator.
145: *
146: * <p>The <em>id</em> is an integer associated to each index iterator. It has
147: * no specific semantics, and can be used differently in different contexts.
148: * A typical usage pattern, for instance, is using it to assign a unique number to
149: * the index iterators contained in a composited document iterator (say,
150: * numbering consecutively the leaves of the composite).
151: *
152: * <p>Instances of {@link Index.EmptyIndexIterator} are allowed to ignore
153: * silently calls to this method.
154: *
155: * @param id the new id for this index iterator.
156: */
157: public void id(int id);
158:
159: /** Returns the id of this index iterator.
160: *
161: * <p>Instances of {@link Index.EmptyIndexIterator} are allowed to return -1.
162: *
163: * @see #id(int)
164: * @return the id of this index iterator.
165: */
166: public int id();
167: }
|