001: package it.unimi.dsi.mg4j.index.cluster;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2006-2007 Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023: import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
024: import it.unimi.dsi.fastutil.objects.ReferenceArraySet;
025: import it.unimi.dsi.fastutil.objects.ReferenceSet;
026: import it.unimi.dsi.mg4j.index.Index;
027: import it.unimi.dsi.mg4j.search.AbstractDocumentIterator;
028: import it.unimi.dsi.mg4j.search.DocumentIterator;
029: import it.unimi.dsi.mg4j.search.IntervalIterator;
030: import it.unimi.dsi.mg4j.search.IntervalIterators;
031: import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;
032:
033: import java.io.IOException;
034:
035: import cern.colt.Arrays;
036:
037: /** A document iterator concatenating iterators from local indices.
038: *
039: * @author Alessandro Arrabito
040: * @author Sebastiano Vigna
041: */
042:
043: public class DocumentalConcatenatedClusterDocumentIterator extends
044: AbstractDocumentIterator implements DocumentIterator {
045: private static final boolean DEBUG = false;
046: private static final boolean ASSERTS = false;
047:
048: /** The component document iterators. */
049: final protected DocumentIterator[] documentIterator;
050: /** The number of component iterators. */
051: final protected int n;
052: /** The indices corresponding to each underlying document iterator. */
053: protected final int[] documentIteratorIndex;
054: /** The cached strategy of the index we refer to. */
055: protected final ContiguousDocumentalStrategy strategy;
056: /** The current iterator (an index into {@link #documentIterator}). If it is equal to {@link #n},
057: * it means that we hit the end of list on the last document iterator. Otherwise, {@link #last}
058: * contains the last document ever returned or reached by {@link #skipTo(int)}. */
059: protected int currentIterator;
060: /** The last iterator to ever return something (an index into {@link #documentIterator}).*/
061: protected int lastIterator = -1;
062: /** The underlying index reader. */
063: private final DocumentalClusterIndexReader indexReader;
064: /** The set of indices involved in this iterator. */
065: final private ReferenceArraySet<Index> indices = new ReferenceArraySet<Index>();
066:
067: /** Creates a new document iterator for a documental cluster.
068: *
069: * <p>This constructor uses an array of document iterators that it is not required to be full.
070: * This is very useful with rare terms.
071: *
072: * @param indexReader the underlying index reader.
073: * @param documentIterator an array of document iterators.
074: * @param usedIndex an array parallel to <code>documentIterator</code> containing the number
075: * of the indices corresponding to the iterators.
076: */
077:
078: public DocumentalConcatenatedClusterDocumentIterator(
079: final DocumentalClusterIndexReader indexReader,
080: final DocumentIterator[] documentIterator, int[] usedIndex) {
081: this .documentIterator = documentIterator;
082: this .n = documentIterator.length;
083: this .indexReader = indexReader;
084: this .documentIteratorIndex = usedIndex;
085: this .strategy = (ContiguousDocumentalStrategy) indexReader.index.strategy;
086: for (int i = n; i-- != 0;) {
087: if (!documentIterator[i].hasNext())
088: throw new IllegalArgumentException(
089: "All component document iterators must be nonempty");
090: indices.addAll(documentIterator[i].indices());
091: }
092: }
093:
094: public IntervalIterator intervalIterator() throws IOException {
095: return documentIterator[lastIterator].intervalIterator();
096: }
097:
098: public IntervalIterator intervalIterator(Index index)
099: throws IOException {
100: if (!indices.contains(index))
101: return IntervalIterators.TRUE;
102: return documentIterator[lastIterator].intervalIterator(index);
103: }
104:
105: public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators()
106: throws IOException {
107: return documentIterator[lastIterator].intervalIterators();
108: }
109:
110: public ReferenceSet<Index> indices() {
111: return indices;
112: }
113:
114: public int skipTo(final int p) throws IOException {
115: if (DEBUG)
116: System.err.println(this + ": Requested to skip to " + p
117: + "...");
118: // In this case we are already beyond p
119: if (last >= p)
120: return last;
121: // In this case, we are already beyond the last iterator
122: if (currentIterator == n)
123: return Integer.MAX_VALUE;
124:
125: next = -1;
126: // Otherwise, first we recover the local index that contains p
127: final int k = strategy.localIndex(p);
128:
129: if (DEBUG)
130: System.err.println(this + ": Moving to local index " + k);
131: if (ASSERTS)
132: assert k >= documentIteratorIndex[currentIterator];
133:
134: // Them we advance currentIterator until we get to index k.
135: while (currentIterator < n
136: && documentIteratorIndex[currentIterator] < k)
137: currentIterator++;
138:
139: // If currentIterator == n, we have been requested to skip to a cluster that does not contain pointers
140: int globalResult = Integer.MAX_VALUE;
141: if (currentIterator < n) {
142: // Now we skip to p inside the only index that might contain it.
143: globalResult = documentIterator[currentIterator]
144: .skipTo(strategy.localPointer(p));
145: if (DEBUG)
146: System.err
147: .println(this + ": Skipped to local pointer "
148: + strategy.localPointer(p)
149: + " in iterator " + currentIterator
150: + "; result: " + globalResult);
151:
152: // If we got to the end of list, the first document beyond p is the first document of the next iterator (if any).
153: if (globalResult == Integer.MAX_VALUE
154: && ++currentIterator < n)
155: globalResult = documentIterator[currentIterator]
156: .nextDocument();
157: }
158:
159: lastIterator = globalResult == Integer.MAX_VALUE ? -1
160: : currentIterator;
161: last = globalResult == Integer.MAX_VALUE ? Integer.MAX_VALUE
162: : strategy.globalPointer(
163: documentIteratorIndex[currentIterator],
164: globalResult);
165: if (DEBUG)
166: System.err.println(this + ": Will return " + last
167: + " (lastIterator=" + lastIterator + ")");
168: return last;
169: }
170:
171: public int nextDocument() throws IOException {
172: if (next >= 0) {
173: last = next;
174: next = -1;
175: return last;
176: }
177:
178: if (currentIterator == n)
179: return last = -1;
180: final int result = documentIterator[currentIterator]
181: .nextDocument();
182: if (result != -1)
183: return last = strategy
184: .globalPointer(
185: documentIteratorIndex[lastIterator = currentIterator],
186: result);
187: currentIterator++;
188: /* Note that we are heavily exploiting the fact that only nonempty
189: * iterators are present. */
190: if (currentIterator < n)
191: return last = strategy.globalPointer(
192: documentIteratorIndex[currentIterator],
193: documentIterator[lastIterator = currentIterator]
194: .nextDocument());
195: return last = -1;
196: }
197:
198: // TODO: examine carefully the state change for lastIterator
199:
200: public boolean accept(final DocumentIteratorVisitor visitor)
201: throws IOException {
202: boolean result = true;
203: for (DocumentIterator d : documentIterator)
204: if (!(result &= d.accept(visitor)))
205: break;
206: return result;
207: }
208:
209: public boolean acceptOnTruePaths(
210: final DocumentIteratorVisitor visitor) throws IOException {
211: return documentIterator[lastIterator]
212: .acceptOnTruePaths(visitor);
213: }
214:
215: public void dispose() throws IOException {
216: indexReader.close();
217: }
218:
219: public String toString() {
220: return this.getClass().getSimpleName()
221: + Arrays.toString(documentIterator);
222: }
223: }
|