001: package test.it.unimi.dsi.mg4j.index;
002:
003: import it.unimi.dsi.fastutil.ints.IntIterator;
004: import it.unimi.dsi.mg4j.index.BitStreamIndex;
005: import it.unimi.dsi.mg4j.index.DiskBasedIndex;
006: import it.unimi.dsi.mg4j.index.Index;
007: import it.unimi.dsi.mg4j.index.IndexIterator;
008: import it.unimi.dsi.mg4j.index.MultiTermIndexIterator;
009: import it.unimi.dsi.mg4j.query.nodes.Query;
010: import it.unimi.dsi.mg4j.query.nodes.QueryBuilderVisitorException;
011: import it.unimi.dsi.mg4j.query.parser.QueryParserException;
012: import it.unimi.dsi.mg4j.query.parser.SimpleParser;
013: import it.unimi.dsi.mg4j.search.DocumentIterator;
014: import it.unimi.dsi.mg4j.search.DocumentIteratorBuilderVisitor;
015: import it.unimi.dsi.util.Interval;
016: import it.unimi.dsi.mg4j.search.OrDocumentIterator;
017: import it.unimi.dsi.mg4j.search.visitor.AbstractDocumentIteratorVisitor;
018: import it.unimi.dsi.mg4j.search.visitor.DocumentIteratorVisitor;
019: import it.unimi.dsi.mg4j.tool.IndexBuilder;
020:
021: import java.io.File;
022: import java.io.IOException;
023: import java.lang.reflect.InvocationTargetException;
024: import java.net.URISyntaxException;
025:
026: import junit.framework.TestCase;
027:
028: import org.apache.commons.configuration.ConfigurationException;
029:
030: import test.it.unimi.dsi.mg4j.document.StringArrayDocumentCollection;
031: import test.it.unimi.dsi.mg4j.search.IntArrayIndexIterator;
032:
033: public class MultiTermIndexIteratorTest extends TestCase {
034: private BitStreamIndex index;
035: private SimpleParser simpleParser;
036:
037: public void setUp() throws ConfigurationException,
038: SecurityException, IOException, URISyntaxException,
039: ClassNotFoundException, InstantiationException,
040: IllegalAccessException, InvocationTargetException,
041: NoSuchMethodException {
042:
043: String basename = File.createTempFile(
044: getClass().getSimpleName(), "test").getCanonicalPath();
045: new IndexBuilder(basename, new StringArrayDocumentCollection(
046: "a", "b", "c")).run();
047: index = DiskBasedIndex.getInstance(basename + "-text", true,
048: true);
049: simpleParser = new SimpleParser(index.termProcessor);
050: }
051:
052: public void testSkipBug() throws QueryParserException,
053: QueryBuilderVisitorException, IOException {
054: Query query = simpleParser.parse("a + b + c");
055: DocumentIteratorBuilderVisitor documentIteratorBuilderVisitor = new DocumentIteratorBuilderVisitor(
056: null, index, Integer.MAX_VALUE);
057: DocumentIterator documentIterator = query
058: .accept(documentIteratorBuilderVisitor);
059: assertEquals(2, documentIterator.skipTo(2));
060: documentIterator.dispose();
061: }
062:
063: public void test() throws IOException {
064: IndexIterator i0 = new IntArrayIndexIterator(new int[] { 0, 1,
065: 2 }, new int[][] { { 0, 3 }, { 0 }, { 0 }, });
066: IndexIterator i1 = new IntArrayIndexIterator(
067: new int[] { 0, 2 }, new int[][] { { 1 }, { 1 }, });
068: IndexIterator i2 = new IntArrayIndexIterator(new int[] { 0, 1,
069: 3 }, new int[][] { { 2 }, { 2 }, { 0 }, });
070: MultiTermIndexIterator multiTermIndexIterator = (MultiTermIndexIterator) MultiTermIndexIterator
071: .getInstance(i0, i1, i2);
072: assertEquals(3, multiTermIndexIterator.frequency());
073:
074: assertTrue(multiTermIndexIterator.hasNext());
075: assertTrue(multiTermIndexIterator.hasNext()); // To increase coverage
076:
077: assertEquals(0, multiTermIndexIterator.nextDocument());
078: assertTrue(multiTermIndexIterator.intervalIterator().hasNext());
079: assertTrue(multiTermIndexIterator.intervalIterator().hasNext()); // To increase coverage
080: assertEquals(Interval.valueOf(0), multiTermIndexIterator
081: .intervalIterator().nextInterval());
082: assertEquals(Interval.valueOf(1), multiTermIndexIterator
083: .intervalIterator().nextInterval());
084: assertTrue(multiTermIndexIterator.intervalIterator().hasNext());
085:
086: assertEquals(4, multiTermIndexIterator.count());
087: int[] position = multiTermIndexIterator.positionArray();
088: assertEquals(0, position[0]);
089: assertEquals(1, position[1]);
090: assertEquals(2, position[2]);
091: assertEquals(3, position[3]);
092:
093: assertEquals(Interval.valueOf(2), multiTermIndexIterator
094: .intervalIterator().nextInterval());
095:
096: position = new int[4];
097: multiTermIndexIterator.positions(position);
098: assertEquals(0, position[0]);
099: assertEquals(1, position[1]);
100: assertEquals(2, position[2]);
101: assertEquals(3, position[3]);
102:
103: assertEquals(Interval.valueOf(3), multiTermIndexIterator
104: .intervalIterator().nextInterval());
105:
106: IntIterator positions = multiTermIndexIterator.positions();
107: assertEquals(0, positions.nextInt());
108: assertEquals(1, positions.nextInt());
109: assertEquals(2, positions.nextInt());
110: assertEquals(3, positions.nextInt());
111: assertFalse(positions.hasNext());
112:
113: assertFalse(multiTermIndexIterator.intervalIterator().hasNext());
114: assertFalse(multiTermIndexIterator.intervalIterator().hasNext()); // To increase coverage
115:
116: assertEquals(1, multiTermIndexIterator.nextDocument());
117: assertTrue(multiTermIndexIterator.intervalIterator().hasNext());
118: assertTrue(multiTermIndexIterator.intervalIterator().hasNext()); // To increase coverage
119: assertEquals(Interval.valueOf(0), multiTermIndexIterator
120: .intervalIterator().nextInterval());
121: assertEquals(Interval.valueOf(2), multiTermIndexIterator
122: .intervalIterator().nextInterval());
123:
124: assertEquals(2, multiTermIndexIterator.count());
125: position = multiTermIndexIterator.positionArray();
126: assertEquals(0, position[0]);
127: assertEquals(2, position[1]);
128: positions = multiTermIndexIterator.positions();
129: assertEquals(0, positions.nextInt());
130: assertEquals(2, positions.nextInt());
131: assertFalse(positions.hasNext());
132:
133: assertFalse(multiTermIndexIterator.intervalIterator().hasNext());
134:
135: assertEquals(2, multiTermIndexIterator.nextDocument());
136: assertTrue(multiTermIndexIterator.intervalIterator().hasNext());
137: assertTrue(multiTermIndexIterator.intervalIterator().hasNext()); // To increase coverage
138: assertEquals(Interval.valueOf(0), multiTermIndexIterator
139: .intervalIterator().nextInterval());
140: assertEquals(Interval.valueOf(1), multiTermIndexIterator
141: .intervalIterator().nextInterval());
142:
143: assertEquals(2, multiTermIndexIterator.count());
144: position = multiTermIndexIterator.positionArray();
145: assertEquals(0, position[0]);
146: assertEquals(1, position[1]);
147: positions = multiTermIndexIterator.positions();
148: assertEquals(0, positions.nextInt());
149: assertEquals(1, positions.nextInt());
150: assertFalse(positions.hasNext());
151:
152: assertFalse(multiTermIndexIterator.intervalIterator().hasNext());
153:
154: // Here we get the iterator of the underlying IndexIterator
155: assertEquals(3, multiTermIndexIterator.nextDocument());
156: assertTrue(multiTermIndexIterator.intervalIterator().hasNext());
157: assertEquals(Interval.valueOf(0), multiTermIndexIterator
158: .intervalIterator().nextInterval());
159:
160: assertEquals(1, multiTermIndexIterator.count());
161: position = multiTermIndexIterator.positionArray();
162: assertEquals(0, position[0]);
163: positions = multiTermIndexIterator.positions();
164: assertEquals(0, positions.nextInt());
165: assertFalse(positions.hasNext());
166:
167: assertFalse(multiTermIndexIterator.intervalIterator().hasNext());
168:
169: // The end
170: assertFalse(multiTermIndexIterator.hasNext());
171: assertFalse(multiTermIndexIterator.hasNext()); // To increase coverage
172: }
173:
174: // Contributed by Fabien Campagne
175: public void testMG4JMultiTermPositionIssue()
176: throws IllegalAccessException, NoSuchMethodException,
177: ConfigurationException, IOException,
178: InvocationTargetException, InstantiationException,
179: ClassNotFoundException, URISyntaxException {
180: String basename = File.createTempFile(
181: getClass().getSimpleName(), "test").getCanonicalPath();
182: new IndexBuilder(basename, new StringArrayDocumentCollection(
183: "A B C D E F F G G", "G A T H S K L J W L",
184: "E S K D L J F K L S J D L S J D", "E B")).run();
185: Index index = DiskBasedIndex.getInstance(basename + "-text",
186: true, true);
187:
188: /// String query = "A| B+C+G|W|S+J";
189: DocumentIterator iterator = OrDocumentIterator.getInstance(
190: index.documents("A"), MultiTermIndexIterator
191: .getInstance(index.documents("B"), index
192: .documents("C"), index.documents("G")),
193: index.documents("W"), MultiTermIndexIterator
194: .getInstance(index.documents("S"), index
195: .documents("J")));
196:
197: final int[] currDoc = new int[1];
198: // A visitor invoking positionArray() on IndexIterators positioned on the current document.
199: DocumentIteratorVisitor visitor = new AbstractDocumentIteratorVisitor() {
200: public boolean visit(IndexIterator indexIterator)
201: throws IOException {
202: if (indexIterator.count() > 0
203: && indexIterator.document() == currDoc[0])
204: indexIterator.positionArray();
205: return true;
206: }
207: };
208:
209: for (int document = 0; document < index.numberOfDocuments; document++) {
210: currDoc[0] = iterator.skipTo(document);
211:
212: if (document == currDoc[0]) {
213: iterator.accept(visitor); // see method visit below.
214: }
215: }
216:
217: while (iterator.hasNext()) {
218: currDoc[0] = iterator.nextDocument();
219: iterator.accept(visitor);
220: }
221: }
222:
223: }
|