001: package it.unimi.dsi.mg4j.test;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2005-2007 Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import it.unimi.dsi.fastutil.ints.IntIterator;
025: import it.unimi.dsi.mg4j.index.BitStreamIndex;
026: import it.unimi.dsi.mg4j.index.DiskBasedIndex;
027: import it.unimi.dsi.mg4j.index.Index;
028: import it.unimi.dsi.mg4j.index.IndexIterator;
029: import it.unimi.dsi.mg4j.index.IndexReader;
030: import it.unimi.dsi.mg4j.index.remote.IndexServer;
031: import it.unimi.dsi.util.Interval;
032: import it.unimi.dsi.mg4j.search.IntervalIterator;
033: import it.unimi.dsi.Util;
034:
035: import java.io.FileNotFoundException;
036: import java.io.IOException;
037: import java.lang.reflect.InvocationTargetException;
038: import java.net.InetAddress;
039: import java.net.URISyntaxException;
040:
041: import junit.framework.Assert;
042:
043: import org.apache.log4j.Logger;
044:
045: import com.martiansoftware.jsap.JSAP;
046: import com.martiansoftware.jsap.JSAPException;
047: import com.martiansoftware.jsap.JSAPResult;
048: import com.martiansoftware.jsap.Parameter;
049: import com.martiansoftware.jsap.SimpleJSAP;
050: import com.martiansoftware.jsap.Switch;
051: import com.martiansoftware.jsap.UnflaggedOption;
052:
053: /**Compare IndexIterator of equals indexes.
054: *Given two index basename, IndexIteratorTest compare that every IndexIterator method give the same results.
055: *
056: * @author Alessandro Arrabito
057: */
058: public class RemoteIndexIteratorTest {
059: @SuppressWarnings("unused")
060: private static final Logger LOGGER = Util
061: .getLogger(RemoteIndexIteratorTest.class);
062:
063: /**for start and debug the server too.*/
064: private static final boolean _DEBUG_SERVER = false;
065: private static final boolean _DEBUG_CLUSTER = false;
066: private static BitStreamIndex firstIndex;
067: private static Index secondIndex;
068: private static String firstBaseName;
069: private static String secondBaseName;
070: private static boolean textTerm = false;
071:
072: public static void testIndexIterator() throws IOException {
073: IndexReader firstIndexReader = firstIndex.getReader();
074: IndexReader secondIndexReader = secondIndex.getReader(1000);
075: IndexIterator firstIterator = null;
076: IndexIterator secondIterator = null;
077:
078: for (int i = 0; i < firstIndex.numberOfTerms; i++) {
079: try {
080: System.out.println("term: " + i);
081: firstIterator = firstIndexReader.documents(i);
082: secondIterator = !textTerm ? secondIndexReader
083: .documents(i) : secondIndexReader
084: .documents(firstIndex.termMap.list().get(i));
085:
086: /** Compare hasNext*/
087: Assert.assertEquals(firstIterator.hasNext(),
088: secondIterator.hasNext());
089:
090: /** Compare frequency*/
091: Assert.assertEquals(firstIterator.frequency(),
092: secondIterator.frequency());
093:
094: /** Compare positions & count*/
095: while (firstIterator.hasNext()) {
096: int fr = firstIterator.nextDocument();
097: int sr = secondIterator.nextDocument();
098: Assert.assertEquals(fr, sr);
099: /** Compare count*/
100: Assert.assertEquals(firstIterator.count(),
101: secondIterator.count());
102:
103: int[] firstPos = new int[1000];
104: int[] secondPos = new int[1000];
105: int fRet = firstIterator.positions(firstPos);
106: int sRet = secondIterator.positions(secondPos);
107: System.out.println(fRet + " " + sRet);
108: Assert.assertTrue(fRet == sRet);
109:
110: for (int j = 0; j < fRet; j++)
111: Assert.assertEquals(firstPos[j], secondPos[j]);
112: }
113:
114: /** Compare positions int[] positionArray()*/
115: while (firstIterator.hasNext()) {
116: secondIterator.next();
117: int[] firstPos = firstIterator.positionArray();
118: int[] secondPos = secondIterator.positionArray();
119: Assert
120: .assertTrue(firstPos.length == secondPos.length);
121: for (int j = 0; j < firstPos.length; j++)
122: Assert.assertTrue(firstPos[j] == secondPos[j]);
123: }
124:
125: /** Compare IntIterator from positions() method */
126: firstIterator = firstIndexReader.documents(i);
127: secondIterator = !textTerm ? secondIndexReader
128: .documents(i) : secondIndexReader
129: .documents(firstIndex.termMap.list().get(i));
130: while (firstIterator.hasNext()) {
131: firstIterator.next();
132: secondIterator.next();
133:
134: IntIterator firstIntIt = firstIterator.positions();
135: IntIterator secondIntIt = secondIterator
136: .positions();
137: while (firstIntIt.hasNext()) {
138: Assert.assertEquals(firstIntIt.nextInt(),
139: secondIntIt.nextInt());
140: }
141: Assert.assertEquals(firstIntIt.skip(2), secondIntIt
142: .skip(2));
143: if (firstIntIt.hasNext()) {
144: Assert.assertEquals(firstIntIt.nextInt(),
145: secondIntIt.nextInt());
146: }
147: Assert.assertEquals(firstIntIt.skip(9999999),
148: secondIntIt.skip(9999999));
149: if (firstIntIt.hasNext()) {
150: Assert.assertEquals(firstIntIt.nextInt(),
151: secondIntIt.nextInt());
152: }
153:
154: }
155:
156: /** Compare IntervalIterator from Interval() method */
157: firstIterator = firstIndexReader.documents(i);
158: secondIterator = !textTerm ? secondIndexReader
159: .documents(i) : secondIndexReader
160: .documents(firstIndex.termMap.list().get(i));
161: while (firstIterator.hasNext()) {
162: firstIterator.next();
163: secondIterator.next();
164: /** Compare position IntIterator*/
165: IntervalIterator firstIntervalIt = firstIterator
166: .intervalIterator(firstIndex);
167: IntervalIterator secondIntervalIt = secondIterator
168: .intervalIterator(secondIndex);
169: while (firstIntervalIt.hasNext()) {
170: Interval firstIntv = firstIntervalIt
171: .nextInterval();
172: Interval secondIntv = secondIntervalIt
173: .nextInterval();
174:
175: System.out.println("left:" + firstIntv.left
176: + " " + "right:" + firstIntv.right);
177: Assert.assertEquals(firstIntv.left,
178: secondIntv.left);
179: Assert.assertEquals(firstIntv.right,
180: secondIntv.right);
181: }
182: }
183:
184: } catch (AssertionError ae) {
185: System.out.println("Error on Term:" + i);
186: ae.printStackTrace();
187: }
188: }
189: }
190:
191: public static void main(final String arg[])
192: throws FileNotFoundException, IOException,
193: ClassNotFoundException, IllegalArgumentException,
194: SecurityException, IllegalAccessException, JSAPException,
195: URISyntaxException,
196: org.apache.commons.configuration.ConfigurationException,
197: InterruptedException, InstantiationException,
198: InvocationTargetException, NoSuchMethodException {
199: String[] debugServerArg = new String(
200: "/home/alex/develop/MG4J/alex/sample/DOCS-text mg4j://localhost:9090")
201: .split(" ");
202: String[] debugClusterArg = new String(
203: "-t /home/alex/develop/MG4J/alex/sample/DOCS-text /home/alex/develop/MG4J/alex/sample/DOCS-split")
204: .split(" ");
205:
206: SimpleJSAP jsap = new SimpleJSAP(
207: "java IndexIteratorTest",
208: "Compare IndexIterator of equals indexes."
209: + "\nGiven two index basename, IndexIteratorTest compare that every IndexIterator method give the same results.",
210: new Parameter[] {
211: new UnflaggedOption("basename_1",
212: JSAP.STRING_PARSER, JSAP.REQUIRED,
213: "The basename of the first index."),
214: new Switch("text_term", 't',
215: "use text term during document method invocation on second index"),
216: new UnflaggedOption("basename_2",
217: JSAP.STRING_PARSER, JSAP.REQUIRED,
218: "The basename of the second index.") });
219: JSAPResult jsapResult = jsap
220: .parse(_DEBUG_SERVER ? debugServerArg
221: : (_DEBUG_CLUSTER ? debugClusterArg : arg));
222: if (!(jsapResult.contains("basename_1") && jsapResult
223: .contains("basename_2")))
224: return;
225: firstBaseName = jsapResult.getString("basename_1");
226: secondBaseName = jsapResult.getString("basename_2");
227: firstIndex = DiskBasedIndex.getInstance(firstBaseName, true,
228: true);
229: textTerm = jsapResult.getBoolean("text_term");
230: if (_DEBUG_SERVER) {
231: new Thread() {
232: public void run() {
233: try {
234: IndexServer
235: .start(
236: Index
237: .getInstance(firstBaseName),
238: InetAddress.getLocalHost(),
239: 9090, false);
240: } catch (Exception e) {
241: e.printStackTrace();
242: }
243: }
244: }.start();
245: Thread.sleep(3000);
246: }
247:
248: secondIndex = Index.getInstance(secondBaseName);
249: long startTime = System.currentTimeMillis();
250: testIndexIterator();
251: long endTime = System.currentTimeMillis();
252: System.out.println("Test terminate succesfully in:"
253: + (endTime - startTime) / 1000 + " sec.");
254: }
255: }
256: /* Test Results and conclusion
257: *
258: * 1(BUG) -FastBufferedInputStream BUG:during the test occour this exception "java.lang.ArrayIndexOutOfBoundsException" but this doesn't happen
259: * with FileInputStream.
260: * 2(?) -ClientInputStream implement Repositionable interface, the method position use skip method with negative value too, the strange thing
261: * is that this implementation is good but for the base class InputStream skip method is able to skip only in forward direction.
262: * 3(OK) - RemoteBitStreamIndex execute the test succesfully but the test make intensive use of method getLong on offset object a buffered
263: * version of RemoteLongList gan give better results.
264: * 4(Slow) - The test is very slow with a RemoteIndex.
265: * 5(!) - The test evidentiate a limitation on RemoteIndex, the RemoteDocumentItarator cannot use the buffer availability,
266: * to get many value from the server on a single request, this cause problem with other method that evaluate position.
267: *
268: */
|