01: package test.it.unimi.dsi.mg4j.index.cluster;
02:
03: import it.unimi.dsi.bits.Utf16TransformationStrategy;
04: import it.unimi.dsi.fastutil.io.BinIO;
05: import it.unimi.dsi.io.FileLinesCollection;
06: import it.unimi.dsi.mg4j.index.CompressionFlags;
07: import it.unimi.dsi.mg4j.index.Index;
08: import it.unimi.dsi.mg4j.index.cluster.DocumentalStrategies;
09: import it.unimi.dsi.mg4j.tool.IndexBuilder;
10: import it.unimi.dsi.mg4j.tool.PartitionDocumentally;
11: import it.unimi.dsi.sux4j.mph.MWHCFunction;
12: import it.unimi.dsi.sux4j.util.ShiftAddXorSignedStringMap;
13: import it.unimi.dsi.logging.ProgressLogger;
14:
15: import java.io.File;
16:
17: import junit.framework.TestCase;
18: import test.it.unimi.dsi.mg4j.document.StringArrayDocumentCollection;
19:
20: public class DocumentalConcatenatedClusterDocumentIteratorTest extends
21: TestCase {
22:
23: public void testSkipToBeyondUsedClusters() throws Exception {
24: /* We test what happens when we skip to a document belonging to a local index larger
25: * than any index in which the term appears. */
26:
27: final String basename = File.createTempFile(
28: getClass().getSimpleName(), "test").getCanonicalPath();
29: new IndexBuilder(basename, new StringArrayDocumentCollection(
30: "A B", "B", "A", "A")).run();
31: BinIO.storeObject(DocumentalStrategies.uniform(2, 4), basename
32: + "-strategy");
33: new PartitionDocumentally(basename + "-text", basename
34: + "-cluster", DocumentalStrategies.uniform(2, 4),
35: basename + "-strategy", 0, 1024,
36: CompressionFlags.DEFAULT_STANDARD_INDEX, true, false,
37: 0, 0, 0, ProgressLogger.DEFAULT_LOG_INTERVAL).run();
38: FileLinesCollection flc;
39: flc = new FileLinesCollection(basename + "-cluster-0.terms",
40: "ASCII");
41: BinIO.storeObject(new ShiftAddXorSignedStringMap(
42: flc.iterator(), new MWHCFunction<CharSequence>(flc,
43: new Utf16TransformationStrategy())), basename
44: + "-cluster-0.termmap");
45: flc = new FileLinesCollection(basename + "-cluster-0.terms",
46: "ASCII");
47: BinIO.storeObject(new ShiftAddXorSignedStringMap(
48: flc.iterator(), new MWHCFunction<CharSequence>(flc,
49: new Utf16TransformationStrategy())), basename
50: + "-cluster-1.termmap");
51: Index index = Index.getInstance(basename + "-cluster");
52: assertEquals(Integer.MAX_VALUE, index.documents("b").skipTo(2));
53: }
54:
55: }
|