001: package it.unimi.dsi.mg4j.index;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2004-2007 Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import it.unimi.dsi.fastutil.ints.IntArrayList;
025: import it.unimi.dsi.fastutil.ints.IntList;
026: import it.unimi.dsi.fastutil.io.BinIO;
027: import it.unimi.dsi.fastutil.longs.LongArrayList;
028: import it.unimi.dsi.fastutil.longs.LongList;
029: import it.unimi.dsi.fastutil.longs.LongLists;
030: import it.unimi.dsi.mg4j.index.CompressionFlags.Coding;
031: import it.unimi.dsi.mg4j.index.CompressionFlags.Component;
032: import it.unimi.dsi.mg4j.index.payload.Payload;
033: import it.unimi.dsi.mg4j.index.Index.UriKeys;
034: import it.unimi.dsi.io.ByteBufferInputStream;
035: import it.unimi.dsi.io.InputBitStream;
036: import it.unimi.dsi.Util;
037: import it.unimi.dsi.util.Properties;
038: import it.unimi.dsi.mg4j.util.SemiExternalOffsetList;
039: import it.unimi.dsi.util.StringMap;
040: import it.unimi.dsi.util.PrefixMap;
041:
042: import java.io.File;
043: import java.io.FileInputStream;
044: import java.io.FileNotFoundException;
045: import java.io.IOException;
046: import java.nio.ByteBuffer;
047: import java.nio.channels.FileChannel.MapMode;
048: import java.util.EnumMap;
049: import java.util.Map;
050:
051: import org.apache.commons.configuration.ConfigurationException;
052: import org.apache.log4j.Logger;
053:
054: /** A static container providing facilities to load an index based on data stored on disk.
055: *
056: * <P>This class contains several useful static methods
057: * such as {@link #readOffsets(InputBitStream, int)} and {@link #readSizes(InputBitStream, int)},
058: * and static factor methods such as {@link #getInstance(CharSequence, boolean, boolean, boolean, EnumMap)}
059: * that take care of reading the properties associated to the index, identify
060: * the correct {@link it.unimi.dsi.mg4j.index.Index} implementation that
061: * should be used to load the index, and load the necessary data into memory.
062: *
063: * <p>As an option, a disk-based index can be <em>loaded</em> into main memory (key: {@link Index.UriKeys#INMEMORY}), returning
064: * an {@link it.unimi.dsi.mg4j.index.InMemoryIndex}/{@link InMemoryHPIndex}, or <em>mapped</em> into main memory (key: {@link Index.UriKeys#MAPPED}),
065: * returning a {@link MemoryMappedIndex}/{@link InMemoryHPIndex} (note that the value assigned to the keys is irrelevant).
066: * In both cases some insurmountable Java problems
067: * prevents using indices whose size exceeds two gigabytes (but see {@link MemoryMappedIndex} for
068: * some elaboration on this topic).
069: *
070: * <p>Moreover, by default the
071: * term-offset list is accessed using a {@link it.unimi.dsi.mg4j.util.SemiExternalOffsetList}
072: * with a step of {@link #DEFAULT_OFFSET_STEP}. This behaviour can be changed using
073: * the URI key {@link UriKeys#OFFSETSTEP}.
074: *
075: * <p>Disk-based indices are the workhorse of MG4J. All other indices (clustered,
076: * remote, etc.) ultimately rely on disk-based indices to provide results.
077: *
078: * <p>Note that not all data produced by {@link it.unimi.dsi.mg4j.tool.Scan} and
079: * by the other indexing utilities are actually necessary to run a disk-based
080: * index. Usually the property file and the index file (plus the positions file,
081: * for {@linkplain BitStreamHPIndex high-performance indices}) are sufficient: if one
082: * needs random access, also the offsets file must be present, and if the
083: * compression method requires document sizes or if sizes are requested explicitly,
084: * also the sizes file must be present. A {@link StringMap}
085: * and possibly a {@link PrefixMap} will be fetched
086: * automatically by {@link #getInstance(CharSequence, boolean, boolean)}
087: * using standard extensions.
088: *
089: * <h2>Thread safety</h2>
090: *
091: * <p>A disk-based index is thread safe as long as the offset list, the size list and
092: * the term/prefix map are. The static factory methods provided by this class load
093: * offsets and sizes using data structures that are thread safe. If you use directly
094: * a constructor, instead, it is your responsability to pass thread-safe data structures.
095: *
096: * @author Sebastiano Vigna
097: * @since 1.1
098: */
099:
100: public class DiskBasedIndex {
101: private static final Logger LOGGER = Util
102: .getLogger(DiskBasedIndex.class);
103: private static final long serialVersionUID = 0;
104:
105: /** The default value for the query parameter {@link Index.UriKeys#OFFSETSTEP}. */
106: public final static int DEFAULT_OFFSET_STEP = 256;
107:
108: /** Standard extension for the index bitstream. */
109: public static final String INDEX_EXTENSION = ".index";
110: /** Standard extension for the positions bitstream of an {@linkplain BitStreamHPIndexWriter high-performance index}. */
111: public static final String POSITIONS_EXTENSION = ".positions";
112: /** Standard extension for the index properties. */
113: public static final String PROPERTIES_EXTENSION = ".properties";
114: /** Standard extension for the file of sizes. */
115: public static final String SIZES_EXTENSION = ".sizes";
116: /** Standard extension for the file of offsets. */
117: public static final String OFFSETS_EXTENSION = ".offsets";
118: /** Standard extension for the file of global counts. */
119: public static final String GLOBCOUNTS_EXTENSION = ".globcounts";
120: /** Standard extension for the file of frequencies. */
121: public static final String FREQUENCIES_EXTENSION = ".frequencies";
122: /** Standard extension for the file of terms. */
123: public static final String TERMS_EXTENSION = ".terms";
124: /** Standard extension for the file of terms, unsorted. */
125: public static final String UNSORTED_TERMS_EXTENSION = ".terms.unsorted";
126: /** Standard extension for the term map. */
127: public static final String TERMMAP_EXTENSION = ".termmap";
128: /** Standard extension for the prefix map. */
129: public static final String PREFIXMAP_EXTENSION = ".prefixmap";
130: /** Standard extension for the stats file. */
131: public static final String STATS_EXTENSION = ".stats";
132:
133: private DiskBasedIndex() {
134: }
135:
136: /** Utility method to load a compressed offset file into a list.
137: *
138: * @param in the input bit stream providing the offsets (see {@link BitStreamIndexWriter}).
139: * @param T the number of terms indexed.
140: * @return a list of longs backed by an array; the list has
141: * an additional final element of index <code>T</code> that gives the number
142: * of bytes of the index file.
143: */
144:
145: public static LongList readOffsets(final InputBitStream in,
146: final int T) throws IOException {
147: final long[] offset = new long[T + 1];
148: LOGGER.debug("Loading offsets...");
149: offset[0] = in.readLongGamma();
150: for (int i = 0; i < T; i++)
151: offset[i + 1] = in.readLongGamma() + offset[i];
152: LOGGER.debug("Completed.");
153: return LongArrayList.wrap(offset);
154: }
155:
156: /** Utility method to load a compressed size file into a list.
157: *
158: * @param in the input bit stream providing the offsets (see {@link BitStreamIndexWriter}).
159: * @param N the number of documents indexed.
160: * @return a list of integers backed by an array.
161: */
162:
163: public static IntList readSizes(final InputBitStream in, final int N)
164: throws IOException {
165: final int[] size = new int[N];
166: LOGGER.debug("Loading sizes...");
167: for (int i = 0; i < N; i++)
168: size[i] = in.readGamma();
169: LOGGER.debug("Completed.");
170: return IntArrayList.wrap(size);
171: }
172:
173: /** Utility static method that loads a term map.
174: *
175: * @param filename the name of the file containing the term map.
176: * @return the map, or <code>null</code> if the file did not exist.
177: * @throws IOException if some IOException (other than {@link FileNotFoundException}) occurred.
178: */
179: @SuppressWarnings("unchecked")
180: public static StringMap<? extends CharSequence> loadStringMap(
181: final String filename) throws IOException {
182: try {
183: return (StringMap<? extends CharSequence>) BinIO
184: .loadObject(filename);
185: } catch (FileNotFoundException e) {
186: return null;
187: } catch (ClassNotFoundException e) {
188: throw new RuntimeException(e);
189: }
190: }
191:
192: /** Utility static method that loads a prefix map.
193: *
194: * @param filename the name of the file containing the prefix map.
195: * @return the map, or <code>null</code> if the file did not exist.
196: * @throws IOException if some IOException (other than {@link FileNotFoundException}) occurred.
197: */
198: @SuppressWarnings("unchecked")
199: public static PrefixMap<? extends CharSequence> loadPrefixMap(
200: final String filename) throws IOException {
201: try {
202: return (PrefixMap<? extends CharSequence>) BinIO
203: .loadObject(filename);
204: } catch (FileNotFoundException e) {
205: return null;
206: } catch (ClassNotFoundException e) {
207: throw new RuntimeException(e);
208: }
209: }
210:
211: /** Returns a new disk-based index, loading exactly the specified parts and using preloaded {@link Properties}.
212: *
213: * @param basename the basename of the index.
214: * @param properties the properties obtained from the given basename.
215: * @param termMap the term map for this index, or <code>null</code> for no term map.
216: * @param prefixMap the prefix map for this index, or <code>null</code> for no prefix map.
217: * @param randomAccess whether the index should be accessible randomly (e.g., if it will
218: * be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index).
219: * @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes
220: * might be loaded anyway because the compression method for positions requires it).
221: * @param queryProperties a map containing associations between {@link Index.UriKeys} and values, or <code>null</code>.
222: */
223: public static BitStreamIndex getInstance(
224: final CharSequence basename, Properties properties,
225: final StringMap<? extends CharSequence> termMap,
226: final PrefixMap<? extends CharSequence> prefixMap,
227: final boolean randomAccess, final boolean documentSizes,
228: final EnumMap<UriKeys, String> queryProperties)
229: throws ClassNotFoundException, IOException,
230: InstantiationException, IllegalAccessException {
231:
232: // This could be null if old indices contain SkipIndex
233: Class<?> indexClass = null;
234: try {
235: indexClass = Class.forName(properties.getString(
236: Index.PropertyKeys.INDEXCLASS,
237: "(missing index class)"));
238: } catch (Exception ignore) {
239: }
240:
241: File indexFile = new File(basename + INDEX_EXTENSION);
242: if (!indexFile.exists())
243: throw new FileNotFoundException("Cannot find index file "
244: + indexFile.getName());
245:
246: final Map<Component, Coding> flags = CompressionFlags.valueOf(
247: properties.getStringArray(Index.PropertyKeys.CODING),
248: null);
249:
250: final int numberOfDocuments = properties
251: .getInt(Index.PropertyKeys.DOCUMENTS);
252: final int numberOfTerms = properties
253: .getInt(Index.PropertyKeys.TERMS);
254: final long numberOfPostings = properties
255: .getLong(Index.PropertyKeys.POSTINGS);
256: final long numberOfOccurrences = properties
257: .getLong(Index.PropertyKeys.OCCURRENCES);
258: final int maxCount = properties.getInt(
259: Index.PropertyKeys.MAXCOUNT, -1);
260: final String field = properties
261: .getString(Index.PropertyKeys.FIELD);
262:
263: if (termMap != null && termMap.size() != numberOfTerms)
264: throw new IllegalArgumentException(
265: "The size of the term map (" + termMap.size()
266: + ") is not equal to the number of terms ("
267: + numberOfTerms + ")");
268: if (prefixMap != null && prefixMap.size() != numberOfTerms)
269: throw new IllegalArgumentException(
270: "The size of the prefix map (" + prefixMap.size()
271: + ") is not equal to the number of terms ("
272: + numberOfTerms + ")");
273:
274: final Payload payload = (Payload) (properties
275: .containsKey(Index.PropertyKeys.PAYLOADCLASS) ? Class
276: .forName(
277: properties
278: .getString(Index.PropertyKeys.PAYLOADCLASS))
279: .newInstance()
280: : null);
281: final Coding frequencyCoding = flags.get(Component.FREQUENCIES);
282: final Coding pointerCoding = flags.get(Component.POINTERS);
283: final Coding countCoding = flags.get(Component.COUNTS);
284: final Coding positionCoding = flags.get(Component.POSITIONS);
285:
286: if (countCoding == null && positionCoding != null)
287: throw new IllegalArgumentException(
288: "Index "
289: + basename
290: + " has positions but no counts (this can't happen)");
291:
292: // Load document sizes if forced to do so, or if the pointer/position compression methods make it necessary.
293: IntList sizes = null;
294: // TODO: quick patch to avoid loading sizes in case of payloads.
295: if (payload == null
296: && (documentSizes || positionCoding == Coding.GOLOMB || positionCoding == Coding.INTERPOLATIVE)) {
297: sizes = DiskBasedIndex.readSizes(new InputBitStream(
298: basename + DiskBasedIndex.SIZES_EXTENSION),
299: numberOfDocuments);
300: if (sizes.size() != numberOfDocuments)
301: throw new IllegalStateException(
302: "The length of the size list ("
303: + sizes.size()
304: + ") is not equal to the number of documents ("
305: + numberOfDocuments + ")");
306: }
307:
308: // Load offsets if forced to do so. Depending on a property, we use the core-memory or the semi-external version.
309: final LongList offsets;
310: // TODO: quick patch to avoid loading sizes in case of payloads.
311: if (payload == null && randomAccess) {
312: int offsetStep = queryProperties != null
313: && queryProperties.get(UriKeys.OFFSETSTEP) != null ? Integer
314: .parseInt(queryProperties.get(UriKeys.OFFSETSTEP))
315: : DEFAULT_OFFSET_STEP;
316:
317: if (offsetStep < 0) { // Memory-mapped
318: offsetStep = -offsetStep;
319: final long length = new File(basename
320: + DiskBasedIndex.OFFSETS_EXTENSION).length();
321: offsets = LongLists
322: .synchronize(new SemiExternalOffsetList(
323: new InputBitStream(
324: new ByteBufferInputStream(
325: new FileInputStream(
326: basename
327: + DiskBasedIndex.OFFSETS_EXTENSION)
328: .getChannel()
329: .map(
330: MapMode.READ_ONLY,
331: 0,
332: length))),
333: offsetStep, numberOfTerms + 1));
334: } else {
335: offsets = offsetStep == 0 ? DiskBasedIndex.readOffsets(
336: new InputBitStream(basename
337: + DiskBasedIndex.OFFSETS_EXTENSION),
338: numberOfTerms)
339: : LongLists
340: .synchronize(new SemiExternalOffsetList(
341: new InputBitStream(
342: basename
343: + DiskBasedIndex.OFFSETS_EXTENSION,
344: 1024), offsetStep,
345: numberOfTerms + 1));
346: }
347: if (offsets.size() != numberOfTerms + 1)
348: throw new IllegalStateException(
349: "The length of the offset list ("
350: + offsets.size()
351: + ") is not equal to the number of terms plus one ("
352: + numberOfTerms + " + 1)");
353: } else
354: offsets = null;
355:
356: final int quantum = properties.getInt(
357: BitStreamIndex.PropertyKeys.SKIPQUANTUM, -1);
358: final int height = properties.getInt(
359: BitStreamIndex.PropertyKeys.SKIPHEIGHT, -1);
360: final int bufferSize = properties.getInt(
361: BitStreamIndex.PropertyKeys.BUFFERSIZE,
362: BitStreamIndex.DEFAULT_BUFFER_SIZE);
363:
364: final TermProcessor termProcessor = Index
365: .getTermProcessor(properties);
366: final boolean highPerformance = indexClass != null
367: && FileHPIndex.class.isAssignableFrom(indexClass);
368:
369: if (queryProperties != null
370: && queryProperties.containsKey(UriKeys.INMEMORY)) {
371: /*if ( SqrtSkipIndex.class.isAssignableFrom( indexClass ) )
372: return new SqrtSkipInMemoryIndex( BinIO.loadBytes( indexFile.toString() ),
373: numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
374: frequencyCoding, pointerCoding, countCoding, positionCoding,
375: termProcessor,
376: field, properties, termMap, prefixMap, sizes, offsets );*/
377: return highPerformance ? new InMemoryHPIndex(BinIO
378: .loadBytes(indexFile.toString()), BinIO
379: .loadBytes(basename + POSITIONS_EXTENSION),
380: numberOfDocuments, numberOfTerms, numberOfPostings,
381: numberOfOccurrences, maxCount, payload,
382: frequencyCoding, pointerCoding, countCoding,
383: positionCoding, quantum, height, termProcessor,
384: field, properties, termMap, prefixMap, sizes,
385: offsets) : new InMemoryIndex(BinIO
386: .loadBytes(indexFile.toString()),
387: numberOfDocuments, numberOfTerms, numberOfPostings,
388: numberOfOccurrences, maxCount, payload,
389: frequencyCoding, pointerCoding, countCoding,
390: positionCoding, quantum, height, termProcessor,
391: field, properties, termMap, prefixMap, sizes,
392: offsets);
393: } else if (queryProperties != null
394: && queryProperties.containsKey(UriKeys.MAPPED)) {
395: final File positionsFile = new File(basename
396: + POSITIONS_EXTENSION);
397: final ByteBuffer index = new FileInputStream(indexFile)
398: .getChannel().map(MapMode.READ_ONLY, 0,
399: indexFile.length());
400: return highPerformance ? new MemoryMappedHPIndex(index,
401: new FileInputStream(positionsFile).getChannel()
402: .map(MapMode.READ_ONLY, 0,
403: positionsFile.length()),
404: numberOfDocuments, numberOfTerms, numberOfPostings,
405: numberOfOccurrences, maxCount, payload,
406: frequencyCoding, pointerCoding, countCoding,
407: positionCoding, quantum, height, termProcessor,
408: field, properties, termMap, prefixMap, sizes,
409: offsets) : new MemoryMappedIndex(index,
410: numberOfDocuments, numberOfTerms, numberOfPostings,
411: numberOfOccurrences, maxCount, payload,
412: frequencyCoding, pointerCoding, countCoding,
413: positionCoding, quantum, height, termProcessor,
414: field, properties, termMap, prefixMap, sizes,
415: offsets);
416:
417: }
418: /*if ( SqrtSkipIndex.class.isAssignableFrom( indexClass ) )
419: return new SqrtSkipFileIndex( basename.toString(),
420: numberOfDocuments, numberOfTerms, numberOfPostings, numberOfOccurrences, maxCount,
421: frequencyCoding, pointerCoding, countCoding, positionCoding,
422: termProcessor,
423: field, properties, termMap, prefixMap, sizes, offsets, indexFile );*/
424:
425: return highPerformance ? new FileHPIndex(basename.toString(),
426: numberOfDocuments, numberOfTerms, numberOfPostings,
427: numberOfOccurrences, maxCount, payload,
428: frequencyCoding, pointerCoding, countCoding,
429: positionCoding, quantum, height, bufferSize,
430: termProcessor, field, properties, termMap, prefixMap,
431: sizes, offsets) : new FileIndex(basename.toString(),
432: numberOfDocuments, numberOfTerms, numberOfPostings,
433: numberOfOccurrences, maxCount, payload,
434: frequencyCoding, pointerCoding, countCoding,
435: positionCoding, quantum, height, bufferSize,
436: termProcessor, field, properties, termMap, prefixMap,
437: sizes, offsets);
438:
439: }
440:
441: /** Returns a new disk-based index, using preloaded {@link Properties} and possibly guessing reasonable term and prefix maps from the basename.
442: *
443: * @param basename the basename of the index.
444: * @param properties the properties obtained by stemming <code>basename</code>.
445: * @param randomAccess whether the index should be accessible randomly.
446: * @param documentSizes if true, document sizes will be loaded.
447: * @param maps if true, {@linkplain StringMap term} and {@linkplain PrefixMap prefix} maps will be guessed and loaded.
448: * @param queryProperties a map containing associations between {@link Index.UriKeys} and values, or <code>null</code>.
449: * @throws IllegalAccessException
450: * @throws InstantiationException
451: *
452: * @see #getInstance(CharSequence, Properties, StringMap, PrefixMap, boolean, boolean, EnumMap)
453: */
454: public static BitStreamIndex getInstance(
455: final CharSequence basename, final Properties properties,
456: final boolean randomAccess, final boolean documentSizes,
457: final boolean maps,
458: final EnumMap<UriKeys, String> queryProperties)
459: throws ClassNotFoundException, IOException,
460: InstantiationException, IllegalAccessException {
461: StringMap<? extends CharSequence> termMap = null;
462: PrefixMap<? extends CharSequence> prefixMap = null;
463: if (maps) {
464: // TODO: check this logic
465: termMap = DiskBasedIndex.loadStringMap(basename
466: + DiskBasedIndex.TERMMAP_EXTENSION);
467: if (termMap != null && termMap instanceof PrefixMap)
468: return getInstance(basename, properties, termMap,
469: (PrefixMap<?>) termMap, randomAccess,
470: documentSizes, queryProperties);
471: prefixMap = DiskBasedIndex.loadPrefixMap(basename
472: + DiskBasedIndex.PREFIXMAP_EXTENSION);
473: if (termMap != null)
474: return getInstance(basename, properties, termMap,
475: prefixMap, randomAccess, documentSizes,
476: queryProperties);
477: if (prefixMap != null)
478: return getInstance(basename, properties, prefixMap,
479: prefixMap, randomAccess, documentSizes,
480: queryProperties);
481: }
482: return getInstance(basename, properties, null, prefixMap,
483: randomAccess, documentSizes, queryProperties);
484: }
485:
486: /** Returns a new disk-based index, possibly guessing reasonable term and prefix maps from the basename.
487: *
488: * <p>If there is a term map file (basename stemmed with <samp>.termmap</samp>), it is used as term map and,
489: * in case it implements {@link PrefixMap}. Otherwise, we search for a prefix map (basename stemmed with <samp>.prefixmap</samp>)
490: * and, if it implements {@link StringMap} and no term map has been found, we use it as prefix map.
491: *
492: * @param basename the basename of the index.
493: * @param randomAccess whether the index should be accessible randomly (e.g., if it will
494: * be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index).
495: * @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes
496: * might be loaded anyway because the compression method for positions requires it).
497: * @param maps if true, {@linkplain StringMap term} and {@linkplain PrefixMap prefix} maps will be guessed and loaded (this
498: * feature might not be available with some kind of index).
499: * @param queryProperties a map containing associations between {@link Index.UriKeys} and values, or <code>null</code>.
500: */
501: public static BitStreamIndex getInstance(
502: final CharSequence basename, final boolean randomAccess,
503: final boolean documentSizes, final boolean maps,
504: final EnumMap<UriKeys, String> queryProperties)
505: throws ConfigurationException, ClassNotFoundException,
506: IOException, InstantiationException, IllegalAccessException {
507: return getInstance(basename, new Properties(basename
508: + DiskBasedIndex.PROPERTIES_EXTENSION), randomAccess,
509: documentSizes, maps, queryProperties);
510: }
511:
512: /** Returns a new disk-based index, using preloaded {@link Properties} and possibly guessing reasonable term and prefix maps from the basename.
513: *
514: * <p>If there is a term map file (basename stemmed with <samp>.termmap</samp>), it is used as term map and,
515: * in case it implements {@link PrefixMap}. Otherwise, we search for a prefix map (basename stemmed with <samp>.prefixmap</samp>)
516: * and, if it implements {@link StringMap} and no term map has been found, we use it as prefix map.
517: *
518: * @param basename the basename of the index.
519: * @param randomAccess whether the index should be accessible randomly (e.g., if it will
520: * be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index).
521: * @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes
522: * might be loaded anyway because the compression method for positions requires it).
523: * @param maps if true, {@linkplain StringMap term} and {@linkplain PrefixMap prefix} maps will be guessed and loaded (this
524: * feature might not be available with some kind of index).
525: * @see #getInstance(CharSequence, boolean, boolean, boolean, EnumMap)
526: */
527: public static BitStreamIndex getInstance(
528: final CharSequence basename, final boolean randomAccess,
529: final boolean documentSizes, final boolean maps)
530: throws ConfigurationException, ClassNotFoundException,
531: IOException, InstantiationException, IllegalAccessException {
532: return getInstance(basename, new Properties(basename
533: + DiskBasedIndex.PROPERTIES_EXTENSION), randomAccess,
534: documentSizes, maps, null);
535: }
536:
537: /** Returns a new disk-based index, guessing reasonable term and prefix maps from the basename.
538: *
539: * @param basename the basename of the index.
540: * @param randomAccess whether the index should be accessible randomly (e.g., if it will
541: * be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index).
542: * @param documentSizes if true, document sizes will be loaded (note that sometimes document sizes
543: * might be loaded anyway because the compression method for positions requires it).
544: */
545: public static BitStreamIndex getInstance(
546: final CharSequence basename, final boolean randomAccess,
547: final boolean documentSizes) throws ConfigurationException,
548: ClassNotFoundException, IOException,
549: InstantiationException, IllegalAccessException {
550: return getInstance(basename, randomAccess, documentSizes, true);
551: }
552:
553: /** Returns a new local index, trying to guess reasonable term and prefix maps from the basename,
554: * and loading document sizes only if it is necessary.
555: *
556: * @param basename the basename of the index.
557: * @param randomAccess whether the index should be accessible randomly (e.g., if it will
558: * be possible to call {@link IndexReader#documents(int)} on the index readers returned by the index).
559: */
560: public static BitStreamIndex getInstance(
561: final CharSequence basename, final boolean randomAccess)
562: throws ConfigurationException, ClassNotFoundException,
563: IOException, InstantiationException, IllegalAccessException {
564: return getInstance(basename, randomAccess, false);
565: }
566:
567: /** Returns a new local index, trying to guess reasonable term and prefix maps from the basename,
568: * loading offsets but loading document sizes only if it is necessary.
569: *
570: * @param basename the basename of the index.
571: */
572: public static BitStreamIndex getInstance(final CharSequence basename)
573: throws ConfigurationException, ClassNotFoundException,
574: IOException, InstantiationException, IllegalAccessException {
575: return getInstance(basename, true);
576: }
577: }
|