001: package it.unimi.dsi.mg4j.index.cluster;
002:
003: /*
004: * MG4J: Managing Gigabytes for Java
005: *
006: * Copyright (C) 2006-2007 Sebastiano Vigna
007: *
008: * This library is free software; you can redistribute it and/or modify it
009: * under the terms of the GNU Lesser General Public License as published by the Free
010: * Software Foundation; either version 2.1 of the License, or (at your option)
011: * any later version.
012: *
013: * This library is distributed in the hope that it will be useful, but
014: * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
015: * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
016: * for more details.
017: *
018: * You should have received a copy of the GNU Lesser General Public License
019: * along with this program; if not, write to the Free Software
020: * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
021: *
022: */
023:
024: import it.unimi.dsi.fastutil.ints.IntArrayList;
025: import it.unimi.dsi.fastutil.ints.IntList;
026: import it.unimi.dsi.mg4j.index.Index;
027: import it.unimi.dsi.mg4j.index.IndexIterator;
028: import it.unimi.dsi.mg4j.index.IndexIterators;
029: import it.unimi.dsi.mg4j.index.IndexReader;
030: import it.unimi.dsi.mg4j.index.TermProcessor;
031: import it.unimi.dsi.mg4j.index.TooManyTermsException;
032: import it.unimi.dsi.mg4j.index.payload.Payload;
033: import it.unimi.dsi.mg4j.search.DocumentIterator;
034: import it.unimi.dsi.util.BloomFilter;
035: import it.unimi.dsi.util.Properties;
036:
037: import java.io.IOException;
038: import java.util.ArrayList;
039: import java.util.Arrays;
040:
041: import org.apache.commons.lang.ClassUtils;
042:
043: /** A abstract class representing a cluster of local indices containing separate
044: * set of documents from the same collection.
045: *
046: * <p>This class stores the strategy and possibly the {@linkplain BloomFilter Bloom filters}
047: * associated to this documental cluster.
048: *
049: * @author Alessandro Arrabito
050: * @author Sebastiano Vigna
051: */
052:
053: public abstract class DocumentalCluster extends IndexCluster {
054: private static final long serialVersionUID = 1L;
055:
056: public static final int DEFAULT_BUFFER_SIZE = 8 * 1024;
057:
058: /** Whether this documental cluster is concatenated. */
059: public final boolean concatenated;
060: /** Whether this documental cluster is flat; in this case, all local indices have the same term list. */
061: public final boolean flat;
062: /** An Array containing the numbers from 0 to the number of local indices (excluded). Used to implement {@link IndexReader#documents(int)} more
063: * efficiently in flat indices. */
064: public final int[] allIndices;
065:
066: /** The clustering strategy. */
067: protected final DocumentalClusteringStrategy strategy;
068:
069: /** Creates a new documental index cluster. */
070:
071: public DocumentalCluster(final Index[] localIndex,
072: final DocumentalClusteringStrategy strategy,
073: final boolean flat, final BloomFilter[] termFilter,
074: final int numberOfDocuments, final int numberOfTerms,
075: final long numberOfPostings, final long numberOfOccurences,
076: final int maxCount, final Payload payload,
077: final boolean hasCounts, final boolean hasPositions,
078: final TermProcessor termProcessor, final String field,
079: final IntList sizes, final Properties properties) {
080: super (localIndex, termFilter, numberOfDocuments, numberOfTerms,
081: numberOfPostings, numberOfOccurences, maxCount,
082: payload, hasCounts, hasPositions, termProcessor, field,
083: sizes, properties);
084: this .strategy = strategy;
085: this .flat = flat;
086: this .concatenated = getClass().isAssignableFrom(
087: DocumentalConcatenatedCluster.class);
088: this .allIndices = new int[localIndex.length];
089: for (int i = allIndices.length; i-- != 0;)
090: allIndices[i] = i;
091: }
092:
093: @Override
094: public DocumentalClusterIndexReader getReader(final int bufferSize)
095: throws IOException {
096: return new DocumentalClusterIndexReader(this ,
097: bufferSize == -1 ? DEFAULT_BUFFER_SIZE : bufferSize);
098: }
099:
100: @Override
101: public IndexIterator documents(final CharSequence prefix,
102: final int limit) throws IOException, TooManyTermsException {
103: final ArrayList<DocumentIterator> iterators = new ArrayList<DocumentIterator>(
104: localIndex.length);
105: final IntArrayList usedIndices = new IntArrayList();
106:
107: IndexIterator documentIterator;
108: for (int i = 0; i < localIndex.length; i++) {
109: // TODO: check for limit globally
110: documentIterator = localIndex[i].documents(prefix, limit);
111: if (documentIterator.hasNext()) {
112: iterators.add(documentIterator);
113: usedIndices.add(i);
114: }
115: }
116: // TODO: test that this clustered multiterm does work
117: final IndexIterator result = concatenated ? new DocumentalConcatenatedClusterIndexIterator(
118: (DocumentalClusterIndexReader) getReader(), iterators
119: .toArray(IndexIterators.EMPTY_ARRAY),
120: usedIndices.toIntArray())
121: : new DocumentalMergedClusterIndexIterator(
122: (DocumentalClusterIndexReader) getReader(),
123: iterators.toArray(IndexIterators.EMPTY_ARRAY),
124: usedIndices.toIntArray());
125: result.term(prefix);
126: return result;
127:
128: }
129:
130: public String toString() {
131: return ClassUtils.getShortClassName(this, null)
132: + Arrays.toString(localIndex);
133: }
134: }
|