Source Code Cross Referenced for BitStreamIndexReader.java in » Search-Engine » mg4j » it » unimi » dsi » mg4j » index » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Search Engine » mg4j » it.unimi.dsi.mg4j.index
Source Cross Referenced Class Diagram Java Document (Java Doc)
0001:        package it.unimi.dsi.mg4j.index;
0002:
0003:        /*		 
0004:         * MG4J: Managing Gigabytes for Java
0005:         *
0006:         * Copyright (C) 2003-2006 Paolo Boldi and Sebastiano Vigna 
0007:         *
0008:         *  This library is free software; you can redistribute it and/or modify it
0009:         *  under the terms of the GNU Lesser General Public License as published by the Free
0010:         *  Software Foundation; either version 2.1 of the License, or (at your option)
0011:         *  any later version.
0012:         *
0013:         *  This library is distributed in the hope that it will be useful, but
0014:         *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
0015:         *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
0016:         *  for more details.
0017:         *
0018:         *  You should have received a copy of the GNU Lesser General Public License
0019:         *  along with this program; if not, write to the Free Software
0020:         *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
0021:         *
0022:         */
0023:
0024:        import it.unimi.dsi.fastutil.ints.IntIterator;
0025:        import it.unimi.dsi.fastutil.ints.IntIterators;
0026:        import it.unimi.dsi.fastutil.ints.IntSet;
0027:        import it.unimi.dsi.fastutil.objects.AbstractObjectIterator;
0028:        import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
0029:        import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;
0030:        import it.unimi.dsi.fastutil.objects.ReferenceSet;
0031:        import it.unimi.dsi.mg4j.index.AbstractIndexIterator;
0032:        import it.unimi.dsi.mg4j.index.AbstractIndexReader;
0033:        import it.unimi.dsi.mg4j.index.BitStreamIndex;
0034:        import it.unimi.dsi.mg4j.index.Index;
0035:        import it.unimi.dsi.mg4j.index.IndexIterator;
0036:        import it.unimi.dsi.mg4j.index.CompressionFlags.Coding;
0037:        import it.unimi.dsi.mg4j.index.payload.Payload;
0038:        import it.unimi.dsi.io.InputBitStream;
0039:        import it.unimi.dsi.util.Interval;
0040:        import it.unimi.dsi.mg4j.search.IntervalIterator;
0041:        import it.unimi.dsi.mg4j.search.IntervalIterators;
0042:        import it.unimi.dsi.bits.Fast;
0043:        import it.unimi.dsi.Util;
0044:
0045:        import java.io.IOException;
0046:        import java.util.NoSuchElementException;
0047:
0048:        import org.apache.log4j.Logger;
0049:
0050:        /** A bitstream-based {@linkplain IndexReader index reader}. */
0051:
0052:        public class BitStreamIndexReader extends AbstractIndexReader {
0053:            @SuppressWarnings("unused")
0054:            private static final Logger LOGGER = Util
0055:                    .getLogger(BitStreamIndexReader.class);
0056:
0057:            /** The reference index. */
0058:            protected final BitStreamIndex index;
0059:
0060:            private final static boolean ASSERTS = false;
0061:            private final static boolean DEBUG = false;
0062:
0063:            /** The {@link IndexIterator} view of this reader (returned by {@link #documents(CharSequence)}). */
0064:            protected final BitStreamIndexReaderIndexIterator indexIterator;
0065:
0066:            /** Creates a new skip index reader, with the specified underlying {@link Index} and input bit stream.
0067:             *
0068:             * @param index the index.
0069:             * @param ibs the underlying bit stream.
0070:             */
0071:            public BitStreamIndexReader(final BitStreamIndex index,
0072:                    final InputBitStream ibs) {
0073:                this .index = index;
0074:                this .indexIterator = new BitStreamIndexReaderIndexIterator(
0075:                        this , ibs);
0076:            }
0077:
0078:            protected static final class BitStreamIndexReaderIndexIterator
0079:                    extends AbstractIndexIterator implements  IndexIterator {
0080:                /** The enclosing instance. */
0081:                private final BitStreamIndexReader parent;
0082:                /** The reference index. */
0083:                protected final BitStreamIndex index;
0084:                /** The underlying input bit stream. */
0085:                protected final InputBitStream ibs;
0086:                /** The enclosed interval iterator. */
0087:                private final IndexIntervalIterator intervalIterator;
0088:                /** A singleton set containing the enclosed interval iterator. */
0089:                private final Reference2ReferenceMap<Index, IntervalIterator> singletonIntervalIterator;
0090:                /** The key index. */
0091:                private final Index keyIndex;
0092:
0093:                /** The cached copy of {@link #index index.hasPositions}. */
0094:                protected final boolean hasPositions;
0095:                /** The cached copy of {@link #index index.hasCounts}. */
0096:                protected final boolean hasCounts;
0097:                /** The cached copy of {@link #index index.hasPayloads}. */
0098:                protected final boolean hasPayloads;
0099:                /** Whether the underlying index has skips. */
0100:                protected final boolean hasSkips;
0101:
0102:                /** The cached copy of {@link #index index.pointerCoding}. */
0103:                protected final Coding pointerCoding;
0104:
0105:                /** The cached copy of {@link #index index.countCoding}. */
0106:                protected final Coding countCoding;
0107:
0108:                /** The cached copy of {@link #index index.positionCoding}. */
0109:                protected final Coding positionCoding;
0110:
0111:                /** The payload, in case the index of this reader has payloads, or <code>null</code>. */
0112:                protected final Payload payload;
0113:
0114:                /** The parameter <code>b</code> for Golomb coding of pointers. */
0115:                protected int b;
0116:                /** The parameter <code>log2b</code> for Golomb coding of pointers; it is the most significant bit of {@link #b}. */
0117:                protected int log2b;
0118:
0119:                /** The current term. */
0120:                protected int currentTerm = -1;
0121:                /** The current frequency. */
0122:                protected int frequency;
0123:                /** Whether the current terms has pointers at all (this happens when the {@link #frequency} is smaller than the number of documents). */
0124:                protected boolean hasPointers;
0125:                /** The current count (if this index contains counts). */
0126:                protected int count;
0127:                /** The last document pointer we read from current list, -1 if we just read the frequency,
0128:                 * {@link Integer#MAX_VALUE} if we are beyond the end of list. */
0129:                protected int currentDocument;
0130:                /** The number of the document record we are going to read inside the current inverted list. */
0131:                protected int numberOfDocumentRecord;
0132:                /** This variable tracks the current state of the reader. */
0133:                protected int state;
0134:
0135:                /** The parameter <code>h</code> (the maximum height of a skip tower). */
0136:                public final int height;
0137:                /** The quantum. */
0138:                public final int quantum;
0139:                /** The bit mask giving the remainder of the division by {@link #quantum}. */
0140:                public final int quantumModuloMask;
0141:                /** The shift giving result of the division by {@link #quantum}. */
0142:                public final int quantumDivisionShift;
0143:                /** The maximum height of a skip tower in the current block. May be less than {@link #height} if the block is defective,
0144:                 * and will be -1 on defective quanta (no tower at all). */
0145:                private int maxh;
0146:                /** The maximum valid index of the current skip tower, if any. */
0147:                private int s;
0148:                /** The minimum valid index of the current skip tower, or {@link Integer#MAX_VALUE}. If {@link #maxh} is negative, the value is undefined. */
0149:                private int lowest;
0150:                /** We have <var>w</var> = <var>Hq</var>. */
0151:                private final int w;
0152:                /** The bit mask giving the remainder of the division by {@link #w}. */
0153:                private final int wModuloMask;
0154:                /** The shift giving result of the division by {@link #w}. */
0155:                private final int wDivisionShift;
0156:                /** The Golomb modulus for a top pointer skip, for each level. */
0157:                private int[] towerTopB;
0158:                /** The most significant bit of the Golomb modulus for a top point[]er skip, for each level. */
0159:                private int[] towerTopLog2B;
0160:                /** The Golomb modulus for a lower pointer skip, for each level. */
0161:                private int[] towerLowerB;
0162:                /** The most significant bit of the Golomb modulus for a lower pointer skip, for each level. */
0163:                private int[] towerLowerLog2B;
0164:                /** The prediction for a pointer skip, for each level. */
0165:                private int[] pointerPrediction;
0166:                /** An array to decode bit skips. */
0167:                private long[] bitSkip;
0168:                /** An array to decode the pointer skips. */
0169:                private int[] pointerSkip;
0170:                /** The number of bits read just after reading the last skip tower. */
0171:                private long readBitsAtLastSkipTower;
0172:                /** The document pointer corresponding to the last skip tower. */
0173:                private int pointerAtLastSkipTower;
0174:                /** The current quantum bit length, as provided by the index. */
0175:                private int quantumBitLength;
0176:                /** The current entry bit length, as provided by the index. */
0177:                private int entryBitLength;
0178:                /** This value of {@link #state} means that we are positioned just before a tower. */
0179:                private static final int BEFORE_TOWER = 0;
0180:
0181:                /** This value of {@link #state} can be assumed only in indices that contain a payload; it
0182:                 * means that we are positioned just before the payload for the current document record. */
0183:                private static final int BEFORE_PAYLOAD = 1;
0184:
0185:                /** This value of {@link #state} can be assumed only in indices that contain counts; it
0186:                 * means that we are positioned just before the count for the current document record. */
0187:                private static final int BEFORE_COUNT = 2;
0188:
0189:                /** This value of {@link #state} can be assumed only in indices that contain document positions; 
0190:                 * it means that we are positioned just before the position list of the current document record. */
0191:                private static final int BEFORE_POSITIONS = 3;
0192:
0193:                /** This value of {@link #state} means that we are at the start of a new document record, 
0194:                 * unless we already read all documents (i.e., {@link #numberOfDocumentRecord} == {@link #frequency}),
0195:                 * in which case we are at the end of the inverted list, and {@link #endOfList()} is true. */
0196:                private static final int BEFORE_POINTER = 4;
0197:
0198:                /** The cached position array. */
0199:                protected int[] positionCache = new int[16];
0200:
0201:                public BitStreamIndexReaderIndexIterator(
0202:                        final BitStreamIndexReader parent,
0203:                        final InputBitStream ibs) {
0204:                    this .parent = parent;
0205:                    this .ibs = ibs;
0206:                    index = parent.index;
0207:                    keyIndex = index.keyIndex;
0208:                    pointerCoding = index.pointerCoding;
0209:
0210:                    hasPayloads = index.hasPayloads;
0211:                    payload = hasPayloads ? index.payload.copy() : null;
0212:                    hasCounts = index.hasCounts;
0213:                    countCoding = index.countCoding;
0214:                    hasPositions = index.hasPositions;
0215:                    positionCoding = index.positionCoding;
0216:                    intervalIterator = index.hasPositions ? new IndexIntervalIterator()
0217:                            : null;
0218:                    singletonIntervalIterator = index.hasPositions ? Reference2ReferenceMaps
0219:                            .singleton(keyIndex,
0220:                                    (IntervalIterator) intervalIterator)
0221:                            : null;
0222:                    quantum = index.quantum;
0223:                    height = index.height;
0224:                    if ((quantum == -1) != (height == -1))
0225:                        throw new IllegalArgumentException();
0226:                    hasSkips = quantum != -1 && height != -1;
0227:                    if (hasSkips) {
0228:                        w = (1 << height) * quantum;
0229:                        quantumModuloMask = quantum - 1;
0230:                        wModuloMask = w - 1;
0231:                        quantumDivisionShift = Fast.mostSignificantBit(quantum);
0232:                        wDivisionShift = Fast.mostSignificantBit(w);
0233:                        bitSkip = new long[height + 1];
0234:                        pointerSkip = new int[height + 1];
0235:                        towerTopB = new int[height + 1];
0236:                        towerTopLog2B = new int[height + 1];
0237:                        towerLowerB = new int[height + 1];
0238:                        towerLowerLog2B = new int[height + 1];
0239:                        pointerPrediction = new int[height + 1];
0240:                    } else {
0241:                        w = quantumModuloMask = wModuloMask = quantumDivisionShift = wDivisionShift = 0;
0242:                        bitSkip = null;
0243:                        pointerSkip = towerTopB = towerTopLog2B = towerLowerB = towerLowerLog2B = pointerPrediction = null;
0244:                    }
0245:                }
0246:
0247:                /** Positions the index on the inverted list of a given term.
0248:                 *
0249:                 * <p>This method can be called at any time. Note that it is <em>always</em> possible
0250:                 * to call this method with argument 0, even if offsets have not been loaded.
0251:                 *
0252:                 * @param term a term.
0253:                 */
0254:                protected void position(final int term) throws IOException {
0255:                    if (term == 0) {
0256:                        ibs.position(0);
0257:                        ibs.readBits(0);
0258:                    } else {
0259:                        if (index.offsets == null)
0260:                            throw new IllegalStateException(
0261:                                    "You cannot position an index without offsets");
0262:                        final long offset = index.offsets.getLong(term);
0263:                        ibs.position(offset);
0264:                        // TODO: Can't we set this to 0?
0265:                        ibs.readBits(offset);
0266:                    }
0267:                    currentTerm = term;
0268:                    readFrequency();
0269:                }
0270:
0271:                public int termNumber() {
0272:                    return currentTerm;
0273:                }
0274:
0275:                protected IndexIterator advance() throws IOException {
0276:                    if (currentTerm == index.numberOfTerms - 1)
0277:                        return null;
0278:                    if (currentTerm != -1) {
0279:                        skipTo(Integer.MAX_VALUE);
0280:                        nextDocument(); // This guarantees we have no garbage before the frequency
0281:                    }
0282:                    currentTerm++;
0283:                    readFrequency();
0284:                    return this ;
0285:                }
0286:
0287:                private void readFrequency() throws IOException {
0288:                    // Read the frequency
0289:                    switch (index.frequencyCoding) {
0290:                    case GAMMA:
0291:                        frequency = ibs.readGamma() + 1;
0292:                        break;
0293:                    case SHIFTED_GAMMA:
0294:                        frequency = ibs.readShiftedGamma() + 1;
0295:                        break;
0296:                    case DELTA:
0297:                        frequency = ibs.readDelta() + 1;
0298:                        break;
0299:                    default:
0300:                        throw new IllegalStateException(
0301:                                "The required frequency coding ("
0302:                                        + index.frequencyCoding
0303:                                        + ") is not supported.");
0304:                    }
0305:                    hasPointers = frequency < index.numberOfDocuments;
0306:                    // We compute the modulus used for pointer Golomb coding 
0307:                    if (pointerCoding == Coding.GOLOMB) {
0308:                        if (hasPointers) {
0309:                            b = BitStreamIndex.golombModulus(frequency,
0310:                                    index.numberOfDocuments);
0311:                            log2b = Fast.mostSignificantBit(b);
0312:                        }
0313:                    }
0314:                    if (hasSkips) {
0315:                        quantumBitLength = entryBitLength = -1;
0316:                        lowest = Integer.MAX_VALUE;
0317:                        if (ASSERTS)
0318:                            for (int i = height; i > Math
0319:                                    .min(
0320:                                            height,
0321:                                            Fast
0322:                                                    .mostSignificantBit(frequency >> quantumDivisionShift)); i--)
0323:                                towerTopB[i] = towerLowerB[i] = pointerPrediction[i] = -1;
0324:                        final long pointerQuantumSigma = BitStreamIndex
0325:                                .quantumSigma(frequency,
0326:                                        index.numberOfDocuments, quantum);
0327:                        for (int i = Math
0328:                                .min(
0329:                                        height,
0330:                                        Fast
0331:                                                .mostSignificantBit(frequency >> quantumDivisionShift)); i >= 0; i--) {
0332:                            towerTopB[i] = BitStreamIndex
0333:                                    .gaussianGolombModulus(pointerQuantumSigma,
0334:                                            i + 1);
0335:                            towerTopLog2B[i] = Fast
0336:                                    .mostSignificantBit(towerTopB[i]);
0337:                            towerLowerB[i] = BitStreamIndex
0338:                                    .gaussianGolombModulus(pointerQuantumSigma,
0339:                                            i);
0340:                            towerLowerLog2B[i] = Fast
0341:                                    .mostSignificantBit(towerLowerB[i]);
0342:                            pointerPrediction[i] = (int) ((quantum * (1L << i)
0343:                                    * index.numberOfDocuments + frequency / 2) / frequency);
0344:                        }
0345:                    }
0346:                    count = -1;
0347:                    currentDocument = -1;
0348:                    numberOfDocumentRecord = -1;
0349:                    state = BEFORE_POINTER;
0350:                }
0351:
0352:                public Index index() {
0353:                    return keyIndex;
0354:                }
0355:
0356:                public int frequency() {
0357:                    return frequency;
0358:                }
0359:
0360:                private void ensureCurrentDocument() {
0361:                    if (currentDocument < 0)
0362:                        throw new IllegalStateException(
0363:                                "nextDocument() has never been called for (term="
0364:                                        + currentTerm + ")");
0365:                    if (currentDocument == Integer.MAX_VALUE)
0366:                        throw new IllegalStateException(
0367:                                "This reader is positioned beyond the end of list of (term="
0368:                                        + currentTerm + ")");
0369:                }
0370:
0371:                /** Returns whether there are no more document records in the current inverted list.
0372:                 * 
0373:                 * <p>This method returns true if the last document pointer of the current inverted
0374:                 * list has been read. It makes no distinction as to where (inside the last document
0375:                 * record) this reader is currently positioned. In particular, this method will
0376:                 * return true independently of whether count and positions have been read or not (we
0377:                 * note by passing that this is the only sensible behaviour, as you can build indices
0378:                 * with or without counts/positions).
0379:                 * 
0380:                 * <p>This method will return true also when this reader is positioned <em>beyond</em>
0381:                 * the last document pointer. In this case, {@link #currentDocumentPointer()} will
0382:                 * return {@link Integer#MAX_VALUE}.
0383:                 *
0384:                 * @return true whether there are no more document records in the current inverted list.
0385:                 */
0386:                private boolean endOfList() {
0387:                    if (ASSERTS)
0388:                        assert numberOfDocumentRecord <= frequency;
0389:                    return numberOfDocumentRecord >= frequency - 1;
0390:                }
0391:
0392:                public int document() {
0393:                    if (ASSERTS)
0394:                        ensureCurrentDocument();
0395:                    return currentDocument;
0396:                }
0397:
0398:                public Payload payload() throws IOException {
0399:                    if (DEBUG)
0400:                        System.err.println(this  + ".payload()");
0401:                    if (ASSERTS)
0402:                        ensureCurrentDocument();
0403:                    if (!hasPayloads)
0404:                        throw new UnsupportedOperationException("This index ("
0405:                                + index + ") does not contain payloads");
0406:                    if (state <= BEFORE_PAYLOAD) {
0407:                        if (state == BEFORE_TOWER)
0408:                            readTower();
0409:                        payload.read(ibs);
0410:                        state = hasCounts ? BEFORE_COUNT : BEFORE_POINTER;
0411:                    }
0412:                    return payload;
0413:                }
0414:
0415:                public int count() throws IOException {
0416:                    if (DEBUG)
0417:                        System.err.println(this  + ".count()");
0418:                    if (count != -1)
0419:                        return count;
0420:                    if (ASSERTS)
0421:                        ensureCurrentDocument();
0422:                    if (!hasCounts)
0423:                        throw new UnsupportedOperationException("This index ("
0424:                                + index + ") does not contain counts");
0425:                    if (state == BEFORE_TOWER)
0426:                        readTower();
0427:                    if (state == BEFORE_PAYLOAD)
0428:                        payload.read(ibs);
0429:                    {
0430:                        if (ASSERTS && state != BEFORE_COUNT)
0431:                            throw new IllegalStateException();
0432:                        state = hasPositions ? BEFORE_POSITIONS
0433:                                : BEFORE_POINTER;
0434:                        switch (countCoding) {
0435:                        case UNARY:
0436:                            count = ibs.readUnary() + 1;
0437:                            break;
0438:                        case SHIFTED_GAMMA:
0439:                            count = ibs.readShiftedGamma() + 1;
0440:                            break;
0441:                        case GAMMA:
0442:                            count = ibs.readGamma() + 1;
0443:                            break;
0444:                        case DELTA:
0445:                            count = ibs.readDelta() + 1;
0446:                            break;
0447:                        default:
0448:                            throw new IllegalStateException(
0449:                                    "The required count coding (" + countCoding
0450:                                            + ") is not supported.");
0451:                        }
0452:                    }
0453:                    return count;
0454:                }
0455:
0456:                /** We read positions, assuming state <= BEFORE_POSITIONS */
0457:                @SuppressWarnings("unused")
0458:                protected void updatePositionCache() throws IOException {
0459:                    if (ASSERTS)
0460:                        assert state <= BEFORE_POSITIONS;
0461:                    if (!hasPositions)
0462:                        throw new UnsupportedOperationException("This index("
0463:                                + index + ") does not contain positions");
0464:                    if (state < BEFORE_POSITIONS) {
0465:                        if (state == BEFORE_TOWER)
0466:                            readTower();
0467:                        if (state == BEFORE_PAYLOAD)
0468:                            payload.read(ibs);
0469:                        if (state == BEFORE_COUNT) {
0470:                            if (ASSERTS && state != BEFORE_COUNT)
0471:                                throw new IllegalStateException();
0472:                            switch (countCoding) {
0473:                            case UNARY:
0474:                                count = ibs.readUnary() + 1;
0475:                                break;
0476:                            case SHIFTED_GAMMA:
0477:                                count = ibs.readShiftedGamma() + 1;
0478:                                break;
0479:                            case GAMMA:
0480:                                count = ibs.readGamma() + 1;
0481:                                break;
0482:                            case DELTA:
0483:                                count = ibs.readDelta() + 1;
0484:                                break;
0485:                            default:
0486:                                throw new IllegalStateException(
0487:                                        "The required count coding ("
0488:                                                + countCoding
0489:                                                + ") is not supported.");
0490:                            }
0491:                        }
0492:                    }
0493:                    if (count > positionCache.length)
0494:                        positionCache = new int[Math.max(
0495:                                positionCache.length * 2, count)];
0496:                    final int[] occ = positionCache;
0497:                    state = BEFORE_POINTER;
0498:                    switch (positionCoding) {
0499:                    case SHIFTED_GAMMA:
0500:                        ibs.readShiftedGammas(occ, count);
0501:                        for (int i = 1; i < count; i++)
0502:                            occ[i] += occ[i - 1] + 1;
0503:                        return;
0504:                    case GAMMA:
0505:                        ibs.readGammas(occ, count);
0506:                        for (int i = 1; i < count; i++)
0507:                            occ[i] += occ[i - 1] + 1;
0508:                        return;
0509:                    case DELTA:
0510:                        ibs.readDeltas(occ, count);
0511:                        for (int i = 1; i < count; i++)
0512:                            occ[i] += occ[i - 1] + 1;
0513:                        return;
0514:                    case GOLOMB:
0515:                        if (ASSERTS)
0516:                            assert index.sizes != null;
0517:                        int docSize = index.sizes.getInt(currentDocument);
0518:                        if (count < 3)
0519:                            for (int i = 0; i < count; i++)
0520:                                occ[i] = ibs.readMinimalBinary(docSize);
0521:                        else {
0522:                            final int bb = BitStreamIndex.golombModulus(count,
0523:                                    docSize);
0524:                            int prev = -1;
0525:                            if (bb != 0) {
0526:                                final int log2bb = Fast.mostSignificantBit(bb);
0527:                                for (int i = 0; i < count; i++)
0528:                                    occ[i] = prev = ibs.readGolomb(bb, log2bb)
0529:                                            + prev + 1;
0530:                            } else
0531:                                for (int i = 0; i < count; i++)
0532:                                    occ[i] = i;
0533:                        }
0534:                        return;
0535:                    case SKEWED_GOLOMB:
0536:                        if (ASSERTS)
0537:                            assert index.sizes != null;
0538:                        int docSize2 = index.sizes.getInt(currentDocument);
0539:                        if (count < 3)
0540:                            for (int i = 0; i < count; i++)
0541:                                occ[i] = ibs.readMinimalBinary(docSize2);
0542:                        else {
0543:                            final int sb = ibs.readMinimalBinary(docSize2) + 1;
0544:                            int prev2 = -1;
0545:                            for (int i = 0; i < count; i++)
0546:                                occ[i] = prev2 = ibs.readSkewedGolomb(sb)
0547:                                        + prev2 + 1;
0548:                        }
0549:                        return;
0550:                    case INTERPOLATIVE:
0551:                        it.unimi.dsi.mg4j.io.InterpolativeCoding.read(ibs, occ,
0552:                                0, count, 0, index.sizes
0553:                                        .getInt(currentDocument) - 1);
0554:                        return;
0555:                    default:
0556:                        throw new IllegalStateException(
0557:                                "The required position coding ("
0558:                                        + index.positionCoding
0559:                                        + ") is not supported.");
0560:                    }
0561:                }
0562:
0563:                public IntIterator positions() throws IOException {
0564:                    if (ASSERTS)
0565:                        ensureCurrentDocument();
0566:                    if (state <= BEFORE_POSITIONS)
0567:                        updatePositionCache();
0568:                    return IntIterators.wrap(positionCache, 0, count);
0569:                }
0570:
0571:                public int[] positionArray() throws IOException {
0572:                    if (ASSERTS)
0573:                        ensureCurrentDocument();
0574:                    if (state <= BEFORE_POSITIONS)
0575:                        updatePositionCache();
0576:                    return positionCache;
0577:                }
0578:
0579:                // TODO: check who's using this (positionArray() is actually faster now)
0580:                public int positions(final int[] position) throws IOException {
0581:                    if (ASSERTS)
0582:                        ensureCurrentDocument();
0583:                    if (state <= BEFORE_POSITIONS)
0584:                        updatePositionCache(); // And also that positions have been read
0585:                    if (position.length < count)
0586:                        return -count;
0587:                    for (int i = count; i-- != 0;)
0588:                        position[i] = this .positionCache[i];
0589:                    return count;
0590:                }
0591:
0592:                public int nextDocument() throws IOException {
0593:                    if (DEBUG)
0594:                        System.err.println("{" + this  + "} nextDocument()");
0595:                    if (state != BEFORE_POINTER) {
0596:                        if (state == BEFORE_TOWER)
0597:                            readTower();
0598:                        if (state == BEFORE_PAYLOAD)
0599:                            payload.read(ibs);
0600:                        if (state == BEFORE_COUNT) {
0601:                            if (ASSERTS && state != BEFORE_COUNT)
0602:                                throw new IllegalStateException();
0603:                            state = hasPositions ? BEFORE_POSITIONS
0604:                                    : BEFORE_POINTER;
0605:                            switch (countCoding) {
0606:                            case UNARY:
0607:                                count = ibs.readUnary() + 1;
0608:                                break;
0609:                            case SHIFTED_GAMMA:
0610:                                count = ibs.readShiftedGamma() + 1;
0611:                                break;
0612:                            case GAMMA:
0613:                                count = ibs.readGamma() + 1;
0614:                                break;
0615:                            case DELTA:
0616:                                count = ibs.readDelta() + 1;
0617:                                break;
0618:                            default:
0619:                                throw new IllegalStateException(
0620:                                        "The required count coding ("
0621:                                                + countCoding
0622:                                                + ") is not supported.");
0623:                            }
0624:                        }
0625:                        if (state == BEFORE_POSITIONS) {
0626:                            // Here we just skip; note that the state change is necessary if endOfList() is true
0627:                            state = BEFORE_POINTER;
0628:                            switch (positionCoding) {
0629:                            case SHIFTED_GAMMA:
0630:                                ibs.skipShiftedGammas(count);
0631:                                break;
0632:                            case GAMMA:
0633:                                ibs.skipGammas(count);
0634:                                break;
0635:                            case DELTA:
0636:                                ibs.skipDeltas(count);
0637:                                break;
0638:                            case GOLOMB:
0639:                                if (ASSERTS)
0640:                                    assert index.sizes != null;
0641:                                int docSize = index.sizes
0642:                                        .getInt(currentDocument);
0643:                                if (count < 3)
0644:                                    for (int i = 0; i < count; i++)
0645:                                        ibs.readMinimalBinary(docSize);
0646:                                else {
0647:                                    final int bb = BitStreamIndex
0648:                                            .golombModulus(count, docSize);
0649:                                    if (bb != 0) {
0650:                                        final int log2bb = Fast
0651:                                                .mostSignificantBit(bb);
0652:                                        for (int i = 0; i < count; i++)
0653:                                            ibs.readGolomb(bb, log2bb);
0654:                                    }
0655:                                }
0656:                                break;
0657:                            case SKEWED_GOLOMB:
0658:                                if (ASSERTS)
0659:                                    assert index.sizes != null;
0660:                                docSize = index.sizes.getInt(currentDocument);
0661:                                if (count < 3)
0662:                                    for (int i = 0; i < count; i++)
0663:                                        ibs.readMinimalBinary(docSize);
0664:                                else {
0665:                                    final int sb = ibs
0666:                                            .readMinimalBinary(docSize) + 1;
0667:                                    for (int i = 0; i < count; i++)
0668:                                        ibs.readSkewedGolomb(sb);
0669:                                }
0670:                                break;
0671:                            case INTERPOLATIVE:
0672:                                it.unimi.dsi.mg4j.io.InterpolativeCoding.read(
0673:                                        ibs, null, 0, count, 0, index.sizes
0674:                                                .getInt(currentDocument) - 1);
0675:                                break;
0676:                            default:
0677:                                throw new IllegalStateException(
0678:                                        "The required position coding ("
0679:                                                + positionCoding
0680:                                                + ") is not supported.");
0681:                            }
0682:                        }
0683:                    }
0684:                    if (endOfList())
0685:                        return -1;
0686:                    if (hasPointers) {// We do not write pointers for everywhere occurring terms.
0687:                        switch (pointerCoding) {
0688:                        case SHIFTED_GAMMA:
0689:                            currentDocument += ibs.readShiftedGamma() + 1;
0690:                            break;
0691:                        case GAMMA:
0692:                            currentDocument += ibs.readGamma() + 1;
0693:                            break;
0694:                        case DELTA:
0695:                            currentDocument += ibs.readDelta() + 1;
0696:                            break;
0697:                        case GOLOMB:
0698:                            currentDocument += ibs.readGolomb(b, log2b) + 1;
0699:                            break;
0700:                        default:
0701:                            throw new IllegalStateException(
0702:                                    "The required pointer coding ("
0703:                                            + pointerCoding
0704:                                            + ") is not supported.");
0705:                        }
0706:                    } else
0707:                        currentDocument++;
0708:                    numberOfDocumentRecord++;
0709:                    if (hasPayloads)
0710:                        state = BEFORE_PAYLOAD;
0711:                    else if (hasCounts)
0712:                        state = BEFORE_COUNT;
0713:                    count = -1;
0714:                    if (hasSkips
0715:                            && (numberOfDocumentRecord & quantumModuloMask) == 0)
0716:                        state = BEFORE_TOWER;
0717:                    return currentDocument;
0718:                }
0719:
0720:                /** Reads the entire skip tower for the current position. 
0721:                 */
0722:                private void readTower() throws IOException {
0723:                    readTower(-1);
0724:                }
0725:
0726:                /** Reads the skip tower for the current position, possibly skipping part of the tower. 
0727:                 * 
0728:                 * <P>Note that this method will update {@link #state} only if it reads the entire tower,
0729:                 * otherwise the state remains {@link #BEFORE_TOWER}.
0730:                 * 
0731:                 * @param pointer the tower will be read up to the first entry smaller than or equal to this pointer; use
0732:                 * -1 to guarantee that the entire tower will be read.
0733:                 */
0734:                private void readTower(final int pointer) throws IOException {
0735:                    int i, j, k, cacheOffset, cache, towerLength = 0;
0736:                    long bitsAtTowerStart = 0;
0737:                    boolean truncated = false;
0738:                    if (ASSERTS)
0739:                        assert numberOfDocumentRecord % quantum == 0;
0740:                    if (ASSERTS && state != BEFORE_TOWER)
0741:                        throw new IllegalStateException(
0742:                                "readTower() called in state " + state);
0743:                    cacheOffset = (numberOfDocumentRecord & wModuloMask);
0744:                    k = cacheOffset >> quantumDivisionShift;
0745:                    if (ASSERTS && k == 0) { // Invalidate current tower data
0746:                        it.unimi.dsi.fastutil.ints.IntArrays.fill(pointerSkip,
0747:                                Integer.MAX_VALUE);
0748:                        it.unimi.dsi.fastutil.longs.LongArrays.fill(bitSkip,
0749:                                Integer.MAX_VALUE);
0750:                    }
0751:                    // Compute the height of the current skip tower.
0752:                    s = (k == 0) ? height : Fast.leastSignificantBit(k);
0753:                    cache = frequency - w
0754:                            * (numberOfDocumentRecord >> wDivisionShift);
0755:                    if (cache < w) {
0756:                        maxh = Fast
0757:                                .mostSignificantBit((cache >> quantumDivisionShift)
0758:                                        - k);
0759:                        if (maxh < s) {
0760:                            s = maxh;
0761:                            truncated = true;
0762:                        } else
0763:                            truncated = false;
0764:                    } else {
0765:                        cache = w;
0766:                        maxh = height;
0767:                        truncated = k == 0;
0768:                    }
0769:                    //assert w == cache || k == 0 || lastMaxh == Fast.mostSignificantBit( k ^ ( cache/quantum ) )  : lastMaxh +","+ (Fast.mostSignificantBit( k ^ ( cache/quantum ) ));
0770:                    i = s;
0771:                    if (s >= 0) {
0772:                        if (k == 0) {
0773:                            if (quantumBitLength < 0) {
0774:                                quantumBitLength = ibs.readDelta();
0775:                                entryBitLength = ibs.readDelta();
0776:                            } else {
0777:                                quantumBitLength += Fast.nat2int(ibs
0778:                                        .readDelta());
0779:                                entryBitLength += Fast.nat2int(ibs.readDelta());
0780:                            }
0781:                            if (DEBUG)
0782:                                System.err
0783:                                        .println("{" + this 
0784:                                                + "} quantum bit length="
0785:                                                + quantumBitLength
0786:                                                + " entry bit length="
0787:                                                + entryBitLength);
0788:                        }
0789:                        if (DEBUG)
0790:                            System.err.println("{" + this 
0791:                                    + "} Reading tower; pointer=" + pointer
0792:                                    + " maxh=" + maxh + " s=" + s);
0793:                        if (s > 0) {
0794:                            towerLength = entryBitLength * (s + 1)
0795:                                    + Fast.nat2int(ibs.readDelta());
0796:                            if (DEBUG)
0797:                                System.err.println("{" + this 
0798:                                        + "} Tower length=" + towerLength);
0799:                        }
0800:                        // We store the number of bits read at the start of the tower (just after the length).
0801:                        bitsAtTowerStart = ibs.readBits();
0802:                        if (truncated) {
0803:                            if (DEBUG)
0804:                                System.err.println("{" + this 
0805:                                        + "} Truncated--reading tops");
0806:                            // We read the tower top.
0807:                            pointerSkip[s] = Fast.nat2int(ibs.readGolomb(
0808:                                    towerTopB[s], towerTopLog2B[s]))
0809:                                    + pointerPrediction[s];
0810:                            bitSkip[s] = quantumBitLength * (1 << s)
0811:                                    + entryBitLength * ((1 << s + 1) - s - 2)
0812:                                    + Fast.nat2int(ibs.readLongDelta());
0813:                        } else {
0814:                            // We copy the tower top from the lowest inherited entry suitably updated.
0815:                            pointerSkip[s] = pointerSkip[s + 1]
0816:                                    - (currentDocument - pointerAtLastSkipTower);
0817:                            bitSkip[s] = bitSkip[s + 1]
0818:                                    - (bitsAtTowerStart - readBitsAtLastSkipTower)
0819:                                    - towerLength;
0820:                        }
0821:                        // We read the remaining part of the tower, at least until we point after pointer.
0822:                        if (currentDocument + pointerSkip[i] > pointer) {
0823:                            for (i = s - 1; i >= 0; i--) {
0824:                                pointerSkip[i] = Fast.nat2int(ibs.readGolomb(
0825:                                        towerLowerB[i], towerLowerLog2B[i]))
0826:                                        + pointerSkip[i + 1] / 2;
0827:                                bitSkip[i] = (bitSkip[i + 1] - entryBitLength
0828:                                        * (i + 1))
0829:                                        / 2 - Fast.nat2int(ibs.readLongDelta());
0830:                                if (DEBUG
0831:                                        && currentDocument + pointerSkip[i] <= pointer)
0832:                                    System.err
0833:                                            .println("{"
0834:                                                    + this 
0835:                                                    + "} stopping reading at i="
0836:                                                    + i
0837:                                                    + " as currentDocument ("
0838:                                                    + currentDocument
0839:                                                    + ") plus pointer skip ("
0840:                                                    + pointerSkip[i]
0841:                                                    + ") is smaller than or equal target ("
0842:                                                    + pointer + ")");
0843:                                if (currentDocument + pointerSkip[i] <= pointer)
0844:                                    break;
0845:                            }
0846:                        }
0847:                    }
0848:                    /* If we did not read the entire tower, we need to fix the skips we read (as they
0849:                     * are offsets from the *end* of the tower) and moreover we must make unusable the
0850:                     * rest of the tower (for asserts). */
0851:                    if (i > 0) {
0852:                        final long fix = ibs.readBits() - bitsAtTowerStart;
0853:                        for (j = s; j >= i; j--)
0854:                            bitSkip[j] += towerLength - fix;
0855:                        if (ASSERTS)
0856:                            for (; j >= 0; j--)
0857:                                pointerSkip[j] = Integer.MAX_VALUE;
0858:                    } else
0859:                        state = hasPayloads ? BEFORE_PAYLOAD
0860:                                : hasCounts ? BEFORE_COUNT : BEFORE_POINTER;
0861:                    // We update the inherited tower.
0862:                    final long deltaBits = ibs.readBits()
0863:                            - readBitsAtLastSkipTower;
0864:                    final int deltaPointers = currentDocument
0865:                            - pointerAtLastSkipTower;
0866:                    for (j = Fast.mostSignificantBit(k
0867:                            ^ (cache >> quantumDivisionShift)); j >= s + 1; j--) {
0868:                        bitSkip[j] -= deltaBits;
0869:                        pointerSkip[j] -= deltaPointers;
0870:                    }
0871:                    readBitsAtLastSkipTower = ibs.readBits();
0872:                    pointerAtLastSkipTower = currentDocument;
0873:                    lowest = i < 0 ? 0 : i;
0874:                    if (DEBUG) {
0875:                        System.err.println("{" + this  + "} "
0876:                                + "Computed skip tower (lowest: " + lowest
0877:                                + ") for document record number "
0878:                                + numberOfDocumentRecord + " (pointer "
0879:                                + currentDocument + ") from " + Math.max(i, 0)
0880:                                + ": ");
0881:                        System.err.print("% ");
0882:                        for (j = 0; j <= s; j++)
0883:                            System.err.print(pointerSkip[j] + ":" + bitSkip[j]
0884:                                    + " ");
0885:                        System.err.print(" [");
0886:                        for (; j <= height; j++)
0887:                            System.err.print(pointerSkip[j] + ":" + bitSkip[j]
0888:                                    + " ");
0889:                        System.err.print("]");
0890:                        System.err.println();
0891:                    }
0892:                }
0893:
0894:                /*
0895:                	public int skip( final int n ) throws IOException {
0896:                		int i, k, cacheOffset, start = numberOfDocumentRecord, skip = 0;
0897:
0898:                		if ( DEBUG ) System.err.println( "{" + this + "} " + "Going to enter linear skip code with lastDoc=" + currentDocument + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", n=" + n + ", endOfList()=" + endOfList() );
0899:                		if ( n < 0 ) throw new IllegalArgumentException();
0900:                		if ( n == 0 ) return 0;
0901:                		
0902:                		// If we are just at the start of a list, let us read the first pointer.
0903:                		if ( numberOfDocumentRecord == -1 ) readDocumentPointer();
0904:                		if ( state == BEFORE_TOWER ) readTower( -1 ); 
0905:
0906:                		if ( DEBUG ) System.err.println( "{" + this + "} " + "Entering skip code with lastDoc=" + currentDocument + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", n=" + n + ", endOfList()=" + endOfList() );
0907:
0908:                		for(;;) {
0909:                			if ( DEBUG ) System.err.println( "{" + this + "} " + "In for loop, lastDoc=" + currentDocument + ", maxh=" + maxh + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", n=" + n );
0910:
0911:                			cacheOffset = ( numberOfDocumentRecord & wModuloMask );
0912:                			k = cacheOffset >> quantumDivisionShift;
0913:
0914:                			if ( maxh < 0 ) break; // Defective quantum--no tower.
0915:
0916:                			for( i = Fast.mostSignificantBit( k ^ ( Math.min( w, frequency - numberOfDocumentRecord + cacheOffset ) >> quantumDivisionShift ) ); i >= 0; i-- )
0917:                				if ( ( skip = ( ( k & - ( 1 << i ) ) + ( 1 << i ) ) * quantum - cacheOffset ) <= n ) break;
0918:                			
0919:                			if ( i >= 0 ) {
0920:                				ibs.skip( bitSkip[ i ] - ( ibs.readBits() - readBitsAtLastSkipTower ) );
0921:                				state = BEFORE_TOWER;
0922:                				currentDocument = pointerSkip[ i ] + pointerAtLastSkipTower;
0923:                				numberOfDocumentRecord += skip;
0924:                				// If we skipped beyond the end of the list, we invalidate the current document.
0925:                				if ( numberOfDocumentRecord == frequency ) currentDocument = -1;
0926:                				readTower( -1 );
0927:                				count = -1; // We must invalidate count as readDocumentPointer() would do.
0928:                				if ( endOfList() ) return numberOfDocumentRecord - start;
0929:                			}
0930:                			else break;
0931:                		}
0932:
0933:                		if ( DEBUG ) System.err.println( "{" + this + "} " + "Completing sequentially, lastDoc=" + currentDocument + ", numberOfDocumentRecord=" + numberOfDocumentRecord + ", n=" + n );
0934:
0935:                		while( numberOfDocumentRecord - start < n ) {
0936:                			if ( endOfList() ) break;
0937:                			readDocumentPointer();
0938:                		}
0939:                		return numberOfDocumentRecord - start;
0940:                	}
0941:                 */
0942:                public int skipTo(final int p) throws IOException {
0943:                    if (DEBUG)
0944:                        System.err.println(this  + ".skipTo(" + p
0945:                                + ") [currentDocument=" + currentDocument
0946:                                + ", numberOfDocumentRecord="
0947:                                + numberOfDocumentRecord + ", endOfList()="
0948:                                + endOfList());
0949:                    // If we are just at the start of a list, let us read the first pointer.
0950:                    if (numberOfDocumentRecord == -1)
0951:                        nextDocument(); // TODO: shouldn't we just read the tower?
0952:                    if (currentDocument >= p) {
0953:                        if (DEBUG)
0954:                            System.err.println(this 
0955:                                    + ": No skip necessary, returning "
0956:                                    + currentDocument);
0957:                        return currentDocument;
0958:                    }
0959:                    if (hasSkips) {
0960:                        if (state == BEFORE_TOWER)
0961:                            readTower(p);
0962:                        final int[] pointerSkip = this .pointerSkip;
0963:                        for (;;) {
0964:                            if (ASSERTS)
0965:                                assert maxh < 0 || lowest > 0
0966:                                        || pointerSkip[0] != Integer.MAX_VALUE;
0967:                            // If on a defective quantum (no tower) or p is inside the current quantum (no need to scan the tower) we bail out. 
0968:                            if (maxh < 0
0969:                                    || lowest == 0
0970:                                    && pointerAtLastSkipTower + pointerSkip[0] > p)
0971:                                break;
0972:                            if (DEBUG)
0973:                                System.err.println(this 
0974:                                        + ": In for loop, currentDocument="
0975:                                        + currentDocument + ", maxh=" + maxh
0976:                                        + ", numberOfDocumentRecord="
0977:                                        + numberOfDocumentRecord + ", p=" + p);
0978:                            final int cacheOffset = (numberOfDocumentRecord & wModuloMask);
0979:                            final int k = cacheOffset >> quantumDivisionShift;
0980:                            final int top = Fast
0981:                                    .mostSignificantBit(k
0982:                                            ^ (Math.min(w, frequency
0983:                                                    - numberOfDocumentRecord
0984:                                                    + cacheOffset) >> quantumDivisionShift));
0985:                            int i;
0986:                            for (i = lowest; i <= top; i++) {
0987:                                if (ASSERTS && (k & 1 << i) != 0)
0988:                                    assert pointerSkip[i] == pointerSkip[i + 1];
0989:                                if (ASSERTS)
0990:                                    assert pointerSkip[i] != Integer.MAX_VALUE : "Invalid pointer skip "
0991:                                            + i
0992:                                            + " (lowest="
0993:                                            + lowest
0994:                                            + ", top=" + top + ")";
0995:                                if (pointerAtLastSkipTower + pointerSkip[i] > p)
0996:                                    break;
0997:                            }
0998:                            if (--i < 0)
0999:                                break;
1000:                            if (ASSERTS)
1001:                                assert i >= lowest : i + " < " + lowest;
1002:                            if (DEBUG)
1003:                                System.err.println(this 
1004:                                        + ": Safely after for with i=" + i
1005:                                        + ", P[i]=" + pointerSkip[i]
1006:                                        + ", A[i]=" + bitSkip[i]);
1007:                            if (DEBUG)
1008:                                System.err
1009:                                        .println(this 
1010:                                                + ": ["
1011:                                                + ibs.readBits()
1012:                                                + "] Skipping "
1013:                                                + (bitSkip[i] - (ibs.readBits() - readBitsAtLastSkipTower))
1014:                                                + " bits ("
1015:                                                + (((k & -(1 << i)) + (1 << i))
1016:                                                        * quantum - cacheOffset)
1017:                                                + " records) to get to document pointer "
1018:                                                + (currentDocument + pointerSkip[i]));
1019:                            ibs
1020:                                    .skip(bitSkip[i]
1021:                                            - (ibs.readBits() - readBitsAtLastSkipTower));
1022:                            state = BEFORE_TOWER;
1023:                            currentDocument = pointerSkip[i]
1024:                                    + pointerAtLastSkipTower;
1025:                            numberOfDocumentRecord += ((k & -(1 << i)) + (1 << i))
1026:                                    * quantum - cacheOffset;
1027:                            // If we skipped beyond the end of the list, we invalidate the current document.
1028:                            if (numberOfDocumentRecord == frequency) {
1029:                                currentDocument = Integer.MAX_VALUE;
1030:                                state = BEFORE_POINTER; // We are actually before a frequency, but we must avoid that calls to nextDocument() read anything
1031:                            } else
1032:                                readTower(p); // Note that if we are exactly on the destination pointer, we will read the entire tower.
1033:                            count = -1; // We must invalidate count as readDocumentPointer() would do.
1034:                            if (endOfList()) {
1035:                                if (DEBUG)
1036:                                    System.err
1037:                                            .println(this 
1038:                                                    + ".toSkip(): end-of-list, returning "
1039:                                                    + currentDocument);
1040:                                // Note that if p == Integer.MAX_VALUE, we are certainly beyond end-of-list
1041:                                return p == Integer.MAX_VALUE ? Integer.MAX_VALUE
1042:                                        : currentDocument;
1043:                            }
1044:                        }
1045:                        if (DEBUG)
1046:                            System.err
1047:                                    .println(this 
1048:                                            + ": Completing sequentially, currentDocument="
1049:                                            + currentDocument
1050:                                            + ", numberOfDocumentRecord="
1051:                                            + numberOfDocumentRecord + ", p="
1052:                                            + p);
1053:                    }
1054:                    while (currentDocument < p) {
1055:                        if (DEBUG)
1056:                            System.err
1057:                                    .println(this 
1058:                                            + ": Skipping sequentially (second), currentDocument="
1059:                                            + currentDocument
1060:                                            + ", numberOfDocumentRecord="
1061:                                            + numberOfDocumentRecord + ", p="
1062:                                            + p);
1063:                        if (nextDocument() == -1) {
1064:                            if (DEBUG)
1065:                                System.err.println(this 
1066:                                        + ": end-of-list, returning MAX_VALUE");
1067:                            return Integer.MAX_VALUE;
1068:                        }
1069:                    }
1070:                    if (DEBUG)
1071:                        System.err.println(this  + ".toSkip(): Returning "
1072:                                + currentDocument);
1073:                    return currentDocument;
1074:                }
1075:
1076:                public void dispose() throws IOException {
1077:                    parent.close();
1078:                }
1079:
1080:                public boolean hasNext() {
1081:                    return !endOfList();
1082:                }
1083:
1084:                public int nextInt() {
1085:                    if (!hasNext())
1086:                        throw new NoSuchElementException();
1087:                    try {
1088:                        return nextDocument();
1089:                    } catch (IOException e) {
1090:                        throw new RuntimeException(e);
1091:                    }
1092:                }
1093:
1094:                public String toString() {
1095:                    return index + " [" + currentTerm + "]";
1096:                }
1097:
1098:                /** An interval iterator returning the positions of the current document as singleton intervals. */
1099:                private final class IndexIntervalIterator extends
1100:                        AbstractObjectIterator<Interval> implements 
1101:                        IntervalIterator {
1102:                    int pos = -1;
1103:
1104:                    public void reset() throws IOException {
1105:                        pos = -1;
1106:                        if (state <= BEFORE_POSITIONS)
1107:                            updatePositionCache(); // This guarantees the position cache is ok 
1108:                    }
1109:
1110:                    public void intervalTerms(final IntSet terms) {
1111:                        terms
1112:                                .add(BitStreamIndexReaderIndexIterator.this .currentTerm);
1113:                    }
1114:
1115:                    public boolean hasNext() {
1116:                        return pos < count - 1;
1117:                    }
1118:
1119:                    public Interval next() {
1120:                        if (!hasNext())
1121:                            throw new NoSuchElementException();
1122:                        return Interval.valueOf(positionCache[++pos]);
1123:                    }
1124:
1125:                    public Interval nextInterval() {
1126:                        return pos < count - 1 ? Interval
1127:                                .valueOf(positionCache[++pos]) : null;
1128:                    }
1129:
1130:                    public int extent() {
1131:                        return 1;
1132:                    }
1133:
1134:                    public String toString() {
1135:                        return index + ": " + term + "[doc=" + currentDocument
1136:                                + ", count=" + count + ", pos=" + pos + "]";
1137:                    }
1138:                };
1139:
1140:                public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators()
1141:                        throws IOException {
1142:                    intervalIterator();
1143:                    return singletonIntervalIterator;
1144:                }
1145:
1146:                public IntervalIterator intervalIterator() throws IOException {
1147:                    return intervalIterator(keyIndex);
1148:                }
1149:
1150:                public IntervalIterator intervalIterator(final Index index)
1151:                        throws IOException {
1152:                    if (ASSERTS)
1153:                        ensureCurrentDocument();
1154:                    // TODO: this was if ( index != keyIndex || hasPayloads ) 
1155:                    if (index != keyIndex)
1156:                        return IntervalIterators.TRUE;
1157:                    if (ASSERTS)
1158:                        assert intervalIterator != null;
1159:                    intervalIterator.reset();
1160:                    return intervalIterator;
1161:                }
1162:
1163:                public ReferenceSet<Index> indices() {
1164:                    return index.singletonSet;
1165:                }
1166:            }
1167:
1168:            private IndexIterator documents(final CharSequence term,
1169:                    final int termNumber) throws IOException {
1170:                indexIterator.term(term);
1171:                indexIterator.position(termNumber);
1172:                return indexIterator;
1173:            }
1174:
1175:            public IndexIterator documents(final int term) throws IOException {
1176:                return documents(null, term);
1177:            }
1178:
1179:            public IndexIterator documents(final CharSequence term)
1180:                    throws IOException {
1181:                if (closed)
1182:                    throw new IllegalStateException("This "
1183:                            + getClass().getSimpleName() + " has been closed");
1184:                if (index.termMap != null) {
1185:                    final int termIndex = (int) index.termMap.getLong(term);
1186:                    if (termIndex == -1)
1187:                        return index.emptyIndexIterator;
1188:                    return documents(term, termIndex);
1189:                }
1190:                throw new UnsupportedOperationException("Index " + index
1191:                        + " has no term map");
1192:            }
1193:
1194:            @Override
1195:            public IndexIterator nextIterator() throws IOException {
1196:                return indexIterator.advance();
1197:            }
1198:
1199:            public String toString() {
1200:                return getClass().getSimpleName() + "[" + index + "]";
1201:            }
1202:
1203:            public void close() throws IOException {
1204:                super.close();
1205:                indexIterator.ibs.close();
1206:            }
1207:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.