Source Code Cross Referenced for BitStreamHPIndexReader.java in  » Search-Engine » mg4j » it » unimi » dsi » mg4j » index » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Search Engine » mg4j » it.unimi.dsi.mg4j.index 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        package it.unimi.dsi.mg4j.index;
0002:
0003:        /*		 
0004:         * MG4J: Managing Gigabytes for Java
0005:         *
0006:         * Copyright (C) 2007 Paolo Boldi and Sebastiano Vigna 
0007:         *
0008:         *  This library is free software; you can redistribute it and/or modify it
0009:         *  under the terms of the GNU Lesser General Public License as published by the Free
0010:         *  Software Foundation; either version 2.1 of the License, or (at your option)
0011:         *  any later version.
0012:         *
0013:         *  This library is distributed in the hope that it will be useful, but
0014:         *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
0015:         *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
0016:         *  for more details.
0017:         *
0018:         *  You should have received a copy of the GNU Lesser General Public License
0019:         *  along with this program; if not, write to the Free Software
0020:         *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
0021:         *
0022:         */
0023:
0024:        import it.unimi.dsi.fastutil.ints.IntIterator;
0025:        import it.unimi.dsi.fastutil.ints.IntIterators;
0026:        import it.unimi.dsi.fastutil.ints.IntSet;
0027:        import it.unimi.dsi.fastutil.objects.AbstractObjectIterator;
0028:        import it.unimi.dsi.fastutil.objects.Reference2ReferenceMap;
0029:        import it.unimi.dsi.fastutil.objects.Reference2ReferenceMaps;
0030:        import it.unimi.dsi.fastutil.objects.ReferenceSet;
0031:        import it.unimi.dsi.mg4j.index.AbstractIndexIterator;
0032:        import it.unimi.dsi.mg4j.index.AbstractIndexReader;
0033:        import it.unimi.dsi.mg4j.index.BitStreamHPIndex;
0034:        import it.unimi.dsi.mg4j.index.Index;
0035:        import it.unimi.dsi.mg4j.index.IndexIterator;
0036:        import it.unimi.dsi.mg4j.index.CompressionFlags.Coding;
0037:        import it.unimi.dsi.mg4j.index.payload.Payload;
0038:        import it.unimi.dsi.io.InputBitStream;
0039:        import it.unimi.dsi.util.Interval;
0040:        import it.unimi.dsi.mg4j.search.IntervalIterator;
0041:        import it.unimi.dsi.mg4j.search.IntervalIterators;
0042:        import it.unimi.dsi.bits.Fast;
0043:        import it.unimi.dsi.Util;
0044:
0045:        import java.io.IOException;
0046:        import java.util.NoSuchElementException;
0047:
0048:        import org.apache.log4j.Logger;
0049:
0050:        /** A bitstream-based {@linkplain IndexReader index reader} for {@linkplain BitStreamHPIndex high-performance indices}. */
0051:
0052:        public class BitStreamHPIndexReader extends AbstractIndexReader {
0053:            @SuppressWarnings("unused")
0054:            private static final Logger LOGGER = Util
0055:                    .getLogger(BitStreamHPIndexReader.class);
0056:
0057:            private final static boolean ASSERTS = false;
0058:            private final static boolean DEBUG = false;
0059:            private final static boolean COOKIES = false;
0060:
0061:            /** The reference index. */
0062:            protected final BitStreamHPIndex index;
0063:            /** The {@link IndexIterator} view of this reader (returned by {@link #documents(CharSequence)}). */
0064:            protected final BitStreamHPIndexReaderIndexIterator indexIterator;
0065:
0066:            /**
0067:             * Creates a new skip index reader, with the specified underlying {@link Index} and input bit
0068:             * stream.
0069:             * 
0070:             * @param index the index.
0071:             * @param ibs the underlying bit stream.
0072:             */
0073:            public BitStreamHPIndexReader(final BitStreamHPIndex index,
0074:                    final InputBitStream ibs, final InputBitStream positions) {
0075:                this .index = index;
0076:                this .indexIterator = new BitStreamHPIndexReaderIndexIterator(
0077:                        this , ibs, positions);
0078:            }
0079:
0080:            protected static final class BitStreamHPIndexReaderIndexIterator
0081:                    extends AbstractIndexIterator implements  IndexIterator {
0082:                /** The enclosing instance. */
0083:                private final BitStreamHPIndexReader parent;
0084:
0085:                /** The reference index. */
0086:                protected final BitStreamHPIndex index;
0087:
0088:                /** The underlying input bit stream. */
0089:                protected final InputBitStream ibs;
0090:
0091:                /** The underlying positions input bit stream. */
0092:                private final InputBitStream positions;
0093:
0094:                /** The enclosed interval iterator. */
0095:                private final IndexIntervalIterator intervalIterator;
0096:
0097:                /** A singleton set containing the enclosed interval iterator. */
0098:                private final Reference2ReferenceMap<Index, IntervalIterator> singletonIntervalIterator;
0099:
0100:                /** The key index. */
0101:                private final Index keyIndex;
0102:
0103:                /** The cached copy of {@link #index index.pointerCoding}. */
0104:                protected final Coding pointerCoding;
0105:
0106:                /** The cached copy of {@link #index index.countCoding}. */
0107:                protected final Coding countCoding;
0108:
0109:                /** The cached copy of {@link #index index.positionCoding}. */
0110:                protected final Coding positionCoding;
0111:
0112:                /** The parameter <code>b</code> for Golomb coding of pointers. */
0113:                protected int b;
0114:
0115:                /**
0116:                 * The parameter <code>log2b</code> for Golomb coding of pointers; it is the most
0117:                 * significant bit of {@link #b}.
0118:                 */
0119:                protected int log2b;
0120:
0121:                /** The current term. */
0122:                @SuppressWarnings("hiding")
0123:                protected int term = -1;
0124:
0125:                /** The current frequency. */
0126:                protected int frequency;
0127:
0128:                /**
0129:                 * Whether the current terms has pointers at all (this happens when the {@link #frequency}
0130:                 * is smaller than the number of documents).
0131:                 */
0132:                protected boolean hasPointers;
0133:
0134:                /** The current count (if this index contains counts). */
0135:                protected int count;
0136:
0137:                /**
0138:                 * The last document pointer we read from current list, -1 if we just read the frequency,
0139:                 * {@link Integer#MAX_VALUE} if we are beyond the end of list.
0140:                 */
0141:                protected int currentDocument;
0142:
0143:                /** The number of the document record we are going to read inside the current inverted list. */
0144:                protected int numberOfDocumentRecord;
0145:
0146:                /** This variable tracks the current state of the reader. */
0147:                protected int state;
0148:
0149:                /** The parameter <code>h</code> (the maximum height of a skip tower). */
0150:                public final int height;
0151:
0152:                /** The quantum. */
0153:                public final int quantum;
0154:
0155:                /** The bit mask giving the remainder of the division by {@link #quantum}. */
0156:                public final int quantumModuloMask;
0157:
0158:                /** The shift giving result of the division by {@link #quantum}. */
0159:                public final int quantumDivisionShift;
0160:
0161:                /**
0162:                 * The maximum height of a skip tower in the current block. May be less than {@link #height}
0163:                 * if the block is defective, and will be -1 on defective quanta (no tower at all).
0164:                 */
0165:                private int maxh;
0166:
0167:                /** The maximum valid index of the current skip tower, if any. */
0168:                private int s;
0169:
0170:                /**
0171:                 * The minimum valid index of the current skip tower, or {@link Integer#MAX_VALUE}. If
0172:                 * {@link #maxh} is negative, the value is undefined.
0173:                 */
0174:                private int lowest;
0175:
0176:                /** We have <var>w</var> = <var>Hq</var>. */
0177:                private int w;
0178:
0179:                /** The bit mask giving the remainder of the division by {@link #w}. */
0180:                private final int wModuloMask;
0181:
0182:                /** The shift giving result of the division by {@link #w}. */
0183:                private final int wDivisionShift;
0184:
0185:                /** The Golomb modulus for a top pointer skip, for each level. */
0186:                private int[] towerTopB;
0187:
0188:                /** The most significant bit of the Golomb modulus for a top point[]er skip, for each level. */
0189:                private int[] towerTopLog2B;
0190:
0191:                /** The Golomb modulus for a lower pointer skip, for each level. */
0192:                private int[] towerLowerB;
0193:
0194:                /** The most significant bit of the Golomb modulus for a lower pointer skip, for each level. */
0195:                private int[] towerLowerLog2B;
0196:
0197:                /** The prediction for a pointer skip, for each level. */
0198:                private int[] pointerPrediction;
0199:
0200:                /** An array to decode bit skips. */
0201:                private long[] bitSkip;
0202:
0203:                /** An array to decode positions bit skips. */
0204:                private long[] positionsBitSkip;
0205:
0206:                /** An array to decode the pointer skips. */
0207:                private int[] pointerSkip;
0208:
0209:                /** The number of bits read just after reading the last skip tower. */
0210:                private long readBitsAtLastSkipTower;
0211:
0212:                /** The document pointer corresponding to the last skip tower. */
0213:                private int pointerAtLastSkipTower;
0214:
0215:                /** The number of positions that should be skipped manually
0216:                 * once the last read tower has been reached by skipping {@link #positionsToReadToReachCurrentPosition}. 
0217:                 */
0218:                private int positionsToReadToReachCurrentPosition;
0219:
0220:                /** The offset in the positions stream from the start of the current quantum
0221:                 * to the start of the last tower.
0222:                 */
0223:                private long positionsBitsOffset;
0224:
0225:                /** The last increment added to {@link #positionsBitSkip}. */
0226:                private long lastPositionsIncrement;
0227:
0228:                /** The current quantum bit length, as provided by the index. */
0229:                private int quantumBitLength;
0230:
0231:                /** The current positions quantum bit length, as provided by the index. */
0232:                private int positionsQuantumBitLength;
0233:
0234:                /** The current entry bit length, as provided by the index. */
0235:                private int entryBitLength;
0236:
0237:                /** This value of {@link #state} means that we are positioned just before a tower. */
0238:                private static final int BEFORE_TOWER = 0;
0239:
0240:                /**
0241:                 * This value of {@link #state} can be assumed only in indices that contain counts; it means
0242:                 * that we are positioned just before the count for the current document record.
0243:                 */
0244:                private static final int BEFORE_COUNT = 1;
0245:
0246:                /**
0247:                 * This value of {@link #state} means that we are at the start of a new document record,
0248:                 * unless we already read all documents (i.e., {@link #numberOfDocumentRecord} ==
0249:                 * {@link #frequency}), in which case we are at the end of the inverted list, and
0250:                 * {@link #endOfList()} is true.
0251:                 */
0252:                private static final int BEFORE_POINTER = 2;
0253:
0254:                /** Whether the positions for the current document pointer have not been fetched yet. */
0255:                private boolean positionsUnread;
0256:
0257:                /** The cached position array. */
0258:                protected int[] positionCache = new int[16];
0259:
0260:                /** The offset of the positions of the current {@link #term}. */
0261:                protected long lastPositionsOffset;
0262:
0263:                public BitStreamHPIndexReaderIndexIterator(
0264:                        final BitStreamHPIndexReader parent,
0265:                        final InputBitStream ibs, final InputBitStream positions) {
0266:                    this .parent = parent;
0267:                    this .ibs = ibs;
0268:                    this .positions = positions;
0269:                    index = parent.index;
0270:                    keyIndex = index.keyIndex;
0271:                    pointerCoding = index.pointerCoding;
0272:                    countCoding = index.countCoding;
0273:                    positionCoding = index.positionCoding;
0274:                    intervalIterator = index.hasPositions ? new IndexIntervalIterator()
0275:                            : null;
0276:                    singletonIntervalIterator = index.hasPositions ? Reference2ReferenceMaps
0277:                            .singleton(keyIndex,
0278:                                    (IntervalIterator) intervalIterator)
0279:                            : null;
0280:                    quantum = index.quantum;
0281:                    quantumModuloMask = quantum - 1;
0282:                    quantumDivisionShift = Fast.mostSignificantBit(quantum);
0283:                    height = index.height;
0284:                    w = (1 << height) * quantum;
0285:                    wModuloMask = w - 1;
0286:                    wDivisionShift = Fast.mostSignificantBit(w);
0287:                    bitSkip = new long[height + 1];
0288:                    positionsBitSkip = new long[height + 1];
0289:                    pointerSkip = new int[height + 1];
0290:                    towerTopB = new int[height + 1];
0291:                    towerTopLog2B = new int[height + 1];
0292:                    towerLowerB = new int[height + 1];
0293:                    towerLowerLog2B = new int[height + 1];
0294:                    pointerPrediction = new int[height + 1];
0295:                }
0296:
0297:                /** Debug variable, usually unused, that keeps track of the end of the positions stream for the current term (but not for term 0!). */
0298:                private long positionsLength;
0299:
0300:                /**
0301:                 * Positions the index on the inverted list of a given term.
0302:                 * 
0303:                 * <p>This method can be called at any time. Note that it is <em>always</em> possible to
0304:                 * call this method with argument 0, even if offsets have not been loaded.
0305:                 * 
0306:                 * @param term a term.
0307:                 */
0308:                protected void position(final int term) throws IOException {
0309:                    if (term == 0) {
0310:                        if (ASSERTS) {
0311:                            // Get end of positions
0312:                            ibs.position(index.offsets.getLong(1));
0313:                            positionsLength = ibs.readLongDelta();
0314:                        }
0315:
0316:                        ibs.position(0);
0317:                        ibs.readBits(0);
0318:                        lastPositionsOffset = ibs.readLongDelta(); // This is 0 for sure 
0319:                        positions.position(0);
0320:                    } else {
0321:                        if (index.offsets == null)
0322:                            throw new IllegalStateException(
0323:                                    "You cannot position an index without offsets");
0324:
0325:                        if (ASSERTS) {
0326:                            // Get end of positions
0327:                            if (term < index.numberOfTerms - 1) {
0328:                                ibs.position(index.offsets.getLong(term + 1));
0329:                                positionsLength = ibs.readLongDelta();
0330:                            } else
0331:                                positionsLength = Long.MAX_VALUE; // Presently, no way to check
0332:                        }
0333:
0334:                        ibs.position(index.offsets.getLong(term));
0335:                        ibs.readBits(index.offsets.getLong(term));
0336:                        // Let us position the positions bitstream
0337:                        lastPositionsOffset = ibs.readLongDelta();
0338:                        positions.position(lastPositionsOffset);
0339:                        if (ASSERTS && positionsLength != Long.MAX_VALUE)
0340:                            positionsLength -= lastPositionsOffset;
0341:                        if (DEBUG && ASSERTS)
0342:                            System.err.println(this  + ": positions for term "
0343:                                    + term + " start at bit "
0344:                                    + lastPositionsOffset + " ("
0345:                                    + positionsLength + " bits)");
0346:                    }
0347:                    positions.readBits(0);
0348:                    this .term = term;
0349:                    readFrequency();
0350:                }
0351:
0352:                public int termNumber() {
0353:                    return term;
0354:                }
0355:
0356:                protected IndexIterator advance() throws IOException {
0357:                    if (term == index.numberOfTerms - 1)
0358:                        return null;
0359:                    if (term != -1) {
0360:                        skipTo(Integer.MAX_VALUE);
0361:                        // This guarantees we have no garbage before the frequency
0362:                        nextDocument();
0363:                        positionsUnread = false;
0364:                    }
0365:                    // We must skip the offset into the positions bitstream
0366:                    final long nextPositionsOffset = ibs.readLongDelta();
0367:                    positions.skip(nextPositionsOffset - lastPositionsOffset
0368:                            - positions.readBits());
0369:                    lastPositionsOffset = nextPositionsOffset;
0370:                    positions.readBits(0);
0371:                    term++;
0372:                    if (ASSERTS)
0373:                        positionsLength = -1; // Invalidate
0374:                    readFrequency();
0375:                    return this ;
0376:                }
0377:
0378:                private void readFrequency() throws IOException {
0379:                    // Read the frequency
0380:
0381:                    switch (index.frequencyCoding) {
0382:                    case GAMMA:
0383:
0384:                        frequency = ibs.readGamma() + 1;
0385:
0386:                        break;
0387:                    case SHIFTED_GAMMA:
0388:
0389:                        frequency = ibs.readShiftedGamma() + 1;
0390:
0391:                        break;
0392:                    case DELTA:
0393:
0394:                        frequency = ibs.readDelta() + 1;
0395:
0396:                        break;
0397:                    default:
0398:                        throw new IllegalStateException(
0399:                                "The required frequency coding ("
0400:                                        + index.frequencyCoding
0401:                                        + ") is not supported.");
0402:                    }
0403:
0404:                    if (DEBUG)
0405:                        System.err.println(this  + ": Frequency for term "
0406:                                + term + " is " + frequency);
0407:
0408:                    hasPointers = frequency < index.numberOfDocuments;
0409:
0410:                    // We compute the modulus used for pointer Golomb coding
0411:                    if (pointerCoding == Coding.GOLOMB) {
0412:
0413:                        if (hasPointers) {
0414:                            b = BitStreamIndex.golombModulus(frequency,
0415:                                    index.numberOfDocuments);
0416:                            log2b = Fast.mostSignificantBit(b);
0417:                        }
0418:
0419:                    }
0420:
0421:                    quantumBitLength = positionsQuantumBitLength = entryBitLength = -1;
0422:                    lowest = Integer.MAX_VALUE;
0423:                    if (ASSERTS)
0424:                        for (int i = height; i > Math
0425:                                .min(
0426:                                        height,
0427:                                        Fast
0428:                                                .mostSignificantBit(frequency >> quantumDivisionShift)); i--)
0429:                            towerTopB[i] = towerLowerB[i] = pointerPrediction[i] = -1;
0430:                    final long pointerQuantumSigma = BitStreamIndex
0431:                            .quantumSigma(frequency, index.numberOfDocuments,
0432:                                    quantum);
0433:                    for (int i = Math
0434:                            .min(
0435:                                    height,
0436:                                    Fast
0437:                                            .mostSignificantBit(frequency >> quantumDivisionShift)); i >= 0; i--) {
0438:                        towerTopB[i] = BitStreamIndex.gaussianGolombModulus(
0439:                                pointerQuantumSigma, i + 1);
0440:                        towerTopLog2B[i] = Fast
0441:                                .mostSignificantBit(towerTopB[i]);
0442:                        towerLowerB[i] = BitStreamIndex.gaussianGolombModulus(
0443:                                pointerQuantumSigma, i);
0444:                        towerLowerLog2B[i] = Fast
0445:                                .mostSignificantBit(towerLowerB[i]);
0446:                        pointerPrediction[i] = (int) ((quantum * (1L << i)
0447:                                * index.numberOfDocuments + frequency / 2) / frequency);
0448:                    }
0449:                    count = -1;
0450:                    currentDocument = -1;
0451:                    numberOfDocumentRecord = -1;
0452:                    positionsBitsOffset = 0;
0453:                    positionsBitSkip[0] = 0; // To avoid spurious tower updates on the first tower
0454:                    positionsToReadToReachCurrentPosition = 0;
0455:                    lastPositionsIncrement = 0;
0456:                    state = BEFORE_POINTER;
0457:                }
0458:
0459:                public Index index() {
0460:                    return keyIndex;
0461:                }
0462:
0463:                public int frequency() {
0464:                    return frequency;
0465:                }
0466:
0467:                private void ensureCurrentDocument() {
0468:                    if (currentDocument < 0)
0469:                        throw new IllegalStateException(
0470:                                "nextDocument() has never been called for (term="
0471:                                        + term + ")");
0472:                    if (currentDocument == Integer.MAX_VALUE)
0473:                        throw new IllegalStateException(
0474:                                "This reader is positioned beyond the end of list of (term="
0475:                                        + term + ")");
0476:                }
0477:
0478:                /**
0479:                 * Returns whether there are no more document records in the current inverted list.
0480:                 * 
0481:                 * <p>This method returns true if the last document pointer of the current inverted list
0482:                 * has been read. It makes no distinction as to where (inside the last document record) this
0483:                 * reader is currently positioned. In particular, this method will return true independently
0484:                 * of whether count and positions have been read or not (we note by passing that this is the
0485:                 * only sensible behaviour, as you can build indices with or without counts/positions).
0486:                 * 
0487:                 * <p>This method will return true also when this reader is positioned <em>beyond</em>
0488:                 * the last document pointer. In this case, {@link #currentDocumentPointer()} will return
0489:                 * {@link Integer#MAX_VALUE}.
0490:                 * 
0491:                 * @return true whether there are no more document records in the current inverted list.
0492:                 */
0493:                private boolean endOfList() {
0494:                    if (ASSERTS)
0495:                        assert numberOfDocumentRecord <= frequency;
0496:                    return numberOfDocumentRecord >= frequency - 1;
0497:                }
0498:
0499:                public int document() {
0500:                    if (ASSERTS)
0501:                        ensureCurrentDocument();
0502:                    return currentDocument;
0503:                }
0504:
0505:                public Payload payload() throws IOException {
0506:                    throw new UnsupportedOperationException("This index ("
0507:                            + index + ") does not contain payloads");
0508:                }
0509:
0510:                public int count() throws IOException {
0511:                    if (DEBUG)
0512:                        System.err.println(this  + ".count()");
0513:                    if (count != -1)
0514:                        return count;
0515:                    if (ASSERTS)
0516:                        ensureCurrentDocument();
0517:                    if (state == BEFORE_TOWER)
0518:                        readTower();
0519:                    if (ASSERTS && state != BEFORE_COUNT)
0520:                        throw new IllegalStateException();
0521:                    state = BEFORE_POINTER;
0522:
0523:                    switch (countCoding) {
0524:                    case UNARY:
0525:
0526:                        count = ibs.readUnary() + 1;
0527:
0528:                        break;
0529:                    case SHIFTED_GAMMA:
0530:
0531:                        count = ibs.readShiftedGamma() + 1;
0532:
0533:                        break;
0534:                    case GAMMA:
0535:
0536:                        count = ibs.readGamma() + 1;
0537:
0538:                        break;
0539:                    case DELTA:
0540:
0541:                        count = ibs.readDelta() + 1;
0542:
0543:                        break;
0544:                    default:
0545:                        throw new IllegalStateException(
0546:                                "The required count coding (" + countCoding
0547:                                        + ") is not supported.");
0548:                    }
0549:
0550:                    return count;
0551:                }
0552:
0553:                protected void updatePositionCache() throws IOException {
0554:                    if (DEBUG)
0555:                        System.err.println(this  + ".updatePositionCache()");
0556:                    positionsUnread = false;
0557:                    count(); // This will force reading the tower and updating positionsBitsOffset, if necessary
0558:                    if (positionsBitsOffset > positions.readBits()) {
0559:                        if (DEBUG)
0560:                            System.err.println(this 
0561:                                    + ": positionsBitsOffset="
0562:                                    + positionsBitsOffset
0563:                                    + ", positions.readBits()="
0564:                                    + positions.readBits()
0565:                                    + ", skipping by "
0566:                                    + (positionsBitsOffset - positions
0567:                                            .readBits()));
0568:                        positions.skip(positionsBitsOffset
0569:                                - positions.readBits());
0570:                    }
0571:
0572:                    if (ASSERTS)
0573:                        assert positionsToReadToReachCurrentPosition >= 0 : positionsToReadToReachCurrentPosition
0574:                                + " < 0";
0575:
0576:                    if (positionsToReadToReachCurrentPosition > 0) {
0577:                        if (DEBUG)
0578:                            System.err.println(this  + ":Skipping sequentially "
0579:                                    + positionsToReadToReachCurrentPosition
0580:                                    + " positions...");
0581:                        // We skip, inside the current quantum, the positions we haven't read
0582:
0583:                        switch (positionCoding) {
0584:                        case SHIFTED_GAMMA:
0585:
0586:                            if (COOKIES) {
0587:                                positionsToReadToReachCurrentPosition--;
0588:                                if (positions.readShiftedGamma() != Integer.MAX_VALUE)
0589:                                    throw new AssertionError();
0590:                            }
0591:                            positions
0592:                                    .skipShiftedGammas(positionsToReadToReachCurrentPosition);
0593:
0594:                            break;
0595:                        case GAMMA:
0596:
0597:                            if (COOKIES) {
0598:                                positionsToReadToReachCurrentPosition--;
0599:                                if (positions.readGamma() != Integer.MAX_VALUE)
0600:                                    throw new AssertionError();
0601:                            }
0602:                            positions
0603:                                    .skipGammas(positionsToReadToReachCurrentPosition);
0604:
0605:                            break;
0606:                        case DELTA:
0607:
0608:                            if (COOKIES) {
0609:                                positionsToReadToReachCurrentPosition--;
0610:                                if (positions.readDelta() != Integer.MAX_VALUE)
0611:                                    throw new AssertionError();
0612:                            }
0613:                            positions
0614:                                    .skipDeltas(positionsToReadToReachCurrentPosition);
0615:
0616:                            break;
0617:                        default:
0618:                            throw new IllegalStateException(
0619:                                    "The required position coding ("
0620:                                            + positionCoding
0621:                                            + ") is not supported.");
0622:                        }
0623:
0624:                    }
0625:
0626:                    // We must fix it so that nextDocument() will restore it to 0
0627:                    positionsToReadToReachCurrentPosition = -count;
0628:                    if (COOKIES)
0629:                        positionsToReadToReachCurrentPosition--;
0630:                    if (count > positionCache.length)
0631:                        positionCache = new int[Math.max(
0632:                                positionCache.length * 2, count)];
0633:                    final int[] occ = positionCache;
0634:
0635:                    switch (positionCoding) {
0636:                    case SHIFTED_GAMMA:
0637:
0638:                        if (COOKIES
0639:                                && positions.readShiftedGamma() != Integer.MAX_VALUE)
0640:                            throw new AssertionError();
0641:                        positions.readShiftedGammas(occ, count);
0642:                        for (int i = 1; i < count; i++)
0643:                            occ[i] += occ[i - 1] + 1;
0644:
0645:                        return;
0646:                    case GAMMA:
0647:
0648:                        if (COOKIES
0649:                                && positions.readGamma() != Integer.MAX_VALUE)
0650:                            throw new AssertionError();
0651:                        positions.readGammas(occ, count);
0652:                        for (int i = 1; i < count; i++)
0653:                            occ[i] += occ[i - 1] + 1;
0654:
0655:                        return;
0656:                    case DELTA:
0657:
0658:                        if (COOKIES
0659:                                && positions.readDelta() != Integer.MAX_VALUE)
0660:                            throw new AssertionError();
0661:                        positions.readDeltas(occ, count);
0662:                        for (int i = 1; i < count; i++)
0663:                            occ[i] += occ[i - 1] + 1;
0664:
0665:                        return;
0666:                    default:
0667:                        throw new IllegalStateException(
0668:                                "The required position coding ("
0669:                                        + index.positionCoding
0670:                                        + ") is not supported.");
0671:                    }
0672:
0673:                }
0674:
0675:                public IntIterator positions() throws IOException {
0676:                    if (ASSERTS)
0677:                        ensureCurrentDocument();
0678:                    if (positionsUnread)
0679:                        updatePositionCache();
0680:                    return IntIterators.wrap(positionCache, 0, count);
0681:                }
0682:
0683:                public int[] positionArray() throws IOException {
0684:                    if (ASSERTS)
0685:                        ensureCurrentDocument();
0686:                    if (positionsUnread)
0687:                        updatePositionCache();
0688:                    return positionCache;
0689:                }
0690:
0691:                // TODO: check who's using this (positionArray() is actually faster now)
0692:                public int positions(final int[] position) throws IOException {
0693:                    if (ASSERTS)
0694:                        ensureCurrentDocument();
0695:                    if (positionsUnread)
0696:                        updatePositionCache(); // And also that positions have
0697:                    // been read
0698:                    if (position.length < count)
0699:                        return -count;
0700:                    for (int i = count; i-- != 0;)
0701:                        position[i] = this .positionCache[i];
0702:                    return count;
0703:                }
0704:
0705:                public int nextDocument() throws IOException {
0706:                    if (DEBUG)
0707:                        System.err.println("{" + this  + "} nextDocument()");
0708:                    if (state != BEFORE_POINTER) {
0709:                        if (state == BEFORE_TOWER)
0710:                            readTower();
0711:                        if (state == BEFORE_COUNT) {
0712:
0713:                            switch (countCoding) {
0714:                            case UNARY:
0715:
0716:                                count = ibs.readUnary() + 1;
0717:
0718:                                break;
0719:                            case SHIFTED_GAMMA:
0720:
0721:                                count = ibs.readShiftedGamma() + 1;
0722:
0723:                                break;
0724:                            case GAMMA:
0725:
0726:                                count = ibs.readGamma() + 1;
0727:
0728:                                break;
0729:                            case DELTA:
0730:
0731:                                count = ibs.readDelta() + 1;
0732:
0733:                                break;
0734:                            default:
0735:                                throw new IllegalStateException(
0736:                                        "The required count coding ("
0737:                                                + countCoding
0738:                                                + ") is not supported.");
0739:                            }
0740:
0741:                            state = BEFORE_POINTER;
0742:                        }
0743:                    }
0744:                    if (endOfList())
0745:                        return -1;
0746:                    if (hasPointers) {// We do not write pointers for everywhere occurring terms.
0747:
0748:                        switch (pointerCoding) {
0749:                        case SHIFTED_GAMMA:
0750:
0751:                            currentDocument += ibs.readShiftedGamma() + 1;
0752:
0753:                            break;
0754:                        case GAMMA:
0755:
0756:                            currentDocument += ibs.readGamma() + 1;
0757:
0758:                            break;
0759:                        case DELTA:
0760:
0761:                            currentDocument += ibs.readDelta() + 1;
0762:
0763:                            break;
0764:                        case GOLOMB:
0765:
0766:                            currentDocument += ibs.readGolomb(b, log2b) + 1;
0767:
0768:                            break;
0769:                        default:
0770:                            throw new IllegalStateException(
0771:                                    "The required pointer coding ("
0772:                                            + pointerCoding
0773:                                            + ") is not supported.");
0774:                        }
0775:
0776:                    } else
0777:                        currentDocument++;
0778:                    numberOfDocumentRecord++;
0779:
0780:                    if (ASSERTS && numberOfDocumentRecord > quantum)
0781:                        assert positionsBitsOffset > 0;
0782:
0783:                    if ((numberOfDocumentRecord & quantumModuloMask) == 0) {
0784:                        state = BEFORE_TOWER;
0785:                        positionsToReadToReachCurrentPosition = 0;
0786:                    } else {
0787:                        state = BEFORE_COUNT;
0788:                        if (ASSERTS)
0789:                            assert count > 0 : count + " <= " + 0;
0790:                        positionsToReadToReachCurrentPosition += count;
0791:                        if (COOKIES)
0792:                            positionsToReadToReachCurrentPosition++;
0793:                    }
0794:                    count = -1;
0795:                    positionsUnread = true;
0796:                    return currentDocument;
0797:                }
0798:
0799:                /**
0800:                 * Reads the entire skip tower for the current position. This method assumes that the tower
0801:                 * has been passed over sequentially, and correpondingly sets {@link #lastPositionsIncrement}
0802:                 * to the number of positions bits of the last quantum.
0803:                 */
0804:                private void readTower() throws IOException {
0805:                    lastPositionsIncrement = maxh >= 0 ? positionsBitSkip[0]
0806:                            : 0;
0807:                    if (DEBUG)
0808:                        System.err.println(this 
0809:                                + ": Setting lastPositionsIncrement to "
0810:                                + lastPositionsIncrement + " in readTower()");
0811:                    readTower(-1);
0812:                    if (DEBUG)
0813:                        System.err.println(this 
0814:                                + ": Incrementing positionsBitsOffset by "
0815:                                + lastPositionsIncrement + " in readTower()");
0816:                    // TODO: this should be moved into readTower(int)
0817:                    positionsBitsOffset += lastPositionsIncrement;
0818:                }
0819:
0820:                /**
0821:                 * Reads the skip tower for the current position, possibly skipping part of the tower.
0822:                 * 
0823:                 * <P>Note that this method will update {@link #state} only if it reads the entire tower,
0824:                 * otherwise the state remains {@link #BEFORE_TOWER}.
0825:                 * 
0826:                 * @param pointer the tower will be read up to the first entry smaller than or equal to this
0827:                 * pointer; use -1 to guarantee that the entire tower will be read.
0828:                 */
0829:                private void readTower(final int pointer) throws IOException {
0830:                    int i, j, k, cacheOffset, cache, towerLength = 0;
0831:                    long bitsAtTowerStart = 0;
0832:                    boolean truncated = false;
0833:                    if (ASSERTS)
0834:                        assert numberOfDocumentRecord % quantum == 0;
0835:                    if (ASSERTS && state != BEFORE_TOWER)
0836:                        throw new IllegalStateException(
0837:                                "readTower() called in state " + state);
0838:                    cacheOffset = (numberOfDocumentRecord & wModuloMask);
0839:                    k = cacheOffset >> quantumDivisionShift;
0840:                    if (ASSERTS && k == 0) { // Invalidate current tower data
0841:                        it.unimi.dsi.fastutil.ints.IntArrays.fill(pointerSkip,
0842:                                Integer.MAX_VALUE);
0843:                        it.unimi.dsi.fastutil.longs.LongArrays.fill(bitSkip,
0844:                                Integer.MAX_VALUE);
0845:                        it.unimi.dsi.fastutil.longs.LongArrays.fill(
0846:                                positionsBitSkip, Integer.MAX_VALUE);
0847:                    }
0848:                    // Compute the height of the current skip tower.
0849:                    s = (k == 0) ? height : Fast.leastSignificantBit(k);
0850:                    cache = frequency - w
0851:                            * (numberOfDocumentRecord >> wDivisionShift);
0852:                    if (cache < w) {
0853:                        maxh = Fast
0854:                                .mostSignificantBit((cache >> quantumDivisionShift)
0855:                                        - k);
0856:                        if (maxh < s) {
0857:                            s = maxh;
0858:                            truncated = true;
0859:                        } else
0860:                            truncated = false;
0861:                    } else {
0862:                        cache = w;
0863:                        maxh = height;
0864:                        truncated = k == 0;
0865:                    }
0866:                    // assert w == cache || k == 0 || lastMaxh == Fast.mostSignificantBit( k ^ (
0867:                    // cache/quantum ) ) : lastMaxh +","+ (Fast.mostSignificantBit( k ^ ( cache/quantum )
0868:                    // ));
0869:                    i = s;
0870:                    if (s >= 0) {
0871:                        if (k == 0) {
0872:                            if (quantumBitLength < 0) {
0873:                                quantumBitLength = ibs.readDelta();
0874:                                positionsQuantumBitLength = ibs.readDelta();
0875:                                entryBitLength = ibs.readDelta();
0876:                            } else {
0877:                                quantumBitLength += Fast.nat2int(ibs
0878:                                        .readDelta());
0879:                                positionsQuantumBitLength += Fast.nat2int(ibs
0880:                                        .readDelta());
0881:                                entryBitLength += Fast.nat2int(ibs.readDelta());
0882:                            }
0883:                            if (DEBUG)
0884:                                System.err
0885:                                        .println("{"
0886:                                                + this 
0887:                                                + "} quantum bit length="
0888:                                                + quantumBitLength
0889:                                                + " positions quantum bit length="
0890:                                                + positionsQuantumBitLength
0891:                                                + " entry bit length="
0892:                                                + entryBitLength);
0893:                        }
0894:                        if (DEBUG)
0895:                            System.err.println("{" + this 
0896:                                    + "} Reading tower; pointer=" + pointer
0897:                                    + " maxh=" + maxh + " s=" + s);
0898:                        if (s > 0) {
0899:                            towerLength = entryBitLength * (s + 1)
0900:                                    + Fast.nat2int(ibs.readDelta());
0901:                            if (DEBUG)
0902:                                System.err.println("{" + this 
0903:                                        + "} Tower length=" + towerLength);
0904:                        }
0905:                        // We store the number of bits read at the start of the tower (just after the
0906:                        // length).
0907:                        bitsAtTowerStart = ibs.readBits();
0908:                        if (truncated) {
0909:                            if (DEBUG)
0910:                                System.err.println("{" + this 
0911:                                        + "} Truncated--reading tops");
0912:                            // We read the tower top.
0913:                            pointerSkip[s] = Fast.nat2int(ibs.readGolomb(
0914:                                    towerTopB[s], towerTopLog2B[s]))
0915:                                    + pointerPrediction[s];
0916:                            bitSkip[s] = quantumBitLength * (1 << s)
0917:                                    + entryBitLength * ((1 << s + 1) - s - 2)
0918:                                    + Fast.nat2int(ibs.readLongDelta());
0919:                            positionsBitSkip[s] = positionsQuantumBitLength
0920:                                    * (1 << s)
0921:                                    + Fast.nat2int(ibs.readLongDelta());
0922:                        } else {
0923:                            // We copy the tower top from the lowest inherited entry suitably updated.
0924:                            pointerSkip[s] = pointerSkip[s + 1]
0925:                                    - (currentDocument - pointerAtLastSkipTower);
0926:                            bitSkip[s] = bitSkip[s + 1]
0927:                                    - (bitsAtTowerStart - readBitsAtLastSkipTower)
0928:                                    - towerLength;
0929:                            // TODO: note that we could use the same method for pointers and bit skips!
0930:                            positionsBitSkip[s] = positionsBitSkip[s + 1]
0931:                                    - positionsBitSkip[s];
0932:                            if (ASSERTS)
0933:                                assert positionsBitSkip[s] > 0 : positionsBitSkip[s]
0934:                                        + " <= " + 0;
0935:                        }
0936:                        // We read the remaining part of the tower, at least until we point after pointer.
0937:                        if (currentDocument + pointerSkip[i] > pointer) {
0938:                            for (i = s - 1; i >= 0; i--) {
0939:                                pointerSkip[i] = Fast.nat2int(ibs.readGolomb(
0940:                                        towerLowerB[i], towerLowerLog2B[i]))
0941:                                        + pointerSkip[i + 1] / 2;
0942:                                bitSkip[i] = (bitSkip[i + 1] - entryBitLength
0943:                                        * (i + 1))
0944:                                        / 2 - Fast.nat2int(ibs.readLongDelta());
0945:                                positionsBitSkip[i] = positionsBitSkip[i + 1]
0946:                                        / 2 - Fast.nat2int(ibs.readLongDelta());
0947:                                if (DEBUG
0948:                                        && currentDocument + pointerSkip[i] <= pointer)
0949:                                    System.err
0950:                                            .println("{"
0951:                                                    + this 
0952:                                                    + "} stopping reading at i="
0953:                                                    + i
0954:                                                    + " as currentDocument ("
0955:                                                    + currentDocument
0956:                                                    + ") plus pointer skip ("
0957:                                                    + pointerSkip[i]
0958:                                                    + ") is smaller than or equal target ("
0959:                                                    + pointer + ")");
0960:                                if (currentDocument + pointerSkip[i] <= pointer)
0961:                                    break;
0962:                            }
0963:                        }
0964:                    }
0965:                    /*
0966:                     * If we did not read the entire tower, we need to fix the skips we read (as they are
0967:                     * offsets from the *end* of the tower) and moreover we must make unusable the rest of
0968:                     * the tower (for asserts).
0969:                     */
0970:                    if (i > 0) {
0971:                        final long fix = ibs.readBits() - bitsAtTowerStart;
0972:                        for (j = s; j >= i; j--)
0973:                            bitSkip[j] += towerLength - fix;
0974:                        if (ASSERTS)
0975:                            for (; j >= 0; j--)
0976:                                pointerSkip[j] = Integer.MAX_VALUE;
0977:                    } else
0978:                        state = BEFORE_COUNT;
0979:                    // We update the inherited tower.
0980:                    final long deltaBits = ibs.readBits()
0981:                            - readBitsAtLastSkipTower;
0982:                    final int deltaPointers = currentDocument
0983:                            - pointerAtLastSkipTower;
0984:
0985:                    if (ASSERTS)
0986:                        assert lastPositionsIncrement >= 0 : lastPositionsIncrement
0987:                                + " < " + 0;
0988:
0989:                    for (j = Fast.mostSignificantBit(k
0990:                            ^ (cache >> quantumDivisionShift)); j >= s + 1; j--) {
0991:                        bitSkip[j] -= deltaBits;
0992:                        positionsBitSkip[j] -= lastPositionsIncrement;
0993:                        pointerSkip[j] -= deltaPointers;
0994:                    }
0995:
0996:                    readBitsAtLastSkipTower = ibs.readBits();
0997:                    pointerAtLastSkipTower = currentDocument;
0998:                    lowest = i < 0 ? 0 : i;
0999:
1000:                    if (DEBUG) {
1001:                        System.err.println("{" + this  + "} "
1002:                                + "Computed skip tower (lowest: " + lowest
1003:                                + ") for document record number "
1004:                                + numberOfDocumentRecord + " (pointer "
1005:                                + currentDocument + ") from " + Math.max(i, 0)
1006:                                + ": ");
1007:                        System.err.print("% ");
1008:                        for (j = 0; j <= s; j++)
1009:                            System.err.print(pointerSkip[j] + ":" + bitSkip[j]
1010:                                    + ":" + positionsBitSkip[j] + " ");
1011:                        System.err.print(" [");
1012:                        for (; j <= height; j++)
1013:                            System.err.print(pointerSkip[j] + ":" + bitSkip[j]
1014:                                    + ":" + positionsBitSkip[j] + " ");
1015:                        System.err.print("]");
1016:                        System.err.println();
1017:                    }
1018:
1019:                    if (ASSERTS) {
1020:                        for (j = Fast.mostSignificantBit(k
1021:                                ^ (cache >> quantumDivisionShift)); j >= s + 1; j--) {
1022:                            assert positionsBitSkip[j] >= 0 : "positionsBitSkip["
1023:                                    + j
1024:                                    + "] = "
1025:                                    + positionsBitSkip[j]
1026:                                    + " < "
1027:                                    + 0;
1028:                            assert positionsBitSkip[j] >= positionsBitSkip[j - 1] : "positionsBitSkip["
1029:                                    + j
1030:                                    + "] = "
1031:                                    + positionsBitSkip[j]
1032:                                    + " < "
1033:                                    + positionsBitSkip[j - 1]
1034:                                    + " = positionsBitSkip[" + (j - 1) + "]";
1035:                        }
1036:                    }
1037:
1038:                }
1039:
1040:                public int skipTo(final int p) throws IOException {
1041:                    if (DEBUG)
1042:                        System.err.println(this  + ".skipTo(" + p
1043:                                + ") [currentDocument=" + currentDocument
1044:                                + ", numberOfDocumentRecord="
1045:                                + numberOfDocumentRecord
1046:                                + ", positionsBitsOffset="
1047:                                + positionsBitsOffset + "]");
1048:                    // If we are just at the start of a list, let us read the first pointer.
1049:                    if (numberOfDocumentRecord == -1)
1050:                        nextDocument(); // TODO: shouldn't we just read the
1051:                    // tower?
1052:                    if (currentDocument >= p) {
1053:                        if (DEBUG)
1054:                            System.err.println(this 
1055:                                    + ": No skip necessary, returning "
1056:                                    + currentDocument);
1057:                        return currentDocument;
1058:                    }
1059:                    if (state == BEFORE_TOWER) {
1060:                        lastPositionsIncrement = maxh >= 0 ? positionsBitSkip[0]
1061:                                : 0;
1062:                        readTower(p);
1063:                        if (DEBUG)
1064:                            System.err.println(this 
1065:                                    + ": Incrementing positionsBitsOffset by "
1066:                                    + lastPositionsIncrement
1067:                                    + " in skipTo() initial phase");
1068:                        positionsBitsOffset += lastPositionsIncrement;
1069:                    }
1070:
1071:                    final int[] pointerSkip = this .pointerSkip;
1072:                    for (;;) {
1073:                        if (ASSERTS)
1074:                            assert maxh < 0 || lowest > 0
1075:                                    || pointerSkip[0] != Integer.MAX_VALUE;
1076:                        // If on a defective quantum (no tower) or p is inside the current quantum (no
1077:                        // need to scan the tower) we bail out.
1078:                        if (maxh < 0 || lowest == 0
1079:                                && pointerAtLastSkipTower + pointerSkip[0] > p)
1080:                            break;
1081:                        if (DEBUG)
1082:                            System.err.println(this 
1083:                                    + ": In for loop, currentDocument="
1084:                                    + currentDocument + ", maxh=" + maxh
1085:                                    + ", numberOfDocumentRecord="
1086:                                    + numberOfDocumentRecord + ", p=" + p);
1087:                        final int cacheOffset = (numberOfDocumentRecord & wModuloMask);
1088:                        final int k = cacheOffset >> quantumDivisionShift;
1089:                        final int top = Fast
1090:                                .mostSignificantBit(k
1091:                                        ^ (Math.min(w, frequency
1092:                                                - numberOfDocumentRecord
1093:                                                + cacheOffset) >> quantumDivisionShift));
1094:                        int i;
1095:                        for (i = lowest; i <= top; i++) {
1096:                            if (ASSERTS && (k & 1 << i) != 0)
1097:                                assert pointerSkip[i] == pointerSkip[i + 1];
1098:                            if (ASSERTS)
1099:                                assert pointerSkip[i] != Integer.MAX_VALUE : "Invalid pointer skip "
1100:                                        + i
1101:                                        + " (lowest="
1102:                                        + lowest
1103:                                        + ", top="
1104:                                        + top + ")";
1105:                            if (pointerAtLastSkipTower + pointerSkip[i] > p)
1106:                                break;
1107:                        }
1108:                        if (--i < 0)
1109:                            break;
1110:                        if (ASSERTS)
1111:                            assert i >= lowest : i + " < " + lowest;
1112:                        if (DEBUG)
1113:                            System.err.println(this 
1114:                                    + ": Safely after for with i=" + i
1115:                                    + ", P[i]=" + pointerSkip[i] + ", A[i]="
1116:                                    + bitSkip[i] + ", B[i]="
1117:                                    + positionsBitSkip[i]);
1118:                        if (DEBUG)
1119:                            System.err
1120:                                    .println(this 
1121:                                            + ": ["
1122:                                            + ibs.readBits()
1123:                                            + "] Skipping "
1124:                                            + (bitSkip[i] - (ibs.readBits() - readBitsAtLastSkipTower))
1125:                                            + " bits ("
1126:                                            + (((k & -(1 << i)) + (1 << i))
1127:                                                    * quantum - cacheOffset)
1128:                                            + " records) to get to document pointer "
1129:                                            + (currentDocument + pointerSkip[i]));
1130:                        ibs.skip(bitSkip[i]
1131:                                - (ibs.readBits() - readBitsAtLastSkipTower));
1132:                        /* We accumulate the number of bits that must be skipped in the positions stream to reach the position
1133:                         * corresponding to the current tower. */
1134:                        if (DEBUG)
1135:                            System.err.println(this 
1136:                                    + ": Incrementing positionsBitsOffset by "
1137:                                    + positionsBitSkip[i] + " (height " + i
1138:                                    + ")");
1139:                        lastPositionsIncrement = positionsBitSkip[i];
1140:                        positionsBitsOffset += lastPositionsIncrement;
1141:                        if (ASSERTS && positionsLength != -1)
1142:                            assert positionsBitsOffset <= positionsLength : positionsBitsOffset
1143:                                    + " > " + positionsLength;
1144:                        state = BEFORE_TOWER;
1145:                        currentDocument = pointerSkip[i]
1146:                                + pointerAtLastSkipTower;
1147:                        numberOfDocumentRecord += ((k & -(1 << i)) + (1 << i))
1148:                                * quantum - cacheOffset;
1149:                        // If we skipped beyond the end of the list, we invalidate the current document.
1150:                        if (numberOfDocumentRecord == frequency) {
1151:                            currentDocument = Integer.MAX_VALUE;
1152:                            positionsUnread = false;
1153:                            state = BEFORE_POINTER; // We are actually before a frequency, but we must
1154:                            // avoid that calls to nextDocument() read anything
1155:                        } else {
1156:                            positionsUnread = true;
1157:                            readTower(p); // Note that if we are exactly on the destination pointer, we will read the entire tower.
1158:                        }
1159:
1160:                        count = -1; // We must invalidate count as readDocumentPointer() would do.
1161:                        positionsToReadToReachCurrentPosition = 0;
1162:                        if (endOfList()) {
1163:                            if (DEBUG)
1164:                                System.err.println(this 
1165:                                        + ".toSkip(): end-of-list, returning "
1166:                                        + currentDocument + ", state=" + state);
1167:                            // Note that if p == Integer.MAX_VALUE, we are certainly beyond end-of-list
1168:                            return p == Integer.MAX_VALUE ? Integer.MAX_VALUE
1169:                                    : currentDocument;
1170:                        }
1171:                    }
1172:                    if (DEBUG)
1173:                        System.err.println(this 
1174:                                + ": Completing sequentially, currentDocument="
1175:                                + currentDocument + ", numberOfDocumentRecord="
1176:                                + numberOfDocumentRecord + ", p=" + p);
1177:                    while (currentDocument < p) {
1178:                        if (DEBUG)
1179:                            System.err
1180:                                    .println(this 
1181:                                            + ": Skipping sequentially (second), currentDocument="
1182:                                            + currentDocument
1183:                                            + ", numberOfDocumentRecord="
1184:                                            + numberOfDocumentRecord + ", p="
1185:                                            + p);
1186:                        if (nextDocument() == -1) {
1187:                            if (DEBUG)
1188:                                System.err.println(this 
1189:                                        + ": end-of-list, returning MAX_VALUE");
1190:                            return Integer.MAX_VALUE;
1191:                        }
1192:                    }
1193:                    if (DEBUG)
1194:                        System.err.println(this  + ".toSkip(): Returning "
1195:                                + currentDocument);
1196:                    return currentDocument;
1197:                }
1198:
1199:                public void dispose() throws IOException {
1200:                    parent.close();
1201:                }
1202:
1203:                public boolean hasNext() {
1204:                    return !endOfList();
1205:                }
1206:
1207:                public int nextInt() {
1208:                    if (!hasNext())
1209:                        throw new NoSuchElementException();
1210:                    try {
1211:                        return nextDocument();
1212:                    } catch (IOException e) {
1213:                        throw new RuntimeException(e);
1214:                    }
1215:                }
1216:
1217:                public String toString() {
1218:                    return index + " [" + term + "]";
1219:                }
1220:
1221:                /**
1222:                 * An interval iterator returning the positions of the current document as singleton
1223:                 * intervals.
1224:                 */
1225:                private final class IndexIntervalIterator extends
1226:                        AbstractObjectIterator<Interval> implements 
1227:                        IntervalIterator {
1228:                    int pos = -1;
1229:
1230:                    public void reset() throws IOException {
1231:                        pos = -1;
1232:                        if (positionsUnread)
1233:                            updatePositionCache(); // This guarantees the position cache is ok
1234:                    }
1235:
1236:                    public void intervalTerms(final IntSet terms) {
1237:                        terms
1238:                                .add(BitStreamHPIndexReaderIndexIterator.this .term);
1239:                    }
1240:
1241:                    public boolean hasNext() {
1242:                        return pos < count - 1;
1243:                    }
1244:
1245:                    public Interval next() {
1246:                        if (!hasNext())
1247:                            throw new NoSuchElementException();
1248:                        return Interval.valueOf(positionCache[++pos]);
1249:                    }
1250:
1251:                    public Interval nextInterval() {
1252:                        return pos < count - 1 ? Interval
1253:                                .valueOf(positionCache[++pos]) : null;
1254:                    }
1255:
1256:                    public int extent() {
1257:                        return 1;
1258:                    }
1259:
1260:                    public String toString() {
1261:                        return index + ": " + term + "[doc=" + currentDocument
1262:                                + ", count=" + count + ", pos=" + pos + "]";
1263:                    }
1264:                };
1265:
1266:                public Reference2ReferenceMap<Index, IntervalIterator> intervalIterators()
1267:                        throws IOException {
1268:                    intervalIterator();
1269:                    return singletonIntervalIterator;
1270:                }
1271:
1272:                public IntervalIterator intervalIterator() throws IOException {
1273:                    return intervalIterator(keyIndex);
1274:                }
1275:
1276:                public IntervalIterator intervalIterator(final Index index)
1277:                        throws IOException {
1278:                    if (ASSERTS)
1279:                        ensureCurrentDocument();
1280:                    // TODO: this was if ( index != keyIndex || hasPayloads )
1281:                    if (index != keyIndex)
1282:                        return IntervalIterators.TRUE;
1283:                    if (ASSERTS)
1284:                        assert intervalIterator != null;
1285:                    intervalIterator.reset();
1286:                    return intervalIterator;
1287:                }
1288:
1289:                public ReferenceSet<Index> indices() {
1290:                    return index.singletonSet;
1291:                }
1292:            }
1293:
1294:            private IndexIterator documents(final CharSequence term,
1295:                    final int termNumber) throws IOException {
1296:                indexIterator.term(term);
1297:                indexIterator.position(termNumber);
1298:                return indexIterator;
1299:            }
1300:
1301:            public IndexIterator documents(final int term) throws IOException {
1302:                return documents(null, term);
1303:            }
1304:
1305:            public IndexIterator documents(final CharSequence term)
1306:                    throws IOException {
1307:                if (closed)
1308:                    throw new IllegalStateException("This "
1309:                            + getClass().getSimpleName() + " has been closed");
1310:                if (index.termMap != null) {
1311:                    final int termIndex = (int) index.termMap.getLong(term);
1312:                    if (termIndex == -1)
1313:                        return index.emptyIndexIterator;
1314:                    return documents(term, termIndex);
1315:                }
1316:                throw new UnsupportedOperationException("Index " + index
1317:                        + " has no term map");
1318:            }
1319:
1320:            @Override
1321:            public IndexIterator nextIterator() throws IOException {
1322:                return indexIterator.advance();
1323:            }
1324:
1325:            public String toString() {
1326:                return getClass().getSimpleName() + "[" + index + "]";
1327:            }
1328:
1329:            public void close() throws IOException {
1330:                super.close();
1331:                indexIterator.ibs.close();
1332:                indexIterator.positions.close();
1333:            }
1334:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.