001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.lib.lexer.batch;
043:
044: import java.io.IOException;
045: import java.io.Reader;
046: import org.netbeans.api.lexer.TokenId;
047: import org.netbeans.lib.lexer.LexerInputOperation;
048: import org.netbeans.lib.lexer.TokenList;
049: import org.netbeans.lib.lexer.token.AbstractToken;
050: import org.netbeans.spi.lexer.LexerInput;
051:
052: /**
053: * Lexer input operation over a {@link java.io.Reader}.
054: *
055: * @author Miloslav Metelka
056: * @version 1.00
057: */
058:
059: public final class SkimLexerInputOperation<T extends TokenId> extends
060: LexerInputOperation<T> {
061:
062: private static final char[] EMPTY_CHAR_ARRAY = new char[0];
063:
064: /**
065: * Default size for reading char array.
066: */
067: private static final int DEFAULT_READ_CHAR_ARRAY_SIZE = 4096;
068:
069: /**
070: * Minimum size to be read (to have space for reading).
071: */
072: private static final int MIN_READ_SIZE = 512;
073:
074: private static final int DEFAULT_CLUSTER_SIZE = 4096;
075:
076: /**
077: * Maximum fragmentation factor for token character arrays.
078: * <br>
079: * If there is not enough space in the tokenCharArray
080: * to copy a token's characters there then if the token's length
081: * will be greater than this threshold then the token will get
082: * an extra character buffer just for itself and there will
083: * still be chance to use the present tokenCharArray for tokens
084: * with lower length.
085: */
086: private static final int MAX_UNUSED_CLUSTER_SIZE_FRACTION = 50;
087:
088: /**
089: * Reader as a primary source of characters that are further
090: * copied and cached.
091: */
092: private Reader reader;
093:
094: /**
095: * Array holding the read characters.
096: */
097: private char[] readCharArray;
098:
099: /**
100: * Character sequence holding the characters to be read.
101: */
102: private CharSequence readCharSequence;
103:
104: /**
105: * Index of a first character in the token being currently recognized.
106: */
107: private int readStartIndex;
108:
109: /**
110: * End of valid chars in readCharArray (points to first invalid char).
111: */
112: private int readEndIndex;
113:
114: /**
115: * Whether EOF was read from reader already or not.
116: */
117: private boolean eofRead;
118:
119: /**
120: * Actual token cluster where the tokens are being placed.
121: */
122: private SkimTokenList<T> cluster;
123:
124: private int clusterTextEndIndex;
125:
126: private int defaultClusterSize = DEFAULT_CLUSTER_SIZE;
127:
128: /**
129: * Starting offset of the cluster currently being used.
130: */
131: private int clusterStartOffset;
132:
133: /**
134: * How much the offset is ahead of the token's text offset
135: * in the cluster. The tokens that get skipped and flyweight tokens
136: * increase this value because their text is not physically copied
137: * into the clusters character data but they increase the offset.
138: */
139: private int offsetShift;
140:
141: public SkimLexerInputOperation(TokenList<T> tokenList, Reader reader) {
142: super (tokenList, 0, null);
143: this .reader = reader;
144: this .readCharArray = new char[DEFAULT_READ_CHAR_ARRAY_SIZE];
145: }
146:
147: public SkimLexerInputOperation(TokenList<T> tokenList,
148: CharSequence readCharSequence) {
149: super (tokenList, 0, null);
150: this .readCharSequence = readCharSequence;
151: this .readEndIndex = readCharSequence.length();
152: }
153:
154: public int read(int index) { // index >= 0 is guaranteed by contract
155: index += readStartIndex;
156: if (index < readEndIndex) {
157: return (readCharArray != null) ? readCharArray[index]
158: : readCharSequence.charAt(index);
159:
160: } else { // must read next or return EOF
161: if (!eofRead) {
162: eofRead = (readCharArray != null) ? readNextCharArray()
163: : true; // using readCharSequence -> no more chars
164:
165: return read(index);
166:
167: } else {
168: return LexerInput.EOF;
169: }
170: }
171: }
172:
173: public char readExisting(int index) {
174: return (readCharArray != null) ? readCharArray[index]
175: : readCharSequence.charAt(index);
176: }
177:
178: public void approveToken(AbstractToken<T> token) {
179: int tokenLength = token.length();
180: if (isSkipToken(token)) {
181: preventFlyToken();
182: skipChars(tokenLength());
183:
184: } else if (token.isFlyweight()) {
185: assert isFlyTokenAllowed();
186: flyTokenAdded();
187: skipChars(tokenLength);
188:
189: } else { // non-flyweight token => must be L0Token instance
190: if (clusterTextEndIndex != 0) { // valid cluster exists
191: // Check whether token fits into cluster's char array
192: if (tokenLength + clusterTextEndIndex > cluster
193: .getText().length) {
194: // Cannot fit the token's text into current cluster
195: finishCluster();
196: }
197: }
198:
199: if (clusterTextEndIndex == 0) { // allocate new cluster
200: int clusterSize = defaultClusterSize;
201: if (clusterSize < tokenLength) { // cluster just for one token
202: clusterSize = tokenLength;
203: }
204: defaultClusterSize = clusterSize;
205: cluster = new SkimTokenList<T>(
206: (CopyTextTokenList<T>) tokenList(),
207: clusterStartOffset, new char[clusterSize]);
208: }
209:
210: // Now it's clear that the token will fit into the cluster's text
211: // TODO for DirectCharSequence use more efficient way
212: char[] clusterText = cluster.getText();
213: if (readCharArray != null) {
214: System.arraycopy(readCharArray, readStartIndex,
215: clusterText, clusterTextEndIndex, tokenLength);
216: } else { // using readCharSequence
217: for (int i = 0; i < tokenLength; i++) {
218: clusterText[clusterTextEndIndex + i] = readCharSequence
219: .charAt(readStartIndex + i);
220: }
221: }
222:
223: int rawOffset = (offsetShift << 16) | clusterTextEndIndex;
224: token.setTokenList(cluster);
225: token.setRawOffset(rawOffset);
226: clusterTextEndIndex += tokenLength;
227: clearFlySequence();
228: }
229:
230: readStartIndex += tokenLength;
231: tokenApproved();
232: }
233:
234: private void skipChars(int skipLength) {
235: if (clusterTextEndIndex != 0) { // cluster already populated
236: if (offsetShift + skipLength > Short.MAX_VALUE) {
237: // Cannot advance offset shift without overflowing -> cluster is finished
238: finishCluster();
239: clusterStartOffset += skipLength;
240:
241: } else { // relOffset will fit into current cluster
242: offsetShift += skipLength;
243: }
244:
245: } else { // cluster is null -> can shift cluster's start offset
246: clusterStartOffset += skipLength;
247: }
248: }
249:
250: public void finish() {
251: if (clusterTextEndIndex != 0) {
252: finishCluster();
253: }
254: }
255:
256: private void finishCluster() {
257: // If there would be too much unused space in the cluster's char array
258: // then it will be reallocated.
259: int clusterTextLength = cluster.getText().length;
260: if (clusterTextLength / MAX_UNUSED_CLUSTER_SIZE_FRACTION > (clusterTextLength - clusterTextEndIndex)) { // Fragmentation -> reallocate cluster's char array
261: char[] newText = new char[clusterTextEndIndex];
262: System.arraycopy(cluster.getText(), 0, newText, 0,
263: clusterTextEndIndex);
264: cluster.setText(newText);
265: }
266: clusterStartOffset += clusterTextEndIndex + offsetShift;
267: clusterTextEndIndex = 0;
268: offsetShift = 0;
269: cluster = null; // cluster no longer valid
270: }
271:
272: private boolean readNextCharArray() {
273: // Copy everything from present readStartIndex till readEndIndex
274: int retainLength = readEndIndex - readStartIndex;
275: int minReadSize = readCharArray.length - retainLength;
276: char[] newReadCharArray = readCharArray; // by default take original one
277: if (minReadSize < MIN_READ_SIZE) { // allocate new
278: // double the current array's size
279: newReadCharArray = new char[readCharArray.length * 2];
280: }
281: System.arraycopy(readCharArray, readStartIndex,
282: newReadCharArray, 0, retainLength);
283: readCharArray = newReadCharArray;
284: readStartIndex = 0;
285: readEndIndex = retainLength;
286:
287: boolean eof = false;
288: while (readEndIndex < readCharArray.length) {
289: int readSize;
290: try {
291: readSize = reader.read(readCharArray, readEndIndex,
292: readCharArray.length - readEndIndex);
293: } catch (IOException e) {
294: // The exception is silently ignored here
295: // This should generally not happen - a wrapping reader
296: // should be used that will catch and process the IO exceptions.
297: readSize = -1;
298: }
299: if (readSize == -1) {
300: eof = true;
301: try {
302: reader.close();
303: } catch (IOException e) {
304: // The exception is silently ignored here
305: // This should generally not happen - a wrapping reader
306: // should be used that will catch and process the IO exceptions.
307: }
308: break;
309: } else {
310: readEndIndex += readSize;
311: }
312: }
313: return eof;
314: }
315:
316: }
|