001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.lib.lexer;
043:
044: import java.util.List;
045: import java.util.Set;
046: import org.netbeans.api.lexer.InputAttributes;
047: import org.netbeans.api.lexer.Language;
048: import org.netbeans.api.lexer.LanguagePath;
049: import org.netbeans.api.lexer.TokenId;
050: import org.netbeans.lib.editor.util.GapList;
051: import org.netbeans.lib.lexer.token.ComplexToken;
052: import org.netbeans.spi.lexer.Lexer;
053: import org.netbeans.spi.lexer.LexerInput;
054: import org.netbeans.lib.lexer.token.AbstractToken;
055: import org.netbeans.lib.lexer.token.ComplexToken;
056: import org.netbeans.spi.lexer.LanguageHierarchy;
057: import org.netbeans.spi.lexer.LexerRestartInfo;
058: import org.netbeans.spi.lexer.TokenFactory;
059:
060: /**
061: * Implementation of the functionality related to lexer input.
062: *
063: * @author Miloslav Metelka
064: * @version 1.00
065: */
066:
067: public abstract class LexerInputOperation<T extends TokenId> implements
068: CharProvider {
069:
070: /** Flag for additional correctness checks (may degrade performance). */
071: private static final boolean testing = Boolean
072: .getBoolean("netbeans.debug.lexer.test");
073:
074: /**
075: * Current reading index in the operation.
076: * At all times it must be >=0.
077: */
078: private int readIndex;
079:
080: /**
081: * Maximum index from which the char was fetched for current
082: * (or previous) tokens recognition.
083: * <br>
084: * The index is updated lazily - only when EOF is reached
085: * and when backup() is called.
086: */
087: private int lookaheadIndex;
088:
089: /**
090: * Active preprocessor or null if there is no preprocessor.
091: */
092: private CharPreprocessorOperation preprocessorOperation;
093:
094: /**
095: * Computed and cached token length.
096: */
097: private int tokenLength;
098:
099: private final TokenList<T> tokenList;
100:
101: private final boolean mutableInput;
102:
103: private final Lexer<T> lexer;
104:
105: /**
106: * Start of the token being currently recognized.
107: */
108: private int tokenStartIndex;
109:
110: private boolean lexerFinished;
111:
112: /**
113: * How many flyweight tokens were created in a row.
114: */
115: private int flySequenceLength;
116:
117: private List<CharPreprocessorError> preprocessErrorList;
118:
119: /**
120: * Total count of preprocessors used during lexing.
121: * It's used to determine whether extra preprocessed chars need to be used.
122: */
123: protected int preprocessingLevelCount;
124:
125: private CharProvider.ExtraPreprocessedChars extraPreprocessedChars;
126:
127: private Language<T> language;
128:
129: public LexerInputOperation(TokenList<T> tokenList, int tokenIndex,
130: Object lexerRestartState) {
131: this .tokenList = tokenList;
132: this .mutableInput = (tokenList.modCount() != -1);
133: // Determine flySequenceLength setting
134: while (--tokenIndex >= 0
135: && LexerUtilsConstants.token(tokenList, tokenIndex)
136: .isFlyweight()) {
137: flySequenceLength++;
138: }
139:
140: LanguagePath languagePath = tokenList.languagePath();
141: language = LexerUtilsConstants.innerLanguage(languagePath);
142: LanguageHierarchy<T> languageHierarchy = LexerApiPackageAccessor
143: .get().languageHierarchy(language);
144: TokenFactory<T> tokenFactory = LexerSpiPackageAccessor.get()
145: .createTokenFactory(this );
146:
147: // Check whether character preprocessing is necessary
148: // CharPreprocessor p = LexerSpiPackageAccessor.get().createCharPreprocessor(languageHierarchy);
149: // if (p != null) {
150: // preprocessingLevelCount++;
151: // preprocessorOperation = new CharPreprocessorOperation(
152: // ((preprocessorOperation != null)
153: // ? (CharProvider)preprocessorOperation
154: // : this),
155: // p,
156: // this
157: // );
158: // }
159:
160: LexerInput lexerInput = LexerSpiPackageAccessor
161: .get()
162: .createLexerInput(
163: (preprocessorOperation != null) ? preprocessorOperation
164: : this );
165:
166: LexerRestartInfo<T> info = LexerSpiPackageAccessor.get()
167: .createLexerRestartInfo(lexerInput, tokenFactory,
168: lexerRestartState, tokenList.languagePath(),
169: inputAttributes());
170: lexer = LexerSpiPackageAccessor.get().createLexer(
171: languageHierarchy, info);
172: }
173:
174: public abstract int read(int index);
175:
176: public abstract char readExisting(int index);
177:
178: public abstract void approveToken(AbstractToken<T> token);
179:
180: public Set<T> skipTokenIds() {
181: return tokenList.skipTokenIds();
182: }
183:
184: public final int read() {
185: int c = read(readIndex++);
186: if (c == LexerInput.EOF) {
187: lookaheadIndex = readIndex; // count EOF char into lookahead
188: readIndex--; // readIndex must not include EOF
189: }
190: return c;
191: }
192:
193: public int deepRawLength(int length) {
194: // No preprocessing by default
195: return length;
196: }
197:
198: public int deepRawLengthShift(int index) {
199: // No preprocessing by default
200: return index;
201: }
202:
203: public final int readIndex() {
204: return readIndex;
205: }
206:
207: public final void backup(int count) {
208: if (lookaheadIndex < readIndex) {
209: lookaheadIndex = readIndex;
210: }
211: readIndex -= count;
212: }
213:
214: /**
215: * Get a distance between the index of the rightmost character already returned
216: * by previous {@link #read()} operations and the present read index.
217: * <br/>
218: * If there were no {@link #backup(int)} operation performed
219: * the lookahead will be zero except the case when EOF was already returned.
220: *
221: * @return >=0 number of characters between the rightmost reading index reached
222: * and the present read position.
223: * <br/>
224: * The EOF (when reached by reading) is treated as a single character
225: * in lookahead.
226: * <br/>
227: * If there is an active character preprocessor the returned value
228: * is a raw length of the lookahead.
229: */
230: public final int lookahead() {
231: return (lookaheadIndex > readIndex) ? ((preprocessorOperation != null) ? preprocessorOperation
232: .deepRawLength(lookaheadIndex - readIndex)
233: : (lookaheadIndex - readIndex))
234: : 0;
235: }
236:
237: public final int tokenLength() {
238: return tokenLength;
239: }
240:
241: public void tokenRecognized(int tokenLength) {
242: if (tokenLength > readIndex()) {
243: throw new IndexOutOfBoundsException("tokenLength="
244: + tokenLength // NOI18N
245: + " >" + readIndex());
246: }
247: this .tokenLength = tokenLength;
248: }
249:
250: public void tokenApproved() {
251: tokenStartIndex += tokenLength;
252: readIndex -= tokenLength;
253: lookaheadIndex -= tokenLength;
254: }
255:
256: protected final TokenList<T> tokenList() {
257: return tokenList;
258: }
259:
260: protected final int tokenStartIndex() {
261: return tokenStartIndex;
262: }
263:
264: public final void setTokenStartIndex(int tokenStartIndex) {
265: this .tokenStartIndex = tokenStartIndex;
266: }
267:
268: protected final CharPreprocessorOperation preprocessor() {
269: return preprocessorOperation;
270: }
271:
272: public final boolean isMutableInput() {
273: return mutableInput;
274: }
275:
276: public final boolean isStoreLookaheadAndState() {
277: return isMutableInput() || testing;
278: }
279:
280: public AbstractToken<T> nextToken() {
281: assert (!lexerFinished);
282: while (true) {
283: @SuppressWarnings("unchecked")
284: AbstractToken<T> token = (AbstractToken<T>) lexer()
285: .nextToken();
286: if (token == null) {
287: LexerUtilsConstants
288: .checkLexerInputFinished(
289: (preprocessorOperation != null) ? (CharProvider) preprocessorOperation
290: : this , this );
291: lexerFinished = true;
292: return null;
293: } else {
294: // Check that the id belongs to the language
295: if (token != TokenFactory.SKIP_TOKEN
296: && !language.tokenIds().contains(token.id())) {
297: String msgPrefix = "Invalid TokenId=" + token.id()
298: + " returned from lexer=" + lexer()
299: + " for language=" + language + ":\n";
300: if (token.id().ordinal() > language.maxOrdinal()) {
301: throw new IllegalStateException(msgPrefix
302: + "Language.maxOrdinal()="
303: + language.maxOrdinal() + " < "
304: + token.id().ordinal());
305: } else { // Ordinal ok but different id with that ordinal contained in language
306: throw new IllegalStateException(
307: msgPrefix
308: + "Language contains no or different tokenId with ordinal="
309: + token.id().ordinal()
310: + ": "
311: + language.tokenId(token.id()
312: .ordinal()));
313: }
314: }
315: approveToken(token);
316: }
317: if (token == TokenFactory.SKIP_TOKEN)
318: continue; // Fetch next token
319: return token;
320: }
321: }
322:
323: /**
324: * Notification that the token was recognized.
325: * @param tokenLength length of the recognized token.
326: * @param skip whether the token should be skipped
327: * @return true if the token holding preprocessed text should be created.
328: * If skip is true then false is returned.
329: */
330: public final boolean tokenRecognized(int tokenLength, boolean skip) {
331: if (preprocessorOperation != null) {
332: preprocessorOperation.tokenRecognized(tokenLength);
333: } else { // no preprocessor
334: tokenRecognized(tokenLength);
335: }
336:
337: // If the token is not skipped check whether preprocessed token
338: // should be created instead of the regular token.
339: if (!skip
340: && tokenLength != this .tokenLength
341: || (preprocessErrorList != null && preprocessErrorList
342: .get(0).index() < this .tokenLength)) {
343: if (extraPreprocessedChars == null
344: && preprocessingLevelCount > 1) {
345: // For more than one preprocessing level need to handle
346: // extra preprocessed chars before and after the main ones
347: // on the parent levels.
348: extraPreprocessedChars = new CharProvider.ExtraPreprocessedChars();
349: }
350: return true;
351: }
352: return false;
353: }
354:
355: public void notifyPreprocessorError(CharPreprocessorError error) {
356: if (preprocessErrorList == null) {
357: preprocessErrorList = new GapList<CharPreprocessorError>();
358: }
359: preprocessErrorList.add(error);
360: }
361:
362: // public final void initPreprocessedToken(AbstractToken<T> token) {
363: // CharPreprocessorError error = null;
364: // if (preprocessErrorList != null && preprocessErrorList.size() > 0) {
365: // for (int i = preprocessErrorList.size() - 1; i >= 0; i--) {
366: // error = preprocessErrorList.get(i);
367: // if (error.index() < tokenLength) {
368: // preprocessErrorList.remove(i);
369: // } else {// Above errors for this token
370: // // Relocate - subtract token length
371: // error.updateIndex(-tokenLength);
372: // error = null;
373: // }
374: // }
375: // }
376: //
377: // PreprocessedTextStorage storage = preprocessorOperation.createPreprocessedTextStorage(
378: // token.text(), extraPreprocessedChars);
379: //
380: // if (token.getClass() == ComplexToken.class) {
381: // ((ComplexToken)token).initPrep(storage, error);
382: // } else {
383: // ((PreprocessedTextToken)token).initPrep(storage, error);
384: // }
385: // }
386:
387: public void collectExtraPreprocessedChars(
388: CharProvider.ExtraPreprocessedChars epc,
389: int prepStartIndex, int prepEndIndex, int topPrepEndIndex) {
390: // No extra preprocessed characters
391: }
392:
393: public final LanguageOperation<T> languageOperation() {
394: return LexerUtilsConstants.innerLanguageOperation(tokenList
395: .languagePath());
396: }
397:
398: public final Object lexerState() {
399: return lexer.state();
400: }
401:
402: public final boolean isFlyTokenAllowed() {
403: return (flySequenceLength < LexerUtilsConstants.MAX_FLY_SEQUENCE_LENGTH);
404: }
405:
406: protected final void flyTokenAdded() {
407: flySequenceLength++;
408: }
409:
410: protected final void preventFlyToken() {
411: flySequenceLength = LexerUtilsConstants.MAX_FLY_SEQUENCE_LENGTH;
412: }
413:
414: protected final void clearFlySequence() {
415: flySequenceLength = 0;
416: }
417:
418: protected final boolean isSkipToken(AbstractToken<T> token) {
419: return (token == TokenFactory.SKIP_TOKEN);
420: }
421:
422: public final Lexer lexer() {
423: return lexer;
424: }
425:
426: public final InputAttributes inputAttributes() {
427: return tokenList.inputAttributes();
428: }
429:
430: public final void release() {
431: lexer.release();
432: }
433:
434: }
|