001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.lib.lexer;
043:
044: import java.util.Set;
045: import org.netbeans.api.lexer.LanguagePath;
046: import org.netbeans.api.lexer.InputAttributes;
047: import org.netbeans.api.lexer.TokenId;
048: import org.netbeans.lib.lexer.token.AbstractToken;
049:
050: /**
051: * Browsable list of tokens.
052: * <br>
053: * {@link org.netbeans.api.lexer.TokenSequence} delegates
054: * all its operation to this class so any service provider
055: * delivering this class will be able to produce token sequences.
056: *
057: * There are various implementations of the token list:
058: * <ul>
059: * <li>BatchTokenList</li> - predecessor of batch token lists
060: * <li>TextTokenList</li> - token list over immutable char sequence
061: * <li>CopyTextTokenList</li> - token list over text input
062: * that needs to be copied. Characters that belong to tokens
063: * skipped due to skipTokenIds do not need to be copied.
064: * <li>SkimTokenList</li> - filter over CopyTextTokenList
065: * to store the token characters in multiple arrays
066: * and to correctly compute the tokens' starting offsets.
067: * <li>IncTokenList</li> - token list for mutable-input environment.
068: * <li>EmbeddedTokenList</li> - token list for a single language embedding
069: * suitable for both batch and incremental environments.
070: * </ul>
071: *
072: * @author Miloslav Metelka
073: * @version 1.00
074: */
075:
076: public interface TokenList<T extends TokenId> {
077:
078: /**
079: * Language path of this token list.
080: */
081: LanguagePath languagePath();
082:
083: /**
084: * Get token or {@link EmbeddingContainer} at given index in this list.
085: * <br/>
086: * The method's implementation may need to be synchronized as multiple
087: * threads can access it at the same time.
088: * <br/>
089: * The requested index value may be arbitrarily high
090: * (e.g. when TokenSequence.move(index) is used for too high value).
091: *
092: * @param >=0 index of the token in this list.
093: * @return valid token or null if the index is too high.
094: */
095: Object tokenOrEmbeddingContainer(int index);
096:
097: /**
098: * Replace flyweight token at the given index with its non-flyweight copy.
099: * <br/>
100: * This may be requested by <code>TokenSequence.offsetToken()</code>.
101: *
102: * @param index >=0 index of the flyweight token in this list.
103: * @param flyToken non-null flyweight token.
104: * @param offset >=0 absolute offset where the flyweight token resides.
105: * @return non-flyweight token instance.
106: */
107: AbstractToken<T> replaceFlyToken(int index,
108: AbstractToken<T> flyToken, int offset);
109:
110: /**
111: * Wrap the token by a branch token list due to language embedding
112: * that exists for the token.
113: *
114: * @param index existing index in this token list at which the token
115: * should be wrapped with the embedding info.
116: * @param embeddingContainer embedding info that should wrap the token.
117: */
118: void wrapToken(int index, EmbeddingContainer<T> embeddingContainer);
119:
120: /**
121: * Get absolute offset of the token at the given index in the token list.
122: * <br>
123: * This method can only be called if the token at the given index
124: * was already fetched by {@link tokenOrEmbeddingContainer(int)}.
125: */
126: int tokenOffset(int index);
127:
128: /**
129: * Get total count of tokens in the list.
130: * <br/>
131: * For token lists that create the tokens lazily
132: * this will lead to lexing till the end of the input.
133: */
134: int tokenCount();
135:
136: /**
137: * Return present number of tokens in the token list but do not create
138: * any new tokens (because of possible lazy token creation).
139: * <br/>
140: * This is necessary e.g. for <code>TokenSequence.move()</code>
141: * that needs a binary search for fast positioning
142: * but using {@link #tokenCount()} would lead to unnecessary creation
143: * of all tokens.
144: */
145: int tokenCountCurrent();
146:
147: /**
148: * Get number of modifications which mutated this token list.
149: * <br>
150: * Token sequence remembers this number when it gets constructed
151: * and checks this number when it moves between tokens
152: * and if there is an extra modification performed it throws
153: * <code>IllegalStateException</code>.
154: *
155: * <p>
156: * This is also used to check whether this token list corresponds to mutable input
157: * or not because unmodifiable lists return -1 from this method.
158: *
159: * <p>
160: * For branch token lists the {@link #updateStartOffsetShift()} ensures
161: * that the value returned by this method is most up-to-date
162: * (equals to the root list's one).
163: *
164: * @return number of modifications performed to the list.
165: * <br/>
166: * Returns -1 if this list is constructed for immutable input and cannot be mutated.
167: */
168: int modCount();
169:
170: /**
171: * Get absolute offset of the child token with the given raw offset
172: * in the underlying input.
173: *
174: * @param rawOffset raw offset of the child token.
175: * @return absolute offset in the input.
176: */
177: int childTokenOffset(int rawOffset);
178:
179: /**
180: * Get character of a token from the character sequence represented
181: * by this support.
182: *
183: * @param rawOffset raw offset of the child token.
184: * The given offset value may need to be preprocessed before using (it depends
185: * on a nature of the token list).
186: * @param index index inside the token's text that should be returned.
187: * This value cannot be simply added to the previous parameter
188: * for mutable token lists as the value could errorneously point
189: * into a middle of the offset gap then.
190: * @return appropriate character that the token has requested.
191: */
192: char childTokenCharAt(int rawOffset, int index);
193:
194: /**
195: * Get the root token list of the token list hierarchy.
196: */
197: TokenList<?> root();
198:
199: /**
200: * Get token hierarchy operation for this token list or null
201: * if this token list does not have any token hierarchy.
202: */
203: TokenHierarchyOperation<?, ?> tokenHierarchyOperation();
204:
205: /**
206: * Extra attributes related to the input being lexed.
207: */
208: InputAttributes inputAttributes();
209:
210: /**
211: * Get lookahead information for the token at the existing token index.
212: * <br/>
213: * Lookahead is number of characters that the lexer has read
214: * past the end of the given token in order to recognize it in the text.
215: * <br>
216: * This information allows the lexer to know whether modifications
217: * past the end of the token can affect its validity.
218: *
219: * <p>
220: * In general only mutable token lists benefit from this information
221: * but non-mutable token lists may store the information as well for testing
222: * purposes.
223: * </p>
224: *
225: * @param index index of the existing token.
226: * @return >=0 number of characters that the lexer has read
227: * in order to recognize this token. Return zero if this token list
228: * does not maintain lookaheads.
229: */
230: int lookahead(int index);
231:
232: /**
233: * Get state information for the token at the existing token index.
234: * <br/>
235: * It is an object defining lexer's state after recognition
236: * of the given token.
237: * <br/>
238: * This information allows to restart the lexer at the end of the given token.
239: *
240: * <p>
241: * In general only mutable token lists benefit from this information
242: * but non-mutable token lists may store the information as well for testing
243: * purposes.
244: * </p>
245: *
246: * @param index index of the existing token.
247: * @return lexer's state after recognition of this token
248: * or null for default state. Return null if this token list
249: * does not maintain states.
250: */
251: Object state(int index);
252:
253: /**
254: * Returns true if the underlying token list does not contain offset ranges
255: * that would not be covered by tokens.
256: * <br/>
257: * This could happen if a batch token list would use token id filter.
258: * <br/>
259: * If the token list is continuous the TokenSequence
260: * can compute token offsets more efficiently.
261: */
262: boolean isContinuous();
263:
264: /**
265: * Get set of token ids to be skipped during token creation.
266: */
267: Set<T> skipTokenIds();
268:
269: /**
270: * Get offset where a first token of this token list should start.
271: * <br/>
272: * If token filtering is used then the first token may start at higher offset.
273: * <br/>
274: * It's guaranteed that there will be no token starting below this offset.
275: */
276: int startOffset();
277:
278: /**
279: * Get offset where the last token of this token list should end.
280: * <br/>
281: * If token filtering is used then the last token may end at lower offset.
282: * <br/>
283: * It's guaranteed that there will be no token ending above this offset.
284: */
285: int endOffset();
286:
287: /**
288: * Check if this token list is removed from token hierarchy.
289: * <br/>
290: * Should only be called under the lock of the root token list.
291: *
292: * @return true if the token list was removed or false otherwise.
293: */
294: boolean isRemoved();
295:
296: }
|