001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.lib.lexer;
043:
044: import org.netbeans.lib.editor.util.ArrayUtilities;
045: import org.netbeans.lib.lexer.token.AbstractToken;
046: import org.netbeans.lib.lexer.CharPreprocessor;
047: import org.netbeans.spi.lexer.LexerInput;
048:
049: /**
050: * Character preprocessor delegates all its operation
051: * to this class.
052: * <br/>
053: * Each preprocessor operation has its parent character provider
054: * which is LexerInputOperation if this is there's just one char preprocessor.
055: * <br/>
056: * There can be more preprocessors chained above the LexerInputOperation.
057: * <br/>
058: * The LexerInput operates on top of the top char provider (preprocessor).
059: * @author Miloslav Metelka
060: * @version 1.00
061: */
062:
063: public final class CharPreprocessorOperation implements CharProvider {
064:
065: /** Flag for additional correctness checks (may degrade performance). */
066: private static final boolean testing = Boolean
067: .getBoolean("netbeans.debug.lexer.test");
068:
069: /**
070: * Parent char provider from which the characters are read.
071: */
072: private CharProvider parent;
073:
074: /** The actual preprocessor that this operation wraps. */
075: private CharPreprocessor preprocessor;
076:
077: /**
078: * Current reading index among the characters offered by this operation
079: * relative to the current token's begining.
080: */
081: private int readIndex;
082:
083: /**
084: * Max read index that either the client was requesting or which resulted
085: * by calling more than one {@link #outputOriginal(char)}
086: * in the {@link #preprocessChar()}.
087: */
088: private int lookaheadIndex;
089:
090: /**
091: * Read index corresponding to the first char that really had to be preprocessed
092: * for the current token (and is the first one in the prepChars array).
093: */
094: private int prepStartIndex;
095:
096: /**
097: * Read index corresponding to the first non-preprocessed character
098: * after the preprocessed characters.
099: * <br/>
100: * This variable is only valid if (prepStartIndex != lookaheadIndex).
101: */
102: private int prepEndIndex;
103:
104: /**
105: * Characters translated by this preprocessor.
106: * The token's initial characters that do not need any translation
107: * are not stored in this array. Once there is a char resulting
108: * from translation then all the subsequent chars will be stored
109: * in this array even if they did not require translation.
110: * <br/>
111: * Once the token gets created or skipped the indexes are cleared
112: * but the allocated array is retained.
113: */
114: private char[] prepChars = ArrayUtilities.emptyCharArray();
115:
116: /**
117: * Extra read lengths of the input characters corresponding to each preprocessed char
118: * on the output.
119: * <br/>
120: * The length shifts are related to the parent but at the end of the processing
121: * of a particular token they need to retrieve the raw lengths against
122: * the original input text and the extraRawLengthShifts gets populated
123: * if necessary.
124: */
125: private int[] rawLengthShifts;
126:
127: /**
128: * Last character passed to outputOriginal() or outputPreprocessed().
129: */
130: private int lastOutputChar;
131:
132: /** Computed and cached token length. */
133: private int tokenLength;
134:
135: private LexerInputOperation lexerInputOperation;
136:
137: private int tokenEndRawLengthShift;
138:
139: /**
140: * Construct new char preprocessor operation.
141: * @param lexerInputOperation may be null then error notification won't work.
142: */
143: CharPreprocessorOperation(CharProvider parent,
144: CharPreprocessor preprocessor,
145: LexerInputOperation lexerInputOperation) {
146: this .parent = parent;
147: this .preprocessor = preprocessor;
148: this .lexerInputOperation = lexerInputOperation;
149: // Init the preprocessor to use this operation
150: // LexerSpiPackageAccessor.get().init(preprocessor, this);
151: }
152:
153: /**
154: * Init the given token if necessary before {@link #tokenApproved()}
155: * gets called.
156: * <br/>
157: * This method is only called on the preprocessor's operation.
158: */
159: public void initApprovedToken(AbstractToken token) {
160:
161: }
162:
163: /**
164: * Read a single character for preprocessing from the underlying input.
165: * <br/>
166: * The character is obtained either from the real input source
167: * or from another (parent) char preprocessor.
168: *
169: * @return valid character or {@link LexerInput#EOF} if there are no more
170: * characters available on the input.
171: */
172: public int inputRead() {
173: return parent.read();
174: }
175:
176: public void inputBackup(int count) {
177: parent.backup(count);
178: }
179:
180: public void outputOriginal(int ch) {
181: lastOutputChar = ch;
182: if (ch != LexerInput.EOF) {
183: if (prepStartIndex == lookaheadIndex) { // collecting non-preprocessed
184: prepStartIndex++;
185: } else { // adding to existing prepChars
186: // leave prepEndIndex as it is now
187: }
188: lookaheadIndex++;
189: }
190: }
191:
192: public void outputPreprocessed(char ch, int extraInputLength) {
193: lastOutputChar = ch;
194: if (prepStartIndex == lookaheadIndex) { // no prepChars yet
195: prepEndIndex = prepStartIndex;
196: } else if (prepEndIndex < lookaheadIndex) {
197: // Add missing output chars
198: do {
199: addPrepChar(parent.readExisting(prepEndIndex), 0);
200: } while (prepEndIndex < lookaheadIndex);
201: } // else adding to the end of prepChars (last char was prep char)
202: addPrepChar(ch, extraInputLength);
203: lookaheadIndex++; // State that a prep char was added
204: }
205:
206: public int deepRawLength(int length) {
207: return parent.deepRawLength(parentLength(length));
208: }
209:
210: public int deepRawLengthShift(int index) {
211: return rawLengthShift(index) + parent.deepRawLengthShift(index);
212: }
213:
214: private int rawLengthShift(int index) {
215: if (index < prepStartIndex) {
216: return index;
217: } else if (index < prepEndIndex) {
218: return rawLengthShifts[index - prepStartIndex];
219: } else {
220: return totalRawLengthShift();
221: }
222: }
223:
224: /**
225: * Given length here translated into the length in parent.
226: */
227: private int parentLength(int length) {
228: if (length > prepStartIndex) {
229: if (length <= prepEndIndex) {
230: length += rawLengthShifts[length - 1 - prepStartIndex];
231: } else {
232: length += totalRawLengthShift();
233: }
234: }
235: return length;
236: }
237:
238: private int totalRawLengthShift() {
239: return rawLengthShifts[prepEndIndex - 1 - prepStartIndex];
240: }
241:
242: public void notifyError(String errorMessage) {
243: if (lexerInputOperation != null) {
244: int parentIndex = parent.readIndex(); // Get the
245: lexerInputOperation
246: .notifyPreprocessorError(new CharPreprocessorError(
247: errorMessage, parent
248: .deepRawLength(parentIndex)));
249: }
250: }
251:
252: public int read() {
253: // Check whether any characters need to be preprocessed first
254: if (readIndex == lookaheadIndex) {
255: // Most typical situation - preprocess char
256: // LexerSpiPackageAccessor.get().preprocessChar(preprocessor);
257: readIndex++;
258: // Expect only a single char to be put into lastOutputChar
259: if (readIndex == lookaheadIndex) {
260: return lastOutputChar;
261: } else { // possibly more chars processed or EOF found etc.
262: readIndex--;
263: // Check whether EOF was processed (returned)
264: if (readIndex == lookaheadIndex
265: && lastOutputChar == LexerInput.EOF) {
266: return LexerInput.EOF;
267: }
268: }
269:
270: }
271:
272: return readExisting(readIndex++);
273: }
274:
275: public char readExisting(int index) {
276: return (index < prepStartIndex)// below the first preprocessed character
277: ? parent.readExisting(index)
278: : (index < prepEndIndex) // inside prepChars
279: ? prepChars[index - prepStartIndex]
280: : parent.readExisting(index
281: + totalRawLengthShift());
282: }
283:
284: public int readIndex() {
285: return readIndex;
286: }
287:
288: public void backup(int count) {
289: readIndex -= count;
290: }
291:
292: public int tokenLength() {
293: return tokenLength;
294: }
295:
296: public void tokenRecognized(int tokenLength) {
297: this .tokenLength = tokenLength;
298: // Modify tokenLength for preprocessed characters
299: parent.tokenRecognized(parentLength(tokenLength));
300: }
301:
302: public PreprocessedTextStorage createPreprocessedTextStorage(
303: CharSequence rawText,
304: CharProvider.ExtraPreprocessedChars epc) {
305: int pStartIndex;
306: int pEndIndex;
307: int topEndIndex;
308: if (prepStartIndex >= tokenLength) {
309: if (prepEndIndex > tokenLength) {
310: updateTokenEndRawLengthShift();
311: pEndIndex = tokenLength - 1;
312: // Optimize the case when there are lookahead chars
313: // for the present token and the ending chars could possibly
314: // be non-preprocessed (prepEndIndex > tokenLength)
315: while (--pEndIndex >= prepStartIndex
316: && rawLengthShifts[pEndIndex] == tokenEndRawLengthShift) { // not preprocessed
317: }
318: pEndIndex += 2;
319: } else
320: // prepEndIndex <= tokenLength
321: pEndIndex = prepEndIndex;
322: topEndIndex = parentLength(pEndIndex);
323:
324: // Get deep raw lengths
325: for (int i = prepStartIndex; i < pEndIndex; i++) {
326: rawLengthShifts[i - prepStartIndex] = deepRawLength(i + 1)
327: - (i + 1);
328: }
329: pStartIndex = prepStartIndex;
330:
331: } else { // No preprocessed chars inside token
332: pStartIndex = tokenLength;
333: pEndIndex = tokenLength;
334: topEndIndex = tokenLength;
335: }
336:
337: PreprocessedTextStorage prepStorage;
338: if (epc != null) {
339: parent.collectExtraPreprocessedChars(epc, pStartIndex,
340: pEndIndex, topEndIndex);
341: prepStorage = PreprocessedTextStorage.create(rawText,
342: prepChars, pEndIndex - pStartIndex, pStartIndex,
343: rawLengthShifts, epc.extraPrepChars(), epc
344: .extraRawLengthShifts(), epc
345: .preStartIndex(), epc.postEndIndex());
346: epc.clear();
347:
348: } else { // no extra preprocessed chars
349: prepStorage = PreprocessedTextStorage.create(rawText,
350: prepChars, pEndIndex - pStartIndex, pStartIndex,
351: rawLengthShifts);
352: }
353: return prepStorage;
354: }
355:
356: private void updateTokenEndRawLengthShift() {
357: tokenEndRawLengthShift = rawLengthShifts[tokenLength - 1
358: - prepStartIndex];
359: }
360:
361: public void collectExtraPreprocessedChars(
362: ExtraPreprocessedChars epc, int prepStartIndex,
363: int prepEndIndex, int topPrepEndIndex) {
364: if (prepStartIndex < tokenLength) { // Some preprocessed characters
365: // Check for any pre-prepChars
366: int preCount = Math.max(prepStartIndex
367: - this .prepStartIndex, 0);
368: // Check for post-prepChars
369: int postCount;
370: if (this .prepEndIndex > tokenLength) {
371: updateTokenEndRawLengthShift();
372: postCount = tokenLength - prepEndIndex;
373: if (postCount > 0) {
374: int i = tokenLength - 2;
375: // Optimize the case when there are lookahead chars
376: // for the present token and the ending chars could possibly
377: // be non-preprocessed (prepEndIndex > tokenLength)
378: while (--i >= prepStartIndex
379: && postCount > 0
380: && rawLengthShifts[i] == tokenEndRawLengthShift) { // not preprocessed
381: postCount--;
382: }
383: } else
384: // postCount <= 0
385: postCount = 0;
386:
387: } else { // this.prepEndIndex <= tokenLength
388: postCount = this .prepEndIndex - prepEndIndex;
389: }
390:
391: assert (preCount >= 0 && postCount >= 0);
392: epc.ensureExtraLength(preCount + postCount);
393: while (--preCount >= 0) {
394: epc.insert(readExisting(prepStartIndex - 1),
395: deepRawLength(prepStartIndex) - prepStartIndex);
396: prepStartIndex--;
397: }
398: while (--postCount >= 0) {
399: epc.append(readExisting(prepEndIndex),
400: deepRawLength(prepEndIndex) - topPrepEndIndex);
401: prepEndIndex++;
402: topPrepEndIndex++;
403: }
404: }
405:
406: parent.collectExtraPreprocessedChars(epc, prepStartIndex,
407: prepEndIndex, topPrepEndIndex);
408: }
409:
410: /**
411: * This method is called after the token has been recognized
412: * to clear internal data related to processing of token's characters.
413: */
414: public void tokenApproved() {
415: if (prepStartIndex != lookaheadIndex) { // some prep chars (may be after token length)
416: if (prepStartIndex < tokenLength) { // prep chars before token end
417: if (prepEndIndex <= tokenLength) { // no preprocessed chars past token end
418: prepStartIndex = lookaheadIndex; // signal no preprocessed chars
419: } else { // prepEndIndex > tokenLength => initial prep chars in the next token
420: // updateTokenLengthParentShift() was already called in this case
421: for (int i = tokenLength; i < prepEndIndex; i++) {
422: rawLengthShifts[i] -= tokenEndRawLengthShift;
423: }
424: System
425: .arraycopy(prepChars, prepStartIndex,
426: prepChars, 0, prepEndIndex
427: - prepStartIndex);
428: System.arraycopy(rawLengthShifts, prepStartIndex,
429: rawLengthShifts, 0, prepEndIndex
430: - prepStartIndex);
431: prepStartIndex = 0;
432: prepEndIndex -= tokenLength;
433: }
434:
435: } else { // prepStartIndex >= tokenLength
436: prepStartIndex -= tokenLength;
437: prepEndIndex -= tokenLength;
438: }
439: } else
440: prepStartIndex -= tokenLength;
441:
442: readIndex -= tokenLength;
443: lookaheadIndex -= tokenLength;
444: parent.tokenApproved();
445:
446: if (testing)
447: consistencyCheck();
448: }
449:
450: /**
451: * Add preprocessed or passed char to prepChars
452: */
453: private void addPrepChar(char ch, int extraInputLength) {
454: int prepCharsLength = prepEndIndex - prepStartIndex;
455: if (prepCharsLength == prepChars.length) { // reallocate
456: prepChars = ArrayUtilities.charArray(prepChars);
457: rawLengthShifts = ArrayUtilities.intArray(rawLengthShifts);
458: }
459: prepChars[prepCharsLength] = ch;
460: int prevRawLengthShift = (prepCharsLength > 0) ? rawLengthShifts[prepCharsLength - 1]
461: : 0;
462: rawLengthShifts[prepCharsLength] = prevRawLengthShift
463: + extraInputLength;
464: prepEndIndex++;
465: }
466:
467: private void consistencyCheck() {
468: if (readIndex > lookaheadIndex) {
469: throw new IllegalStateException(
470: "readIndex > lookaheadIndex: " + this );
471: }
472: if (prepStartIndex > lookaheadIndex) {
473: throw new IllegalStateException(
474: "prepStartIndex > lookaheadIndex: " + this );
475: }
476: if (prepStartIndex != lookaheadIndex
477: && prepStartIndex >= prepEndIndex) {
478: throw new IllegalStateException(
479: "prepStartIndex >= prepEndIndex: " + this );
480: }
481: }
482:
483: public String toString() {
484: StringBuilder sb = new StringBuilder();
485: sb.append("readIndex=");
486: sb.append(readIndex);
487: sb.append(", lookaheadIndex=");
488: sb.append(lookaheadIndex);
489: sb.append(", prepStartIndex=");
490: sb.append(prepStartIndex);
491: sb.append(", prepEndIndex=");
492: sb.append(prepEndIndex);
493: return sb.toString();
494: }
495:
496: }
|