0001: /*
0002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
0003: *
0004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
0005: *
0006: * The contents of this file are subject to the terms of either the GNU
0007: * General Public License Version 2 only ("GPL") or the Common
0008: * Development and Distribution License("CDDL") (collectively, the
0009: * "License"). You may not use this file except in compliance with the
0010: * License. You can obtain a copy of the License at
0011: * http://www.netbeans.org/cddl-gplv2.html
0012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
0013: * specific language governing permissions and limitations under the
0014: * License. When distributing the software, include this License Header
0015: * Notice in each file and include the License file at
0016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
0017: * particular file as subject to the "Classpath" exception as provided
0018: * by Sun in the GPL Version 2 section of the License file that
0019: * accompanied this code. If applicable, add the following below the
0020: * License Header, with the fields enclosed by brackets [] replaced by
0021: * your own identifying information:
0022: * "Portions Copyrighted [year] [name of copyright owner]"
0023: *
0024: * Contributor(s):
0025: *
0026: * The Original Software is NetBeans. The Initial Developer of the Original
0027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2007 Sun
0028: * Microsystems, Inc. All Rights Reserved.
0029: *
0030: * If you wish your version of this file to be governed by only the CDDL
0031: * or only the GPL Version 2, indicate your decision by adding
0032: * "[Contributor] elects to include this software in this distribution
0033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
0034: * single choice of license, a recipient has the option to distribute
0035: * your version of this file under either the CDDL, the GPL Version 2 or
0036: * to extend the choice of license to its licensees as provided above.
0037: * However, if you add GPL Version 2 code and therefore, elected the GPL
0038: * Version 2 license, then the option applies only if the new code is
0039: * made subject to such option by the copyright holder.
0040: */
0041:
0042: package org.netbeans.modules.cnd.editor.cplusplus;
0043:
0044: import org.netbeans.editor.Syntax;
0045: import org.netbeans.editor.TokenID;
0046:
0047: /**
0048: * Syntax analyzes for CC source files.
0049: * Tokens and internal states are given below.
0050: */
0051: public class CCSyntax extends Syntax {
0052:
0053: // Internal states
0054: private static final int ISI_WHITESPACE = 2; // inside white space
0055: private static final int ISI_LINE_COMMENT = 4; // inside line comment //
0056: private static final int ISI_BLOCK_COMMENT = 5; // inside block comment /* ... */
0057: private static final int ISI_STRING = 6; // inside string constant
0058: private static final int ISI_STRING_A_BSLASH = 7; // inside string constant after backslash
0059: private static final int ISI_CHAR = 8; // inside char constant
0060: private static final int ISI_CHAR_A_BSLASH = 9; // inside char constant after backslash
0061: private static final int ISI_IDENTIFIER = 10; // inside identifier
0062: private static final int ISA_SLASH = 11; // slash char
0063: private static final int ISA_EQ = 12; // after '='
0064: private static final int ISA_GT = 13; // after '>'
0065: private static final int ISA_GTGT = 14; // after '>>'
0066: private static final int ISA_LT = 16; // after '<'
0067: private static final int ISA_LTLT = 17; // after '<<'
0068: private static final int ISA_PLUS = 18; // after '+'
0069: private static final int ISA_MINUS = 19; // after '-'
0070: private static final int ISA_STAR = 20; // after '*'
0071: private static final int ISA_STAR_I_BLOCK_COMMENT = 21; // after '*'
0072: private static final int ISA_PIPE = 22; // after '|'
0073: private static final int ISA_PERCENT = 23; // after '%'
0074: private static final int ISA_AND = 24; // after '&'
0075: private static final int ISA_XOR = 25; // after '^'
0076: private static final int ISA_EXCLAMATION = 26; // after '!'
0077: private static final int ISA_ZERO = 27; // after '0'
0078: private static final int ISI_INT = 28; // integer number
0079: private static final int ISI_OCTAL = 29; // octal number
0080: private static final int ISI_DOUBLE = 30; // double number
0081: private static final int ISI_DOUBLE_EXP = 31; // double number
0082: private static final int ISI_HEX = 32; // hex number
0083: private static final int ISA_DOT = 33; // after '.'
0084: private static final int ISA_HASH = 34; // after "#"
0085: private static final int ISA_HASH_WS = ISA_HASH + 1; // after "#" and whitespace
0086: private static final int ISA_BACKSLASH = ISA_HASH_WS + 1; // after backslash
0087: private static final int ISA_LINE_CONTINUATION = ISA_BACKSLASH + 1; // after backslash eol
0088: private static final int ISA_COMMA = ISA_LINE_CONTINUATION + 1; // after backslash eol
0089: private static final int ISA_INCLUDE = ISA_COMMA + 1; // after #include
0090: private static final int ISA_INCLUDE_A_WS = ISA_INCLUDE + 1; // after #include and whitespaces
0091: private static final int ISI_SYS_INCLUDE = ISA_INCLUDE_A_WS + 1; // inside <filename> include directive
0092: private static final int ISI_USR_START_INCLUDE = ISI_SYS_INCLUDE + 1; // inside "filename" include directive at first '"'
0093: private static final int ISI_USR_INCLUDE = ISI_USR_START_INCLUDE + 1; // inside "filename" include directive
0094: private static final int ISA_COLON = ISI_USR_INCLUDE + 1; // after ':'
0095: private static final int ISA_ARROW = ISA_COLON + 1; // after '->'
0096:
0097: protected static final String IS_CPLUSPLUS = "C++"; // NOI18N
0098: protected static final String IS_C = "C"; // NOI18N
0099:
0100: protected String lang;
0101:
0102: public CCSyntax() {
0103: tokenContextPath = CCTokenContext.contextPath;
0104: lang = IS_CPLUSPLUS;
0105: }
0106:
0107: protected TokenID parseToken() {
0108: char actChar;
0109:
0110: while (offset < stopOffset) {
0111: actChar = buffer[offset];
0112:
0113: switch (state) {
0114: case INIT:
0115: switch (actChar) {
0116: case '"':
0117: state = ISI_STRING;
0118: break;
0119: case '\'':
0120: state = ISI_CHAR;
0121: break;
0122: case '/':
0123: state = ISA_SLASH;
0124: break;
0125: case '\\':
0126: state = ISA_BACKSLASH;
0127: break;
0128: case '=':
0129: state = ISA_EQ;
0130: break;
0131: case '>':
0132: state = ISA_GT;
0133: break;
0134: case '<':
0135: state = ISA_LT;
0136: break;
0137: case '+':
0138: state = ISA_PLUS;
0139: break;
0140: case '-':
0141: state = ISA_MINUS;
0142: break;
0143: case '*':
0144: state = ISA_STAR;
0145: break;
0146: case '|':
0147: state = ISA_PIPE;
0148: break;
0149: case '%':
0150: state = ISA_PERCENT;
0151: break;
0152: case '&':
0153: state = ISA_AND;
0154: break;
0155: case '^':
0156: state = ISA_XOR;
0157: break;
0158: case '~':
0159: offset++;
0160: return CCTokenContext.NEG;
0161: case '!':
0162: state = ISA_EXCLAMATION;
0163: break;
0164: case '0':
0165: state = ISA_ZERO;
0166: break;
0167: case '.':
0168: state = ISA_DOT;
0169: break;
0170: case ',':
0171: offset++;
0172: return CCTokenContext.COMMA;
0173: case ';':
0174: offset++;
0175: return CCTokenContext.SEMICOLON;
0176: case ':':
0177: //state = ISA_COMMA;
0178: state = ISA_COLON;
0179: break;
0180: case '?':
0181: offset++;
0182: return CCTokenContext.QUESTION;
0183: case '(':
0184: offset++;
0185: return CCTokenContext.LPAREN;
0186: case ')':
0187: offset++;
0188: return CCTokenContext.RPAREN;
0189: case '[':
0190: offset++;
0191: return CCTokenContext.LBRACKET;
0192: case ']':
0193: offset++;
0194: return CCTokenContext.RBRACKET;
0195: case '{':
0196: offset++;
0197: return CCTokenContext.LBRACE;
0198: case '}':
0199: offset++;
0200: return CCTokenContext.RBRACE;
0201: case '#':
0202: state = ISA_HASH;
0203: break;
0204:
0205: default:
0206: // Check for whitespace
0207: if (Character.isWhitespace(actChar)) {
0208: state = ISI_WHITESPACE;
0209: break;
0210: }
0211:
0212: // Check for digit
0213: if (Character.isDigit(actChar)) {
0214: state = ISI_INT;
0215: break;
0216: }
0217:
0218: // Check for identifier
0219:
0220: // At this point, you're probably wondering
0221: // "why is he using isJavaIdentifier here
0222: // when this is C++, not Java?
0223: // The answer is that isJavaIdentifierStart
0224: // is implemented very efficiently. Implementing
0225: // something equivalent requires a huge lookup
0226: // table, probably not worth the extra footprint
0227: // considering that isJavaIdentifierStart pretty
0228: // closely matches what is considered an
0229: // identifier in C++. The main difference seems
0230: // to be that allowable unicode characters that
0231: // are not ASCII *would* be allowed by this
0232: // function. Not worth the trouble IMHO.
0233:
0234: // XXX Perhaps I should write an efficient
0235: // recognizor here which ONLY considers ASCII
0236: // characters valid! (as identifiers that is).
0237: // That allows a small table (or even fairly simple
0238: // bit operations on the character value)
0239: // since I can take advantage of unicode's
0240: // ASCII range.
0241: // But this might require some convoluted logic since the
0242: // compiler PARTIALLY allows other code sets.
0243: // Nay, I say, isJ* is okay!
0244: if (Character.isJavaIdentifierStart(actChar)) {
0245: state = ISI_IDENTIFIER;
0246: break;
0247: }
0248:
0249: offset++;
0250: return CCTokenContext.INVALID_CHAR;
0251: } // end of case INIT
0252: break;
0253:
0254: case ISI_WHITESPACE: // white space
0255: if (!Character.isWhitespace(actChar)) {
0256: state = INIT;
0257: return CCTokenContext.WHITESPACE;
0258: }
0259: break;
0260:
0261: case ISI_LINE_COMMENT:
0262: switch (actChar) {
0263: case '\n':
0264: state = INIT;
0265: return CCTokenContext.LINE_COMMENT;
0266: }
0267: break;
0268:
0269: case ISI_BLOCK_COMMENT:
0270: switch (actChar) {
0271: case '*':
0272: state = ISA_STAR_I_BLOCK_COMMENT;
0273: break;
0274: }
0275: break;
0276:
0277: case ISI_STRING:
0278: switch (actChar) {
0279: case '\\':
0280: state = ISI_STRING_A_BSLASH;
0281: break;
0282: case '\n':
0283: state = INIT;
0284: supposedTokenID = CCTokenContext.STRING_LITERAL;
0285: // was commented in java return CCTokenContext.INCOMPLETE_STRING_LITERAL;
0286: return supposedTokenID;
0287: case '"':
0288: offset++;
0289: state = INIT;
0290: return CCTokenContext.STRING_LITERAL;
0291: }
0292: break;
0293:
0294: case ISI_STRING_A_BSLASH:
0295: switch (actChar) {
0296: case '"':
0297: case '\\':
0298: break;
0299: default:
0300: offset--;
0301: break;
0302: }
0303: state = ISI_STRING;
0304: break;
0305:
0306: case ISI_CHAR:
0307: switch (actChar) {
0308: case '\\':
0309: state = ISI_CHAR_A_BSLASH;
0310: break;
0311: case '\n':
0312: state = INIT;
0313: supposedTokenID = CCTokenContext.CHAR_LITERAL;
0314: // was commented in java return CCTokenContext.INCOMPLETE_CHAR_LITERAL;
0315: return supposedTokenID;
0316: case '\'':
0317: offset++;
0318: state = INIT;
0319: return CCTokenContext.CHAR_LITERAL;
0320: }
0321: break;
0322:
0323: case ISI_CHAR_A_BSLASH:
0324: switch (actChar) {
0325: case '\'':
0326: case '\\':
0327: break;
0328: default:
0329: offset--;
0330: break;
0331: }
0332: state = ISI_CHAR;
0333: break;
0334:
0335: case ISI_IDENTIFIER:
0336: // For a comment of why we use isJAVAidentifier here,
0337: // grep backwards for isJavaIdentifierStart
0338: if (!(Character.isJavaIdentifierPart(actChar))) {
0339: state = INIT;
0340: TokenID tid = matchKeyword(buffer, tokenOffset,
0341: offset - tokenOffset);
0342: if (tid == null) {
0343: tid = matchCPPKeyword(buffer, tokenOffset,
0344: offset - tokenOffset);
0345: if (tid != null
0346: && ((tid.getNumericID() == CCTokenContext.CPPINCLUDE_ID) || (tid
0347: .getNumericID() == CCTokenContext.CPPINCLUDE_NEXT_ID))) {
0348: state = ISA_INCLUDE;
0349: }
0350: }
0351: return (tid != null) ? tid
0352: : CCTokenContext.IDENTIFIER;
0353: }
0354: break;
0355:
0356: case ISA_INCLUDE:
0357: if (isSpaceChar(actChar)) {
0358: state = ISA_INCLUDE_A_WS;
0359: } else if (actChar == '"') {
0360: state = ISI_USR_INCLUDE;
0361: } else if (actChar == '<') {
0362: state = ISI_SYS_INCLUDE;
0363: } else {
0364: state = INIT;
0365: offset--;
0366: }
0367: break;
0368:
0369: case ISA_INCLUDE_A_WS:
0370: if (isSpaceChar(actChar)) {
0371: state = ISA_INCLUDE_A_WS;
0372: break;
0373: }
0374: switch (actChar) {
0375: case '<':
0376: state = ISI_SYS_INCLUDE;
0377: break;
0378: case '"':
0379: state = ISI_USR_START_INCLUDE;
0380: break;
0381: default:
0382: if (Character.isJavaIdentifierStart(actChar)) {
0383: state = ISI_IDENTIFIER;
0384: break;
0385: } else {
0386: // does not consume actChar, as it is part of next token
0387: state = INIT;
0388: }
0389: }
0390: return CCTokenContext.WHITESPACE;
0391:
0392: case ISI_SYS_INCLUDE:
0393: switch (actChar) {
0394: case '>':
0395: // consume actChar, as it is part of token
0396: offset++;
0397: state = INIT;
0398: supposedTokenID = CCTokenContext.SYS_INCLUDE;
0399: // check non-empty included file #include <>
0400: return ((offset - tokenOffset) <= 2) ? CCTokenContext.INCOMPLETE_SYS_INCLUDE
0401: : CCTokenContext.SYS_INCLUDE;
0402: case '\n':
0403: // new line without closed '"'
0404: // does not consume actChar, as it is part of next token
0405: state = INIT;
0406: supposedTokenID = CCTokenContext.SYS_INCLUDE;
0407: return CCTokenContext.INCOMPLETE_SYS_INCLUDE;
0408: default:
0409: }
0410: break;
0411:
0412: case ISI_USR_START_INCLUDE:
0413: switch (actChar) {
0414: case '"':
0415: // does not consume actChar, as it is done at the end of main while loop
0416: state = ISI_USR_INCLUDE;
0417: break;
0418: default:
0419: }
0420: break;
0421:
0422: case ISI_USR_INCLUDE:
0423: switch (actChar) {
0424: case '"':
0425: // consume actChar, as it is part of token
0426: offset++;
0427: state = INIT;
0428: supposedTokenID = CCTokenContext.USR_INCLUDE;
0429: // check non-empty included file #include ""
0430: return ((offset - tokenOffset) <= 2) ? CCTokenContext.INCOMPLETE_USR_INCLUDE
0431: : CCTokenContext.USR_INCLUDE;
0432: case '\n':
0433: // does not consume actChar, as it is part of next token
0434: state = INIT;
0435: supposedTokenID = CCTokenContext.USR_INCLUDE;
0436: return CCTokenContext.INCOMPLETE_USR_INCLUDE;
0437: default:
0438: }
0439: break;
0440:
0441: case ISA_SLASH:
0442: switch (actChar) {
0443: case '=':
0444: offset++;
0445: state = INIT;
0446: return CCTokenContext.DIV_EQ;
0447: case '/':
0448: state = ISI_LINE_COMMENT;
0449: break;
0450: case '*':
0451: state = ISI_BLOCK_COMMENT;
0452: break;
0453: default:
0454: state = INIT;
0455: return CCTokenContext.DIV;
0456: }
0457: break;
0458:
0459: case ISA_BACKSLASH:
0460: switch (actChar) {
0461: case '\n':
0462: state = ISA_LINE_CONTINUATION;
0463: break;
0464: default:
0465: state = INIT;
0466: return CCTokenContext.INVALID_BACKSLASH;
0467: }
0468: break;
0469:
0470: case ISA_LINE_CONTINUATION:
0471: state = INIT;
0472: return CCTokenContext.BACKSLASH;
0473:
0474: case ISA_HASH:
0475: // Check for whitespace, but not eol
0476: if (isSpaceChar(actChar)) {
0477: state = ISA_HASH_WS;
0478: break;
0479: }
0480:
0481: // Check for identifier
0482:
0483: // At this point, you're probably wondering
0484: // "why is he using isJavaIdentifier here
0485: // when this is C++, not Java?
0486: // The answer is that isJavaIdentifierStart
0487: // is implemented very efficiently. Implementing
0488: // something equivalent requires a huge lookup
0489: // table, probably not worth the extra footprint
0490: // considering that isJavaIdentifierStart pretty
0491: // closely matches what is considered an
0492: // identifier in C++. The main difference seems
0493: // to be that allowable unicode characters that
0494: // are not ASCII *would* be allowed by this
0495: // function. Not worth the trouble IMHO.
0496:
0497: // XXX Perhaps I should write an efficient
0498: // recognizor here which ONLY considers ASCII
0499: // characters valid! (as identifiers that is).
0500: // That allows a small table (or even fairly simple
0501: // bit operations on the character value)
0502: // since I can take advantage of unicode's
0503: // ASCII range.
0504: // But this might require some convoluted logic since the
0505: // compiler PARTIALLY allows other code sets.
0506: // Nay, I say, isJ* is okay!
0507: if (Character.isJavaIdentifierStart(actChar)) {
0508: state = ISI_IDENTIFIER;
0509: break;
0510: }
0511:
0512: state = INIT;
0513: if (actChar == '#') {
0514: offset++;
0515: return CCTokenContext.DOUBLE_HASH;
0516: }
0517: return CCTokenContext.HASH;
0518:
0519: case ISA_HASH_WS:
0520: // Check for whitespace, but not eol
0521: if (isSpaceChar(actChar)) {
0522: state = ISA_HASH_WS;
0523: break;
0524: }
0525:
0526: // Check for identifier
0527:
0528: // At this point, you're probably wondering
0529: // "why is he using isJavaIdentifier here
0530: // when this is C++, not Java?
0531: // The answer is that isJavaIdentifierStart
0532: // is implemented very efficiently. Implementing
0533: // something equivalent requires a huge lookup
0534: // table, probably not worth the extra footprint
0535: // considering that isJavaIdentifierStart pretty
0536: // closely matches what is considered an
0537: // identifier in C++. The main difference seems
0538: // to be that allowable unicode characters that
0539: // are not ASCII *would* be allowed by this
0540: // function. Not worth the trouble IMHO.
0541:
0542: // XXX Perhaps I should write an efficient
0543: // recognizor here which ONLY considers ASCII
0544: // characters valid! (as identifiers that is).
0545: // That allows a small table (or even fairly simple
0546: // bit operations on the character value)
0547: // since I can take advantage of unicode's
0548: // ASCII range.
0549: // But this might require some convoluted logic since the
0550: // compiler PARTIALLY allows other code sets.
0551: // Nay, I say, isJ* is okay!
0552: if (Character.isJavaIdentifierStart(actChar)) {
0553: state = ISI_IDENTIFIER;
0554: break;
0555: }
0556: state = INIT;
0557: return CCTokenContext.HASH;
0558:
0559: case ISA_EQ:
0560: switch (actChar) {
0561: case '=':
0562: offset++;
0563: return CCTokenContext.EQ_EQ;
0564: default:
0565: state = INIT;
0566: return CCTokenContext.EQ;
0567: }
0568: // break;
0569:
0570: case ISA_GT:
0571: switch (actChar) {
0572: case '>':
0573: state = ISA_GTGT;
0574: break;
0575: case '=':
0576: offset++;
0577: return CCTokenContext.GT_EQ;
0578: default:
0579: state = INIT;
0580: return CCTokenContext.GT;
0581: }
0582: break;
0583:
0584: case ISA_GTGT:
0585: switch (actChar) {
0586: case '=':
0587: offset++;
0588: return CCTokenContext.RSSHIFT_EQ;
0589: default:
0590: state = INIT;
0591: return CCTokenContext.RSSHIFT;
0592: }
0593: // break;
0594:
0595: case ISA_LT:
0596: switch (actChar) {
0597: case '<':
0598: state = ISA_LTLT;
0599: break;
0600: case '=':
0601: offset++;
0602: return CCTokenContext.LT_EQ;
0603: default:
0604: state = INIT;
0605: return CCTokenContext.LT;
0606: }
0607: break;
0608:
0609: case ISA_LTLT:
0610: switch (actChar) {
0611: case '<':
0612: state = INIT;
0613: offset++;
0614: return CCTokenContext.INVALID_OPERATOR;
0615: case '=':
0616: offset++;
0617: return CCTokenContext.LSHIFT_EQ;
0618: default:
0619: state = INIT;
0620: return CCTokenContext.LSHIFT;
0621: }
0622:
0623: case ISA_PLUS:
0624: switch (actChar) {
0625: case '+':
0626: offset++;
0627: return CCTokenContext.PLUS_PLUS;
0628: case '=':
0629: offset++;
0630: return CCTokenContext.PLUS_EQ;
0631: default:
0632: state = INIT;
0633: return CCTokenContext.PLUS;
0634: }
0635:
0636: case ISA_MINUS:
0637: switch (actChar) {
0638: case '-':
0639: offset++;
0640: return CCTokenContext.MINUS_MINUS;
0641: case '=':
0642: offset++;
0643: return CCTokenContext.MINUS_EQ;
0644: case '>':
0645: state = ISA_ARROW;
0646: break;
0647: default:
0648: state = INIT;
0649: return CCTokenContext.MINUS;
0650: }
0651: break;
0652:
0653: case ISA_ARROW:
0654: switch (actChar) {
0655: case '*':
0656: state = INIT;
0657: offset++;
0658: return CCTokenContext.ARROWMBR;
0659: default:
0660: state = INIT;
0661: return CCTokenContext.ARROW;
0662: }
0663:
0664: case ISA_COMMA:
0665: state = INIT;
0666: return CCTokenContext.COMMA;
0667:
0668: case ISA_COLON:
0669: switch (actChar) {
0670: case ':':
0671: offset++;
0672: return CCTokenContext.SCOPE;
0673: default:
0674: state = INIT;
0675: return CCTokenContext.COLON;
0676: }
0677:
0678: case ISA_STAR:
0679: switch (actChar) {
0680: case '=':
0681: offset++;
0682: return CCTokenContext.MUL_EQ;
0683: case '/':
0684: // either '*/' outside comment
0685: // or pointer like
0686: // int*/* commnet*/var;
0687: if ((offset + 1 < stopOffset)
0688: && (buffer[offset + 1] != '*')) {
0689: offset++;
0690: state = INIT;
0691: return CCTokenContext.INVALID_COMMENT_END; // '*/' outside comment
0692: } else {
0693: //nobreak;
0694: }
0695: default:
0696: state = INIT;
0697: return CCTokenContext.MUL;
0698: }
0699:
0700: case ISA_STAR_I_BLOCK_COMMENT:
0701: switch (actChar) {
0702: case '/':
0703: offset++;
0704: state = INIT;
0705: return CCTokenContext.BLOCK_COMMENT;
0706: default:
0707: offset--;
0708: state = ISI_BLOCK_COMMENT;
0709: break;
0710: }
0711: break;
0712:
0713: case ISA_PIPE:
0714: switch (actChar) {
0715: case '=':
0716: offset++;
0717: state = INIT;
0718: return CCTokenContext.OR_EQ;
0719: case '|':
0720: offset++;
0721: state = INIT;
0722: return CCTokenContext.OR_OR;
0723: default:
0724: state = INIT;
0725: return CCTokenContext.OR;
0726: }
0727: // break;
0728:
0729: case ISA_PERCENT:
0730: switch (actChar) {
0731: case '=':
0732: offset++;
0733: state = INIT;
0734: return CCTokenContext.MOD_EQ;
0735: default:
0736: state = INIT;
0737: return CCTokenContext.MOD;
0738: }
0739: // break;
0740:
0741: case ISA_AND:
0742: switch (actChar) {
0743: case '=':
0744: offset++;
0745: state = INIT;
0746: return CCTokenContext.AND_EQ;
0747: case '&':
0748: offset++;
0749: state = INIT;
0750: return CCTokenContext.AND_AND;
0751: default:
0752: state = INIT;
0753: return CCTokenContext.AND;
0754: }
0755: // break;
0756:
0757: case ISA_XOR:
0758: switch (actChar) {
0759: case '=':
0760: offset++;
0761: state = INIT;
0762: return CCTokenContext.XOR_EQ;
0763: default:
0764: state = INIT;
0765: return CCTokenContext.XOR;
0766: }
0767: // break;
0768:
0769: case ISA_EXCLAMATION:
0770: switch (actChar) {
0771: case '=':
0772: offset++;
0773: state = INIT;
0774: return CCTokenContext.NOT_EQ;
0775: default:
0776: state = INIT;
0777: return CCTokenContext.NOT;
0778: }
0779: // break;
0780:
0781: case ISA_ZERO:
0782: switch (actChar) {
0783: case '.':
0784: state = ISI_DOUBLE;
0785: break;
0786: case 'x':
0787: case 'X':
0788: state = ISI_HEX;
0789: break;
0790: case 'l':
0791: case 'L':
0792: offset++;
0793: state = INIT;
0794: return CCTokenContext.LONG_LITERAL;
0795: case 'f':
0796: case 'F':
0797: offset++;
0798: state = INIT;
0799: return CCTokenContext.FLOAT_LITERAL;
0800: case 'd':
0801: case 'D':
0802: offset++;
0803: state = INIT;
0804: return CCTokenContext.DOUBLE_LITERAL;
0805: case '8': // it's error to have '8' and '9' in octal number
0806: case '9':
0807: state = INIT;
0808: offset++;
0809: return CCTokenContext.INVALID_OCTAL_LITERAL;
0810: case 'e':
0811: case 'E':
0812: state = ISI_DOUBLE_EXP;
0813: break;
0814: default:
0815: if (Character.isDigit(actChar)) { // '8' and '9' already handled
0816: state = ISI_OCTAL;
0817: break;
0818: }
0819: state = INIT;
0820: return CCTokenContext.INT_LITERAL;
0821: }
0822: break;
0823:
0824: case ISI_INT:
0825: switch (actChar) {
0826: case 'l':
0827: case 'L':
0828: offset++;
0829: state = INIT;
0830: return CCTokenContext.LONG_LITERAL;
0831: case '.':
0832: state = ISI_DOUBLE;
0833: break;
0834: case 'f':
0835: case 'F':
0836: offset++;
0837: state = INIT;
0838: return CCTokenContext.FLOAT_LITERAL;
0839: case 'd':
0840: case 'D':
0841: offset++;
0842: state = INIT;
0843: return CCTokenContext.DOUBLE_LITERAL;
0844: case 'e':
0845: case 'E':
0846: state = ISI_DOUBLE_EXP;
0847: break;
0848: default:
0849: if (!(actChar >= '0' && actChar <= '9')) {
0850: state = INIT;
0851: return CCTokenContext.INT_LITERAL;
0852: }
0853: }
0854: break;
0855:
0856: case ISI_OCTAL:
0857: if (!(actChar >= '0' && actChar <= '7')) {
0858:
0859: state = INIT;
0860: return CCTokenContext.OCTAL_LITERAL;
0861: }
0862: break;
0863:
0864: case ISI_DOUBLE:
0865: switch (actChar) {
0866: case 'f':
0867: case 'F':
0868: offset++;
0869: state = INIT;
0870: return CCTokenContext.FLOAT_LITERAL;
0871: case 'd':
0872: case 'D':
0873: offset++;
0874: state = INIT;
0875: return CCTokenContext.DOUBLE_LITERAL;
0876: case 'e':
0877: case 'E':
0878: state = ISI_DOUBLE_EXP;
0879: break;
0880: default:
0881: if (!((actChar >= '0' && actChar <= '9') || actChar == '.')) {
0882:
0883: state = INIT;
0884: return CCTokenContext.DOUBLE_LITERAL;
0885: }
0886: }
0887: break;
0888:
0889: case ISI_DOUBLE_EXP:
0890: switch (actChar) {
0891: case 'f':
0892: case 'F':
0893: offset++;
0894: state = INIT;
0895: return CCTokenContext.FLOAT_LITERAL;
0896: case 'd':
0897: case 'D':
0898: offset++;
0899: state = INIT;
0900: return CCTokenContext.DOUBLE_LITERAL;
0901: default:
0902: if (!(Character.isDigit(actChar) || actChar == '-' || actChar == '+')) {
0903: state = INIT;
0904: return CCTokenContext.DOUBLE_LITERAL;
0905: }
0906: }
0907: break;
0908:
0909: case ISI_HEX:
0910: if (!((actChar >= 'a' && actChar <= 'f')
0911: || (actChar >= 'A' && actChar <= 'F') || Character
0912: .isDigit(actChar))) {
0913:
0914: state = INIT;
0915: return CCTokenContext.HEX_LITERAL;
0916: }
0917: break;
0918:
0919: case ISA_DOT:
0920: if (Character.isDigit(actChar)) {
0921: state = ISI_DOUBLE;
0922: } else if (actChar == '*') {
0923: state = INIT;
0924: offset++;
0925: return CCTokenContext.DOTMBR;
0926: } else { // only single dot
0927: state = INIT;
0928: return CCTokenContext.DOT;
0929: }
0930: break;
0931:
0932: } // end of switch(state)
0933:
0934: offset++;
0935: } // end of while(offset...)
0936:
0937: /** At this stage there's no more text in the scanned buffer.
0938: * Scanner first checks whether this is completely the last
0939: * available buffer.
0940: */
0941:
0942: if (lastBuffer) {
0943: switch (state) {
0944: case ISI_WHITESPACE:
0945: state = INIT;
0946: return CCTokenContext.WHITESPACE;
0947: case ISI_IDENTIFIER:
0948: state = INIT;
0949: TokenID kwd = matchKeyword(buffer, tokenOffset, offset
0950: - tokenOffset);
0951: if (kwd == null) {
0952: kwd = matchCPPKeyword(buffer, tokenOffset, offset
0953: - tokenOffset);
0954: if (kwd != null
0955: && kwd.getNumericID() == CCTokenContext.CPPINCLUDE_ID) {
0956: state = ISA_INCLUDE;
0957: }
0958: }
0959: return (kwd != null) ? kwd : CCTokenContext.IDENTIFIER;
0960: case ISI_LINE_COMMENT:
0961: return CCTokenContext.LINE_COMMENT; // stay in line-comment state
0962: case ISI_BLOCK_COMMENT:
0963: case ISA_STAR_I_BLOCK_COMMENT:
0964: return CCTokenContext.BLOCK_COMMENT; // stay in block-comment state
0965: case ISI_STRING:
0966: case ISI_STRING_A_BSLASH:
0967: return CCTokenContext.STRING_LITERAL; // hold the state
0968: case ISI_SYS_INCLUDE:
0969: return CCTokenContext.INCOMPLETE_SYS_INCLUDE; // hold the state
0970: case ISA_INCLUDE:
0971: return CCTokenContext.CPPINCLUDE; // hold the state
0972: case ISA_INCLUDE_A_WS:
0973: return CCTokenContext.WHITESPACE; // hold the state
0974: case ISI_USR_INCLUDE:
0975: return CCTokenContext.INCOMPLETE_USR_INCLUDE; // hold the state
0976: case ISI_CHAR:
0977: case ISI_CHAR_A_BSLASH:
0978: return CCTokenContext.CHAR_LITERAL; // hold the state
0979: case ISA_ZERO:
0980: case ISI_INT:
0981: state = INIT;
0982: return CCTokenContext.INT_LITERAL;
0983: case ISI_OCTAL:
0984: state = INIT;
0985: return CCTokenContext.OCTAL_LITERAL;
0986: case ISI_DOUBLE:
0987: case ISI_DOUBLE_EXP:
0988: state = INIT;
0989: return CCTokenContext.DOUBLE_LITERAL;
0990: case ISI_HEX:
0991: state = INIT;
0992: return CCTokenContext.HEX_LITERAL;
0993: case ISA_BACKSLASH:
0994: state = INIT;
0995: return CCTokenContext.BACKSLASH;
0996: case ISA_LINE_CONTINUATION:
0997: state = INIT;
0998: return CCTokenContext.LINE_CONTINUATION;
0999: case ISA_DOT:
1000: state = INIT;
1001: return CCTokenContext.DOT;
1002: case ISA_SLASH:
1003: state = INIT;
1004: return CCTokenContext.DIV;
1005: case ISA_EQ:
1006: state = INIT;
1007: return CCTokenContext.EQ;
1008: case ISA_GT:
1009: state = INIT;
1010: return CCTokenContext.GT;
1011: case ISA_GTGT:
1012: state = INIT;
1013: return CCTokenContext.RSSHIFT;
1014: case ISA_LT:
1015: state = INIT;
1016: return CCTokenContext.LT;
1017: case ISA_LTLT:
1018: state = INIT;
1019: return CCTokenContext.LSHIFT;
1020: case ISA_PLUS:
1021: state = INIT;
1022: return CCTokenContext.PLUS;
1023: case ISA_MINUS:
1024: state = INIT;
1025: return CCTokenContext.MINUS;
1026: case ISA_ARROW:
1027: state = INIT;
1028: return CCTokenContext.ARROW;
1029: case ISA_COMMA:
1030: state = INIT;
1031: return CCTokenContext.COMMA;
1032: case ISA_STAR:
1033: state = INIT;
1034: return CCTokenContext.MUL;
1035: case ISA_PIPE:
1036: state = INIT;
1037: return CCTokenContext.OR;
1038: case ISA_PERCENT:
1039: state = INIT;
1040: return CCTokenContext.MOD;
1041: case ISA_AND:
1042: state = INIT;
1043: return CCTokenContext.AND;
1044: case ISA_XOR:
1045: state = INIT;
1046: return CCTokenContext.XOR;
1047: case ISA_EXCLAMATION:
1048: state = INIT;
1049: return CCTokenContext.NOT;
1050: case ISA_HASH:
1051: state = INIT;
1052: return CCTokenContext.HASH;
1053: case ISA_COLON:
1054: state = INIT;
1055: return CCTokenContext.COLON;
1056: }
1057: }
1058:
1059: /* At this stage there's no more text in the scanned buffer, but
1060: * this buffer is not the last so the scan will continue on another buffer.
1061: * The scanner tries to minimize the amount of characters
1062: * that will be prescanned in the next buffer by returning the token
1063: * where possible.
1064: */
1065:
1066: switch (state) {
1067: case ISI_WHITESPACE:
1068: return CCTokenContext.WHITESPACE;
1069: }
1070:
1071: return null; // nothing found
1072: }
1073:
1074: public String getStateName(int stateNumber) {
1075: switch (stateNumber) {
1076: case ISI_WHITESPACE:
1077: return "ISI_WHITESPACE"; //NOI18N
1078: case ISI_LINE_COMMENT:
1079: return "ISI_LINE_COMMENT"; //NOI18N
1080: case ISI_BLOCK_COMMENT:
1081: return "ISI_BLOCK_COMMENT"; //NOI18N
1082: case ISI_STRING:
1083: return "ISI_STRING"; //NOI18N
1084: case ISI_STRING_A_BSLASH:
1085: return "ISI_STRING_A_BSLASH"; //NOI18N
1086: case ISI_CHAR:
1087: return "ISI_CHAR"; //NOI18N
1088: case ISI_CHAR_A_BSLASH:
1089: return "ISI_CHAR_A_BSLASH"; //NOI18N
1090: case ISI_IDENTIFIER:
1091: return "ISI_IDENTIFIER"; //NOI18N
1092: case ISA_SLASH:
1093: return "ISA_SLASH"; //NOI18N
1094: case ISA_BACKSLASH:
1095: return "ISA_BACKSLASH"; //NOI18N
1096: case ISA_EQ:
1097: return "ISA_EQ"; //NOI18N
1098: case ISA_GT:
1099: return "ISA_GT"; //NOI18N
1100: case ISA_GTGT:
1101: return "ISA_GTGT"; //NOI18N
1102: case ISA_LT:
1103: return "ISA_LT"; //NOI18N
1104: case ISA_LTLT:
1105: return "ISA_LTLT"; //NOI18N
1106: case ISA_PLUS:
1107: return "ISA_PLUS"; //NOI18N
1108: case ISA_MINUS:
1109: return "ISA_MINUS"; //NOI18N
1110: case ISA_COMMA:
1111: return "ISA_COMMA"; //NOI18N
1112: case ISA_STAR:
1113: return "ISA_STAR"; //NOI18N
1114: case ISA_STAR_I_BLOCK_COMMENT:
1115: return "ISA_STAR_I_BLOCK_COMMENT"; //NOI18N
1116: case ISA_PIPE:
1117: return "ISA_PIPE"; //NOI18N
1118: case ISA_PERCENT:
1119: return "ISA_PERCENT"; //NOI18N
1120: case ISA_AND:
1121: return "ISA_AND"; //NOI18N
1122: case ISA_XOR:
1123: return "ISA_XOR"; //NOI18N
1124: case ISA_EXCLAMATION:
1125: return "ISA_EXCLAMATION"; //NOI18N
1126: case ISA_ZERO:
1127: return "ISA_ZERO"; //NOI18N
1128: case ISI_INT:
1129: return "ISI_INT"; //NOI18N
1130: case ISI_OCTAL:
1131: return "ISI_OCTAL"; //NOI18N
1132: case ISI_DOUBLE:
1133: return "ISI_DOUBLE"; //NOI18N
1134: case ISI_DOUBLE_EXP:
1135: return "ISI_DOUBLE_EXP"; //NOI18N
1136: case ISI_HEX:
1137: return "ISI_HEX"; //NOI18N
1138: case ISA_DOT:
1139: return "ISA_DOT"; //NOI18N
1140: case ISA_HASH:
1141: return "ISA_HASH"; //NOI18N
1142: case ISA_INCLUDE:
1143: return "ISA_INCLUDE"; //NOI18N
1144: case ISA_INCLUDE_A_WS:
1145: return "ISA_INCLUDE_A_WS"; //NOI18N
1146: case ISI_SYS_INCLUDE:
1147: return "ISI_SYS_INCLUDE"; //NOI18N
1148: case ISI_USR_INCLUDE:
1149: return "ISI_USR_INCLUDE"; //NOI18N
1150: case ISI_USR_START_INCLUDE:
1151: return "ISI_USR_START_INCLUDE"; //NOI18N
1152: default:
1153: return super .getStateName(stateNumber);
1154: }
1155: }
1156:
1157: public TokenID matchKeyword(char[] buffer, int offset, int len) {
1158: // String kw = new String(buffer, offset, len);
1159: // System.err.println("matchKeyword[" + lang + "]: " + kw);
1160:
1161: if (len > 16)
1162: return null;
1163: if (len <= 1)
1164: return null;
1165: switch (buffer[offset++]) {
1166: case 'a':
1167: if (len <= 2)
1168: return null;
1169: switch (buffer[offset++]) {
1170: case 's': // keyword "asm" (C++ only)
1171: return (lang == IS_CPLUSPLUS && len == 3 && buffer[offset++] == 'm') ? CCTokenContext.ASM
1172: : null;
1173: case 'u': // keyword "auto"
1174: return (len == 4 && buffer[offset++] == 't' && buffer[offset++] == 'o') ? CCTokenContext.AUTO
1175: : null;
1176: default:
1177: return null;
1178: }
1179: case 'b':
1180: if (len <= 3)
1181: return null;
1182: switch (buffer[offset++]) {
1183: case 'o': // keyword "bool" (C++ only)
1184: return (lang == IS_CPLUSPLUS && len == 4
1185: && buffer[offset++] == 'o' && buffer[offset++] == 'l') ? CCTokenContext.BOOLEAN
1186: : null;
1187: case 'r': // keyword "break"
1188: return (len == 5 && buffer[offset++] == 'e'
1189: && buffer[offset++] == 'a' && buffer[offset++] == 'k') ? CCTokenContext.BREAK
1190: : null;
1191: default:
1192: return null;
1193: }
1194: case 'c':
1195: if (len <= 3)
1196: return null;
1197: switch (buffer[offset++]) {
1198: case 'a':
1199: switch (buffer[offset++]) {
1200: case 's': // keyword "case"
1201: return (len == 4 && buffer[offset++] == 'e') ? CCTokenContext.CASE
1202: : null;
1203: case 't': // keyword "catch" (C++ only)
1204: return (lang == IS_CPLUSPLUS && len == 5
1205: && buffer[offset++] == 'c' && buffer[offset++] == 'h') ? CCTokenContext.CATCH
1206: : null;
1207: default:
1208: return null;
1209: }
1210: case 'h': // keyword "char"
1211: return (len == 4 && buffer[offset++] == 'a' && buffer[offset++] == 'r') ? CCTokenContext.CHAR
1212: : null;
1213: case 'l': // keyword "class" (C++ only)
1214: return (lang == IS_CPLUSPLUS && len == 5
1215: && buffer[offset++] == 'a'
1216: && buffer[offset++] == 's' && buffer[offset++] == 's') ? CCTokenContext.CLASS
1217: : null;
1218: case 'o':
1219: if (len <= 4)
1220: return null;
1221: if (buffer[offset++] != 'n')
1222: return null;
1223: switch (buffer[offset++]) {
1224: case 's':
1225: if (lang == IS_CPLUSPLUS && len == 5) { // keyword "const" (C++ only)
1226: return (buffer[offset++] == 't') ? CCTokenContext.CONST
1227: : null;
1228: } else if (lang == IS_CPLUSPLUS && len == 10) { // keyword "const_cast" (C++ only)
1229: return (buffer[offset++] == 't'
1230: && buffer[offset++] == '_'
1231: && buffer[offset++] == 'c'
1232: && buffer[offset++] == 'a'
1233: && buffer[offset++] == 's' && buffer[offset++] == 't') ? CCTokenContext.CONST_CAST
1234: : null;
1235: } else {
1236: return null;
1237: }
1238: case 't': // keyword "continue"
1239: return (len == 8 && buffer[offset++] == 'i'
1240: && buffer[offset++] == 'n'
1241: && buffer[offset++] == 'u' && buffer[offset++] == 'e') ? CCTokenContext.CONTINUE
1242: : null;
1243: default:
1244: return null;
1245: }
1246: default:
1247: return null;
1248: }
1249: case 'd':
1250: if (len <= 1)
1251: return null;
1252: switch (buffer[offset++]) {
1253: case 'e':
1254: switch (buffer[offset++]) {
1255: case 'f': // keyword "default"
1256: return (len == 7 && buffer[offset++] == 'a'
1257: && buffer[offset++] == 'u'
1258: && buffer[offset++] == 'l' && buffer[offset++] == 't') ? CCTokenContext.DEFAULT
1259: : null;
1260: case 'l': // keyword "delete" (C++ only)
1261: return (lang == IS_CPLUSPLUS && len == 6
1262: && buffer[offset++] == 'e'
1263: && buffer[offset++] == 't' && buffer[offset++] == 'e') ? CCTokenContext.DELETE
1264: : null;
1265: default:
1266: return null;
1267: }
1268: case 'o':
1269: if (len == 2) { // keyword "do"
1270: return CCTokenContext.DO;
1271: }
1272: return (len == 6 // keyword "double"
1273: && buffer[offset++] == 'u'
1274: && buffer[offset++] == 'b'
1275: && buffer[offset++] == 'l' && buffer[offset++] == 'e') ? CCTokenContext.DOUBLE
1276: : null;
1277: case 'y': // keyword "dynamic_cast" (C++ only)
1278: return (lang == IS_CPLUSPLUS && len == 12
1279: && buffer[offset++] == 'n'
1280: && buffer[offset++] == 'a'
1281: && buffer[offset++] == 'm'
1282: && buffer[offset++] == 'i'
1283: && buffer[offset++] == 'c'
1284: && buffer[offset++] == '_'
1285: && buffer[offset++] == 'c'
1286: && buffer[offset++] == 'a'
1287: && buffer[offset++] == 's' && buffer[offset++] == 't') ? CCTokenContext.DYNAMIC_CAST
1288: : null;
1289: default:
1290: return null;
1291: }
1292: case 'e':
1293: if (len <= 3)
1294: return null;
1295: switch (buffer[offset++]) {
1296: case 'l': // keyword "else"
1297: return (len == 4 && buffer[offset++] == 's' && buffer[offset++] == 'e') ? CCTokenContext.ELSE
1298: : null;
1299: case 'n': // keyword "enum"
1300: return (len == 4 && buffer[offset++] == 'u' && buffer[offset++] == 'm') ? CCTokenContext.ENUM
1301: : null;
1302: case 'x':
1303: switch (buffer[offset++]) {
1304: case 'p':
1305: switch (buffer[offset++]) {
1306: case 'l': // keyword "explicit" (C++ only)
1307: return (lang == IS_CPLUSPLUS && len == 8
1308: && buffer[offset++] == 'i'
1309: && buffer[offset++] == 'c'
1310: && buffer[offset++] == 'i' && buffer[offset++] == 't') ? CCTokenContext.EXPLICIT
1311: : null;
1312: case 'o': // keyword "export" (C++ only)
1313: return (lang == IS_CPLUSPLUS && len == 6
1314: && buffer[offset++] == 'r' && buffer[offset++] == 't') ? CCTokenContext.EXPORT
1315: : null;
1316: default:
1317: return null;
1318: }
1319: case 't': // keyword "extern"
1320: return (len == 6 && buffer[offset++] == 'e'
1321: && buffer[offset++] == 'r' && buffer[offset++] == 'n') ? CCTokenContext.EXTERN
1322: : null;
1323: default:
1324: return null;
1325: }
1326: default:
1327: return null;
1328: }
1329: case 'f':
1330: if (len <= 2)
1331: return null;
1332: switch (buffer[offset++]) {
1333: case 'a': // keyword "false" (C++ only)
1334: return (lang == IS_CPLUSPLUS && len == 5
1335: && buffer[offset++] == 'l'
1336: && buffer[offset++] == 's' && buffer[offset++] == 'e') ? CCTokenContext.FALSE
1337: : null;
1338: case 'l': // keyword "float"
1339: return (len == 5 && buffer[offset++] == 'o'
1340: && buffer[offset++] == 'a' && buffer[offset++] == 't') ? CCTokenContext.FLOAT
1341: : null;
1342: case 'o': // keyword "for"
1343: return (len == 3 && buffer[offset++] == 'r') ? CCTokenContext.FOR
1344: : null;
1345: case 'r': // keyword "friend" (C++ only)
1346: return (lang == IS_CPLUSPLUS && len == 6
1347: && buffer[offset++] == 'i'
1348: && buffer[offset++] == 'e'
1349: && buffer[offset++] == 'n' && buffer[offset++] == 'd') ? CCTokenContext.FRIEND
1350: : null;
1351: default:
1352: return null;
1353: }
1354: case 'g': // keyword "goto"
1355: return (len == 4 && buffer[offset++] == 'o'
1356: && buffer[offset++] == 't' && buffer[offset++] == 'o') ? CCTokenContext.GOTO
1357: : null;
1358: case 'i':
1359: switch (buffer[offset++]) {
1360: case 'f': // keyword "if"
1361: return (len == 2) ? CCTokenContext.IF : null;
1362: case 'n':
1363: switch (buffer[offset++]) {
1364: case 't': // keyword "int"
1365: return (len == 3) ? CCTokenContext.INT : null;
1366: case 'l': // keyword "inline"
1367: return (len == 6 && buffer[offset++] == 'i'
1368: && buffer[offset++] == 'n' && buffer[offset++] == 'e') ? CCTokenContext.INLINE
1369: : null;
1370: default:
1371: return null;
1372: }
1373: default:
1374: return null;
1375: }
1376: case 'l': // keyword "long"
1377: return (len == 4 && buffer[offset++] == 'o'
1378: && buffer[offset++] == 'n' && buffer[offset++] == 'g') ? CCTokenContext.LONG
1379: : null;
1380: case 'm': // keyword "mutable" (C++ only)
1381: return (lang == IS_CPLUSPLUS && len == 7
1382: && buffer[offset++] == 'u'
1383: && buffer[offset++] == 't'
1384: && buffer[offset++] == 'a'
1385: && buffer[offset++] == 'b'
1386: && buffer[offset++] == 'l' && buffer[offset++] == 'e') ? CCTokenContext.MUTABLE
1387: : null;
1388: case 'n':
1389: if (len <= 2)
1390: return null;
1391: switch (buffer[offset++]) {
1392: case 'a': // keyword "namespace" (C++ only)
1393: return (lang == IS_CPLUSPLUS && len == 9
1394: && buffer[offset++] == 'm'
1395: && buffer[offset++] == 'e'
1396: && buffer[offset++] == 's'
1397: && buffer[offset++] == 'p'
1398: && buffer[offset++] == 'a'
1399: && buffer[offset++] == 'c' && buffer[offset++] == 'e') ? CCTokenContext.NAMESPACE
1400: : null;
1401: case 'e': // keyword "new" (C++ only)
1402: return (lang == IS_CPLUSPLUS && len == 3 && buffer[offset++] == 'w') ? CCTokenContext.NEW
1403: : null;
1404: default:
1405: return null;
1406: }
1407: case 'o': // keyword "operator" (C++ only)
1408: return (lang == IS_CPLUSPLUS && len == 8
1409: && buffer[offset++] == 'p'
1410: && buffer[offset++] == 'e'
1411: && buffer[offset++] == 'r'
1412: && buffer[offset++] == 'a'
1413: && buffer[offset++] == 't'
1414: && buffer[offset++] == 'o' && buffer[offset++] == 'r') ? CCTokenContext.OPERATOR
1415: : null;
1416: case 'p':
1417: if (lang == IS_C || len <= 5)
1418: return null;
1419: switch (buffer[offset++]) {
1420: case 'r':
1421: if (len <= 6)
1422: return null;
1423: switch (buffer[offset++]) {
1424: case 'i': // keyword "private" (C++ only)
1425: return (len == 7 && buffer[offset++] == 'v'
1426: && buffer[offset++] == 'a'
1427: && buffer[offset++] == 't' && buffer[offset++] == 'e') ? CCTokenContext.PRIVATE
1428: : null;
1429: case 'o': // keyword "protected" (C++ only)
1430: return (len == 9 && buffer[offset++] == 't'
1431: && buffer[offset++] == 'e'
1432: && buffer[offset++] == 'c'
1433: && buffer[offset++] == 't'
1434: && buffer[offset++] == 'e' && buffer[offset++] == 'd') ? CCTokenContext.PROTECTED
1435: : null;
1436: default:
1437: return null;
1438: }
1439: case 'u': // keyword "public" (C++ only)
1440: return (len == 6 && buffer[offset++] == 'b'
1441: && buffer[offset++] == 'l'
1442: && buffer[offset++] == 'i' && buffer[offset++] == 'c') ? CCTokenContext.PUBLIC
1443: : null;
1444: default:
1445: return null;
1446: }
1447: case 'r':
1448: if (len < 6 || buffer[offset++] != 'e')
1449: return null;
1450:
1451: switch (buffer[offset++]) {
1452: case 'g': // keyword "register"
1453: return (len == 8 && buffer[offset++] == 'i'
1454: && buffer[offset++] == 's'
1455: && buffer[offset++] == 't'
1456: && buffer[offset++] == 'e' && buffer[offset++] == 'r') ? CCTokenContext.REGISTER
1457: : null;
1458: case 'i': // keyword "reinterpret_cast" (C++ only)
1459: return (lang == IS_CPLUSPLUS && len == 16
1460: && buffer[offset++] == 'n'
1461: && buffer[offset++] == 't'
1462: && buffer[offset++] == 'e'
1463: && buffer[offset++] == 'r'
1464: && buffer[offset++] == 'p'
1465: && buffer[offset++] == 'r'
1466: && buffer[offset++] == 'e'
1467: && buffer[offset++] == 't'
1468: && buffer[offset++] == '_'
1469: && buffer[offset++] == 'c'
1470: && buffer[offset++] == 'a'
1471: && buffer[offset++] == 's' && buffer[offset++] == 't') ? CCTokenContext.REINTERPRET_CAST
1472: : null;
1473: case 's': // keyword "restrict"
1474: return (lang == IS_C && len == 8
1475: && buffer[offset++] == 't'
1476: && buffer[offset++] == 'r'
1477: && buffer[offset++] == 'i'
1478: && buffer[offset++] == 'c' && buffer[offset++] == 't') ? CCTokenContext.RESTRICT
1479: : null;
1480: case 't': // keyword "return"
1481: return (len == 6 && buffer[offset++] == 'u'
1482: && buffer[offset++] == 'r' && buffer[offset++] == 'n') ? CCTokenContext.RETURN
1483: : null;
1484: default:
1485: return null;
1486: }
1487: case 's':
1488: if (len <= 4)
1489: return null;
1490: switch (buffer[offset++]) {
1491: case 'h': // keyword "short"
1492: return (len == 5 && buffer[offset++] == 'o'
1493: && buffer[offset++] == 'r' && buffer[offset++] == 't') ? CCTokenContext.SHORT
1494: : null;
1495: case 'i':
1496: switch (buffer[offset++]) {
1497: case 'z': // keyword "sizeof"
1498: return (len == 6 && buffer[offset++] == 'e'
1499: && buffer[offset++] == 'o' && buffer[offset++] == 'f') ? CCTokenContext.SIZEOF
1500: : null;
1501: case 'g': // keyword "signed"
1502: return (len == 6 && buffer[offset++] == 'n'
1503: && buffer[offset++] == 'e' && buffer[offset++] == 'd') ? CCTokenContext.SIGNED
1504: : null;
1505: default:
1506: return null;
1507: }
1508: case 't':
1509: switch (buffer[offset++]) {
1510: case 'r': // keyword "struct"
1511: return (len == 6 && buffer[offset++] == 'u'
1512: && buffer[offset++] == 'c' && buffer[offset++] == 't') ? CCTokenContext.STRUCT
1513: : null;
1514: case 'a':
1515: if (len == 6) { // keyword "static"
1516: return (buffer[offset++] == 't'
1517: && buffer[offset++] == 'i' && buffer[offset++] == 'c') ? CCTokenContext.STATIC
1518: : null;
1519: } else if (lang == IS_CPLUSPLUS && len == 11) { // keyword "static_cast" (C++ only)
1520: return (buffer[offset++] == 't'
1521: && buffer[offset++] == 'i'
1522: && buffer[offset++] == 'c'
1523: && buffer[offset++] == '_'
1524: && buffer[offset++] == 'c'
1525: && buffer[offset++] == 'a'
1526: && buffer[offset++] == 's' && buffer[offset++] == 't') ? CCTokenContext.STATIC_CAST
1527: : null;
1528: } else {
1529: return null;
1530: }
1531: default:
1532: return null;
1533: }
1534: case 'w': // keyword "switch"
1535: return (len == 6 && buffer[offset++] == 'i'
1536: && buffer[offset++] == 't'
1537: && buffer[offset++] == 'c' && buffer[offset++] == 'h') ? CCTokenContext.SWITCH
1538: : null;
1539: default:
1540: return null;
1541: }
1542: case 't':
1543: if (len <= 2)
1544: return null;
1545: switch (buffer[offset++]) {
1546: case 'e': // keyword "template" (C++ only)
1547: return (lang == IS_CPLUSPLUS && len == 8
1548: && buffer[offset++] == 'm'
1549: && buffer[offset++] == 'p'
1550: && buffer[offset++] == 'l'
1551: && buffer[offset++] == 'a'
1552: && buffer[offset++] == 't' && buffer[offset++] == 'e') ? CCTokenContext.TEMPLATE
1553: : null;
1554: case 'h':
1555: switch (buffer[offset++]) {
1556: case 'i': // keyword "this" (C++ only)
1557: return (lang == IS_CPLUSPLUS && len == 4 && buffer[offset++] == 's') ? CCTokenContext.THIS
1558: : null;
1559: case 'r': // keyword "throw" (C++ only)
1560: return (lang == IS_CPLUSPLUS && len == 5
1561: && buffer[offset++] == 'o' && buffer[offset++] == 'w') ? CCTokenContext.THROW
1562: : null;
1563: default:
1564: return null;
1565: }
1566: case 'r':
1567: switch (buffer[offset++]) {
1568: case 'u': // keyword "true" (C++ only)
1569: return (lang == IS_CPLUSPLUS && len == 4 && buffer[offset++] == 'e') ? CCTokenContext.TRUE
1570: : null;
1571: case 'y': // keyword "try" (C++ only)
1572: return (lang == IS_CPLUSPLUS && len == 3) ? CCTokenContext.TRY
1573: : null;
1574: default:
1575: return null;
1576: }
1577: case 'y':
1578: if (len <= 5 || buffer[offset++] != 'p'
1579: || buffer[offset++] != 'e') {
1580: return null;
1581: } else {
1582: switch (buffer[offset++]) {
1583: case 'd': // keyword "typedef"
1584: return (len == 7 && buffer[offset++] == 'e' && buffer[offset++] == 'f') ? CCTokenContext.TYPEDEF
1585: : null;
1586: case 'i': // keyword "typeid" (C++ only)
1587: return (lang == IS_CPLUSPLUS && len == 6 && buffer[offset++] == 'd') ? CCTokenContext.TYPEID
1588: : null;
1589: case 'n': // keyword "typename" (C++ only)
1590: return (lang == IS_CPLUSPLUS && len == 8
1591: && buffer[offset++] == 'a'
1592: && buffer[offset++] == 'm' && buffer[offset++] == 'e') ? CCTokenContext.TYPENAME
1593: : null;
1594: case 'o': // keyword "typeof" (C++ only)
1595: return (lang == IS_CPLUSPLUS && len == 6 && buffer[offset++] == 'f') ? CCTokenContext.TYPEOF
1596: : null;
1597: default:
1598: return null;
1599: }
1600: }
1601: default:
1602: return null;
1603: }
1604: case 'u':
1605: if (len <= 4)
1606: return null;
1607: switch (buffer[offset++]) {
1608: case 's': // keyword "using" (C++ only)
1609: return (lang == IS_CPLUSPLUS && len == 5
1610: && buffer[offset++] == 'i'
1611: && buffer[offset++] == 'n' && buffer[offset++] == 'g') ? CCTokenContext.USING
1612: : null;
1613: case 'n':
1614: switch (buffer[offset++]) {
1615: case 'i': // keyword "union"
1616: return (len == 5 && buffer[offset++] == 'o' && buffer[offset++] == 'n') ? CCTokenContext.UNION
1617: : null;
1618: case 's': // keyword "unsigned"
1619: return (len == 8 && buffer[offset++] == 'i'
1620: && buffer[offset++] == 'g'
1621: && buffer[offset++] == 'n'
1622: && buffer[offset++] == 'e' && buffer[offset++] == 'd') ? CCTokenContext.UNSIGNED
1623: : null;
1624: default:
1625: return null;
1626: }
1627: default:
1628: return null;
1629: }
1630: case 'v':
1631: if (len <= 3)
1632: return null;
1633: switch (buffer[offset++]) {
1634: case 'i': // keyword "virtual" (C++ only)
1635: return (lang == IS_CPLUSPLUS && len == 7
1636: && buffer[offset++] == 'r'
1637: && buffer[offset++] == 't'
1638: && buffer[offset++] == 'u'
1639: && buffer[offset++] == 'a' && buffer[offset++] == 'l') ? CCTokenContext.VIRTUAL
1640: : null;
1641: case 'o':
1642: switch (buffer[offset++]) {
1643: case 'i': // keyword "void"
1644: return (len == 4 && buffer[offset++] == 'd') ? CCTokenContext.VOID
1645: : null;
1646: case 'l': // keyword "volatile"
1647: return (len == 8 && buffer[offset++] == 'a'
1648: && buffer[offset++] == 't'
1649: && buffer[offset++] == 'i'
1650: && buffer[offset++] == 'l' && buffer[offset++] == 'e') ? CCTokenContext.VOLATILE
1651: : null;
1652: default:
1653: return null;
1654: }
1655: default:
1656: return null;
1657: }
1658: case 'w':
1659: if (len <= 4)
1660: return null;
1661: switch (buffer[offset++]) {
1662: case 'c': // keyword "wchar_t" (C++ only)
1663: return (lang == IS_CPLUSPLUS && len == 7
1664: && buffer[offset++] == 'h'
1665: && buffer[offset++] == 'a'
1666: && buffer[offset++] == 'r'
1667: && buffer[offset++] == '_' && buffer[offset++] == 't') ? CCTokenContext.WCHAR_T
1668: : null;
1669: case 'h': // keyword "while"
1670: return (len == 5 && buffer[offset++] == 'i'
1671: && buffer[offset++] == 'l' && buffer[offset++] == 'e') ? CCTokenContext.WHILE
1672: : null;
1673: default:
1674: return null;
1675: }
1676: case '_':
1677: if (len <= 4) {
1678: return null;
1679: }
1680: switch (buffer[offset++]) {
1681: case 'B': // keyword "_Bool" (C only)
1682: return (lang == IS_C && len == 5
1683: && buffer[offset++] == 'o'
1684: && buffer[offset++] == 'o' && buffer[offset++] == 'l') ? CCTokenContext._BOOL
1685: : null;
1686: case 'C': // keyword "_Complex" (C only)
1687: return (lang == IS_C && len == 8
1688: && buffer[offset++] == 'o'
1689: && buffer[offset++] == 'm'
1690: && buffer[offset++] == 'p'
1691: && buffer[offset++] == 'l'
1692: && buffer[offset++] == 'e' && buffer[offset++] == 'x') ? CCTokenContext._COMPLEX
1693: : null;
1694: case 'I': // keyword "_Imaginary" (C only)
1695: return (lang == IS_C && len == 10
1696: && buffer[offset++] == 'm'
1697: && buffer[offset++] == 'a'
1698: && buffer[offset++] == 'g'
1699: && buffer[offset++] == 'i'
1700: && buffer[offset++] == 'n'
1701: && buffer[offset++] == 'a'
1702: && buffer[offset++] == 'r' && buffer[offset++] == 'y') ? CCTokenContext._IMAGINARY
1703: : null;
1704: }
1705: default:
1706: return null;
1707: }
1708: }
1709:
1710: /* Match C preprocessor tokens. These are:
1711: #define name token-string
1712: #define name(argument [, argument] ... ) token-string
1713: #undef name
1714: #include "filename"
1715: #include <filename>
1716: #line integer-constant "filename"
1717: #if constant-expression
1718: #ifdef name
1719: #ifndef name
1720: #elif constant-expression
1721: #else
1722: #endif
1723: #error
1724: #warning
1725: plus the special names
1726: __LINE__
1727: __FILE__
1728:
1729: In addition, also recognize #pragma
1730: */
1731:
1732: // define, elif, else, endif, if, ifdef, ifndef, include, line, undef
1733: public static TokenID matchCPPKeyword(char[] buffer, int offset,
1734: int len) {
1735: // System.err.print("In matchCPPKeyword: ");
1736: // int x;
1737: // for (x = offset; x <offset+len; x++) {
1738: // System.err.print(buffer[x]);
1739: // }
1740: // System.err.println("");
1741:
1742: if (buffer[offset] != '#') {
1743: return null;
1744: }
1745: len--;
1746: offset++;
1747:
1748: // skip all whitespaces
1749: while (len > 0 && Character.isWhitespace(buffer[offset])) {
1750: offset++;
1751: len--;
1752: }
1753: TokenID defCPPToken = CCTokenContext.CPPIDENTIFIER;
1754: if (len > 15)
1755: return defCPPToken;
1756: if (len <= 1)
1757: return defCPPToken;
1758: switch (buffer[offset++]) {
1759: case 'd': // define
1760: return (len == 6 && buffer[offset++] == 'e'
1761: && buffer[offset++] == 'f'
1762: && buffer[offset++] == 'i'
1763: && buffer[offset++] == 'n' && buffer[offset++] == 'e') ? CCTokenContext.CPPDEFINE
1764: : defCPPToken;
1765: case 'e': // elif, else, endif, error
1766: if (len <= 3)
1767: return defCPPToken;
1768: switch (buffer[offset++]) {
1769: case 'l': // elif, else
1770: switch (buffer[offset++]) {
1771: case 's': // else
1772: return (len == 4 && buffer[offset++] == 'e') ? CCTokenContext.CPPELSE
1773: : defCPPToken;
1774: case 'i': // endif
1775: return (len == 4 && buffer[offset++] == 'f') ? CCTokenContext.CPPELIF
1776: : defCPPToken;
1777: default:
1778: return defCPPToken;
1779: }
1780: case 'n': // endif
1781: return (len == 5 && buffer[offset++] == 'd'
1782: && buffer[offset++] == 'i' && buffer[offset++] == 'f') ? CCTokenContext.CPPENDIF
1783: : defCPPToken;
1784: case 'r': // error
1785: return (len == 5 && buffer[offset++] == 'r'
1786: && buffer[offset++] == 'o' && buffer[offset++] == 'r') ? CCTokenContext.CPPERROR
1787: : defCPPToken;
1788: default:
1789: return defCPPToken;
1790: }
1791: case 'i': // if, ifdef, ifndef, include
1792: switch (buffer[offset++]) {
1793: case 'f': // if, ifdef, ifndef
1794: if (len == 2) {
1795: return CCTokenContext.CPPIF;
1796: }
1797: switch (buffer[offset++]) {
1798: case 'd':
1799: return (len == 5 && buffer[offset++] == 'e' && buffer[offset++] == 'f') ? CCTokenContext.CPPIFDEF
1800: : defCPPToken;
1801: case 'n':
1802: return (len == 6 && buffer[offset++] == 'd'
1803: && buffer[offset++] == 'e' && buffer[offset++] == 'f') ? CCTokenContext.CPPIFNDEF
1804: : defCPPToken;
1805: default:
1806: return defCPPToken;
1807: }
1808: case 'n': // include
1809: if (len >= 7 && buffer[offset++] == 'c'
1810: && buffer[offset++] == 'l'
1811: && buffer[offset++] == 'u'
1812: && buffer[offset++] == 'd'
1813: && buffer[offset++] == 'e') {
1814: if (len == 7) {
1815: return CCTokenContext.CPPINCLUDE;
1816: } else if (len == 12 && buffer[offset++] == '_'
1817: && buffer[offset++] == 'n'
1818: && buffer[offset++] == 'e'
1819: && buffer[offset++] == 'x'
1820: && buffer[offset++] == 't') {
1821: return CCTokenContext.CPPINCLUDE_NEXT;
1822: } else {
1823: return defCPPToken;
1824: }
1825: } else {
1826: return defCPPToken;
1827: }
1828: default:
1829: return defCPPToken;
1830: }
1831: case 'l': // line
1832: if (len != 4) {
1833: return defCPPToken;
1834: }
1835: return (buffer[offset++] == 'i' && buffer[offset++] == 'n' && buffer[offset++] == 'e') ? CCTokenContext.CPPLINE
1836: : defCPPToken;
1837: case 'p': // pragma
1838: return (len == 6 && buffer[offset++] == 'r'
1839: && buffer[offset++] == 'a'
1840: && buffer[offset++] == 'g'
1841: && buffer[offset++] == 'm' && buffer[offset++] == 'a') ? CCTokenContext.CPPPRAGMA
1842: : defCPPToken;
1843: case 'u': // undef
1844: if (len != 5)
1845: return defCPPToken;
1846: return (buffer[offset++] == 'n' && buffer[offset++] == 'd'
1847: && buffer[offset++] == 'e' && buffer[offset++] == 'f') ? CCTokenContext.CPPUNDEF
1848: : defCPPToken;
1849: case 'w': // warning
1850: if (len != 7)
1851: return defCPPToken;
1852: return (buffer[offset++] == 'a' && buffer[offset++] == 'r'
1853: && buffer[offset++] == 'n'
1854: && buffer[offset++] == 'i'
1855: && buffer[offset++] == 'n' && buffer[offset++] == 'g') ? CCTokenContext.CPPWARNING
1856: : defCPPToken;
1857: default:
1858: return defCPPToken;
1859: }
1860: }
1861:
1862: public static boolean isSpaceChar(char actChar) {
1863: return Character.isSpaceChar(actChar) || actChar == '\t';
1864: }
1865:
1866: public static boolean isLineSeparator(char actChar) {
1867: return actChar == '\n' || actChar == '\r';
1868: }
1869: }
|