001: /*
002: * FindBugs - Find Bugs in Java programs
003: * Copyright (C) 2003-2007 University of Maryland
004: *
005: * This library is free software; you can redistribute it and/or
006: * modify it under the terms of the GNU Lesser General Public
007: * License as published by the Free Software Foundation; either
008: * version 2.1 of the License, or (at your option) any later version.
009: *
010: * This library is distributed in the hope that it will be useful,
011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
013: * Lesser General Public License for more details.
014: *
015: * You should have received a copy of the GNU Lesser General Public
016: * License along with this library; if not, write to the Free Software
017: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
018: */
019:
020: package edu.umd.cs.findbugs.jaif;
021:
022: import java.io.BufferedReader;
023: import java.io.IOException;
024: import java.io.Reader;
025: import java.util.regex.Matcher;
026: import java.util.regex.Pattern;
027:
028: /**
029: * Lexical scanner for external annotation files.
030: *
031: * @author David Hovemeyer
032: * @see http://pag.csail.mit.edu/jsr308/annotation-file-utilities/
033: */
034: public class JAIFScanner {
035:
036: static class TokenPattern {
037: private Pattern pattern;
038: private JAIFTokenKind kind;
039:
040: public TokenPattern(String regex, JAIFTokenKind kind) {
041: this .pattern = Pattern.compile("^" + regex);
042: this .kind = kind;
043: }
044:
045: public JAIFTokenKind getKind(String lexeme) {
046: return kind;
047: }
048:
049: public Pattern getPattern() {
050: return pattern;
051: }
052: }
053:
054: // See http://java.sun.com/docs/books/jls/third_edition/html/lexical.html
055: // Hexidecimal floating-point literals are not implemented.
056: // Unicode escapes are not implemented (but could be implemented in the fillLineBuf() method).
057:
058: private static final String ID_START = "[@A-Za-z_\\$]";
059: private static final String ID_REST = "[A-Za-z0-9_\\$]";
060: private static final String DIGIT = "[0-9]";
061: private static final String DIGITS = DIGIT + "+";
062: private static final String DIGITS_OPT = DIGIT + "*";
063: private static final String SIGN_OPT = "[+-]?";
064: private static final String DOT = "\\.";
065: private static final String EXP_PART = "([Ee]" + SIGN_OPT + DIGITS
066: + ")";
067: private static final String EXP_PART_OPT = EXP_PART + "?";
068: private static final String FLOAT_TYPE_SUFFIX = "[FfDd]";
069: private static final String FLOAT_TYPE_SUFFIX_OPT = FLOAT_TYPE_SUFFIX
070: + "?";
071: private static final String OCTAL_DIGITS = "[0-7]+";
072: private static final String HEX_SIGNIFIER = "0[Xx]";
073: private static final String HEX_DIGITS = "[0-9A-Fa-f]+";
074: private static final String INT_TYPE_SUFFIX_OPT = "[Ll]?";
075: private static final String INPUT_CHAR = "[^\\\\\\\"]";// anything other than backslash or double-quote character
076: private static final String OCT_ESCAPE = "([0-7]|[0-3]?[0-7][0-7])";
077: private static final String ESCAPE_SEQ = "(\\\\[btnfr\"'\\\\]|\\\\"
078: + OCT_ESCAPE + ")";
079: private static final String STRING_CHARS_OPT = "(" + INPUT_CHAR
080: + "|" + ESCAPE_SEQ + ")*";
081:
082: private static final TokenPattern[] TOKEN_PATTERNS = {
083: // Misc. syntax
084: new TokenPattern(":", JAIFTokenKind.COLON),
085: new TokenPattern("\\(", JAIFTokenKind.LPAREN),
086: new TokenPattern("\\)", JAIFTokenKind.RPAREN),
087: new TokenPattern(",", JAIFTokenKind.COMMA),
088: new TokenPattern("=", JAIFTokenKind.EQUALS),
089:
090: // Identifiers and keywords
091: new TokenPattern(ID_START + "(" + ID_REST + ")*",
092: JAIFTokenKind.IDENTIFIER_OR_KEYWORD),
093:
094: // FP literals
095: new TokenPattern(DIGITS + DOT + DIGITS_OPT + EXP_PART_OPT
096: + FLOAT_TYPE_SUFFIX_OPT,
097: JAIFTokenKind.FLOATING_POINT_LITERAL),
098: new TokenPattern(DOT + DIGITS + EXP_PART_OPT
099: + FLOAT_TYPE_SUFFIX_OPT,
100: JAIFTokenKind.FLOATING_POINT_LITERAL),
101: new TokenPattern(DIGITS + EXP_PART + FLOAT_TYPE_SUFFIX_OPT,
102: JAIFTokenKind.FLOATING_POINT_LITERAL),
103: new TokenPattern(DIGITS + EXP_PART_OPT + FLOAT_TYPE_SUFFIX,
104: JAIFTokenKind.FLOATING_POINT_LITERAL),
105:
106: // This must come after the FP literal patterns
107: new TokenPattern(DOT, JAIFTokenKind.DOT),
108:
109: // Integer literals
110: new TokenPattern("0" + OCTAL_DIGITS + INT_TYPE_SUFFIX_OPT,
111: JAIFTokenKind.OCTAL_LITERAL),
112: new TokenPattern(HEX_SIGNIFIER + HEX_DIGITS
113: + INT_TYPE_SUFFIX_OPT, JAIFTokenKind.HEX_LITERAL),
114: new TokenPattern(DIGITS + INT_TYPE_SUFFIX_OPT,
115: JAIFTokenKind.DECIMAL_LITERAL),
116:
117: // String literals
118: new TokenPattern("\"" + STRING_CHARS_OPT + "\"",
119: JAIFTokenKind.STRING_LITERAL), };
120:
121: private BufferedReader reader;
122: private JAIFToken next;
123: private String lineBuf;
124: private int lineNum;
125:
126: /**
127: * @param reader
128: */
129: public JAIFScanner(Reader reader) {
130: this .reader = new BufferedReader(reader);
131: this .lineNum = 0;
132: }
133:
134: public int getLineNumber() {
135: return lineNum;
136: }
137:
138: public JAIFToken nextToken() throws IOException,
139: JAIFSyntaxException {
140: if (next == null) {
141: fetchToken();
142: }
143: JAIFToken result = next;
144: next = null;
145: return result;
146: }
147:
148: public JAIFToken peekToken() throws IOException,
149: JAIFSyntaxException {
150: if (next == null) {
151: fetchToken();
152: }
153: return next;
154: }
155:
156: public boolean atEOF() throws IOException {
157: fillLineBuf();
158: return lineBuf == null;
159: }
160:
161: private void fillLineBuf() throws IOException {
162: if (lineBuf == null) {
163: lineBuf = reader.readLine();
164: if (lineBuf != null) {
165: ++lineNum;
166: }
167: }
168: }
169:
170: private boolean isHorizWhitespace(char c) {
171: return c == ' ' || c == '\t';
172: }
173:
174: private void fetchToken() throws IOException, JAIFSyntaxException {
175: assert next == null;
176:
177: fillLineBuf();
178: if (lineBuf == null) {
179: throw new JAIFSyntaxException(this ,
180: "Unexpected end of file");
181: }
182:
183: // Strip leading whitespace, if any
184: int wsCount = 0;
185: while (wsCount < lineBuf.length()
186: && isHorizWhitespace(lineBuf.charAt(wsCount))) {
187: wsCount++;
188: }
189: if (wsCount > 0) {
190: lineBuf = lineBuf.substring(wsCount);
191: }
192: //System.out.println("Consumed " + wsCount + " characters of horizontal whitespace");
193:
194: if (lineBuf.equals("")) {
195: // Reached end of line.
196: next = new JAIFToken(JAIFTokenKind.NEWLINE, "\n", lineNum);
197: lineBuf = null;
198: return;
199: }
200:
201: // Try matching line buffer against all known patterns
202: // until we fine one that matches.
203: for (TokenPattern tokenPattern : TOKEN_PATTERNS) {
204: Matcher m = tokenPattern.getPattern().matcher(lineBuf);
205: if (m.find()) {
206: String lexeme = m.group();
207: lineBuf = lineBuf.substring(lexeme.length());
208: next = new JAIFToken(tokenPattern.getKind(lexeme),
209: lexeme, lineNum);
210: return;
211: }
212: }
213:
214: throw new JAIFSyntaxException(this ,
215: "Unrecognized token (trying to match text `" + lineBuf
216: + "')");
217: }
218: }
|