001: /*
002: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
003: *
004: * Copyright 1997-2007 Sun Microsystems, Inc. All rights reserved.
005: *
006: * The contents of this file are subject to the terms of either the GNU
007: * General Public License Version 2 only ("GPL") or the Common
008: * Development and Distribution License("CDDL") (collectively, the
009: * "License"). You may not use this file except in compliance with the
010: * License. You can obtain a copy of the License at
011: * http://www.netbeans.org/cddl-gplv2.html
012: * or nbbuild/licenses/CDDL-GPL-2-CP. See the License for the
013: * specific language governing permissions and limitations under the
014: * License. When distributing the software, include this License Header
015: * Notice in each file and include the License file at
016: * nbbuild/licenses/CDDL-GPL-2-CP. Sun designates this
017: * particular file as subject to the "Classpath" exception as provided
018: * by Sun in the GPL Version 2 section of the License file that
019: * accompanied this code. If applicable, add the following below the
020: * License Header, with the fields enclosed by brackets [] replaced by
021: * your own identifying information:
022: * "Portions Copyrighted [year] [name of copyright owner]"
023: *
024: * Contributor(s):
025: *
026: * The Original Software is NetBeans. The Initial Developer of the Original
027: * Software is Sun Microsystems, Inc. Portions Copyright 1997-2006 Sun
028: * Microsystems, Inc. All Rights Reserved.
029: *
030: * If you wish your version of this file to be governed by only the CDDL
031: * or only the GPL Version 2, indicate your decision by adding
032: * "[Contributor] elects to include this software in this distribution
033: * under the [CDDL or GPL Version 2] license." If you do not indicate a
034: * single choice of license, a recipient has the option to distribute
035: * your version of this file under either the CDDL, the GPL Version 2 or
036: * to extend the choice of license to its licensees as provided above.
037: * However, if you add GPL Version 2 code and therefore, elected the GPL
038: * Version 2 license, then the option applies only if the new code is
039: * made subject to such option by the copyright holder.
040: */
041:
042: package org.netbeans.modules.languages.parser;
043:
044: import java.util.Map;
045: import org.netbeans.api.languages.ParseException;
046: import org.netbeans.api.languages.CharInput;
047: import java.util.Collections;
048: import java.util.HashSet;
049: import java.util.Iterator;
050: import java.util.Set;
051: import org.netbeans.modules.languages.TokenType;
052: import org.netbeans.modules.languages.parser.StringInput;
053:
054: public class Pattern {
055:
056: private static final Character STAR = new Character((char) 0);
057: private static NodeFactory<Integer> nodeFactory = new NodeFactory<Integer>() {
058: private int counter = 1;
059:
060: public Integer createNode() {
061: return Integer.valueOf(counter++);
062: }
063: };
064:
065: public static Pattern create() {
066: return new Pattern();
067: }
068:
069: public static Pattern create(String input) throws ParseException {
070: if (input.length() == 0)
071: throw new ParseException("Empty pattern.");
072: return create(new StringInput(input));
073: }
074:
075: public static Pattern create(CharInput input) throws ParseException {
076: Pattern p = createIn(input);
077: DG<Integer, Character, Integer, TokenType> ndg = DGUtils
078: .<Integer, Character, Integer, TokenType> reduce(p.dg,
079: nodeFactory);
080: return new Pattern(ndg);
081: }
082:
083: private static Pattern createCaseInsensitive(StringBuffer input)
084: throws ParseException {
085: int length = input.length();
086: Pattern pattern = new Pattern();
087: for (int x = 0; x < length; x++) {
088: char c = input.charAt(x);
089: char up = Character.toUpperCase(c);
090: char down = Character.toLowerCase(c);
091: if (up != down) {
092: pattern = pattern.append(new Pattern(new Character(up))
093: .merge(new Pattern(new Character(down))));
094: } else {
095: pattern = pattern.append(new Pattern(new Character(c)));
096: }
097: }
098: return pattern;
099: }
100:
101: private static Pattern createIn(CharInput input)
102: throws ParseException {
103: Pattern pattern = new Pattern();
104: Pattern last = null;
105: char ch = input.next();
106: while (ch != 0) {
107: switch (ch) {
108: case ' ':
109: case '\t':
110: case '\n':
111: case '\r':
112: input.read();
113: break;
114: case '*':
115: input.read();
116: if (last == null)
117: throw new ParseException("Unexpected character '"
118: + ch + "'.");
119: last = last.star();
120: break;
121: case '?':
122: input.read();
123: if (last == null)
124: throw new ParseException("Unexpected character '"
125: + ch + "'.");
126: last = last.question();
127: break;
128: case '+':
129: input.read();
130: if (last == null)
131: throw new ParseException("Unexpected character '"
132: + ch + "'.");
133: last = last.plus();
134: break;
135: case '(':
136: input.read();
137: if (last != null)
138: pattern = pattern.append(last);
139: last = createIn(input);
140: if (input.next() != ')')
141: throw new ParseException("Unexpected character '"
142: + input.next() + "'.");
143: input.read();
144: break;
145: // case '<':
146: // input.read ();
147: // if (last != null) pattern = pattern.append (last);
148: // last = new Pattern (readToken (input));
149: // if (input.read () != '>')
150: // throw new ParseException ("> expected: " + input);
151: // break;
152: case '\'':
153: case '"':
154: input.read();
155: if (last != null)
156: pattern = pattern.append(last);
157: last = Pattern.create();
158: StringBuffer buf = new StringBuffer();
159: ch = input.next();
160: while (ch != '"' && ch != '\'') {
161: if (ch == 0)
162: throw new ParseException(
163: "Unexpected character '" + ch + "'.");
164: if (ch == '\\') {
165: input.read();
166: switch (input.next()) {
167: case '\\':
168: input.read();
169: buf.append('\\');
170: break;
171: case 'n':
172: input.read();
173: buf.append('\n');
174: break;
175: case 'r':
176: input.read();
177: buf.append('\r');
178: break;
179: case 't':
180: input.read();
181: buf.append('\t');
182: break;
183: case '"':
184: input.read();
185: buf.append('"');
186: break;
187: case '\'':
188: input.read();
189: buf.append('\'');
190: break;
191: case 'u':
192: input.read();
193: int ch1 = 0;
194: for (int i = 16 * 16 * 16; i >= 1; i /= 16) {
195: char c = input.next();
196: int ii = 0;
197: if ('0' <= c && c <= '9') {
198: ii = c - '0';
199: } else if ('a' <= c && c <= 'f') {
200: ii = c - 'a' + 10;
201: } else if ('A' <= c && c <= 'F') {
202: ii = c - 'A' + 10;
203: } else {
204: throw new ParseException(
205: "Unexpected character after \\u:"
206: + c);
207: }
208: ch1 += ii * i;
209: input.read();
210: }
211: buf.append((char) ch1);
212: break;
213: default:
214: throw new ParseException(
215: "Unexpected character after \\:"
216: + input.next());
217: }
218: } else {
219: buf.append(input.read());
220: }
221: ch = input.next();
222: }
223: input.read();
224: ch = input.next();
225: if (ch == 'i') {
226: input.read();
227: last = last.append(createCaseInsensitive(buf));
228: } else {
229: int length = buf.length();
230: Pattern pat = new Pattern();
231: for (int x = 0; x < length; x++) {
232: pat = pat.append(new Pattern(new Character(buf
233: .charAt(x))));
234: }
235: last = last.append(pat);
236: }
237: break;
238: case '|':
239: input.read();
240: if (last != null)
241: pattern = pattern.append(last);
242: last = null;
243: pattern = pattern.merge(Pattern.createIn(input));
244: return pattern;
245: case '-':
246: if (last != null)
247: pattern = pattern.append(last);
248: input.read();
249: skipWhitespaces(input);
250: ch = input.next();
251: if (ch != '\'' && ch != '"')
252: throw new ParseException("Unexpected character '"
253: + ch + "'.");
254: input.read();
255: ch = input.next();
256: if (ch == '\'' || ch == '"')
257: throw new ParseException("Unexpected character '"
258: + ch + "'.");
259: Character edge = new Character(input.next());
260: last = new Pattern(true, Collections
261: .<Character> singleton(edge));
262: last = last.star().append(new Pattern(edge));
263: input.read();
264: ch = input.next();
265: while (ch != '\'' && ch != '"') {
266: if (ch == 0)
267: throw new ParseException(
268: "Unexpected character '" + ch + "'.");
269: last = last.plus();
270: Integer endN = last.dg.getEnds().iterator().next();
271: Integer newE = last.nodeFactory.createNode();
272: last.dg.addNode(newE);
273: last.dg.addEdge(endN, newE, new Character(input
274: .next()));
275: last.dg.setEnds(Collections.singleton(newE));
276: input.read();
277: ch = input.next();
278: }
279: input.read();
280: break;
281: case ')':
282: if (last != null)
283: pattern = pattern.append(last);
284: return pattern;
285: case '.':
286: input.read();
287: if (last != null)
288: pattern = pattern.append(last);
289: last = new Pattern(Pattern.STAR);
290: break;
291: case '[':
292: input.read();
293: if (last != null)
294: pattern = pattern.append(last);
295: boolean not = false;
296: ch = input.next();
297: if (ch == '^') {
298: input.read();
299: ch = input.next();
300: not = true;
301: }
302: Set<Character> set = new HashSet<Character>();
303: char l = (char) 0;
304: boolean minus = false;
305: ch = input.next();
306: while (ch != ']' && ch != 0) {
307: switch (ch) {
308: case ' ':
309: case '\t':
310: case '\n':
311: case '\r':
312: input.read();
313: break;
314: case '\'':
315: case '"':
316: char ol = l;
317: if (l != 0 && !minus)
318: set.add(new Character(l));
319: input.read();
320: ch = input.next();
321: if (ch == '\\') {
322: input.read();
323: ch = input.next();
324: switch (ch) {
325: case 'n':
326: l = '\n';
327: break;
328: case 't':
329: l = '\t';
330: break;
331: case 'r':
332: l = '\r';
333: break;
334: case '\'':
335: l = '\'';
336: break;
337: case '\\':
338: l = '\\';
339: break;
340: case '"':
341: l = '"';
342: break;
343: case 'u':
344: l = 0;
345: for (int i = 16 * 16 * 16; i >= 1; i /= 16) {
346: input.read();
347: char c = input.next();
348: int ii = 0;
349: if ('0' <= c && c <= '9') {
350: ii = c - '0';
351: } else if ('a' <= c && c <= 'f') {
352: ii = c - 'a' + 10;
353: } else if ('A' <= c && c <= 'F') {
354: ii = c - 'A' + 10;
355: } else {
356: throw new ParseException(
357: "Unexpected character after \\u:"
358: + c);
359: }
360: l += ii * i;
361: }
362: break;
363: default:
364: throw new ParseException(
365: "Unexpected character '" + ch
366: + "'.");
367: } // switch
368: input.read();
369: } else
370: // if '\\'
371: l = input.read();
372: ch = input.next();
373: if (ch != '"' && ch != '\'')
374: throw new ParseException(
375: "Unexpected character '" + ch
376: + "'.");
377: input.read();
378: if (minus) {
379: addInterval(set, ol, l);
380: l = 0;
381: }
382: minus = false;
383: break; // case '"'
384: case '-':
385: input.read();
386: if (l == 0)
387: throw new ParseException(
388: "Unexpected character '-'.");
389: minus = true;
390: break;
391: // case '<':
392: // input.read ();
393: // if (minus) throw new ParseException (input.toString ());
394: // if (l != 0)
395: // set.add (new Character (l));
396: // set.add (readToken (input));
397: // if (input.read () != '>')
398: // throw new ParseException ("> expected: " + input);
399: // break;
400: default:
401: throw new ParseException(
402: "Unexpected character '" + ch + "'.");
403: } // switch
404: ch = input.next();
405: } // while
406: if (minus)
407: throw new ParseException("Unexpected character '"
408: + ch + "'.");
409: if (l != 0)
410: set.add(new Character(l));
411: input.read();
412: last = new Pattern(not, set);
413: break;
414: default:
415: throw new ParseException("Unexpected character '" + ch
416: + "'.");
417: // input.read ();
418: // if (last != null) pattern = pattern.append (last);
419: // last = new Pattern (new Character (ch));
420: } // switch
421: ch = input.next();
422: } // while
423: if (last != null)
424: pattern = pattern.append(last);
425: return pattern;
426: }
427:
428: // private static ASTToken readToken (CharInput input) throws ParseException {
429: // StringBuilder sb = new StringBuilder ();
430: // char ch = input.next ();
431: // while (ch != ',' && ch != '>') {
432: // if (ch == 0) throw new ParseException ("Unexpected end." + input.toString ());
433: // sb.append (ch);
434: // input.read ();
435: // ch = input.next ();
436: // }
437: // ch = input.next ();
438: // String type = sb.toString ().trim ();
439: // if (ch == '>') return ASTToken.create (type, null);
440: // input.read ();
441: // skipWhitespaces (input);
442: // sb = new StringBuilder ();
443: // ch = input.next ();
444: // boolean read = ch != '"' && ch != '\'';
445: // if (!read) {
446: // input.read ();
447: // ch = input.next ();
448: // }
449: // while (ch != '>' && ch != '"' && ch != '\'' && ch != ',') {
450: // if (ch == 0) throw new ParseException ("Unexpected end." + input.toString ());
451: // sb.append (ch);
452: // input.read ();
453: // ch = input.next ();
454: // }
455: // if (read && (ch == '"' || ch == '\'')) throw new ParseException ("Unexpected \":" + input.toString ());
456: // if (!read) input.read ();
457: // String identifier = null;
458: // String name = null;
459: // if (read) name = sb.toString ();
460: // else identifier = sb.toString ();
461: // if (!read && ch == ',') {
462: // ch = input.next ();
463: // sb = new StringBuilder ();
464: // while (ch != '>') {
465: // if (ch == 0) throw new ParseException ("Unexpected end." + input.toString ());
466: // sb.append (ch);
467: // input.read ();
468: // ch = input.next ();
469: // }
470: // name = sb.toString ();
471: // }
472: // return ASTToken.create (type, identifier);
473: // }
474:
475: private static Set<Character> whitespace = new HashSet<Character>();
476: static {
477: whitespace.add(new Character(' '));
478: whitespace.add(new Character('\n'));
479: whitespace.add(new Character('\r'));
480: whitespace.add(new Character('\t'));
481: }
482:
483: private static void skipWhitespaces(CharInput input) {
484: while (whitespace.contains(new Character(input.next())))
485: input.read();
486: }
487:
488: private static void addInterval(Set<Character> set, char from,
489: char to) throws ParseException {
490: if (from > to)
491: throw new ParseException("Invalid interval (" + from + ">"
492: + to + ").");
493: do {
494: set.add(new Character(from));
495: from++;
496: } while (from <= to);
497: }
498:
499: private DG<Integer, Character, Integer, TokenType> dg;// = DG.<Integer,Character,K,TokenType>createDG ();
500:
501: private Pattern(DG<Integer, Character, Integer, TokenType> dg) {
502: this .dg = dg;
503: }
504:
505: private Pattern() {
506: dg = DG
507: .<Integer, Character, Integer, TokenType> createDG(nodeFactory
508: .createNode());
509: // Integer start = nodeFactory.createNode ();
510: // dg.addNode (start);
511: // dg.setStart (start);
512: // dg.addEnd (start);
513: }
514:
515: private Pattern(Pattern p) {
516: dg = DGUtils.<Integer, Character, Integer, TokenType> cloneDG(
517: p.dg, false, nodeFactory);
518: }
519:
520: private Pattern(Character edge) {
521: Integer start = nodeFactory.createNode();
522: dg = DG
523: .<Integer, Character, Integer, TokenType> createDG(start);
524: Integer end = nodeFactory.createNode();
525: dg.addNode(end);
526: dg.addEdge(start, end, edge);
527: dg.setEnds(Collections.<Integer> singleton(end));
528: }
529:
530: private Pattern(boolean not, Set<Character> edges) {
531: Integer start = nodeFactory.createNode();
532: dg = DG
533: .<Integer, Character, Integer, TokenType> createDG(start);
534: Integer end = nodeFactory.createNode();
535: dg.addNode(end);
536: dg.setStart(start);
537: dg.setEnds(Collections.<Integer> emptySet());
538: Iterator<Character> it = edges.iterator();
539: while (it.hasNext()) {
540: Character edge = it.next();
541: dg.addEdge(start, end, edge);
542: }
543: if (not) {
544: Integer failedState = nodeFactory.createNode();
545: dg.addNode(failedState);
546: dg.addEdge(start, failedState, Pattern.STAR);
547: dg.addEnd(failedState);
548: } else
549: dg.addEnd(end);
550: }
551:
552: public Pattern clonePattern() {
553: return new Pattern(this );
554: }
555:
556: public Pattern star() {
557: DG<Integer, Character, Integer, TokenType> ndg = DGUtils
558: .<Integer, Character, Integer, TokenType> plus(dg,
559: STAR, nodeFactory);
560: ndg = DGUtils
561: .<Integer, Character, Integer, TokenType> merge(
562: DG
563: .<Integer, Character, Integer, TokenType> createDG(nodeFactory
564: .createNode()), ndg, STAR,
565: nodeFactory);
566: Pattern p = new Pattern(ndg);
567: return p;
568: }
569:
570: public Pattern plus() {
571: DG<Integer, Character, Integer, TokenType> ndg = DGUtils
572: .<Integer, Character, Integer, TokenType> plus(dg,
573: STAR, nodeFactory);
574: Pattern p = new Pattern(ndg);
575: return p;
576: }
577:
578: public Pattern question() {
579: DG<Integer, Character, Integer, TokenType> ndg = DGUtils
580: .<Integer, Character, Integer, TokenType> cloneDG(dg,
581: true, nodeFactory);
582: ndg.addEnd(ndg.getStartNode());
583: Pattern p = new Pattern(ndg);
584: return p;
585: }
586:
587: public Pattern merge(Pattern parser) {
588: DG<Integer, Character, Integer, TokenType> ndg = DGUtils
589: .<Integer, Character, Integer, TokenType> merge(dg,
590: parser.dg, STAR, nodeFactory);
591: Pattern p = new Pattern(ndg);
592: return p;
593: }
594:
595: public Pattern append(Pattern parser) {
596: DG<Integer, Character, Integer, TokenType> ndg = DGUtils
597: .<Integer, Character, Integer, TokenType> append(dg,
598: parser.dg, STAR, nodeFactory);
599: Pattern p = new Pattern(ndg);
600: return p;
601: }
602:
603: public boolean matches(String text) {
604: int i = 0;
605: Integer state = dg.getStartNode();
606: while (i < text.length()) {
607: state = dg.getNode(state, new Character(text.charAt(i++)));
608: if (state == null)
609: return false;
610: }
611: return dg.getEnds().contains(state);
612: }
613:
614: public Integer next(CharInput input) {
615: return next(dg.getStartNode(), input);
616: }
617:
618: public Integer next(Integer state, CharInput input) {
619: int lastIndex = input.getIndex();
620: Integer lastState = null;
621: while (state != null) {
622: if (dg.getEnds().contains(state)) {
623: lastState = state;
624: lastIndex = input.getIndex();
625: }
626: if (input.eof())
627: break;
628: Integer newState = dg.getNode(state, new Character(input
629: .next()));
630: if (newState != null)
631: state = newState;
632: else
633: state = dg.getNode(state, STAR);
634: if (state != null)
635: input.read();
636: }
637: input.setIndex(lastIndex);
638: return lastState;
639: }
640:
641: public String toString() {
642: return dg.toString();
643: }
644:
645: // public Object getValue (Object state, Object key) {
646: // return dg.getProperty (state, key);
647: // }
648:
649: // DG<Integer,Character,K,TokenType> getDG () {
650: // return dg;
651: // }
652:
653: public Object read(CharInput input) {
654: if (input.eof())
655: return null;
656: int originalIndex = input.getIndex();
657: int lastIndex = -1;
658: TokenType lastTT = null;
659: Integer node = dg.getStartNode();
660: while (!input.eof()) {
661: Character edge = new Character(input.next());
662: Integer nnode = dg.getNode(node, edge);
663: if (nnode == null) {
664: edge = Pattern.STAR;
665: nnode = dg.getNode(node, edge);
666: }
667:
668: if (input.getIndex() > originalIndex) {
669: TokenType bestTT = getBestTT(node);
670: if (bestTT != null) {
671: lastTT = bestTT;
672: lastIndex = input.getIndex();
673: }
674: }
675:
676: if (nnode == null
677: || (dg.getEdges(nnode).isEmpty() && dg
678: .getProperties(nnode).isEmpty())) {
679: if (lastTT == null) {
680: // error => reset position in CURRENT pattern (state)
681: return null;
682: }
683: input.setIndex(lastIndex);
684: return lastTT;
685: }
686:
687: input.read();
688: node = nnode;
689: }
690:
691: TokenType bestTT = getBestTT(node);
692: if (bestTT != null) {
693: lastTT = bestTT;
694: lastIndex = input.getIndex();
695: }
696:
697: if (lastTT == null)
698: return null;
699: return lastTT;
700: }
701:
702: private TokenType getBestTT(Integer node) {
703: Map tts = dg.getProperties(node);
704: TokenType best = null;
705: Iterator it = tts.keySet().iterator();
706: while (it.hasNext()) {
707: Integer i = (Integer) it.next();
708: TokenType tt = (TokenType) tts.get(i);
709: if (best == null || best.getPriority() > tt.getPriority())
710: best = tt;
711: }
712: return best;
713: }
714:
715: void mark(int priority, TokenType r) {
716: Iterator<Integer> it = dg.getEnds().iterator();
717: while (it.hasNext()) {
718: Integer s = it.next();
719: dg.setProperty(s, priority, r);
720: }
721: }
722: }
|