Source Code Cross Referenced for NFAFactory.java in  » Parser » antlr-3.0.1 » org » antlr » tool » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Parser » antlr 3.0.1 » org.antlr.tool 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         [The "BSD licence"]
003:         Copyright (c) 2005-2006 Terence Parr
004:         All rights reserved.
005:
006:         Redistribution and use in source and binary forms, with or without
007:         modification, are permitted provided that the following conditions
008:         are met:
009:         1. Redistributions of source code must retain the above copyright
010:            notice, this list of conditions and the following disclaimer.
011:         2. Redistributions in binary form must reproduce the above copyright
012:            notice, this list of conditions and the following disclaimer in the
013:            documentation and/or other materials provided with the distribution.
014:         3. The name of the author may not be used to endorse or promote products
015:            derived from this software without specific prior written permission.
016:
017:         THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
018:         IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
019:         OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
020:         IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
021:         INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
022:         NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
023:         DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
024:         THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025:         (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
026:         THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027:         */
028:        package org.antlr.tool;
029:
030:        import org.antlr.analysis.*;
031:        import org.antlr.misc.IntSet;
032:        import org.antlr.misc.IntervalSet;
033:
034:        import java.util.Iterator;
035:        import java.util.List;
036:
037:        /** Routines to construct StateClusters from EBNF grammar constructs.
038:         *  No optimization is done to remove unnecessary epsilon edges.
039:         *
040:         *  TODO: add an optimization that reduces number of states and transitions
041:         *  will help with speed of conversion and make it easier to view NFA.  For
042:         *  example, o-A->o-->o-B->o should be o-A->o-B->o
043:         */
044:        public class NFAFactory {
045:            /** This factory is attached to a specifc NFA that it is building.
046:             *  The NFA will be filled up with states and transitions.
047:             */
048:            NFA nfa = null;
049:
050:            String currentRuleName = null;
051:
052:            /** Used to assign state numbers */
053:            protected int stateCounter = 0;
054:
055:            public NFAFactory(NFA nfa) {
056:                nfa.setFactory(this );
057:                this .nfa = nfa;
058:            }
059:
060:            public NFAState newState() {
061:                NFAState n = new NFAState(nfa);
062:                int state = stateCounter;
063:                n.stateNumber = state;
064:                stateCounter++;
065:                nfa.addState(n);
066:                n.setEnclosingRuleName(currentRuleName);
067:                return n;
068:            }
069:
070:            public int getNumberOfStates() {
071:                return stateCounter;
072:            }
073:
074:            /** Optimize an alternative (list of grammar elements).
075:             *
076:             *  Walk the chain of elements (which can be complicated loop blocks...)
077:             *  and throw away any epsilon transitions used to link up simple elements.
078:             *
079:             *  This only removes 195 states from the java.g's NFA, but every little
080:             *  bit helps.  Perhaps I can improve in the future.
081:             */
082:            public void optimizeAlternative(StateCluster alt) {
083:                NFAState s = alt.left;
084:                while (s != alt.right) {
085:                    // if it's a block element, jump over it and continue
086:                    if (s.endOfBlockStateNumber != State.INVALID_STATE_NUMBER) {
087:                        s = nfa.getState(s.endOfBlockStateNumber);
088:                        continue;
089:                    }
090:                    Transition t = s.transition(0);
091:                    if (t instanceof  RuleClosureTransition) {
092:                        s = ((RuleClosureTransition) t).getFollowState();
093:                        continue;
094:                    }
095:                    if (t.label.isEpsilon() && s.getNumberOfTransitions() == 1) {
096:                        // bypass epsilon transition and point to what the epsilon's
097:                        // target points to unless that epsilon transition points to
098:                        // a block or loop etc..  Also don't collapse epsilons that
099:                        // point at the last node of the alt
100:                        NFAState epsilonTarget = (NFAState) t.target;
101:                        if (epsilonTarget.endOfBlockStateNumber == State.INVALID_STATE_NUMBER
102:                                && epsilonTarget.transition(0) != null) {
103:                            s.setTransition0(epsilonTarget.transition(0));
104:                            /*
105:                            System.out.println("### opt "+s.stateNumber+"->"+
106:                            				   epsilonTarget.transition(0).target.stateNumber);
107:                             */
108:                        }
109:                    }
110:                    s = (NFAState) t.target;
111:                }
112:            }
113:
114:            /** From label A build Graph o-A->o */
115:            public StateCluster build_Atom(int label) {
116:                NFAState left = newState();
117:                NFAState right = newState();
118:                transitionBetweenStates(left, right, label);
119:                StateCluster g = new StateCluster(left, right);
120:                return g;
121:            }
122:
123:            /** From set build single edge graph o->o-set->o.  To conform to
124:             *  what an alt block looks like, must have extra state on left.
125:             */
126:            public StateCluster build_Set(IntSet set) {
127:                //NFAState start = newState();
128:                NFAState left = newState();
129:                //transitionBetweenStates(start, left, Label.EPSILON);
130:                NFAState right = newState();
131:                Transition e = new Transition(new Label(set), right);
132:                left.addTransition(e);
133:                StateCluster g = new StateCluster(left, right);
134:                return g;
135:            }
136:
137:            /** Can only complement block of simple alts; can complement build_Set()
138:             *  result, that is.  Get set and complement, replace old with complement.
139:            public StateCluster build_AlternativeBlockComplement(StateCluster blk) {
140:                State s0 = blk.left;
141:                IntSet set = getCollapsedBlockAsSet(s0);
142:                if ( set!=null ) {
143:                    // if set is available, then structure known and blk is a set
144:                    set = nfa.grammar.complement(set);
145:                    Label label = s0.transition(0).target.transition(0).label;
146:                    label.setSet(set);
147:                }
148:                return blk;
149:            }
150:             */
151:
152:            public StateCluster build_Range(int a, int b) {
153:                NFAState left = newState();
154:                NFAState right = newState();
155:                Transition e = new Transition(new Label(IntervalSet.of(a, b)),
156:                        right);
157:                left.addTransition(e);
158:                StateCluster g = new StateCluster(left, right);
159:                return g;
160:            }
161:
162:            /** From char 'c' build StateCluster o-intValue(c)->o
163:             */
164:            public StateCluster build_CharLiteralAtom(String charLiteral) {
165:                int c = Grammar.getCharValueFromGrammarCharLiteral(charLiteral);
166:                return build_Atom(c);
167:            }
168:
169:            /** From char 'c' build StateCluster o-intValue(c)->o
170:             *  can include unicode spec likes '\u0024' later.  Accepts
171:             *  actual unicode 16-bit now, of course, by default.
172:             *  TODO not supplemental char clean!
173:             */
174:            public StateCluster build_CharRange(String a, String b) {
175:                int from = Grammar.getCharValueFromGrammarCharLiteral(a);
176:                int to = Grammar.getCharValueFromGrammarCharLiteral(b);
177:                return build_Range(from, to);
178:            }
179:
180:            /** For a non-lexer, just build a simple token reference atom.
181:             *  For a lexer, a string is a sequence of char to match.  That is,
182:             *  "fog" is treated as 'f' 'o' 'g' not as a single transition in
183:             *  the DFA.  Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states
184:             *  for n characters.
185:             */
186:            public StateCluster build_StringLiteralAtom(String stringLiteral) {
187:                if (nfa.grammar.type == Grammar.LEXER) {
188:                    StringBuffer chars = Grammar
189:                            .getUnescapedStringFromGrammarStringLiteral(stringLiteral);
190:                    NFAState first = newState();
191:                    NFAState last = null;
192:                    NFAState prev = first;
193:                    for (int i = 0; i < chars.length(); i++) {
194:                        int c = chars.charAt(i);
195:                        NFAState next = newState();
196:                        transitionBetweenStates(prev, next, c);
197:                        prev = last = next;
198:                    }
199:                    return new StateCluster(first, last);
200:                }
201:
202:                // a simple token reference in non-Lexers
203:                int tokenType = nfa.grammar.getTokenType(stringLiteral);
204:                return build_Atom(tokenType);
205:            }
206:
207:            /** For reference to rule r, build
208:             *
209:             *  o-e->(r)  o
210:             *
211:             *  where (r) is the start of rule r and the trailing o is not linked
212:             *  to from rule ref state directly (it's done thru the transition(0)
213:             *  RuleClosureTransition.
214:             *
215:             *  If the rule r is just a list of tokens, it's block will be just
216:             *  a set on an edge o->o->o-set->o->o->o, could inline it rather than doing
217:             *  the rule reference, but i'm not doing this yet as I'm not sure
218:             *  it would help much in the NFA->DFA construction.
219:             *
220:             *  TODO add to codegen: collapse alt blks that are sets into single matchSet
221:             */
222:            public StateCluster build_RuleRef(int ruleIndex, NFAState ruleStart) {
223:                /*
224:                System.out.println("building ref to rule "+ruleIndex+": "+
225:                        nfa.getGrammar().getRuleName(ruleIndex));
226:                 */
227:                NFAState left = newState();
228:                // left.setDescription("ref to "+ruleStart.getDescription());
229:                NFAState right = newState();
230:                // right.setDescription("NFAState following ref to "+ruleStart.getDescription());
231:                Transition e = new RuleClosureTransition(ruleIndex, ruleStart,
232:                        right);
233:                left.addTransition(e);
234:                StateCluster g = new StateCluster(left, right);
235:                return g;
236:            }
237:
238:            /** From an empty alternative build StateCluster o-e->o */
239:            public StateCluster build_Epsilon() {
240:                NFAState left = newState();
241:                NFAState right = newState();
242:                transitionBetweenStates(left, right, Label.EPSILON);
243:                StateCluster g = new StateCluster(left, right);
244:                return g;
245:            }
246:
247:            /** Build what amounts to an epsilon transition with a semantic
248:             *  predicate action.  The pred is a pointer into the AST of
249:             *  the SEMPRED token.
250:             */
251:            public StateCluster build_SemanticPredicate(GrammarAST pred) {
252:                // don't count syn preds
253:                if (!pred.getText().toUpperCase().startsWith(
254:                        Grammar.SYNPRED_RULE_PREFIX.toUpperCase())) {
255:                    nfa.grammar.numberOfSemanticPredicates++;
256:                }
257:                NFAState left = newState();
258:                NFAState right = newState();
259:                Transition e = new Transition(new Label(pred), right);
260:                left.addTransition(e);
261:                StateCluster g = new StateCluster(left, right);
262:                return g;
263:            }
264:
265:            /** add an EOF transition to any rule end NFAState that points to nothing
266:             *  (i.e., for all those rules not invoked by another rule).  These
267:             *  are start symbols then.
268:             *
269:             *  Return the number of grammar entry points; i.e., how many rules are
270:             *  not invoked by another rule (they can only be invoked from outside).
271:             *  These are the start rules.
272:             */
273:            public int build_EOFStates(List rules) {
274:                int numberUnInvokedRules = 0;
275:                for (Iterator iterator = rules.iterator(); iterator.hasNext();) {
276:                    Rule r = (Rule) iterator.next();
277:                    String ruleName = r.name;
278:                    NFAState endNFAState = nfa.grammar
279:                            .getRuleStopState(ruleName);
280:                    // Is this rule a start symbol?  (no follow links)
281:                    if (endNFAState.transition(0) == null) {
282:                        // if so, then don't let algorithm fall off the end of
283:                        // the rule, make it hit EOF/EOT.
284:                        /*
285:                        if ( nfa.grammar.type==Grammar.LEXER ) {
286:                        	return; // 11/28/2005: try having only Tokens with EOT transition
287:                        }
288:                        if ( nfa.grammar.type!=Grammar.LEXER ||
289:                        	 ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) )
290:                        {
291:                        	build_EOFState(endNFAState);
292:                        }
293:                         */
294:                        build_EOFState(endNFAState);
295:                        // track how many rules have been invoked by another rule
296:                        numberUnInvokedRules++;
297:                    }
298:                }
299:                return numberUnInvokedRules;
300:            }
301:
302:            /** set up an NFA NFAState that will yield eof tokens or,
303:             *  in the case of a lexer grammar, an EOT token when the conversion
304:             *  hits the end of a rule.
305:             */
306:            private void build_EOFState(NFAState endNFAState) {
307:                NFAState end = newState();
308:                int label = Label.EOF;
309:                if (nfa.grammar.type == Grammar.LEXER) {
310:                    label = Label.EOT;
311:                    end.setEOTTargetState(true);
312:                }
313:                /*
314:                System.out.println("build "+nfa.grammar.getTokenDisplayName(label)+
315:                				   " loop on end of state "+endNFAState.getDescription()+
316:                				   " to state "+end.stateNumber);
317:                 */
318:                Transition toEnd = new Transition(label, end);
319:                endNFAState.addTransition(toEnd);
320:            }
321:
322:            /** From A B build A-e->B (that is, build an epsilon arc from right
323:             *  of A to left of B).
324:             *
325:             *  As a convenience, return B if A is null or return A if B is null.
326:             */
327:            public StateCluster build_AB(StateCluster A, StateCluster B) {
328:                if (A == null) {
329:                    return B;
330:                }
331:                if (B == null) {
332:                    return A;
333:                }
334:                transitionBetweenStates(A.right, B.left, Label.EPSILON);
335:                StateCluster g = new StateCluster(A.left, B.right);
336:                return g;
337:            }
338:
339:            /** From a set ('a'|'b') build
340:             *
341:             *  o->o-'a'..'b'->o->o (last NFAState is blockEndNFAState pointed to by all alts)
342:             */
343:            public StateCluster build_AlternativeBlockFromSet(StateCluster set) {
344:                if (set == null) {
345:                    return null;
346:                }
347:
348:                // single alt, no decision, just return only alt state cluster
349:                NFAState startOfAlt = newState(); // must have this no matter what
350:                transitionBetweenStates(startOfAlt, set.left, Label.EPSILON);
351:
352:                return new StateCluster(startOfAlt, set.right);
353:            }
354:
355:            /** From A|B|..|Z alternative block build
356:             *
357:             *  o->o-A->o->o (last NFAState is blockEndNFAState pointed to by all alts)
358:             *  |          ^
359:             *  o->o-B->o--|
360:             *  |          |
361:             *  ...        |
362:             *  |          |
363:             *  o->o-Z->o--|
364:             *
365:             *  So every alternative gets begin NFAState connected by epsilon
366:             *  and every alt right side points at a block end NFAState.  There is a
367:             *  new NFAState in the NFAState in the StateCluster for each alt plus one for the
368:             *  end NFAState.
369:             *
370:             *  Special case: only one alternative: don't make a block with alt
371:             *  begin/end.
372:             *
373:             *  Special case: if just a list of tokens/chars/sets, then collapse
374:             *  to a single edge'd o-set->o graph.
375:             *
376:             *  Set alt number (1..n) in the left-Transition NFAState.
377:             */
378:            public StateCluster build_AlternativeBlock(
379:                    List alternativeStateClusters) {
380:                StateCluster result = null;
381:                if (alternativeStateClusters == null
382:                        || alternativeStateClusters.size() == 0) {
383:                    return null;
384:                }
385:
386:                // single alt case
387:                if (alternativeStateClusters.size() == 1) {
388:                    // single alt, no decision, just return only alt state cluster
389:                    StateCluster g = (StateCluster) alternativeStateClusters
390:                            .get(0);
391:                    NFAState startOfAlt = newState(); // must have this no matter what
392:                    transitionBetweenStates(startOfAlt, g.left, Label.EPSILON);
393:
394:                    //System.out.println("### opt saved start/stop end in (...)");
395:                    return new StateCluster(startOfAlt, g.right);
396:                }
397:
398:                // even if we can collapse for lookahead purposes, we will still
399:                // need to predict the alts of this subrule in case there are actions
400:                // etc...  This is the decision that is pointed to from the AST node
401:                // (always)
402:                NFAState prevAlternative = null; // tracks prev so we can link to next alt
403:                NFAState firstAlt = null;
404:                NFAState blockEndNFAState = newState();
405:                blockEndNFAState.setDescription("end block");
406:                int altNum = 1;
407:                for (Iterator iter = alternativeStateClusters.iterator(); iter
408:                        .hasNext();) {
409:                    StateCluster g = (StateCluster) iter.next();
410:                    // add begin NFAState for this alt connected by epsilon
411:                    NFAState left = newState();
412:                    left.setDescription("alt " + altNum + " of ()");
413:                    transitionBetweenStates(left, g.left, Label.EPSILON);
414:                    transitionBetweenStates(g.right, blockEndNFAState,
415:                            Label.EPSILON);
416:                    // Are we the first alternative?
417:                    if (firstAlt == null) {
418:                        firstAlt = left; // track extreme left node of StateCluster
419:                    } else {
420:                        // if not first alternative, must link to this alt from previous
421:                        transitionBetweenStates(prevAlternative, left,
422:                                Label.EPSILON);
423:                    }
424:                    prevAlternative = left;
425:                    altNum++;
426:                }
427:
428:                // return StateCluster pointing representing entire block
429:                // Points to first alt NFAState on left, block end on right
430:                result = new StateCluster(firstAlt, blockEndNFAState);
431:
432:                firstAlt.decisionStateType = NFAState.BLOCK_START;
433:
434:                // set EOB markers for Jean
435:                firstAlt.endOfBlockStateNumber = blockEndNFAState.stateNumber;
436:
437:                return result;
438:            }
439:
440:            /** From (A)? build either:
441:             *
442:             *  o--A->o
443:             *  |     ^
444:             *  o---->|
445:             *
446:             *  or, if A is a block, just add an empty alt to the end of the block
447:             */
448:            public StateCluster build_Aoptional(StateCluster A) {
449:                StateCluster g = null;
450:                int n = nfa.grammar.getNumberOfAltsForDecisionNFA(A.left);
451:                if (n == 1) {
452:                    // no decision, just wrap in an optional path
453:                    //NFAState decisionState = newState();
454:                    NFAState decisionState = A.left; // resuse left edge
455:                    decisionState.setDescription("only alt of ()? block");
456:                    NFAState emptyAlt = newState();
457:                    emptyAlt.setDescription("epsilon path of ()? block");
458:                    NFAState blockEndNFAState = null;
459:                    blockEndNFAState = newState();
460:                    transitionBetweenStates(A.right, blockEndNFAState,
461:                            Label.EPSILON);
462:                    blockEndNFAState.setDescription("end ()? block");
463:                    //transitionBetweenStates(decisionState, A.left, Label.EPSILON);
464:                    transitionBetweenStates(decisionState, emptyAlt,
465:                            Label.EPSILON);
466:                    transitionBetweenStates(emptyAlt, blockEndNFAState,
467:                            Label.EPSILON);
468:
469:                    // set EOB markers for Jean
470:                    decisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber;
471:                    blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
472:
473:                    g = new StateCluster(decisionState, blockEndNFAState);
474:                } else {
475:                    // a decision block, add an empty alt
476:                    NFAState lastRealAlt = nfa.grammar
477:                            .getNFAStateForAltOfDecision(A.left, n);
478:                    NFAState emptyAlt = newState();
479:                    emptyAlt.setDescription("epsilon path of ()? block");
480:                    transitionBetweenStates(lastRealAlt, emptyAlt,
481:                            Label.EPSILON);
482:                    transitionBetweenStates(emptyAlt, A.right, Label.EPSILON);
483:
484:                    // set EOB markers for Jean (I think this is redundant here)
485:                    A.left.endOfBlockStateNumber = A.right.stateNumber;
486:                    A.right.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
487:
488:                    g = A; // return same block, but now with optional last path
489:                }
490:                g.left.decisionStateType = NFAState.OPTIONAL_BLOCK_START;
491:
492:                return g;
493:            }
494:
495:            /** From (A)+ build
496:             *
497:             *     |---|    (Transition 2 from A.right points at alt 1)
498:             *     v   |    (follow of loop is Transition 1)
499:             *  o->o-A-o->o
500:             *
501:             *  Meaning that the last NFAState in A points back to A's left Transition NFAState
502:             *  and we add a new begin/end NFAState.  A can be single alternative or
503:             *  multiple.
504:             *
505:             *  During analysis we'll call the follow link (transition 1) alt n+1 for
506:             *  an n-alt A block.
507:             */
508:            public StateCluster build_Aplus(StateCluster A) {
509:                NFAState left = newState();
510:                NFAState blockEndNFAState = newState();
511:                blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
512:
513:                // don't reuse A.right as loopback if it's right edge of another block
514:                if (A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK) {
515:                    // nested A* so make another tail node to be the loop back
516:                    // instead of the usual A.right which is the EOB for inner loop
517:                    NFAState extraRightEdge = newState();
518:                    transitionBetweenStates(A.right, extraRightEdge,
519:                            Label.EPSILON);
520:                    A.right = extraRightEdge;
521:                }
522:
523:                transitionBetweenStates(A.right, blockEndNFAState,
524:                        Label.EPSILON); // follow is Transition 1
525:                // turn A's block end into a loopback (acts like alt 2)
526:                transitionBetweenStates(A.right, A.left, Label.EPSILON); // loop back Transition 2
527:                transitionBetweenStates(left, A.left, Label.EPSILON);
528:
529:                A.right.decisionStateType = NFAState.LOOPBACK;
530:                A.left.decisionStateType = NFAState.BLOCK_START;
531:
532:                // set EOB markers for Jean
533:                A.left.endOfBlockStateNumber = A.right.stateNumber;
534:
535:                StateCluster g = new StateCluster(left, blockEndNFAState);
536:                return g;
537:            }
538:
539:            /** From (A)* build
540:             *
541:             *     |---|
542:             *     v   |
543:             *  o->o-A-o--o (Transition 2 from block end points at alt 1; follow is Transition 1)
544:             *  |         ^
545:             *  o---------| (optional branch is 2nd alt of optional block containing A+)
546:             *
547:             *  Meaning that the last (end) NFAState in A points back to A's
548:             *  left side NFAState and we add 3 new NFAStates (the
549:             *  optional branch is built just like an optional subrule).
550:             *  See the Aplus() method for more on the loop back Transition.
551:             *  The new node on right edge is set to RIGHT_EDGE_OF_CLOSURE so we
552:             *  can detect nested (A*)* loops and insert an extra node.  Previously,
553:             *  two blocks shared same EOB node.
554:             *
555:             *  There are 2 or 3 decision points in a A*.  If A is not a block (i.e.,
556:             *  it only has one alt), then there are two decisions: the optional bypass
557:             *  and then loopback.  If A is a block of alts, then there are three
558:             *  decisions: bypass, loopback, and A's decision point.
559:             *
560:             *  Note that the optional bypass must be outside the loop as (A|B)* is
561:             *  not the same thing as (A|B|)+.
562:             *
563:             *  This is an accurate NFA representation of the meaning of (A)*, but
564:             *  for generating code, I don't need a DFA for the optional branch by
565:             *  virtue of how I generate code.  The exit-loopback-branch decision
566:             *  is sufficient to let me make an appropriate enter, exit, loop
567:             *  determination.  See codegen.g
568:             */
569:            public StateCluster build_Astar(StateCluster A) {
570:                NFAState bypassDecisionState = newState();
571:                bypassDecisionState
572:                        .setDescription("enter loop path of ()* block");
573:                NFAState optionalAlt = newState();
574:                optionalAlt.setDescription("epsilon path of ()* block");
575:                NFAState blockEndNFAState = newState();
576:                blockEndNFAState.decisionStateType = NFAState.RIGHT_EDGE_OF_BLOCK;
577:
578:                // don't reuse A.right as loopback if it's right edge of another block
579:                if (A.right.decisionStateType == NFAState.RIGHT_EDGE_OF_BLOCK) {
580:                    // nested A* so make another tail node to be the loop back
581:                    // instead of the usual A.right which is the EOB for inner loop
582:                    NFAState extraRightEdge = newState();
583:                    transitionBetweenStates(A.right, extraRightEdge,
584:                            Label.EPSILON);
585:                    A.right = extraRightEdge;
586:                }
587:
588:                // convert A's end block to loopback
589:                A.right.setDescription("()* loopback");
590:                // Transition 1 to actual block of stuff
591:                transitionBetweenStates(bypassDecisionState, A.left,
592:                        Label.EPSILON);
593:                // Transition 2 optional to bypass
594:                transitionBetweenStates(bypassDecisionState, optionalAlt,
595:                        Label.EPSILON);
596:                transitionBetweenStates(optionalAlt, blockEndNFAState,
597:                        Label.EPSILON);
598:                // Transition 1 of end block exits
599:                transitionBetweenStates(A.right, blockEndNFAState,
600:                        Label.EPSILON);
601:                // Transition 2 of end block loops
602:                transitionBetweenStates(A.right, A.left, Label.EPSILON);
603:
604:                bypassDecisionState.decisionStateType = NFAState.BYPASS;
605:                A.left.decisionStateType = NFAState.BLOCK_START;
606:                A.right.decisionStateType = NFAState.LOOPBACK;
607:
608:                // set EOB markers for Jean
609:                A.left.endOfBlockStateNumber = A.right.stateNumber;
610:                bypassDecisionState.endOfBlockStateNumber = blockEndNFAState.stateNumber;
611:
612:                StateCluster g = new StateCluster(bypassDecisionState,
613:                        blockEndNFAState);
614:                return g;
615:            }
616:
617:            /** Build an NFA predictor for special rule called Tokens manually that
618:             *  predicts which token will succeed.  The refs to the rules are not
619:             *  RuleRefTransitions as I want DFA conversion to stop at the EOT
620:             *  transition on the end of each token, rather than return to Tokens rule.
621:             *  If I used normal build_alternativeBlock for this, the RuleRefTransitions
622:             *  would save return address when jumping away from Tokens rule.
623:             *
624:             *  All I do here is build n new states for n rules with an epsilon
625:             *  edge to the rule start states and then to the next state in the
626:             *  list:
627:             *
628:             *   o->(A)  (a state links to start of A and to next in list)
629:             *   |
630:             *   o->(B)
631:             *   |
632:             *   ...
633:             *   |
634:             *   o->(Z)
635:             *
636:             *  This is the NFA created for the artificial rule created in
637:             *  Grammar.addArtificialMatchTokensRule().
638:             *
639:             *  11/28/2005: removed so we can use normal rule construction for Tokens.
640:            public NFAState build_ArtificialMatchTokensRuleNFA() {
641:                int altNum = 1;
642:                NFAState firstAlt = null; // the start state for the "rule"
643:                NFAState prevAlternative = null;
644:                Iterator iter = nfa.grammar.getRules().iterator();
645:            	// TODO: add a single decision node/state for good description
646:                while (iter.hasNext()) {
647:            		Rule r = (Rule) iter.next();
648:                    String ruleName = r.name;
649:            		String modifier = nfa.grammar.getRuleModifier(ruleName);
650:                    if ( ruleName.equals(Grammar.ARTIFICIAL_TOKENS_RULENAME) ||
651:            			 (modifier!=null &&
652:            			  modifier.equals(Grammar.FRAGMENT_RULE_MODIFIER)) )
653:            		{
654:                        continue; // don't loop to yourself or do nontoken rules
655:                    }
656:                    NFAState ruleStartState = nfa.grammar.getRuleStartState(ruleName);
657:                    NFAState left = newState();
658:                    left.setDescription("alt "+altNum+" of artificial rule "+Grammar.ARTIFICIAL_TOKENS_RULENAME);
659:                    transitionBetweenStates(left, ruleStartState, Label.EPSILON);
660:                    // Are we the first alternative?
661:                    if ( firstAlt==null ) {
662:                        firstAlt = left; // track extreme top left node as rule start
663:                    }
664:                    else {
665:                        // if not first alternative, must link to this alt from previous
666:                        transitionBetweenStates(prevAlternative, left, Label.EPSILON);
667:                    }
668:                    prevAlternative = left;
669:                    altNum++;
670:                }
671:            	firstAlt.decisionStateType = NFAState.BLOCK_START;
672:
673:                return firstAlt;
674:            }
675:             */
676:
677:            /** Build an atom with all possible values in its label */
678:            public StateCluster build_Wildcard() {
679:                NFAState left = newState();
680:                NFAState right = newState();
681:                Label label = new Label(nfa.grammar.getTokenTypes()); // char or tokens
682:                Transition e = new Transition(label, right);
683:                left.addTransition(e);
684:                StateCluster g = new StateCluster(left, right);
685:                return g;
686:            }
687:
688:            /** Given a collapsed block of alts (a set of atoms), pull out
689:             *  the set and return it.
690:             */
691:            protected IntSet getCollapsedBlockAsSet(State blk) {
692:                State s0 = blk;
693:                if (s0 != null && s0.transition(0) != null) {
694:                    State s1 = s0.transition(0).target;
695:                    if (s1 != null && s1.transition(0) != null) {
696:                        Label label = s1.transition(0).label;
697:                        if (label.isSet()) {
698:                            return label.getSet();
699:                        }
700:                    }
701:                }
702:                return null;
703:            }
704:
705:            private void transitionBetweenStates(NFAState a, NFAState b,
706:                    int label) {
707:                Transition e = new Transition(label, b);
708:                a.addTransition(e);
709:            }
710:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.