Source Code Cross Referenced for Term.java in  » Scripting » jruby » jregex » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Scripting » jruby » jregex 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /**
0002:         * Copyright (c) 2001, Sergey A. Samokhodkin
0003:         * All rights reserved.
0004:         * 
0005:         * Redistribution and use in source and binary forms, with or without modification, 
0006:         * are permitted provided that the following conditions are met:
0007:         * 
0008:         * - Redistributions of source code must retain the above copyright notice, 
0009:         * this list of conditions and the following disclaimer. 
0010:         * - Redistributions in binary form 
0011:         * must reproduce the above copyright notice, this list of conditions and the following 
0012:         * disclaimer in the documentation and/or other materials provided with the distribution.
0013:         * - Neither the name of jregex nor the names of its contributors may be used 
0014:         * to endorse or promote products derived from this software without specific prior 
0015:         * written permission. 
0016:         * 
0017:         * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 
0018:         * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
0019:         * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
0020:         * IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
0021:         * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
0022:         * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
0023:         * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
0024:         * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 
0025:         * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0026:         * 
0027:         * @version 1.2_01
0028:         */package jregex;
0029:
0030:        import java.util.*;
0031:
0032:        class Term implements  REFlags {
0033:            //runtime Term types
0034:            static final int CHAR = 0;
0035:            static final int BITSET = 1;
0036:            static final int BITSET2 = 2;
0037:            static final int ANY_CHAR = 4;
0038:            static final int ANY_CHAR_NE = 5;
0039:
0040:            static final int REG = 6;
0041:            static final int REG_I = 7;
0042:            static final int FIND = 8;
0043:            static final int FINDREG = 9;
0044:            static final int SUCCESS = 10;
0045:
0046:            /*optimization-transparent types*/
0047:            static final int BOUNDARY = 11;
0048:            static final int DIRECTION = 12;
0049:            static final int UBOUNDARY = 13;
0050:            static final int UDIRECTION = 14;
0051:
0052:            static final int GROUP_IN = 15;
0053:            static final int GROUP_OUT = 16;
0054:            static final int VOID = 17;
0055:
0056:            static final int START = 18;
0057:            static final int END = 19;
0058:            static final int END_EOL = 20;
0059:            static final int LINE_START = 21;
0060:            static final int LINE_END = 22;
0061:            static final int LAST_MATCH_END = 23;
0062:
0063:            static final int CNT_SET_0 = 24;
0064:            static final int CNT_INC = 25;
0065:            static final int CNT_GT_EQ = 26;
0066:            static final int READ_CNT_LT = 27;
0067:
0068:            static final int CRSTORE_CRINC = 28; //store on 'actual' search entry
0069:            static final int CR_SET_0 = 29;
0070:            static final int CR_LT = 30;
0071:            static final int CR_GT_EQ = 31;
0072:
0073:            /*optimization-nontransparent types*/
0074:            static final int BRANCH = 32;
0075:            static final int BRANCH_STORE_CNT = 33;
0076:            static final int BRANCH_STORE_CNT_AUX1 = 34;
0077:
0078:            static final int PLOOKAHEAD_IN = 35;
0079:            static final int PLOOKAHEAD_OUT = 36;
0080:            static final int NLOOKAHEAD_IN = 37;
0081:            static final int NLOOKAHEAD_OUT = 38;
0082:            static final int PLOOKBEHIND_IN = 39;
0083:            static final int PLOOKBEHIND_OUT = 40;
0084:            static final int NLOOKBEHIND_IN = 41;
0085:            static final int NLOOKBEHIND_OUT = 42;
0086:            static final int INDEPENDENT_IN = 43; //functionally the same as NLOOKAHEAD_IN
0087:            static final int INDEPENDENT_OUT = 44;
0088:
0089:            static final int REPEAT_0_INF = 45;
0090:            static final int REPEAT_MIN_INF = 46;
0091:            static final int REPEAT_MIN_MAX = 47;
0092:            static final int REPEAT_REG_MIN_INF = 48;
0093:            static final int REPEAT_REG_MIN_MAX = 49;
0094:
0095:            static final int BACKTRACK_0 = 50;
0096:            static final int BACKTRACK_MIN = 51;
0097:            static final int BACKTRACK_FIND_MIN = 52;
0098:            static final int BACKTRACK_FINDREG_MIN = 53;
0099:            static final int BACKTRACK_REG_MIN = 54;
0100:
0101:            static final int MEMREG_CONDITION = 55;
0102:            static final int LOOKAHEAD_CONDITION_IN = 56;
0103:            static final int LOOKAHEAD_CONDITION_OUT = 57;
0104:            static final int LOOKBEHIND_CONDITION_IN = 58;
0105:            static final int LOOKBEHIND_CONDITION_OUT = 59;
0106:
0107:            //optimization
0108:            static final int FIRST_TRANSPARENT = BOUNDARY;
0109:            static final int LAST_TRANSPARENT = CR_GT_EQ;
0110:
0111:            // compiletime: length of vars[] (see makeTree())
0112:            static final int VARS_LENGTH = 4;
0113:
0114:            // compiletime variable indicies:
0115:            private static final int MEMREG_COUNT = 0; //refers current memreg index
0116:            private static final int CNTREG_COUNT = 1; //refers current counters number
0117:            private static final int DEPTH = 2; //refers current depth: (((depth=3)))
0118:            private static final int LOOKAHEAD_COUNT = 3; //refers current memreg index
0119:
0120:            private static final int LIMITS_LENGTH = 3;
0121:            private static final int LIMITS_PARSE_RESULT_INDEX = 2;
0122:            private static final int LIMITS_OK = 1;
0123:            private static final int LIMITS_FAILURE = 2;
0124:
0125:            //static CustomParser[] customParsers=new CustomParser[256];
0126:
0127:            // **** CONTROL FLOW **** 
0128:
0129:            // next-to-execute and next-if-failed commands;
0130:            Term next, failNext;
0131:
0132:            // **** TYPES ****
0133:
0134:            int type = VOID;
0135:            boolean inverse;
0136:
0137:            // used with type=CHAR
0138:            char c;
0139:
0140:            // used with type=FIND
0141:            int distance;
0142:            boolean eat;
0143:
0144:            // used with type=BITSET(2);
0145:            boolean[] bitset;
0146:            boolean[][] bitset2;
0147:            boolean[] categoryBitset; //types(unicode categories)
0148:
0149:            // used with type=BALANCE;
0150:            char[] brackets;
0151:
0152:            // used for optimization with type=BITSET,BITSET2
0153:            int weight;
0154:
0155:            // **** MEMORISATION ****
0156:
0157:            // memory slot, used with type=REG,GROUP_IN,GROUP_OUT
0158:            int memreg = -1;
0159:
0160:            // **** COUNTERS ****
0161:
0162:            // max|min number of iterations
0163:            // used with CNT_GT_EQ ,REPEAT_* etc.;
0164:            int minCount, maxCount;
0165:
0166:            // used with REPEAT_*,REPEAT_REG_*;
0167:            Term target;
0168:
0169:            // a counter slot to increment & compare with maxCount (CNT_INC etc.);
0170:            int cntreg = 0;
0171:
0172:            // lookahead group id;
0173:            int lookaheadId;
0174:
0175:            // **** COMPILE HELPERS ****
0176:
0177:            protected Term prev, in, out, out1, first, current;
0178:
0179:            //new!!
0180:            protected Term branchOut;
0181:
0182:            //protected  boolean newBranch=false,closed=false;
0183:            //protected  boolean newBranch=false;
0184:
0185:            //for debugging
0186:            static int instances;
0187:            int instanceNum;
0188:
0189:            Term() {
0190:                //for debugging
0191:                instanceNum = instances;
0192:                instances++;
0193:                in = out = this ;
0194:            }
0195:
0196:            Term(int type) {
0197:                this ();
0198:                this .type = type;
0199:            }
0200:
0201:            static void makeTree(String s, int flags, Pattern re)
0202:                    throws PatternSyntaxException {
0203:                char[] data = s.toCharArray();
0204:                makeTree(data, 0, data.length, flags, re);
0205:            }
0206:
0207:            static void makeTree(char[] data, int offset, int end, int flags,
0208:                    Pattern re) throws PatternSyntaxException {
0209:                // memreg,counter,depth,lookahead
0210:                int[] vars = { 1, 0, 0, 0 }; //don't use counters[0]
0211:
0212:                //collect iterators for subsequent optimization
0213:                Vector iterators = new Vector();
0214:                Hashtable groupNames = new Hashtable();
0215:
0216:                Pretokenizer t = new Pretokenizer(data, offset, end);
0217:                Term term = makeTree(t, data, vars, flags, new Group(),
0218:                        iterators, groupNames);
0219:                // term=(0-...-0)
0220:
0221:                // convert closing outer bracket into success term
0222:                term.out.type = SUCCESS;
0223:                // term=(0-...-!!!
0224:
0225:                //throw out opening bracket
0226:                Term first = term.next;
0227:                // term=...-!!!
0228:
0229:                // Optimisation: 
0230:                Term optimized = first;
0231:                Optimizer opt = Optimizer.find(first);
0232:                if (opt != null)
0233:                    optimized = opt.makeFirst(first);
0234:
0235:                Enumeration en = iterators.elements();
0236:                while (en.hasMoreElements()) {
0237:                    Iterator i = (Iterator) en.nextElement();
0238:                    i.optimize();
0239:                }
0240:                // ===
0241:
0242:                re.root = optimized;
0243:                re.root0 = first;
0244:                re.memregs = vars[MEMREG_COUNT];
0245:                re.counters = vars[CNTREG_COUNT];
0246:                re.lookaheads = vars[LOOKAHEAD_COUNT];
0247:                re.namedGroupMap = groupNames;
0248:            }
0249:
0250:            private static Term makeTree(Pretokenizer t, char[] data,
0251:                    int[] vars, int flags, Term term, Vector iterators,
0252:                    Hashtable groupNames) throws PatternSyntaxException {
0253:                //System.out.println("Term.makeTree(): flags="+flags);
0254:                if (vars.length != VARS_LENGTH)
0255:                    throw new IllegalArgumentException("vars.length should be "
0256:                            + VARS_LENGTH + ", not " + vars.length);
0257:                //Term term=new Term(isMemReg? vars[MEMREG_COUNT]: -1);
0258:                // use memreg 0 as unsignificant
0259:                //Term term=new Group(isMemReg? vars[MEMREG_COUNT]: 0);
0260:                while (true) {
0261:                    t.next();
0262:                    term.append(t.tOffset, t.tOutside, data, vars, flags,
0263:                            iterators, groupNames);
0264:                    switch (t.ttype) {
0265:                    case Pretokenizer.FLAGS:
0266:                        flags = t.flags(flags);
0267:                        continue;
0268:                    case Pretokenizer.CLASS_GROUP:
0269:                        t.next();
0270:                        Term clg = new Term();
0271:                        CharacterClass
0272:                                .parseGroup(data, t.tOffset, t.tOutside, clg,
0273:                                        (flags & IGNORE_CASE) > 0,
0274:                                        (flags & IGNORE_SPACES) > 0,
0275:                                        (flags & UNICODE) > 0,
0276:                                        (flags & XML_SCHEMA) > 0);
0277:                        term.append(clg);
0278:                        continue;
0279:                    case Pretokenizer.PLAIN_GROUP:
0280:                        vars[DEPTH]++;
0281:                        //System.out.println("PLAIN_GROUP, t.tOffset="+t.tOffset+", t.tOutside="+t.tOutside+", t.flags("+flags+")="+t.flags(flags));
0282:                        term.append(makeTree(t, data, vars, t.flags(flags),
0283:                                new Group(), iterators, groupNames));
0284:                        break;
0285:                    case Pretokenizer.NAMED_GROUP:
0286:                        String gname = t.groupName;
0287:                        int id;
0288:                        if (Character.isDigit(gname.charAt(0))) {
0289:                            try {
0290:                                id = Integer.parseInt(gname);
0291:                            } catch (NumberFormatException e) {
0292:                                throw new PatternSyntaxException(
0293:                                        "group name starts with digit but is not a number");
0294:                            }
0295:                            if (groupNames.contains(new Integer(id))) {
0296:                                if (t.groupDeclared)
0297:                                    throw new PatternSyntaxException(
0298:                                            "group redeclaration: "
0299:                                                    + gname
0300:                                                    + "; use ({=id}...) for multiple group assignments");
0301:                            }
0302:                            if (vars[MEMREG_COUNT] <= id)
0303:                                vars[MEMREG_COUNT] = id + 1;
0304:                        } else {
0305:                            Integer no = (Integer) groupNames.get(gname);
0306:                            if (no == null) {
0307:                                id = vars[MEMREG_COUNT]++;
0308:                                groupNames.put(t.groupName, new Integer(id));
0309:                            } else {
0310:                                if (t.groupDeclared)
0311:                                    throw new PatternSyntaxException(
0312:                                            "group redeclaration "
0313:                                                    + gname
0314:                                                    + "; use ({=name}...) for group reassignments");
0315:                                id = no.intValue();
0316:                            }
0317:                        }
0318:                        vars[DEPTH]++;
0319:                        term.append(makeTree(t, data, vars, flags,
0320:                                new Group(id), iterators, groupNames));
0321:                        break;
0322:                    case '(':
0323:                        vars[DEPTH]++;
0324:                        term.append(makeTree(t, data, vars, flags, new Group(
0325:                                vars[MEMREG_COUNT]++), iterators, groupNames));
0326:                        break;
0327:                    case Pretokenizer.POS_LOOKAHEAD:
0328:                        vars[DEPTH]++;
0329:                        term.append(makeTree(t, data, vars, flags,
0330:                                new Lookahead(vars[LOOKAHEAD_COUNT]++, true),
0331:                                iterators, groupNames));
0332:                        break;
0333:                    case Pretokenizer.NEG_LOOKAHEAD:
0334:                        vars[DEPTH]++;
0335:                        term.append(makeTree(t, data, vars, flags,
0336:                                new Lookahead(vars[LOOKAHEAD_COUNT]++, false),
0337:                                iterators, groupNames));
0338:                        break;
0339:                    case Pretokenizer.POS_LOOKBEHIND:
0340:                        vars[DEPTH]++;
0341:                        term.append(makeTree(t, data, vars, flags,
0342:                                new Lookbehind(vars[LOOKAHEAD_COUNT]++, true),
0343:                                iterators, groupNames));
0344:                        break;
0345:                    case Pretokenizer.NEG_LOOKBEHIND:
0346:                        vars[DEPTH]++;
0347:                        term.append(makeTree(t, data, vars, flags,
0348:                                new Lookbehind(vars[LOOKAHEAD_COUNT]++, false),
0349:                                iterators, groupNames));
0350:                        break;
0351:                    case Pretokenizer.INDEPENDENT_REGEX:
0352:                        vars[DEPTH]++;
0353:                        term.append(makeTree(t, data, vars, flags,
0354:                                new IndependentGroup(vars[LOOKAHEAD_COUNT]++),
0355:                                iterators, groupNames));
0356:                        break;
0357:                    case Pretokenizer.CONDITIONAL_GROUP:
0358:                        vars[DEPTH]++;
0359:                        t.next();
0360:                        Term fork = null;
0361:                        boolean positive = true;
0362:                        switch (t.ttype) {
0363:                        case Pretokenizer.NEG_LOOKAHEAD:
0364:                            positive = false;
0365:                        case Pretokenizer.POS_LOOKAHEAD:
0366:                            vars[DEPTH]++;
0367:                            Lookahead la = new Lookahead(
0368:                                    vars[LOOKAHEAD_COUNT]++, positive);
0369:                            makeTree(t, data, vars, flags, la, iterators,
0370:                                    groupNames);
0371:                            fork = new ConditionalExpr(la);
0372:                            break;
0373:                        case Pretokenizer.NEG_LOOKBEHIND:
0374:                            positive = false;
0375:                        case Pretokenizer.POS_LOOKBEHIND:
0376:                            vars[DEPTH]++;
0377:                            Lookbehind lb = new Lookbehind(
0378:                                    vars[LOOKAHEAD_COUNT]++, positive);
0379:                            makeTree(t, data, vars, flags, lb, iterators,
0380:                                    groupNames);
0381:                            fork = new ConditionalExpr(lb);
0382:                            break;
0383:                        case '(':
0384:                            t.next();
0385:                            if (t.ttype != ')')
0386:                                throw new PatternSyntaxException(
0387:                                        "malformed condition");
0388:                            int memregNo;
0389:                            if (Character.isDigit(data[t.tOffset]))
0390:                                memregNo = makeNumber(t.tOffset, t.tOutside,
0391:                                        data);
0392:                            else {
0393:                                String gn = new String(data, t.tOffset,
0394:                                        t.tOutside - t.tOffset);
0395:                                Integer gno = (Integer) groupNames.get(gn);
0396:                                if (gno == null)
0397:                                    throw new PatternSyntaxException(
0398:                                            "unknown group name in conditional expr.: "
0399:                                                    + gn);
0400:                                memregNo = gno.intValue();
0401:                            }
0402:                            fork = new ConditionalExpr(memregNo);
0403:                            break;
0404:                        default:
0405:                            throw new PatternSyntaxException(
0406:                                    "malformed conditional expression: "
0407:                                            + t.ttype + " '" + (char) t.ttype
0408:                                            + "'");
0409:                        }
0410:                        term.append(makeTree(t, data, vars, flags, fork,
0411:                                iterators, groupNames));
0412:                        break;
0413:                    case '|':
0414:                        term.newBranch();
0415:                        break;
0416:                    case Pretokenizer.END:
0417:                        if (vars[DEPTH] > 0)
0418:                            throw new PatternSyntaxException(
0419:                                    "unbalanced parenthesis");
0420:                        term.close();
0421:                        return term;
0422:                    case ')':
0423:                        if (vars[DEPTH] <= 0)
0424:                            throw new PatternSyntaxException(
0425:                                    "unbalanced parenthesis");
0426:                        term.close();
0427:                        vars[DEPTH]--;
0428:                        return term;
0429:                    case Pretokenizer.COMMENT:
0430:                        while (t.ttype != ')')
0431:                            t.next();
0432:                        continue;
0433:                    default:
0434:                        throw new PatternSyntaxException("unknown token type: "
0435:                                + t.ttype);
0436:                    }
0437:                }
0438:            }
0439:
0440:            static int makeNumber(int off, int out, char[] data) {
0441:                int n = 0;
0442:                for (int i = off; i < out; i++) {
0443:                    int d = data[i] - '0';
0444:                    if (d < 0 || d > 9)
0445:                        return -1;
0446:                    n *= 10;
0447:                    n += d;
0448:                }
0449:                return n;
0450:            }
0451:
0452:            protected void append(int offset, int end, char[] data, int[] vars,
0453:                    int flags, Vector iterators, Hashtable gmap)
0454:                    throws PatternSyntaxException {
0455:                //System.out.println("append("+new String(data,offset,end-offset)+")");
0456:                //System.out.println("current="+this.current);
0457:                int[] limits = new int[3];
0458:                int i = offset;
0459:                Term tmp, current = this .current;
0460:                while (i < end) {
0461:                    char c = data[i];
0462:                    boolean greedy = true;
0463:                    switch (c) {
0464:                    //operations
0465:                    case '*':
0466:                        if (current == null)
0467:                            throw new PatternSyntaxException(
0468:                                    "missing term before *");
0469:                        i++;
0470:                        if (i < end) {
0471:                            switch (data[i]) {
0472:                            case '?':
0473:                                greedy ^= true;
0474:                                i++;
0475:                                break;
0476:                            case '*':
0477:                            case '+':
0478:                                throw new PatternSyntaxException(
0479:                                        "nested *?+ in regexp");
0480:                            }
0481:                        }
0482:                        tmp = greedy ? makeGreedyStar(vars, current, iterators)
0483:                                : makeLazyStar(vars, current);
0484:                        current = replaceCurrent(tmp);
0485:                        break;
0486:
0487:                    case '+':
0488:                        if (current == null)
0489:                            throw new PatternSyntaxException(
0490:                                    "missing term before +");
0491:                        i++;
0492:                        if (i < end) {
0493:                            switch (data[i]) {
0494:                            case '?':
0495:                                greedy ^= true;
0496:                                i++;
0497:                                break;
0498:                            case '*':
0499:                            case '+':
0500:                                throw new PatternSyntaxException(
0501:                                        "nested *?+ in regexp");
0502:                            }
0503:                        }
0504:                        tmp = greedy ? makeGreedyPlus(vars, current, iterators)
0505:                                : makeLazyPlus(vars, current);
0506:                        current = replaceCurrent(tmp);
0507:                        break;
0508:
0509:                    case '?':
0510:                        if (current == null)
0511:                            throw new PatternSyntaxException(
0512:                                    "missing term before ?");
0513:                        i++;
0514:                        if (i < end) {
0515:                            switch (data[i]) {
0516:                            case '?':
0517:                                greedy ^= true;
0518:                                i++;
0519:                                break;
0520:                            case '*':
0521:                            case '+':
0522:                                throw new PatternSyntaxException(
0523:                                        "nested *?+ in regexp");
0524:                            }
0525:                        }
0526:
0527:                        tmp = greedy ? makeGreedyQMark(vars, current)
0528:                                : makeLazyQMark(vars, current);
0529:                        current = replaceCurrent(tmp);
0530:                        break;
0531:
0532:                    case '{':
0533:                        limits[0] = 0;
0534:                        limits[1] = -1;
0535:                        int le = parseLimits(i + 1, end, data, limits);
0536:                        if (limits[LIMITS_PARSE_RESULT_INDEX] == LIMITS_OK) { //parse ok
0537:                            if (current == null)
0538:                                throw new PatternSyntaxException(
0539:                                        "missing term before {}");
0540:                            i = le;
0541:                            if (i < end && data[i] == '?') {
0542:                                greedy ^= true;
0543:                                i++;
0544:                            }
0545:                            tmp = greedy ? makeGreedyLimits(vars, current,
0546:                                    limits, iterators) : makeLazyLimits(vars,
0547:                                    current, limits);
0548:                            current = replaceCurrent(tmp);
0549:                            break;
0550:                        } else { //unicode class or named backreference
0551:                            if (data[i + 1] == '\\') { //'{\name}' - backreference
0552:                                int p = i + 2;
0553:                                if (p == end)
0554:                                    throw new PatternSyntaxException(
0555:                                            "'group_id' expected");
0556:                                while (Character.isWhitespace(data[p])) {
0557:                                    p++;
0558:                                    if (p == end)
0559:                                        throw new PatternSyntaxException(
0560:                                                "'group_id' expected");
0561:                                }
0562:                                BackReference br = new BackReference(-1,
0563:                                        (flags & IGNORE_CASE) > 0);
0564:                                i = parseGroupId(data, p, end, br, gmap);
0565:                                current = append(br);
0566:                                continue;
0567:                            } else {
0568:                                Term t = new Term();
0569:                                i = CharacterClass.parseName(data, i, end, t,
0570:                                        false, (flags & IGNORE_SPACES) > 0);
0571:                                current = append(t);
0572:                                continue;
0573:                            }
0574:                        }
0575:
0576:                    case ' ':
0577:                    case '\t':
0578:                    case '\r':
0579:                    case '\n':
0580:                        if ((flags & IGNORE_SPACES) > 0) {
0581:                            i++;
0582:                            continue;
0583:                        }
0584:                        //else go on as default
0585:
0586:                        //symbolic items
0587:                    default:
0588:                        tmp = new Term();
0589:                        i = parseTerm(data, i, end, tmp, flags);
0590:
0591:                        if (tmp.type == END && i < end) {
0592:                            if ((flags & IGNORE_SPACES) > 0) {
0593:                                i++;
0594:                                while (i < end) {
0595:                                    c = data[i];
0596:                                    switch (c) {
0597:                                    case ' ':
0598:                                    case '\t':
0599:                                    case '\r':
0600:                                    case '\n':
0601:                                        i++;
0602:                                        continue;
0603:                                    default:
0604:                                        throw new PatternSyntaxException(
0605:                                                "'$' is not a last term in the group: <"
0606:                                                        + new String(data,
0607:                                                                offset,
0608:                                                                end - offset)
0609:                                                        + ">");
0610:                                    }
0611:                                }
0612:                            } else {
0613:                                throw new PatternSyntaxException(
0614:                                        "'$' is not a last term in the group: <"
0615:                                                + new String(data, offset, end
0616:                                                        - offset) + ">");
0617:                            }
0618:                        }
0619:                        //"\A" 
0620:                        //if(tmp.type==START && i>(offset+1)){
0621:                        //   throw new PatternSyntaxException("'^' is not a first term in the group: <"+new String(data,offset,end-offset)+">");
0622:                        //}
0623:                        current = append(tmp);
0624:                        break;
0625:                    }
0626:                    //System.out.println("next term: "+next);
0627:                    //System.out.println("  next.out="+next.out);
0628:                    //System.out.println("  next.out1="+next.out1);
0629:                    //System.out.println("  next.branchOut="+next.branchOut);
0630:                }
0631:                //System.out.println(in.toStringAll());
0632:                //System.out.println("current="+current);
0633:                //System.out.println();
0634:            }
0635:
0636:            private static int parseGroupId(char[] data, int i, int end,
0637:                    Term term, Hashtable gmap) throws PatternSyntaxException {
0638:                int id;
0639:                int nstart = i;
0640:                if (Character.isDigit(data[i])) {
0641:                    while (Character.isDigit(data[i])) {
0642:                        i++;
0643:                        if (i == end)
0644:                            throw new PatternSyntaxException(
0645:                                    "group_id expected");
0646:                    }
0647:                    id = makeNumber(nstart, i, data);
0648:                } else {
0649:                    while (Character.isJavaIdentifierPart(data[i])) {
0650:                        i++;
0651:                        if (i == end)
0652:                            throw new PatternSyntaxException(
0653:                                    "group_id expected");
0654:                    }
0655:                    String s = new String(data, nstart, i - nstart);
0656:                    Integer no = (Integer) gmap.get(s);
0657:                    if (no == null)
0658:                        throw new PatternSyntaxException(
0659:                                "backreference to unknown group: " + s);
0660:                    id = no.intValue();
0661:                }
0662:                while (Character.isWhitespace(data[i])) {
0663:                    i++;
0664:                    if (i == end)
0665:                        throw new PatternSyntaxException("'}' expected");
0666:                }
0667:
0668:                int c = data[i++];
0669:
0670:                if (c != '}')
0671:                    throw new PatternSyntaxException("'}' expected");
0672:
0673:                term.memreg = id;
0674:                return i;
0675:            }
0676:
0677:            protected Term append(Term term) throws PatternSyntaxException {
0678:                //System.out.println("append("+term.toStringAll()+"), this="+toStringAll());
0679:                //Term prev=this.prev;
0680:                Term current = this .current;
0681:                if (current == null) {
0682:                    //System.out.println("2");
0683:                    //System.out.println("  term="+term);
0684:                    //System.out.println("  term.in="+term.in);
0685:                    in.next = term;
0686:                    term.prev = in;
0687:                    this .current = term;
0688:                    //System.out.println("  result: "+in.toStringAll()+"\r\n");
0689:                    return term;
0690:                }
0691:                //System.out.println("3");
0692:                link(current, term);
0693:                //this.prev=current;
0694:                this .current = term;
0695:                //System.out.println(in.toStringAll());
0696:                //System.out.println("current="+this.current);
0697:                //System.out.println();
0698:                return term;
0699:            }
0700:
0701:            protected Term replaceCurrent(Term term)
0702:                    throws PatternSyntaxException {
0703:                //System.out.println("replaceCurrent("+term+"), current="+current+", current.prev="+current.prev);
0704:                //Term prev=this.prev;
0705:                Term prev = current.prev;
0706:                if (prev != null) {
0707:                    Term in = this .in;
0708:                    if (prev == in) {
0709:                        //in.next=term;
0710:                        //term.prev=in;
0711:                        in.next = term.in;
0712:                        term.in.prev = in;
0713:                    } else
0714:                        link(prev, term);
0715:                }
0716:                this .current = term;
0717:                //System.out.println("   new current="+this.current);
0718:                return term;
0719:            }
0720:
0721:            protected void newBranch() throws PatternSyntaxException {
0722:                //System.out.println("newBranch()");
0723:                close();
0724:                startNewBranch();
0725:                //System.out.println(in.toStringAll());
0726:                //System.out.println("current="+current);
0727:                //System.out.println();
0728:            }
0729:
0730:            protected void close() throws PatternSyntaxException {
0731:                //System.out.println("close(), current="+current+", this="+toStringAll());
0732:                //System.out.println();
0733:                //System.out.println("close()");
0734:                //System.out.println("current="+this.current);
0735:                //System.out.println("prev="+this.prev);
0736:                //System.out.println();
0737:                /*
0738:                Term prev=this.prev;
0739:                if(prev!=null){
0740:                   Term current=this.current;
0741:                   if(current!=null){
0742:                      link(prev,current);
0743:                      prev=current;
0744:                      this.current=null;
0745:                   }
0746:                   link(prev,out);
0747:                   this.prev=null;
0748:                }
0749:                 */
0750:                Term current = this .current;
0751:                if (current != null)
0752:                    linkd(current, out);
0753:                else
0754:                    in.next = out;
0755:                //System.out.println(in.toStringAll());
0756:                //System.out.println("current="+this.current);
0757:                //System.out.println("prev="+this.prev);
0758:                //System.out.println();
0759:            }
0760:
0761:            private final static void link(Term term, Term next) {
0762:                linkd(term, next.in);
0763:                next.prev = term;
0764:            }
0765:
0766:            private final static void linkd(Term term, Term next) {
0767:                //System.out.println("linkDirectly(\""+term+"\" -> \""+next+"\")");
0768:                Term prev_out = term.out;
0769:                if (prev_out != null) {
0770:                    //System.out.println("   prev_out="+prev_out);
0771:                    prev_out.next = next;
0772:                }
0773:                Term prev_out1 = term.out1;
0774:                if (prev_out1 != null) {
0775:                    //System.out.println("   prev_out1="+prev_out1);
0776:                    prev_out1.next = next;
0777:                }
0778:                Term prev_branch = term.branchOut;
0779:                if (prev_branch != null) {
0780:                    //System.out.println("   prev_branch="+prev_branch);
0781:                    prev_branch.failNext = next;
0782:                }
0783:            }
0784:
0785:            protected void startNewBranch() throws PatternSyntaxException {
0786:                //System.out.println("newBranch()");
0787:                //System.out.println("before startNewBranch(), this="+toStringAll());
0788:                //System.out.println();
0789:                Term tmp = in.next;
0790:                Term b = new Branch();
0791:                in.next = b;
0792:                b.next = tmp;
0793:                b.in = null;
0794:                b.out = null;
0795:                b.out1 = null;
0796:                b.branchOut = b;
0797:                current = b;
0798:                //System.out.println("startNewBranch(), this="+toStringAll());
0799:                //System.out.println();
0800:            }
0801:
0802:            private final static Term makeGreedyStar(int[] vars, Term term,
0803:                    Vector iterators) throws PatternSyntaxException {
0804:                //vars[STACK_SIZE]++;
0805:                switch (term.type) {
0806:                case REPEAT_0_INF:
0807:                case REPEAT_MIN_INF:
0808:                case REPEAT_MIN_MAX:
0809:                case REPEAT_REG_MIN_INF:
0810:                case REPEAT_REG_MIN_MAX:
0811:                case INDEPENDENT_IN:
0812:                case GROUP_IN: {
0813:                    Term b = new Branch();
0814:                    b.next = term.in;
0815:                    term.out.next = b;
0816:
0817:                    b.in = b;
0818:                    b.out = null;
0819:                    b.out1 = null;
0820:                    b.branchOut = b;
0821:
0822:                    return b;
0823:                }
0824:                default: {
0825:                    Iterator i = new Iterator(term, 0, -1, iterators);
0826:                    return i;
0827:                }
0828:                }
0829:            }
0830:
0831:            private final static Term makeLazyStar(int[] vars, Term term) {
0832:                //vars[STACK_SIZE]++;
0833:                switch (term.type) {
0834:                case REPEAT_0_INF:
0835:                case REPEAT_MIN_INF:
0836:                case REPEAT_MIN_MAX:
0837:                case REPEAT_REG_MIN_INF:
0838:                case REPEAT_REG_MIN_MAX:
0839:                case GROUP_IN: {
0840:                    Term b = new Branch();
0841:                    b.failNext = term.in;
0842:                    term.out.next = b;
0843:
0844:                    b.in = b;
0845:                    b.out = b;
0846:                    b.out1 = null;
0847:                    b.branchOut = null;
0848:
0849:                    return b;
0850:                }
0851:                default: {
0852:                    Term b = new Branch();
0853:                    b.failNext = term;
0854:                    term.next = b;
0855:
0856:                    b.in = b;
0857:                    b.out = b;
0858:                    b.out1 = null;
0859:                    b.branchOut = null;
0860:
0861:                    return b;
0862:                }
0863:                }
0864:            }
0865:
0866:            private final static Term makeGreedyPlus(int[] vars, Term term,
0867:                    Vector iterators) throws PatternSyntaxException {
0868:                //vars[STACK_SIZE]++;
0869:                switch (term.type) {
0870:                case REPEAT_0_INF:
0871:                case REPEAT_MIN_INF:
0872:                case REPEAT_MIN_MAX:
0873:                case REPEAT_REG_MIN_INF:
0874:                case REPEAT_REG_MIN_MAX:
0875:                case INDEPENDENT_IN://?
0876:                case GROUP_IN: {
0877:                    //System.out.println("makeGreedyPlus():");
0878:                    //System.out.println("   in="+term.in);
0879:                    //System.out.println("   out="+term.out);
0880:                    Term b = new Branch();
0881:                    b.next = term.in;
0882:                    term.out.next = b;
0883:
0884:                    b.in = term.in;
0885:                    b.out = null;
0886:                    b.out1 = null;
0887:                    b.branchOut = b;
0888:
0889:                    //System.out.println("   returning "+b.in);
0890:
0891:                    return b;
0892:                }
0893:                default: {
0894:                    return new Iterator(term, 1, -1, iterators);
0895:                }
0896:                }
0897:            }
0898:
0899:            private final static Term makeLazyPlus(int[] vars, Term term) {
0900:                //vars[STACK_SIZE]++;
0901:                switch (term.type) {
0902:                case REPEAT_0_INF:
0903:                case REPEAT_MIN_INF:
0904:                case REPEAT_MIN_MAX:
0905:                case REPEAT_REG_MIN_INF:
0906:                case REPEAT_REG_MIN_MAX:
0907:                case GROUP_IN: {
0908:                    Term b = new Branch();
0909:                    term.out.next = b;
0910:                    b.failNext = term.in;
0911:
0912:                    b.in = term.in;
0913:                    b.out = b;
0914:                    b.out1 = null;
0915:                    b.branchOut = null;
0916:
0917:                    return b;
0918:                }
0919:                case REG:
0920:                default: {
0921:                    Term b = new Branch();
0922:                    term.next = b;
0923:                    b.failNext = term;
0924:
0925:                    b.in = term;
0926:                    b.out = b;
0927:                    b.out1 = null;
0928:                    b.branchOut = null;
0929:
0930:                    return b;
0931:                }
0932:                }
0933:            }
0934:
0935:            private final static Term makeGreedyQMark(int[] vars, Term term) {
0936:                //vars[STACK_SIZE]++;
0937:                switch (term.type) {
0938:                case REPEAT_0_INF:
0939:                case REPEAT_MIN_INF:
0940:                case REPEAT_MIN_MAX:
0941:                case REPEAT_REG_MIN_INF:
0942:                case REPEAT_REG_MIN_MAX:
0943:                case GROUP_IN: {
0944:                    Term b = new Branch();
0945:                    b.next = term.in;
0946:
0947:                    b.in = b;
0948:                    b.out = term.out;
0949:                    b.out1 = null;
0950:                    b.branchOut = b;
0951:
0952:                    return b;
0953:                }
0954:                case REG:
0955:                default: {
0956:                    Term b = new Branch();
0957:                    b.next = term;
0958:
0959:                    b.in = b;
0960:                    b.out = term;
0961:                    b.out1 = null;
0962:                    b.branchOut = b;
0963:
0964:                    return b;
0965:                }
0966:                }
0967:            }
0968:
0969:            private final static Term makeLazyQMark(int[] vars, Term term) {
0970:                //vars[STACK_SIZE]++;
0971:                switch (term.type) {
0972:                case REPEAT_0_INF:
0973:                case REPEAT_MIN_INF:
0974:                case REPEAT_MIN_MAX:
0975:                case REPEAT_REG_MIN_INF:
0976:                case REPEAT_REG_MIN_MAX:
0977:                case GROUP_IN: {
0978:                    Term b = new Branch();
0979:                    b.failNext = term.in;
0980:
0981:                    b.in = b;
0982:                    b.out = b;
0983:                    b.out1 = term.out;
0984:                    b.branchOut = null;
0985:
0986:                    return b;
0987:                }
0988:                case REG:
0989:                default: {
0990:                    Term b = new Branch();
0991:                    b.failNext = term;
0992:
0993:                    b.in = b;
0994:                    b.out = b;
0995:                    b.out1 = term;
0996:                    b.branchOut = null;
0997:
0998:                    return b;
0999:                }
1000:                }
1001:            }
1002:
1003:            private final static Term makeGreedyLimits(int[] vars, Term term,
1004:                    int[] limits, Vector iterators)
1005:                    throws PatternSyntaxException {
1006:                //vars[STACK_SIZE]++;
1007:                int m = limits[0];
1008:                int n = limits[1];
1009:                switch (term.type) {
1010:                case REPEAT_0_INF:
1011:                case REPEAT_MIN_INF:
1012:                case REPEAT_MIN_MAX:
1013:                case REPEAT_REG_MIN_INF:
1014:                case REPEAT_REG_MIN_MAX:
1015:                case GROUP_IN: {
1016:                    int cntreg = vars[CNTREG_COUNT]++;
1017:                    Term reset = new Term(CR_SET_0);
1018:                    reset.cntreg = cntreg;
1019:                    Term b = new Term(BRANCH);
1020:
1021:                    Term inc = new Term(CRSTORE_CRINC);
1022:                    inc.cntreg = cntreg;
1023:
1024:                    reset.next = b;
1025:
1026:                    if (n >= 0) {
1027:                        Term lt = new Term(CR_LT);
1028:                        lt.cntreg = cntreg;
1029:                        lt.maxCount = n;
1030:                        b.next = lt;
1031:                        lt.next = term.in;
1032:                    } else {
1033:                        b.next = term.in;
1034:                    }
1035:                    term.out.next = inc;
1036:                    inc.next = b;
1037:
1038:                    if (m >= 0) {
1039:                        Term gt = new Term(CR_GT_EQ);
1040:                        gt.cntreg = cntreg;
1041:                        gt.maxCount = m;
1042:                        b.failNext = gt;
1043:
1044:                        reset.in = reset;
1045:                        reset.out = gt;
1046:                        reset.out1 = null;
1047:                        reset.branchOut = null;
1048:                    } else {
1049:                        reset.in = reset;
1050:                        reset.out = null;
1051:                        reset.out1 = null;
1052:                        reset.branchOut = b;
1053:                    }
1054:                    return reset;
1055:                }
1056:                default: {
1057:                    return new Iterator(term, limits[0], limits[1], iterators);
1058:                }
1059:                }
1060:            }
1061:
1062:            private final static Term makeLazyLimits(int[] vars, Term term,
1063:                    int[] limits) {
1064:                //vars[STACK_SIZE]++;
1065:                int m = limits[0];
1066:                int n = limits[1];
1067:                switch (term.type) {
1068:                case REPEAT_0_INF:
1069:                case REPEAT_MIN_INF:
1070:                case REPEAT_MIN_MAX:
1071:                case REPEAT_REG_MIN_INF:
1072:                case REPEAT_REG_MIN_MAX:
1073:                case GROUP_IN: {
1074:                    int cntreg = vars[CNTREG_COUNT]++;
1075:                    Term reset = new Term(CR_SET_0);
1076:                    reset.cntreg = cntreg;
1077:                    Term b = new Term(BRANCH);
1078:                    Term inc = new Term(CRSTORE_CRINC);
1079:                    inc.cntreg = cntreg;
1080:
1081:                    reset.next = b;
1082:
1083:                    if (n >= 0) {
1084:                        Term lt = new Term(CR_LT);
1085:                        lt.cntreg = cntreg;
1086:                        lt.maxCount = n;
1087:                        b.failNext = lt;
1088:                        lt.next = term.in;
1089:                    } else {
1090:                        b.failNext = term.in;
1091:                    }
1092:                    term.out.next = inc;
1093:                    inc.next = b;
1094:
1095:                    if (m >= 0) {
1096:                        Term gt = new Term(CR_GT_EQ);
1097:                        gt.cntreg = cntreg;
1098:                        gt.maxCount = m;
1099:                        b.next = gt;
1100:
1101:                        reset.in = reset;
1102:                        reset.out = gt;
1103:                        reset.out1 = null;
1104:                        reset.branchOut = null;
1105:
1106:                        return reset;
1107:                    } else {
1108:                        reset.in = reset;
1109:                        reset.out = b;
1110:                        reset.out1 = null;
1111:                        reset.branchOut = null;
1112:
1113:                        return reset;
1114:                    }
1115:                }
1116:                case REG:
1117:                default: {
1118:                    Term reset = new Term(CNT_SET_0);
1119:                    Term b = new Branch(BRANCH_STORE_CNT);
1120:                    Term inc = new Term(CNT_INC);
1121:
1122:                    reset.next = b;
1123:
1124:                    if (n >= 0) {
1125:                        Term lt = new Term(READ_CNT_LT);
1126:                        lt.maxCount = n;
1127:                        b.failNext = lt;
1128:                        lt.next = term;
1129:                        term.next = inc;
1130:                        inc.next = b;
1131:                    } else {
1132:                        b.next = term;
1133:                        term.next = inc;
1134:                        inc.next = term;
1135:                    }
1136:
1137:                    if (m >= 0) {
1138:                        Term gt = new Term(CNT_GT_EQ);
1139:                        gt.maxCount = m;
1140:                        b.next = gt;
1141:
1142:                        reset.in = reset;
1143:                        reset.out = gt;
1144:                        reset.out1 = null;
1145:                        reset.branchOut = null;
1146:
1147:                        return reset;
1148:                    } else {
1149:                        reset.in = reset;
1150:                        reset.out = b;
1151:                        reset.out1 = null;
1152:                        reset.branchOut = null;
1153:
1154:                        return reset;
1155:                    }
1156:                }
1157:                }
1158:            }
1159:
1160:            private final int parseTerm(char[] data, int i, int out, Term term,
1161:                    int flags) throws PatternSyntaxException {
1162:                char c = data[i++];
1163:                boolean inv = false;
1164:                switch (c) {
1165:                case '[':
1166:                    return CharacterClass.parseClass(data, i, out, term,
1167:                            (flags & IGNORE_CASE) > 0,
1168:                            (flags & IGNORE_SPACES) > 0, (flags & UNICODE) > 0,
1169:                            (flags & XML_SCHEMA) > 0);
1170:
1171:                case '.':
1172:                    term.type = (flags & DOTALL) > 0 ? ANY_CHAR : ANY_CHAR_NE;
1173:                    break;
1174:
1175:                case '$':
1176:                    //term.type=mods[MULTILINE_IND]? LINE_END: END; //??
1177:                    term.type = (flags & MULTILINE) > 0 ? LINE_END : END_EOL;
1178:                    break;
1179:
1180:                case '^':
1181:                    term.type = (flags & MULTILINE) > 0 ? LINE_START : START;
1182:                    break;
1183:
1184:                case '\\':
1185:                    if (i >= out)
1186:                        throw new PatternSyntaxException(
1187:                                "Escape without a character");
1188:                    c = data[i++];
1189:                    esc: switch (c) {
1190:                    case 'f':
1191:                        c = '\f'; // form feed
1192:                        break;
1193:
1194:                    case 'n':
1195:                        c = '\n'; // new line
1196:                        break;
1197:
1198:                    case 'r':
1199:                        c = '\r'; // carriage return
1200:                        break;
1201:
1202:                    case 't':
1203:                        c = '\t'; // tab
1204:                        break;
1205:
1206:                    case 'u':
1207:                        if (i + 4 >= out)
1208:                            throw new PatternSyntaxException(
1209:                                    "To few characters for u-escape");
1210:
1211:                        c = (char) ((CharacterClass.toHexDigit(data[i++]) << 12)
1212:                                + (CharacterClass.toHexDigit(data[i++]) << 8)
1213:                                + (CharacterClass.toHexDigit(data[i++]) << 4) + CharacterClass
1214:                                .toHexDigit(data[i++]));
1215:                        break;
1216:
1217:                    case 'v':
1218:                        if (i + 6 >= out)
1219:                            throw new PatternSyntaxException(
1220:                                    "To few characters for u-escape");
1221:                        c = (char) ((CharacterClass.toHexDigit(data[i++]) << 24)
1222:                                + (CharacterClass.toHexDigit(data[i++]) << 16)
1223:                                + (CharacterClass.toHexDigit(data[i++]) << 12)
1224:                                + (CharacterClass.toHexDigit(data[i++]) << 8)
1225:                                + (CharacterClass.toHexDigit(data[i++]) << 4) + CharacterClass
1226:                                .toHexDigit(data[i++]));
1227:                        break;
1228:
1229:                    case 'x': { // hex 2-digit number -> char
1230:                        if (i >= out)
1231:                            throw new PatternSyntaxException(
1232:                                    "To few characters for x-escape");
1233:                        int hex = 0;
1234:                        char d;
1235:                        if ((d = data[i++]) == '{') {
1236:                            while (i < out && (d = data[i++]) != '}') {
1237:                                hex = (hex << 4) + CharacterClass.toHexDigit(d);
1238:                                if (hex > 0xffff)
1239:                                    throw new PatternSyntaxException(
1240:                                            "\\x{<out of range>}");
1241:                            }
1242:                        } else {
1243:                            if (i >= out)
1244:                                throw new PatternSyntaxException(
1245:                                        "To few characters for x-escape");
1246:                            hex = (CharacterClass.toHexDigit(d) << 4)
1247:                                    + CharacterClass.toHexDigit(data[i++]);
1248:                        }
1249:                        c = (char) hex;
1250:                        break;
1251:                    }
1252:                    case '0':
1253:                    case 'o': // oct 2- or 3-digit number -> char
1254:                        int oct = 0;
1255:                        for (;;) {
1256:                            char d = data[i];
1257:                            if (d >= '0' && d <= '7') {
1258:                                i++;
1259:                                oct *= 8;
1260:                                oct += d - '0';
1261:                                if (oct > 0xffff)
1262:                                    break;
1263:                                if (i >= out)
1264:                                    break;
1265:                            } else
1266:                                break;
1267:                        }
1268:                        c = (char) oct;
1269:                        break;
1270:
1271:                    case 'm': // decimal number -> char
1272:                        int dec = 0;
1273:                        for (;;) {
1274:                            char d = data[i++];
1275:                            if (d >= '0' && d <= '9') {
1276:                                dec *= 10;
1277:                                dec += d - '0';
1278:                                if (dec > 0xffff)
1279:                                    break;
1280:                                if (i >= out)
1281:                                    break;
1282:                            } else
1283:                                break;
1284:                        }
1285:                        i--;
1286:                        c = (char) dec;
1287:                        break;
1288:
1289:                    case 'c': // ctrl-char
1290:                        c = (char) (data[i++] & 0x1f);
1291:                        break;
1292:
1293:                    case 'D': // non-digit
1294:                        inv = true;
1295:                        // go on
1296:                    case 'd': // digit
1297:                        CharacterClass.makeDigit(term, inv,
1298:                                (flags & UNICODE) > 0);
1299:                        return i;
1300:
1301:                    case 'S': // non-space
1302:                        inv = true;
1303:                        // go on
1304:                    case 's': // space
1305:                        CharacterClass.makeSpace(term, inv,
1306:                                (flags & UNICODE) > 0);
1307:                        return i;
1308:
1309:                    case 'W': // non-letter
1310:                        inv = true;
1311:                        // go on
1312:                    case 'w': // letter
1313:                        CharacterClass.makeWordChar(term, inv,
1314:                                (flags & UNICODE) > 0);
1315:                        return i;
1316:
1317:                    case 'B': // non-(word boundary)
1318:                        inv = true;
1319:                        // go on
1320:                    case 'b': // word boundary
1321:                        CharacterClass.makeWordBoundary(term, inv,
1322:                                (flags & UNICODE) > 0);
1323:                        return i;
1324:                        /* NOT SUPPORTED IN RUBY                  
1325:                        case '<':   // non-(word boundary)
1326:                        CharacterClass.makeWordStart(term,(flags&UNICODE)>0);
1327:                        return i;
1328:                        
1329:                        case '>':   // word boundary
1330:                        CharacterClass.makeWordEnd(term,(flags&UNICODE)>0);
1331:                        return i;
1332:                         */
1333:                    case 'A': // text beginning
1334:                        term.type = START;
1335:                        return i;
1336:
1337:                    case 'Z': // text end
1338:                        term.type = END_EOL;
1339:                        return i;
1340:
1341:                    case 'z': // text end
1342:                        term.type = END;
1343:                        return i;
1344:
1345:                    case 'G': // end of last match
1346:                        term.type = LAST_MATCH_END;
1347:                        return i;
1348:
1349:                    case 'P': // \\P{..}
1350:                        inv = true;
1351:                    case 'p': // \\p{..}
1352:                        i = CharacterClass.parseName(data, i, out, term, inv,
1353:                                (flags & IGNORE_SPACES) > 0);
1354:                        return i;
1355:
1356:                    default:
1357:                        if (c >= '1' && c <= '9') {
1358:                            int n = c - '0';
1359:                            while ((i < out) && (c = data[i]) >= '0'
1360:                                    && c <= '9') {
1361:                                n = (n * 10) + c - '0';
1362:                                i++;
1363:                            }
1364:                            term.type = (flags & IGNORE_CASE) > 0 ? REG_I : REG;
1365:                            term.memreg = n;
1366:                            return i;
1367:                        }
1368:                        /*
1369:                        if(c<256){
1370:                           CustomParser termp=customParsers[c];
1371:                           if(termp!=null){
1372:                              i=termp.parse(i,data,term);
1373:                              return i;
1374:                           }
1375:                        }
1376:                         */
1377:                    }
1378:                    term.type = CHAR;
1379:                    term.c = c;
1380:                    break;
1381:
1382:                default:
1383:                    if ((flags & IGNORE_CASE) == 0) {
1384:                        term.type = CHAR;
1385:                        term.c = c;
1386:                    } else {
1387:                        CharacterClass.makeICase(term, c);
1388:                    }
1389:                    break;
1390:                }
1391:                return i;
1392:            }
1393:
1394:            // one of {n},{n,},{,n},{n1,n2}
1395:            protected static final int parseLimits(int i, int end, char[] data,
1396:                    int[] limits) throws PatternSyntaxException {
1397:                if (limits.length != LIMITS_LENGTH)
1398:                    throw new IllegalArgumentException("maxTimess.length="
1399:                            + limits.length + ", should be 2");
1400:                limits[LIMITS_PARSE_RESULT_INDEX] = LIMITS_OK;
1401:                int ind = 0;
1402:                int v = 0;
1403:                char c;
1404:                while (i < end) {
1405:                    c = data[i++];
1406:                    switch (c) {
1407:                    case ' ':
1408:                        continue;
1409:
1410:                    case ',':
1411:                        if (ind > 0)
1412:                            throw new PatternSyntaxException(
1413:                                    "illegal construction: {.. , , ..}");
1414:                        limits[ind++] = v;
1415:                        v = -1;
1416:                        continue;
1417:
1418:                    case '}':
1419:                        limits[ind] = v;
1420:                        if (ind == 0)
1421:                            limits[1] = v;
1422:                        return i;
1423:
1424:                    default:
1425:                        if (c > '9' || c < '0') {
1426:                            //throw new PatternSyntaxException("illegal symbol in iterator: '{"+c+"}'");
1427:                            limits[LIMITS_PARSE_RESULT_INDEX] = LIMITS_FAILURE;
1428:                            return i;
1429:                        }
1430:                        if (v < 0)
1431:                            v = 0;
1432:                        v = v * 10 + (c - '0');
1433:                    }
1434:                }
1435:                throw new PatternSyntaxException("malformed quantifier");
1436:            }
1437:
1438:            public String toString() {
1439:                StringBuffer b = new StringBuffer(100);
1440:                b.append(instanceNum);
1441:                b.append(": ");
1442:                if (inverse)
1443:                    b.append('^');
1444:                switch (type) {
1445:                case VOID:
1446:                    b.append("[]");
1447:                    b.append(" , ");
1448:                    break;
1449:                case CHAR:
1450:                    b.append(CharacterClass.stringValue(c));
1451:                    b.append(" , ");
1452:                    break;
1453:                case ANY_CHAR:
1454:                    b.append("dotall, ");
1455:                    break;
1456:                case ANY_CHAR_NE:
1457:                    b.append("dot-eols, ");
1458:                    break;
1459:                case BITSET:
1460:                    b.append('[');
1461:                    b.append(CharacterClass.stringValue0(bitset));
1462:                    b.append(']');
1463:                    b.append(" , weight=");
1464:                    b.append(weight);
1465:                    b.append(" , ");
1466:                    break;
1467:                case BITSET2:
1468:                    b.append('[');
1469:                    b.append(CharacterClass.stringValue2(bitset2));
1470:                    b.append(']');
1471:                    b.append(" , weight=");
1472:                    b.append(weight);
1473:                    b.append(" , ");
1474:                    break;
1475:                case START:
1476:                    b.append("abs.start");
1477:                    break;
1478:                case END:
1479:                    b.append("abs.end");
1480:                    break;
1481:                case END_EOL:
1482:                    b.append("abs.end-eol");
1483:                    break;
1484:                case LINE_START:
1485:                    b.append("line start");
1486:                    break;
1487:                case LINE_END:
1488:                    b.append("line end");
1489:                    break;
1490:                case LAST_MATCH_END:
1491:                    if (inverse)
1492:                        b.append("non-");
1493:                    b.append("BOUNDARY");
1494:                    break;
1495:                case BOUNDARY:
1496:                    if (inverse)
1497:                        b.append("non-");
1498:                    b.append("BOUNDARY");
1499:                    break;
1500:                case UBOUNDARY:
1501:                    if (inverse)
1502:                        b.append("non-");
1503:                    b.append("UBOUNDARY");
1504:                    break;
1505:                case DIRECTION:
1506:                    b.append("DIRECTION");
1507:                    break;
1508:                case UDIRECTION:
1509:                    b.append("UDIRECTION");
1510:                    break;
1511:                case FIND:
1512:                    b.append(">>>{");
1513:                    b.append(target);
1514:                    b.append("}, <<");
1515:                    b.append(distance);
1516:                    if (eat) {
1517:                        b.append(",eat");
1518:                    }
1519:                    b.append(", ");
1520:                    break;
1521:                case REPEAT_0_INF:
1522:                    b.append("rpt{");
1523:                    b.append(target);
1524:                    b.append(",0,inf}");
1525:                    if (failNext != null) {
1526:                        b.append(", =>");
1527:                        b.append(failNext.instanceNum);
1528:                        b.append(", ");
1529:                    }
1530:                    break;
1531:                case REPEAT_MIN_INF:
1532:                    b.append("rpt{");
1533:                    b.append(target);
1534:                    b.append(",");
1535:                    b.append(minCount);
1536:                    b.append(",inf}");
1537:                    if (failNext != null) {
1538:                        b.append(", =>");
1539:                        b.append(failNext.instanceNum);
1540:                        b.append(", ");
1541:                    }
1542:                    break;
1543:                case REPEAT_MIN_MAX:
1544:                    b.append("rpt{");
1545:                    b.append(target);
1546:                    b.append(",");
1547:                    b.append(minCount);
1548:                    b.append(",");
1549:                    b.append(maxCount);
1550:                    b.append("}");
1551:                    if (failNext != null) {
1552:                        b.append(", =>");
1553:                        b.append(failNext.instanceNum);
1554:                        b.append(", ");
1555:                    }
1556:                    break;
1557:                case REPEAT_REG_MIN_INF:
1558:                    b.append("rpt{$");
1559:                    b.append(memreg);
1560:                    b.append(',');
1561:                    b.append(minCount);
1562:                    b.append(",inf}");
1563:                    if (failNext != null) {
1564:                        b.append(", =>");
1565:                        b.append(failNext.instanceNum);
1566:                        b.append(", ");
1567:                    }
1568:                    break;
1569:                case REPEAT_REG_MIN_MAX:
1570:                    b.append("rpt{$");
1571:                    b.append(memreg);
1572:                    b.append(',');
1573:                    b.append(minCount);
1574:                    b.append(',');
1575:                    b.append(maxCount);
1576:                    b.append("}");
1577:                    if (failNext != null) {
1578:                        b.append(", =>");
1579:                        b.append(failNext.instanceNum);
1580:                        b.append(", ");
1581:                    }
1582:                    break;
1583:                case BACKTRACK_0:
1584:                    b.append("back(0)");
1585:                    break;
1586:                case BACKTRACK_MIN:
1587:                    b.append("back(");
1588:                    b.append(minCount);
1589:                    b.append(")");
1590:                    break;
1591:                case BACKTRACK_REG_MIN:
1592:                    b.append("back");
1593:                    b.append("_$");
1594:                    b.append(memreg);
1595:                    b.append("(");
1596:                    b.append(minCount);
1597:                    b.append(")");
1598:                    break;
1599:                case GROUP_IN:
1600:                    b.append('(');
1601:                    if (memreg > 0)
1602:                        b.append(memreg);
1603:                    b.append('-');
1604:                    b.append(" , ");
1605:                    break;
1606:                case GROUP_OUT:
1607:                    b.append('-');
1608:                    if (memreg > 0)
1609:                        b.append(memreg);
1610:                    b.append(')');
1611:                    b.append(" , ");
1612:                    break;
1613:                case PLOOKAHEAD_IN:
1614:                    b.append('(');
1615:                    b.append("=");
1616:                    b.append(lookaheadId);
1617:                    b.append(" , ");
1618:                    break;
1619:                case PLOOKAHEAD_OUT:
1620:                    b.append('=');
1621:                    b.append(lookaheadId);
1622:                    b.append(')');
1623:                    b.append(" , ");
1624:                    break;
1625:                case NLOOKAHEAD_IN:
1626:                    b.append("(!");
1627:                    b.append(lookaheadId);
1628:                    b.append(" , ");
1629:                    if (failNext != null) {
1630:                        b.append(", =>");
1631:                        b.append(failNext.instanceNum);
1632:                        b.append(", ");
1633:                    }
1634:                    break;
1635:                case NLOOKAHEAD_OUT:
1636:                    b.append('!');
1637:                    b.append(lookaheadId);
1638:                    b.append(')');
1639:                    b.append(" , ");
1640:                    break;
1641:                case PLOOKBEHIND_IN:
1642:                    b.append('(');
1643:                    b.append("<=");
1644:                    b.append(lookaheadId);
1645:                    b.append(" , dist=");
1646:                    b.append(distance);
1647:                    b.append(" , ");
1648:                    break;
1649:                case PLOOKBEHIND_OUT:
1650:                    b.append("<=");
1651:                    b.append(lookaheadId);
1652:                    b.append(')');
1653:                    b.append(" , ");
1654:                    break;
1655:                case NLOOKBEHIND_IN:
1656:                    b.append("(<!");
1657:                    b.append(lookaheadId);
1658:                    b.append(" , dist=");
1659:                    b.append(distance);
1660:                    b.append(" , ");
1661:                    if (failNext != null) {
1662:                        b.append(", =>");
1663:                        b.append(failNext.instanceNum);
1664:                        b.append(", ");
1665:                    }
1666:                    break;
1667:                case NLOOKBEHIND_OUT:
1668:                    b.append("<!");
1669:                    b.append(lookaheadId);
1670:                    b.append(')');
1671:                    b.append(" , ");
1672:                    break;
1673:                case MEMREG_CONDITION:
1674:                    b.append("(reg");
1675:                    b.append(memreg);
1676:                    b.append("?)");
1677:                    if (failNext != null) {
1678:                        b.append(", =>");
1679:                        b.append(failNext.instanceNum);
1680:                        b.append(", ");
1681:                    }
1682:                    break;
1683:                case LOOKAHEAD_CONDITION_IN:
1684:                    b.append("(cond");
1685:                    b.append(lookaheadId);
1686:                    b.append(((Lookahead) this ).isPositive ? '=' : '!');
1687:                    b.append(" , ");
1688:                    if (failNext != null) {
1689:                        b.append(", =>");
1690:                        b.append(failNext.instanceNum);
1691:                        b.append(", ");
1692:                    }
1693:                    break;
1694:                case LOOKAHEAD_CONDITION_OUT:
1695:                    b.append("cond");
1696:                    b.append(lookaheadId);
1697:                    b.append(")");
1698:                    if (failNext != null) {
1699:                        b.append(", =>");
1700:                        b.append(failNext.instanceNum);
1701:                        b.append(", ");
1702:                    }
1703:                    break;
1704:                case REG:
1705:                    b.append("$");
1706:                    b.append(memreg);
1707:                    b.append(", ");
1708:                    break;
1709:                case SUCCESS:
1710:                    b.append("END");
1711:                    break;
1712:                case BRANCH_STORE_CNT_AUX1:
1713:                    b.append("(aux1)");
1714:                case BRANCH_STORE_CNT:
1715:                    b.append("(cnt)");
1716:                case BRANCH:
1717:                    b.append("=>");
1718:                    if (failNext != null)
1719:                        b.append(failNext.instanceNum);
1720:                    else
1721:                        b.append("null");
1722:                    b.append(" , ");
1723:                    break;
1724:                default:
1725:                    b.append('[');
1726:                    switch (type) {
1727:                    case CNT_SET_0:
1728:                        b.append("cnt=0");
1729:                        break;
1730:                    case CNT_INC:
1731:                        b.append("cnt++");
1732:                        break;
1733:                    case CNT_GT_EQ:
1734:                        b.append("cnt>=" + maxCount);
1735:                        break;
1736:                    case READ_CNT_LT:
1737:                        b.append("->cnt<" + maxCount);
1738:                        break;
1739:                    case CRSTORE_CRINC:
1740:                        b.append("M(" + memreg + ")->,Cr(" + cntreg + ")->,Cr("
1741:                                + cntreg + ")++");
1742:                        break;
1743:                    case CR_SET_0:
1744:                        b.append("Cr(" + cntreg + ")=0");
1745:                        break;
1746:                    case CR_LT:
1747:                        b.append("Cr(" + cntreg + ")<" + maxCount);
1748:                        break;
1749:                    case CR_GT_EQ:
1750:                        b.append("Cr(" + cntreg + ")>=" + maxCount);
1751:                        break;
1752:                    default:
1753:                        b.append("unknown type: " + type);
1754:                    }
1755:                    b.append("] , ");
1756:                }
1757:                if (next != null) {
1758:                    b.append("->");
1759:                    b.append(next.instanceNum);
1760:                    b.append(", ");
1761:                }
1762:                //b.append("\r\n");
1763:                return b.toString();
1764:            }
1765:
1766:            public String toStringAll() {
1767:                return toStringAll(new Vector());
1768:            }
1769:
1770:            public String toStringAll(Vector v) {
1771:                v.addElement(new Integer(instanceNum));
1772:                String s = toString();
1773:                if (next != null) {
1774:                    if (!v.contains(new Integer(next.instanceNum))) {
1775:                        s += "\r\n";
1776:                        s += next.toStringAll(v);
1777:                    }
1778:                }
1779:                if (failNext != null) {
1780:                    if (!v.contains(new Integer(failNext.instanceNum))) {
1781:                        s += "\r\n";
1782:                        s += failNext.toStringAll(v);
1783:                    }
1784:                }
1785:                return s;
1786:            }
1787:        }
1788:
1789:        class Pretokenizer {
1790:            private static final int START = 1;
1791:            static final int END = 2;
1792:            static final int PLAIN_GROUP = 3;
1793:            static final int POS_LOOKAHEAD = 4;
1794:            static final int NEG_LOOKAHEAD = 5;
1795:            static final int POS_LOOKBEHIND = 6;
1796:            static final int NEG_LOOKBEHIND = 7;
1797:            static final int INDEPENDENT_REGEX = 8;
1798:            static final int COMMENT = 9;
1799:            static final int CONDITIONAL_GROUP = 10;
1800:            static final int FLAGS = 11;
1801:            static final int CLASS_GROUP = 12;
1802:            static final int NAMED_GROUP = 13;
1803:
1804:            int tOffset, tOutside, skip;
1805:            int offset, end;
1806:            int c;
1807:
1808:            int ttype = START;
1809:
1810:            char[] data;
1811:
1812:            //results
1813:            private int flags;
1814:            private boolean flagsChanged;
1815:
1816:            char[] brackets;
1817:            String groupName;
1818:            boolean groupDeclared;
1819:
1820:            Pretokenizer(char[] data, int offset, int end) {
1821:                if (offset < 0 || end > data.length)
1822:                    throw new IndexOutOfBoundsException("offset=" + offset
1823:                            + ", end=" + end + ", length=" + data.length);
1824:                this .offset = offset;
1825:                this .end = end;
1826:
1827:                this .tOffset = offset;
1828:                this .tOutside = offset;
1829:
1830:                this .data = data;
1831:            }
1832:
1833:            int flags(int def) {
1834:                return flagsChanged ? flags : def;
1835:            }
1836:
1837:            void next() throws PatternSyntaxException {
1838:                int tOffset = this .tOutside;
1839:                int skip = this .skip;
1840:
1841:                tOffset += skip;
1842:                flagsChanged = false;
1843:
1844:                int end = this .end;
1845:                char[] data = this .data;
1846:                boolean esc = false;
1847:                for (int i = tOffset; i < end; i++) {
1848:                    if (esc) {
1849:                        esc = false;
1850:                        continue;
1851:                    }
1852:                    char c = data[i];
1853:                    switch (c) {
1854:                    case '\\':
1855:                        esc = true;
1856:                        continue;
1857:                    case '|':
1858:                    case ')':
1859:                        ttype = c;
1860:                        this .tOffset = tOffset;
1861:                        this .tOutside = i;
1862:                        this .skip = 1;
1863:                        return;
1864:                    case '(':
1865:                        if (((i + 2) < end) && (data[i + 1] == '?')) {
1866:                            char c1 = data[i + 2];
1867:                            switch (c1) {
1868:                            case ':':
1869:                                ttype = PLAIN_GROUP;
1870:                                skip = 3; // "(?:" - skip 3 chars
1871:                                break;
1872:                            case '=':
1873:                                ttype = POS_LOOKAHEAD;
1874:                                skip = 3; // "(?="
1875:                                break;
1876:                            case '!':
1877:                                ttype = NEG_LOOKAHEAD;
1878:                                skip = 3; // "(?!"
1879:                                break;
1880:                            case '<':
1881:                                switch (c1 = data[i + 3]) {
1882:                                case '=':
1883:                                    ttype = POS_LOOKBEHIND;
1884:                                    skip = 4; // "(?<="
1885:                                    break;
1886:                                case '!':
1887:                                    ttype = NEG_LOOKBEHIND;
1888:                                    skip = 4; // "(?<!"
1889:                                    break;
1890:                                default:
1891:                                    throw new PatternSyntaxException(
1892:                                            "invalid character after '(?<' : "
1893:                                                    + c1);
1894:                                }
1895:                                break;
1896:                            case '>':
1897:                                ttype = INDEPENDENT_REGEX;
1898:                                skip = 3; // "(?>"
1899:                                break;
1900:                            case '#':
1901:                                ttype = COMMENT;
1902:                                skip = 3; // ="(?#".length, the makeTree() skips the rest by itself
1903:                                break;
1904:                            case '(':
1905:                                ttype = CONDITIONAL_GROUP;
1906:                                skip = 2; //"(?"+"(..." - skip "(?" (2 chars) and parse condition as a group
1907:                                break;
1908:                            case '[':
1909:                                ttype = CLASS_GROUP;
1910:                                skip = 2; // "(?"+"[..]+...-...&...)" - skip 2 chars and parse a class group
1911:                                break;
1912:                            default:
1913:                                int mOff,
1914:                                mLen;
1915:                                mLoop: for (int p = i + 2; p < end; p++) {
1916:                                    char c2 = data[p];
1917:                                    switch (c2) {
1918:                                    case '-':
1919:                                    case 'i':
1920:                                    case 'm':
1921:                                    case 's':
1922:                                    case 'x':
1923:                                    case 'u':
1924:                                    case 'X':
1925:                                        //System.out.println("case '+-imsxuX' ("+c2+")");
1926:                                        continue mLoop;
1927:
1928:                                    case ':':
1929:                                        mOff = i + 2;
1930:                                        mLen = p - mOff;
1931:                                        if (mLen > 0) {
1932:                                            flags = Pattern.parseFlags(data,
1933:                                                    mOff, mLen);
1934:                                            flagsChanged = true;
1935:                                        }
1936:                                        ttype = PLAIN_GROUP;
1937:                                        skip = mLen + 3; // "(?imsx:" mLen=4; skip= "(?".len + ":".len + mLen = 2+1+4=7
1938:                                        break mLoop;
1939:                                    case ')':
1940:                                        flags = Pattern.parseFlags(data,
1941:                                                mOff = (i + 2),
1942:                                                mLen = (p - mOff));
1943:                                        flagsChanged = true;
1944:                                        ttype = FLAGS;
1945:                                        skip = mLen + 3; // "(?imsx)" mLen=4, skip="(?".len+")".len+mLen=2+1+4=7
1946:                                        break mLoop;
1947:                                    default:
1948:                                        throw new PatternSyntaxException(
1949:                                                "wrong char after \"(?\": "
1950:                                                        + c2);
1951:                                    }
1952:                                }
1953:                                break;
1954:                            }
1955:                        } else if (((i + 2) < end) && (data[i + 1] == '{')) { //parse named group: ({name}....),({=name}....)
1956:                            int p = i + 2;
1957:                            skip = 3; //'({' + '}'
1958:                            int nstart, nend;
1959:                            boolean isDecl;
1960:                            c = data[p];
1961:                            //System.out.println("NG: p="+p+", c="+c);
1962:                            while (Character.isWhitespace(c)) {
1963:                                c = data[++p];
1964:                                skip++;
1965:                                if (p == end)
1966:                                    throw new PatternSyntaxException(
1967:                                            "malformed named group");
1968:                            }
1969:
1970:                            if (c == '=') {
1971:                                isDecl = false;
1972:                                c = data[++p];
1973:                                skip++;
1974:                                if (p == end)
1975:                                    throw new PatternSyntaxException(
1976:                                            "malformed named group");
1977:                            } else
1978:                                isDecl = true;
1979:
1980:                            nstart = p;
1981:                            while (Character.isJavaIdentifierPart(c)) {
1982:                                c = data[++p];
1983:                                skip++;
1984:                                if (p == end)
1985:                                    throw new PatternSyntaxException(
1986:                                            "malformed named group");
1987:                            }
1988:                            nend = p;
1989:                            while (Character.isWhitespace(c)) {
1990:                                c = data[++p];
1991:                                skip++;
1992:                                if (p == end)
1993:                                    throw new PatternSyntaxException(
1994:                                            "malformed named group");
1995:                            }
1996:                            if (c != '}')
1997:                                throw new PatternSyntaxException(
1998:                                        "'}' expected at " + (p - i) + " in "
1999:                                                + new String(data, i, end - i));
2000:
2001:                            this .groupName = new String(data, nstart, nend
2002:                                    - nstart);
2003:                            this .groupDeclared = isDecl;
2004:                            ttype = NAMED_GROUP;
2005:                        } else {
2006:                            ttype = '(';
2007:                            skip = 1;
2008:                        }
2009:                        this .tOffset = tOffset;
2010:                        this .tOutside = i;
2011:                        this .skip = skip;
2012:                        return;
2013:                    case '[':
2014:                        loop: for (;; i++) {
2015:                            if (i == end)
2016:                                throw new PatternSyntaxException(
2017:                                        "malformed character class");
2018:                            char c1 = data[i];
2019:                            switch (c1) {
2020:                            case '\\':
2021:                                i++;
2022:                                continue;
2023:                            case ']':
2024:                                break loop;
2025:                            }
2026:                        }
2027:                    }
2028:                }
2029:                ttype = END;
2030:                this .tOffset = tOffset;
2031:                this .tOutside = end;
2032:            }
2033:
2034:        }
2035:
2036:        class Branch extends Term {
2037:            Branch() {
2038:                type = BRANCH;
2039:            }
2040:
2041:            Branch(int type) {
2042:                switch (type) {
2043:                case BRANCH:
2044:                case BRANCH_STORE_CNT:
2045:                case BRANCH_STORE_CNT_AUX1:
2046:                    this .type = type;
2047:                    break;
2048:                default:
2049:                    throw new IllegalArgumentException("not a branch type: "
2050:                            + type);
2051:                }
2052:            }
2053:        }
2054:
2055:        class BackReference extends Term {
2056:            BackReference(int no, boolean icase) {
2057:                super (icase ? REG_I : REG);
2058:                memreg = no;
2059:            }
2060:        }
2061:
2062:        class Group extends Term {
2063:            Group() {
2064:                this (0);
2065:            }
2066:
2067:            Group(int memreg) {
2068:                type = GROUP_IN;
2069:                this .memreg = memreg;
2070:
2071:                //used in append()
2072:                current = null;
2073:                in = this ;
2074:                prev = null;
2075:
2076:                out = new Term();
2077:                out.type = GROUP_OUT;
2078:                out.memreg = memreg;
2079:            }
2080:        }
2081:
2082:        class ConditionalExpr extends Group {
2083:            protected Term node;
2084:            protected boolean newBranchStarted = false;
2085:            protected boolean linkAsBranch = true;
2086:
2087:            ConditionalExpr(Lookahead la) {
2088:                super (0);
2089:                //System.out.println("ConditionalExpr("+la+")");
2090:                /*
2091:                 * This all is rather tricky.
2092:                 * See how this types are handled in Matcher.
2093:                 * The shortcoming is that we strongly rely upon 
2094:                 * the internal structure of Lookahead.
2095:                 */
2096:                la.in.type = LOOKAHEAD_CONDITION_IN;
2097:                la.out.type = LOOKAHEAD_CONDITION_OUT;
2098:                if (la.isPositive) {
2099:                    node = la.in;
2100:                    linkAsBranch = true;
2101:
2102:                    //empty 2'nd branch
2103:                    node.failNext = out;
2104:                } else {
2105:                    node = la.out;
2106:                    linkAsBranch = false;
2107:
2108:                    //empty 2'nd branch
2109:                    node.next = out;
2110:                }
2111:
2112:                //node.prev=in;
2113:                //in.next=node;
2114:
2115:                la.prev = in;
2116:                in.next = la;
2117:
2118:                current = la;
2119:                //current=node;
2120:            }
2121:
2122:            ConditionalExpr(Lookbehind lb) {
2123:                super (0);
2124:                //System.out.println("ConditionalExpr("+la+")");
2125:                /*
2126:                 * This all is rather tricky.
2127:                 * See how this types are handled in Matcher.
2128:                 * The shortcoming is that we strongly rely upon 
2129:                 * the internal structure of Lookahead.
2130:                 */
2131:                lb.in.type = LOOKBEHIND_CONDITION_IN;
2132:                lb.out.type = LOOKBEHIND_CONDITION_OUT;
2133:                if (lb.isPositive) {
2134:                    node = lb.in;
2135:                    linkAsBranch = true;
2136:
2137:                    //empty 2'nd branch
2138:                    node.failNext = out;
2139:                } else {
2140:                    node = lb.out;
2141:                    linkAsBranch = false;
2142:
2143:                    //empty 2'nd branch
2144:                    node.next = out;
2145:                }
2146:
2147:                lb.prev = in;
2148:                in.next = lb;
2149:
2150:                current = lb;
2151:                //current=node;
2152:            }
2153:
2154:            ConditionalExpr(int memreg) {
2155:                super (0);
2156:                //System.out.println("ConditionalExpr("+memreg+")");
2157:                Term condition = new Term(MEMREG_CONDITION);
2158:                condition.memreg = memreg;
2159:                condition.out = condition;
2160:                condition.out1 = null;
2161:                condition.branchOut = null;
2162:
2163:                //default branch
2164:                condition.failNext = out;
2165:
2166:                node = current = condition;
2167:                linkAsBranch = true;
2168:
2169:                condition.prev = in;
2170:                in.next = condition;
2171:
2172:                current = condition;
2173:            }
2174:
2175:            protected void startNewBranch() throws PatternSyntaxException {
2176:                if (newBranchStarted)
2177:                    throw new PatternSyntaxException(
2178:                            "attempt to set a 3'd choice in a conditional expr.");
2179:                Term node = this .node;
2180:                node.out1 = null;
2181:                if (linkAsBranch) {
2182:                    node.out = null;
2183:                    node.branchOut = node;
2184:                } else {
2185:                    node.out = node;
2186:                    node.branchOut = null;
2187:                }
2188:                newBranchStarted = true;
2189:                //System.out.println("CondGrp.startNewBranch(): current="+current+", this="+this.toStringAll());
2190:                current = node;
2191:            }
2192:        }
2193:
2194:        class IndependentGroup extends Term {
2195:            IndependentGroup(int id) {
2196:                super (0);
2197:                in = this ;
2198:                out = new Term();
2199:                type = INDEPENDENT_IN;
2200:                out.type = INDEPENDENT_OUT;
2201:                lookaheadId = out.lookaheadId = id;
2202:            }
2203:        }
2204:
2205:        class Lookahead extends Term {
2206:            final boolean isPositive;
2207:
2208:            Lookahead(int id, boolean isPositive) {
2209:                this .isPositive = isPositive;
2210:                in = this ;
2211:                out = new Term();
2212:                if (isPositive) {
2213:                    type = PLOOKAHEAD_IN;
2214:                    out.type = PLOOKAHEAD_OUT;
2215:                } else {
2216:                    type = NLOOKAHEAD_IN;
2217:                    out.type = NLOOKAHEAD_OUT;
2218:                    branchOut = this ;
2219:                }
2220:                lookaheadId = id;
2221:                out.lookaheadId = id;
2222:            }
2223:        }
2224:
2225:        class Lookbehind extends Term {
2226:            final boolean isPositive;
2227:            private int prevDistance = -1;
2228:
2229:            Lookbehind(int id, boolean isPositive) {
2230:                distance = 0;
2231:                this .isPositive = isPositive;
2232:                in = this ;
2233:                out = new Term();
2234:                if (isPositive) {
2235:                    type = PLOOKBEHIND_IN;
2236:                    out.type = PLOOKBEHIND_OUT;
2237:                } else {
2238:                    type = NLOOKBEHIND_IN;
2239:                    out.type = NLOOKBEHIND_OUT;
2240:                    branchOut = this ;
2241:                }
2242:                lookaheadId = id;
2243:                out.lookaheadId = id;
2244:            }
2245:
2246:            protected Term append(Term t) throws PatternSyntaxException {
2247:                distance += length(t);
2248:                return super .append(t);
2249:            }
2250:
2251:            protected Term replaceCurrent(Term t) throws PatternSyntaxException {
2252:                distance += length(t) - length(current);
2253:                return super .replaceCurrent(t);
2254:            }
2255:
2256:            private static int length(Term t) throws PatternSyntaxException {
2257:                int type = t.type;
2258:                switch (type) {
2259:                case CHAR:
2260:                case BITSET:
2261:                case BITSET2:
2262:                case ANY_CHAR:
2263:                case ANY_CHAR_NE:
2264:                    return 1;
2265:                case BOUNDARY:
2266:                case DIRECTION:
2267:                case UBOUNDARY:
2268:                case UDIRECTION:
2269:                    return 0;
2270:                default:
2271:                    if (type >= FIRST_TRANSPARENT && type <= LAST_TRANSPARENT)
2272:                        return 0;
2273:                    throw new PatternSyntaxException(
2274:                            "variable length element within a lookbehind assertion");
2275:                }
2276:            }
2277:
2278:            protected void startNewBranch() throws PatternSyntaxException {
2279:                prevDistance = distance;
2280:                distance = 0;
2281:                super .startNewBranch();
2282:            }
2283:
2284:            protected void close() throws PatternSyntaxException {
2285:                int pd = prevDistance;
2286:                if (pd >= 0) {
2287:                    if (distance != pd)
2288:                        throw new PatternSyntaxException(
2289:                                "non-equal branch lengths within a lookbehind assertion");
2290:                }
2291:                super .close();
2292:            }
2293:        }
2294:
2295:        class Iterator extends Term {
2296:
2297:            Iterator(Term term, int min, int max, Vector collection)
2298:                    throws PatternSyntaxException {
2299:                collection.addElement(this );
2300:                switch (term.type) {
2301:                case CHAR:
2302:                case ANY_CHAR:
2303:                case ANY_CHAR_NE:
2304:                case BITSET:
2305:                case BITSET2: {
2306:                    target = term;
2307:                    Term back = new Term();
2308:                    if (min <= 0 && max < 0) {
2309:                        type = REPEAT_0_INF;
2310:                        back.type = BACKTRACK_0;
2311:                    } else if (min > 0 && max < 0) {
2312:                        type = REPEAT_MIN_INF;
2313:                        back.type = BACKTRACK_MIN;
2314:                        minCount = back.minCount = min;
2315:                    } else {
2316:                        type = REPEAT_MIN_MAX;
2317:                        back.type = BACKTRACK_MIN;
2318:                        minCount = back.minCount = min;
2319:                        maxCount = max;
2320:                    }
2321:
2322:                    failNext = back;
2323:
2324:                    in = this ;
2325:                    out = this ;
2326:                    out1 = back;
2327:                    branchOut = null;
2328:                    return;
2329:                }
2330:                case REG: {
2331:                    target = term;
2332:                    memreg = term.memreg;
2333:                    Term back = new Term();
2334:                    if (max < 0) {
2335:                        type = REPEAT_REG_MIN_INF;
2336:                        back.type = BACKTRACK_REG_MIN;
2337:                        minCount = back.minCount = min;
2338:                    } else {
2339:                        type = REPEAT_REG_MIN_MAX;
2340:                        back.type = BACKTRACK_REG_MIN;
2341:                        minCount = back.minCount = min;
2342:                        maxCount = max;
2343:                    }
2344:
2345:                    failNext = back;
2346:
2347:                    in = this ;
2348:                    out = this ;
2349:                    out1 = back;
2350:                    branchOut = null;
2351:                    return;
2352:                }
2353:                default:
2354:                    throw new PatternSyntaxException(
2355:                            "can't iterate this type: " + term.type);
2356:                }
2357:            }
2358:
2359:            void optimize() {
2360:                //System.out.println("optimizing myself: "+this);
2361:                //BACKTRACK_MIN_REG_FIND
2362:                Term back = failNext;
2363:                Optimizer opt = Optimizer.find(back.next);
2364:                if (opt == null)
2365:                    return;
2366:                failNext = opt.makeBacktrack(back);
2367:            }
2368:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.