Source Code Cross Referenced for CharacterClass.java in  » Scripting » jruby » jregex » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Scripting » jruby » jregex 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /**
002:         * Copyright (c) 2001, Sergey A. Samokhodkin
003:         * All rights reserved.
004:         * 
005:         * Redistribution and use in source and binary forms, with or without modification, 
006:         * are permitted provided that the following conditions are met:
007:         * 
008:         * - Redistributions of source code must retain the above copyright notice, 
009:         * this list of conditions and the following disclaimer. 
010:         * - Redistributions in binary form 
011:         * must reproduce the above copyright notice, this list of conditions and the following 
012:         * disclaimer in the documentation and/or other materials provided with the distribution.
013:         * - Neither the name of jregex nor the names of its contributors may be used 
014:         * to endorse or promote products derived from this software without specific prior 
015:         * written permission. 
016:         * 
017:         * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY 
018:         * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 
019:         * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 
020:         * IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, 
021:         * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 
022:         * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 
023:         * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 
024:         * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY 
025:         * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
026:         * 
027:         * @version 1.2_01
028:         */package jregex;
029:
030:        import java.util.*;
031:
032:        class CharacterClass extends Term implements  UnicodeConstants {
033:            static final Bitset DIGIT = new Bitset();
034:            static final Bitset WORDCHAR = new Bitset();
035:            static final Bitset SPACE = new Bitset();
036:
037:            static final Bitset UDIGIT = new Bitset();
038:            static final Bitset UWORDCHAR = new Bitset();
039:            static final Bitset USPACE = new Bitset();
040:
041:            static final Bitset NONDIGIT = new Bitset();
042:            static final Bitset NONWORDCHAR = new Bitset();
043:            static final Bitset NONSPACE = new Bitset();
044:
045:            static final Bitset UNONDIGIT = new Bitset();
046:            static final Bitset UNONWORDCHAR = new Bitset();
047:            static final Bitset UNONSPACE = new Bitset();
048:
049:            private static boolean namesInitialized = false;
050:
051:            static final Hashtable namedClasses = new Hashtable();
052:            static final Vector unicodeBlocks = new Vector();
053:            static final Vector posixClasses = new Vector();
054:            static final Vector unicodeCategories = new Vector();
055:
056:            //modes; used in parseGroup(()
057:            private final static int ADD = 1;
058:            private final static int SUBTRACT = 2;
059:            private final static int INTERSECT = 3;
060:
061:            private static final String blockData = "0000..007F:InBasicLatin;0080..00FF:InLatin-1Supplement;0100..017F:InLatinExtended-A;"
062:                    + "0180..024F:InLatinExtended-B;0250..02AF:InIPAExtensions;02B0..02FF:InSpacingModifierLetters;"
063:                    + "0300..036F:InCombiningDiacriticalMarks;0370..03FF:InGreek;0400..04FF:InCyrillic;0530..058F:InArmenian;"
064:                    + "0590..05FF:InHebrew;0600..06FF:InArabic;0700..074F:InSyriac;0780..07BF:InThaana;0900..097F:InDevanagari;"
065:                    + "0980..09FF:InBengali;0A00..0A7F:InGurmukhi;0A80..0AFF:InGujarati;0B00..0B7F:InOriya;0B80..0BFF:InTamil;"
066:                    + "0C00..0C7F:InTelugu;0C80..0CFF:InKannada;0D00..0D7F:InMalayalam;0D80..0DFF:InSinhala;0E00..0E7F:InThai;"
067:                    + "0E80..0EFF:InLao;0F00..0FFF:InTibetan;1000..109F:InMyanmar;10A0..10FF:InGeorgian;1100..11FF:InHangulJamo;"
068:                    + "1200..137F:InEthiopic;13A0..13FF:InCherokee;1400..167F:InUnifiedCanadianAboriginalSyllabics;"
069:                    + "1680..169F:InOgham;16A0..16FF:InRunic;1780..17FF:InKhmer;1800..18AF:InMongolian;"
070:                    + "1E00..1EFF:InLatinExtendedAdditional;1F00..1FFF:InGreekExtended;2000..206F:InGeneralPunctuation;"
071:                    + "2070..209F:InSuperscriptsAndSubscripts;20A0..20CF:InCurrencySymbols;"
072:                    + "20D0..20FF:InCombiningMarksForSymbols;2100..214F:InLetterLikeSymbols;2150..218F:InNumberForms;"
073:                    + "2190..21FF:InArrows;2200..22FF:InMathematicalOperators;2300..23FF:InMiscellaneousTechnical;"
074:                    + "2400..243F:InControlPictures;2440..245F:InOpticalCharacterRecognition;"
075:                    + "2460..24FF:InEnclosedAlphanumerics;2500..257F:InBoxDrawing;2580..259F:InBlockElements;"
076:                    + "25A0..25FF:InGeometricShapes;2600..26FF:InMiscellaneousSymbols;2700..27BF:InDingbats;"
077:                    + "2800..28FF:InBraillePatterns;2E80..2EFF:InCJKRadicalsSupplement;2F00..2FDF:InKangxiRadicals;"
078:                    + "2FF0..2FFF:InIdeographicDescriptionCharacters;3000..303F:InCJKSymbolsAndPunctuation;"
079:                    + "3040..309F:InHiragana;30A0..30FF:InKatakana;3100..312F:InBopomofo;3130..318F:InHangulCompatibilityJamo;"
080:                    + "3190..319F:InKanbun;31A0..31BF:InBopomofoExtended;3200..32FF:InEnclosedCJKLettersAndMonths;"
081:                    + "3300..33FF:InCJKCompatibility;3400..4DB5:InCJKUnifiedIdeographsExtensionA;"
082:                    + "4E00..9FFF:InCJKUnifiedIdeographs;A000..A48F:InYiSyllables;A490..A4CF:InYiRadicals;"
083:                    + "AC00..D7A3:InHangulSyllables;D800..DB7F:InHighSurrogates;DB80..DBFF:InHighPrivateUseSurrogates;"
084:                    + "DC00..DFFF:InLowSurrogates;E000..F8FF:InPrivateUse;F900..FAFF:InCJKCompatibilityIdeographs;"
085:                    + "FB00..FB4F:InAlphabeticPresentationForms;FB50..FDFF:InArabicPresentationForms-A;"
086:                    + "FE20..FE2F:InCombiningHalfMarks;FE30..FE4F:InCJKCompatibilityForms;FE50..FE6F:InSmallFormVariants;"
087:                    + "FE70..FEFE:InArabicPresentationForms-B;FEFF..FEFF:InSpecials;FF00..FFEF:InHalfWidthAndFullWidthForms;"
088:                    + "FFF0..FFFD:InSpecials";
089:
090:            static {
091:                //*
092:                DIGIT.setDigit(false);
093:                WORDCHAR.setWordChar(false);
094:                SPACE.setSpace(false);
095:
096:                UDIGIT.setDigit(true);
097:                UWORDCHAR.setWordChar(true);
098:                USPACE.setSpace(true);
099:
100:                NONDIGIT.setDigit(false);
101:                NONDIGIT.setPositive(false);
102:                NONWORDCHAR.setWordChar(false);
103:                NONWORDCHAR.setPositive(false);
104:                NONSPACE.setSpace(false);
105:                NONSPACE.setPositive(false);
106:
107:                UNONDIGIT.setDigit(true);
108:                UNONDIGIT.setPositive(false);
109:                UNONWORDCHAR.setWordChar(true);
110:                UNONWORDCHAR.setPositive(false);
111:                UNONSPACE.setSpace(true);
112:                UNONSPACE.setPositive(false);
113:
114:                initPosixClasses();
115:            }
116:
117:            private static void registerClass(String name, Bitset cls,
118:                    Vector realm) {
119:                namedClasses.put(name, cls);
120:                if (!realm.contains(name))
121:                    realm.addElement(name);
122:            }
123:
124:            private static void initPosixClasses() {
125:                Bitset lower = new Bitset();
126:                lower.setRange('a', 'z');
127:                registerClass("Lower", lower, posixClasses);
128:                Bitset upper = new Bitset();
129:                upper.setRange('A', 'Z');
130:                registerClass("Upper", upper, posixClasses);
131:                Bitset ascii = new Bitset();
132:                ascii.setRange((char) 0, (char) 0x7f);
133:                registerClass("ASCII", ascii, posixClasses);
134:                Bitset alpha = new Bitset();
135:                alpha.add(lower);
136:                alpha.add(upper);
137:                registerClass("Alpha", alpha, posixClasses);
138:                Bitset digit = new Bitset();
139:                digit.setRange('0', '9');
140:                registerClass("Digit", digit, posixClasses);
141:                Bitset alnum = new Bitset();
142:                alnum.add(alpha);
143:                alnum.add(digit);
144:                registerClass("Alnum", alnum, posixClasses);
145:                Bitset punct = new Bitset();
146:                punct.setChars("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~");
147:                registerClass("Punct", punct, posixClasses);
148:                Bitset graph = new Bitset();
149:                graph.add(alnum);
150:                graph.add(punct);
151:                registerClass("Graph", graph, posixClasses);
152:                registerClass("Print", graph, posixClasses);
153:                Bitset blank = new Bitset();
154:                blank.setChars(" \t");
155:                registerClass("Blank", blank, posixClasses);
156:                Bitset cntrl = new Bitset();
157:                cntrl.setRange((char) 0, (char) 0x1f);
158:                cntrl.setChar((char) 0x7f);
159:                registerClass("Cntrl", cntrl, posixClasses);
160:                Bitset xdigit = new Bitset();
161:                xdigit.setRange('0', '9');
162:                xdigit.setRange('a', 'f');
163:                xdigit.setRange('A', 'F');
164:                registerClass("XDigit", xdigit, posixClasses);
165:                Bitset space = new Bitset();
166:                space.setChars(" \t\n\r\f\u000b");
167:                registerClass("Space", space, posixClasses);
168:            }
169:
170:            private static void initNames() {
171:                initNamedCategory("C", new int[] { Cn, Cc, Cf, Co, Cs });
172:                initNamedCategory("Cn", Cn);
173:                initNamedCategory("Cc", Cc);
174:                initNamedCategory("Cf", Cf);
175:                initNamedCategory("Co", Co);
176:                initNamedCategory("Cs", Cs);
177:
178:                initNamedCategory("L", new int[] { Lu, Ll, Lt, Lm, Lo });
179:                initNamedCategory("Lu", Lu);
180:                initNamedCategory("Ll", Ll);
181:                initNamedCategory("Lt", Lt);
182:                initNamedCategory("Lm", Lm);
183:                initNamedCategory("Lo", Lo);
184:
185:                initNamedCategory("M", new int[] { Mn, Me, Mc });
186:                initNamedCategory("Mn", Mn);
187:                initNamedCategory("Me", Me);
188:                initNamedCategory("Mc", Mc);
189:
190:                initNamedCategory("N", new int[] { Nd, Nl, No });
191:                initNamedCategory("Nd", Nd);
192:                initNamedCategory("Nl", Nl);
193:                initNamedCategory("No", No);
194:
195:                initNamedCategory("Z", new int[] { Zs, Zl, Zp });
196:                initNamedCategory("Zs", Zs);
197:                initNamedCategory("Zl", Zl);
198:                initNamedCategory("Zp", Zp);
199:
200:                initNamedCategory("P", new int[] { Pd, Ps, Pi, Pe, Pf, Pc, Po });
201:                initNamedCategory("Pd", Pd);
202:                initNamedCategory("Ps", Ps);
203:                initNamedCategory("Pi", Pi);
204:                initNamedCategory("Pe", Pe);
205:                initNamedCategory("Pf", Pf);
206:                initNamedCategory("Pc", Pc);
207:                initNamedCategory("Po", Po);
208:
209:                initNamedCategory("S", new int[] { Sm, Sc, Sk, So });
210:                initNamedCategory("Sm", Sm);
211:                initNamedCategory("Sc", Sc);
212:                initNamedCategory("Sk", Sk);
213:                initNamedCategory("So", So);
214:
215:                Bitset bs = new Bitset();
216:                bs.setCategory(Cn);
217:                registerClass("UNASSIGNED", bs, unicodeCategories);
218:                bs = new Bitset();
219:                bs.setCategory(Cn);
220:                bs.setPositive(false);
221:                registerClass("ASSIGNED", bs, unicodeCategories);
222:
223:                StringTokenizer st = new StringTokenizer(blockData, ".,:;");
224:                while (st.hasMoreTokens()) {
225:                    try {
226:                        int first = Integer.parseInt(st.nextToken(), 16);
227:                        int last = Integer.parseInt(st.nextToken(), 16);
228:                        String name = st.nextToken();
229:                        initNamedBlock(name, first, last);
230:                    } catch (Exception e) {
231:                        e.printStackTrace();
232:                    }
233:                }
234:
235:                initNamedBlock("ALL", 0, 0xffff);
236:
237:                namesInitialized = true;
238:                //*/
239:            }
240:
241:            private static void initNamedBlock(String name, int first, int last) {
242:                if (first < Character.MIN_VALUE || first > Character.MAX_VALUE)
243:                    throw new IllegalArgumentException("wrong start code ("
244:                            + first + ") in block " + name);
245:                if (last < Character.MIN_VALUE || last > Character.MAX_VALUE)
246:                    throw new IllegalArgumentException("wrong end code ("
247:                            + last + ") in block " + name);
248:                if (last < first)
249:                    throw new IllegalArgumentException(
250:                            "end code < start code in block " + name);
251:                Bitset bs = (Bitset) namedClasses.get(name);
252:                if (bs == null) {
253:                    bs = new Bitset();
254:                    registerClass(name, bs, unicodeBlocks);
255:                }
256:                bs.setRange((char) first, (char) last);
257:            }
258:
259:            private static void initNamedCategory(String name, int cat) {
260:                Bitset bs = new Bitset();
261:                bs.setCategory(cat);
262:                registerClass(name, bs, unicodeCategories);
263:            }
264:
265:            private static void initNamedCategory(String name, int[] cats) {
266:                Bitset bs = new Bitset();
267:                for (int i = 0; i < cats.length; i++) {
268:                    bs.setCategory(cats[i]);
269:                }
270:                namedClasses.put(name, bs);
271:            }
272:
273:            private static Bitset getNamedClass(String name) {
274:                if (!namesInitialized)
275:                    initNames();
276:                return (Bitset) namedClasses.get(name);
277:            }
278:
279:            static void makeICase(Term term, char c) {
280:                Bitset bs = new Bitset();
281:                bs.setChar(Character.toLowerCase(c));
282:                bs.setChar(Character.toUpperCase(c));
283:                bs.setChar(Character.toTitleCase(c));
284:                Bitset.unify(bs, term);
285:            }
286:
287:            static void makeDigit(Term term, boolean inverse, boolean unicode) {
288:                Bitset digit = unicode ? inverse ? UNONDIGIT : UDIGIT
289:                        : inverse ? NONDIGIT : DIGIT;
290:                Bitset.unify(digit, term);
291:            }
292:
293:            static void makeSpace(Term term, boolean inverse, boolean unicode) {
294:                Bitset space = unicode ? inverse ? UNONSPACE : USPACE
295:                        : inverse ? NONSPACE : SPACE;
296:                Bitset.unify(space, term);
297:            }
298:
299:            static void makeWordChar(Term term, boolean inverse, boolean unicode) {
300:                Bitset wordChar = unicode ? inverse ? UNONWORDCHAR : UWORDCHAR
301:                        : inverse ? NONWORDCHAR : WORDCHAR;
302:                Bitset.unify(wordChar, term);
303:            }
304:
305:            static void makeWordBoundary(Term term, boolean inverse,
306:                    boolean unicode) {
307:                makeWordChar(term, inverse, unicode);
308:                term.type = unicode ? UBOUNDARY : BOUNDARY;
309:            }
310:
311:            static void makeWordStart(Term term, boolean unicode) {
312:                makeWordChar(term, false, unicode);
313:                term.type = unicode ? UDIRECTION : DIRECTION;
314:            }
315:
316:            static void makeWordEnd(Term term, boolean unicode) {
317:                makeWordChar(term, true, unicode);
318:                term.type = unicode ? UDIRECTION : DIRECTION;
319:            }
320:
321:            final static void parseGroup(char[] data, int i, int out,
322:                    Term term, boolean icase, boolean skipspaces,
323:                    boolean unicode, boolean xml) throws PatternSyntaxException {
324:                Bitset sum = new Bitset();
325:                Bitset bs = new Bitset();
326:                int mode = ADD;
327:                char c;
328:                for (; i < out;) {
329:                    switch (c = data[i++]) {
330:                    case '+':
331:                        mode = ADD;
332:                        continue;
333:                    case '-':
334:                        mode = SUBTRACT;
335:                        continue;
336:                    case '&':
337:                        mode = INTERSECT;
338:                        continue;
339:                    case '[':
340:                        bs.reset();
341:                        i = parseClass(data, i, out, bs, icase, skipspaces,
342:                                unicode, xml);
343:                        switch (mode) {
344:                        case ADD:
345:                            sum.add(bs);
346:                            break;
347:                        case SUBTRACT:
348:                            sum.subtract(bs);
349:                            break;
350:                        case INTERSECT:
351:                            sum.intersect(bs);
352:                            break;
353:                        }
354:                        continue;
355:                    case ')':
356:                        throw new PatternSyntaxException(
357:                                "unbalanced class group");
358:                    }
359:                }
360:                Bitset.unify(sum, term);
361:            }
362:
363:            final static int parseClass(char[] data, int i, int out, Term term,
364:                    boolean icase, boolean skipspaces, boolean unicode,
365:                    boolean xml) throws PatternSyntaxException {
366:                Bitset bs = new Bitset();
367:                i = parseClass(data, i, out, bs, icase, skipspaces, unicode,
368:                        xml);
369:                Bitset.unify(bs, term);
370:                return i;
371:            }
372:
373:            final static int parseName(char[] data, int i, int out, Term term,
374:                    boolean inverse, boolean skipspaces)
375:                    throws PatternSyntaxException {
376:                StringBuffer sb = new StringBuffer();
377:                i = parseName(data, i, out, sb, skipspaces);
378:                Bitset bs = getNamedClass(sb.toString());
379:                if (bs == null)
380:                    throw new PatternSyntaxException("unknow class: {" + sb
381:                            + "}");
382:                Bitset.unify(bs, term);
383:                term.inverse = inverse;
384:                return i;
385:            }
386:
387:            /*
388:             * @param mode add/subtract
389:             */
390:            private final static int parseClass(char[] data, int i, int out,
391:                    Bitset bs, boolean icase, boolean skipspaces,
392:                    boolean unicode, boolean xml) throws PatternSyntaxException {
393:                //System.out.println("parseClass("+new String(data)+","+i+","+out+",....)");
394:                char c;
395:                int prev = -1;
396:                boolean isFirst = true, setFirst = false, inRange = false;
397:                Bitset bs1 = null;
398:                StringBuffer sb = null;
399:                for (; i < out; isFirst = setFirst, setFirst = false) {
400:                    //System.out.println("   c="+data[i]);
401:                    handle_special: switch (c = data[i++]) {
402:                    case ']':
403:                        //if(inRange) throw new PatternSyntaxException("[...-] is illegal");
404:                        if (isFirst)
405:                            break; //treat as normal char
406:                        if (inRange) {
407:                            bs.setChar('-');
408:                        }
409:                        if (prev >= 0) {
410:                            char c1 = (char) prev;
411:                            if (icase) {
412:                                bs.setChar(Character.toLowerCase(c1));
413:                                bs.setChar(Character.toUpperCase(c1));
414:                                bs.setChar(Character.toTitleCase(c1));
415:                            } else
416:                                bs.setChar(c1);
417:                        }
418:                        return i;
419:
420:                    case '-':
421:                        if (isFirst)
422:                            break;
423:                        //if(isFirst) throw new PatternSyntaxException("[-...] is illegal");
424:                        if (inRange)
425:                            break;
426:                        //if(inRange) throw new PatternSyntaxException("[...--...] is illegal");
427:                        inRange = true;
428:                        continue;
429:
430:                    case '[':
431:                        if (inRange && xml) { //[..-[..]]
432:                            if (prev >= 0)
433:                                bs.setChar((char) prev);
434:                            if (bs1 == null)
435:                                bs1 = new Bitset();
436:                            else
437:                                bs1.reset();
438:                            i = parseClass(data, i, out, bs1, icase,
439:                                    skipspaces, unicode, xml);
440:                            //System.out.println("     i="+i);
441:                            bs.subtract(bs1);
442:                            inRange = false;
443:                            prev = -1;
444:                            continue;
445:                        } else
446:                            break handle_special;
447:
448:                    case '^':
449:                        //if(!isFirst) throw new PatternSyntaxException("'^' isn't a first char in a class def");
450:                        //bs.setPositive(false);
451:                        //setFirst=true;
452:                        //continue;
453:                        if (isFirst) {
454:                            bs.setPositive(false);
455:                            setFirst = true;
456:                            continue;
457:                        }
458:                        //treat as normal char
459:                        break;
460:
461:                    case ' ':
462:                    case '\r':
463:                    case '\n':
464:                    case '\t':
465:                    case '\f':
466:                        if (skipspaces)
467:                            continue;
468:                        else
469:                            break handle_special;
470:                    case '\\':
471:                        Bitset negatigeClass = null;
472:                        boolean inv = false;
473:                        handle_escape: switch (c = data[i++]) {
474:                        case 'r':
475:                            c = '\r';
476:                            break handle_special;
477:
478:                        case 'n':
479:                            c = '\n';
480:                            break handle_special;
481:
482:                        case 'e':
483:                            c = '\u001B';
484:                            break handle_special;
485:
486:                        case 't':
487:                            c = '\t';
488:                            break handle_special;
489:
490:                        case 'f':
491:                            c = '\f';
492:                            break handle_special;
493:
494:                        case 'u':
495:                            if (i >= out - 4)
496:                                throw new PatternSyntaxException(
497:                                        "incomplete escape sequence \\uXXXX");
498:                            c = (char) ((toHexDigit(c) << 12)
499:                                    + (toHexDigit(data[i++]) << 8)
500:                                    + (toHexDigit(data[i++]) << 4) + toHexDigit(data[i++]));
501:                            break handle_special;
502:
503:                        case 'v':
504:                            c = (char) ((toHexDigit(c) << 24)
505:                                    + (toHexDigit(data[i++]) << 16)
506:                                    + (toHexDigit(data[i++]) << 12)
507:                                    + (toHexDigit(data[i++]) << 8)
508:                                    + (toHexDigit(data[i++]) << 4) + toHexDigit(data[i++]));
509:                            break handle_special;
510:
511:                        case 'b':
512:                            c = 8; // backspace
513:                            break handle_special;
514:
515:                        case 'x': { // hex 2-digit number
516:                            int hex = 0;
517:                            char d;
518:                            if ((d = data[i++]) == '{') {
519:                                while ((d = data[i++]) != '}') {
520:                                    hex = (hex << 4) + toHexDigit(d);
521:                                }
522:                                if (hex > 0xffff)
523:                                    throw new PatternSyntaxException(
524:                                            "\\x{<out of range>}");
525:                            } else {
526:                                hex = (toHexDigit(d) << 4)
527:                                        + toHexDigit(data[i++]);
528:                            }
529:                            c = (char) hex;
530:                            break handle_special;
531:                        }
532:                        case '0': // oct 2- or 3-digit number
533:                        case 'o': // oct 2- or 3-digit number
534:                            int oct = 0;
535:                            for (;;) {
536:                                char d = data[i++];
537:                                if (d >= '0' && d <= '7') {
538:                                    oct *= 8;
539:                                    oct += d - '0';
540:                                    if (oct > 0xffff)
541:                                        break;
542:                                } else {
543:                                    i--;
544:                                    break;
545:                                }
546:                            }
547:                            c = (char) oct;
548:                            break handle_special;
549:
550:                        case 'm': // decimal number -> char
551:                            int dec = 0;
552:                            for (;;) {
553:                                char d = data[i++];
554:                                if (d >= '0' && d <= '9') {
555:                                    dec *= 10;
556:                                    dec += d - '0';
557:                                    if (dec > 0xffff)
558:                                        break;
559:                                } else {
560:                                    i--;
561:                                    break;
562:                                }
563:                            }
564:                            c = (char) dec;
565:                            break handle_special;
566:
567:                        case 'c': // ctrl-char
568:                            c = (char) (data[i++] & 0x1f);
569:                            break handle_special;
570:
571:                        //classes;
572:                        //
573:                        case 'D': // non-digit
574:                            negatigeClass = unicode ? UNONDIGIT : NONDIGIT;
575:                            break handle_escape;
576:
577:                        case 'S': // space
578:                            negatigeClass = unicode ? UNONSPACE : NONSPACE;
579:                            break handle_escape;
580:
581:                        case 'W': // space
582:                            negatigeClass = unicode ? UNONWORDCHAR
583:                                    : NONWORDCHAR;
584:                            break handle_escape;
585:
586:                        case 'd': // digit
587:                            if (inRange)
588:                                throw new PatternSyntaxException(
589:                                        "illegal range: [..." + prev
590:                                                + "-\\d...]");
591:                            bs.setDigit(unicode);
592:                            continue;
593:
594:                        case 's': // digit
595:                            if (inRange)
596:                                throw new PatternSyntaxException(
597:                                        "illegal range: [..." + prev
598:                                                + "-\\s...]");
599:                            bs.setSpace(unicode);
600:                            continue;
601:
602:                        case 'w': // digit
603:                            if (inRange)
604:                                throw new PatternSyntaxException(
605:                                        "illegal range: [..." + prev
606:                                                + "-\\w...]");
607:                            bs.setWordChar(unicode);
608:                            continue;
609:
610:                        case 'P': // \\P{..}
611:                            inv = true;
612:                        case 'p': // \\p{..}
613:                            if (inRange)
614:                                throw new PatternSyntaxException(
615:                                        "illegal range: [..." + prev
616:                                                + "-\\w...]");
617:                            if (sb == null)
618:                                sb = new StringBuffer();
619:                            else
620:                                sb.setLength(0);
621:                            i = parseName(data, i, out, sb, skipspaces);
622:                            Bitset nc = getNamedClass(sb.toString());
623:                            if (nc == null)
624:                                throw new PatternSyntaxException(
625:                                        "unknown named class: {" + sb + "}");
626:                            bs.add(nc, inv);
627:                            continue;
628:
629:                        default:
630:                            //other escaped treat as normal
631:                            break handle_special;
632:                        }
633:                        //negatigeClass;
634:                        //\S,\D,\W
635:                        if (inRange)
636:                            throw new PatternSyntaxException(
637:                                    "illegal range: [..." + prev + "-\\" + c
638:                                            + "...]");
639:                        bs.add(negatigeClass);
640:                        continue;
641:                        /* should probably not be here...
642:                        case '{':   //
643:                        if(inRange) throw new PatternSyntaxException("illegal range: [..."+prev+"-\\w...]");
644:                        if(sb==null) sb=new StringBuffer();
645:                        else sb.setLength(0);
646:                        i=parseName(data,i-1,out,sb,skipspaces);
647:                        Bitset nc=getNamedClass(sb.toString());
648:                        if(nc==null) throw new PatternSyntaxException("unknown named class: {"+sb+"}");
649:                        bs.add(nc,false);
650:                        continue;
651:                         */
652:                    default:
653:                    }
654:                    //c is a normal char
655:                    //System.out.println("      normal c="+c+", inRange="+inRange+", prev="+(char)prev);
656:                    if (prev < 0) {
657:                        prev = c;
658:                        inRange = false;
659:                        continue;
660:                    }
661:                    if (!inRange) {
662:                        char c1 = (char) prev;
663:                        if (icase) {
664:                            bs.setChar(Character.toLowerCase(c1));
665:                            bs.setChar(Character.toUpperCase(c1));
666:                            bs.setChar(Character.toTitleCase(c1));
667:                        } else
668:                            bs.setChar(c1);
669:                        prev = c;
670:                    } else {
671:                        if (prev > c)
672:                            throw new PatternSyntaxException("illegal range: "
673:                                    + prev + ">" + c);
674:                        char c0 = (char) prev;
675:                        inRange = false;
676:                        prev = -1;
677:                        if (icase) {
678:                            bs.setRange(Character.toLowerCase(c0), Character
679:                                    .toLowerCase(c));
680:                            bs.setRange(Character.toUpperCase(c0), Character
681:                                    .toUpperCase(c));
682:                            bs.setRange(Character.toTitleCase(c0), Character
683:                                    .toTitleCase(c));
684:                        } else
685:                            bs.setRange(c0, c);
686:                    }
687:                }
688:                throw new PatternSyntaxException(
689:                        "unbalanced brackets in a class def");
690:            }
691:
692:            final static int parseName(char[] data, int i, int out,
693:                    StringBuffer sb, boolean skipspaces)
694:                    throws PatternSyntaxException {
695:                char c;
696:                int start = -1;
697:                while (i < out) {
698:                    switch (c = data[i++]) {
699:                    case '{':
700:                        start = i;
701:                        continue;
702:                    case '}':
703:                        return i;
704:                    case ' ':
705:                    case '\r':
706:                    case '\n':
707:                    case '\t':
708:                    case '\f':
709:                        if (skipspaces)
710:                            continue;
711:                        //else pass on
712:                    default:
713:                        if (start < 0)
714:                            throw new PatternSyntaxException(
715:                                    "named class doesn't start with '{'");
716:                        sb.append(c);
717:                    }
718:                }
719:                throw new PatternSyntaxException("wrong class name: "
720:                        + new String(data, i, out - i));
721:            }
722:
723:            static String stringValue0(boolean[] arr) {
724:                /*
725:                 System.out.println("stringValue0():");
726:                 System.out.println("arr="+arr);
727:                 for(int i=0;i<BLOCK_SIZE;i++){
728:                 if(arr[i]) if(i>32 && i<127)System.out.print((char)i); else System.out.print("["+i+"]");
729:                 }
730:                 System.out.println();
731:                 */
732:                StringBuffer b = new StringBuffer();
733:                int c = 0;
734:
735:                loop: for (;;) {
736:                    while (!arr[c]) {
737:                        //System.out.println(c+": "+arr[c]);
738:                        c++;
739:                        if (c >= 0xff)
740:                            break loop;
741:                    }
742:                    int first = c;
743:                    while (arr[c]) {
744:                        //System.out.println(c+": "+arr[c]);
745:                        c++;
746:                        if (c > 0xff)
747:                            break;
748:                    }
749:                    int last = c - 1;
750:                    if (last == first)
751:                        b.append(stringValue(last));
752:                    else {
753:                        b.append(stringValue(first));
754:                        b.append('-');
755:                        b.append(stringValue(last));
756:                    }
757:                    if (c > 0xff)
758:                        break;
759:                }
760:                return b.toString();
761:            }
762:
763:            /* Mmm.. what is it? 
764:            static String stringValueC(boolean[] categories){
765:               StringBuffer sb=new StringBuffer();
766:               for(int i=0;i<categories.length;i++){
767:                  if(!categories[i]) continue;
768:                  String name=(String)unicodeCategoryNames.get(new Integer(i));
769:                  sb.append('{');
770:                  sb.append(name);
771:                  sb.append('}');
772:               }
773:               return sb.toString();
774:            }
775:             */
776:
777:            static String stringValue2(boolean[][] arr) {
778:                StringBuffer b = new StringBuffer();
779:                int c = 0;
780:                loop: for (;;) {
781:                    boolean marked = false;
782:                    for (;;) {
783:                        boolean[] marks = arr[c >> 8];
784:                        if (marks != null && marks[c & 255])
785:                            break;
786:                        c++;
787:                        if (c > 0xffff)
788:                            break loop;
789:                    }
790:                    int first = c;
791:                    for (; c <= 0xffff;) {
792:                        boolean[] marks = arr[c >> 8];
793:                        if (marks == null || !marks[c & 255])
794:                            break;
795:                        c++;
796:                    }
797:                    int last = c - 1;
798:                    if (last == first)
799:                        b.append(stringValue(last));
800:                    else {
801:                        b.append(stringValue(first));
802:                        b.append('-');
803:                        b.append(stringValue(last));
804:                    }
805:                    if (c > 0xffff)
806:                        break;
807:                }
808:                return b.toString();
809:            }
810:
811:            static String stringValue(int c) {
812:                StringBuffer b = new StringBuffer(5);
813:                if (c < 32) {
814:                    switch (c) {
815:                    case '\r':
816:                        b.append("\\r");
817:                        break;
818:                    case '\n':
819:                        b.append("\\n");
820:                        break;
821:                    case '\t':
822:                        b.append("\\t");
823:                        break;
824:                    case '\f':
825:                        b.append("\\f");
826:                        break;
827:                    default:
828:                        b.append('(');
829:                        b.append((int) c);
830:                        b.append(')');
831:                    }
832:                } else if (c < 256) {
833:                    b.append((char) c);
834:                } else {
835:                    b.append('\\');
836:                    b.append('x');
837:                    b.append(Integer.toHexString(c));
838:                }
839:                return b.toString();
840:            }
841:
842:            static int toHexDigit(char d) throws PatternSyntaxException {
843:                int val = 0;
844:                if (d >= '0' && d <= '9')
845:                    val = d - '0';
846:                else if (d >= 'a' && d <= 'f')
847:                    val = 10 + d - 'a';
848:                else if (d >= 'A' && d <= 'F')
849:                    val = 10 + d - 'A';
850:                else
851:                    throw new PatternSyntaxException(
852:                            "hexadecimal digit expected: " + d);
853:                return val;
854:            }
855:
856:            public static void main(String[] args) {
857:                if (!namesInitialized)
858:                    initNames();
859:                if (args.length == 0) {
860:                    System.out.println("Class usage: \\p{Class},\\P{Class}");
861:                    printRealm(posixClasses, "Posix classes");
862:                    printRealm(unicodeCategories, "Unicode categories");
863:                    printRealm(unicodeBlocks, "Unicode blocks");
864:                } else {
865:                    for (int i = 0; i < args.length; i++) {
866:                        System.out.print(args[i]);
867:                        System.out.print(": ");
868:                        System.out
869:                                .println(namedClasses.containsKey(args[i]) ? "supported"
870:                                        : "not supported");
871:                    }
872:                }
873:                /*
874:                int[][] data=new int[CATEGORY_COUNT][BLOCK_SIZE+2];
875:                for(int i=Character.MIN_VALUE;i<=Character.MAX_VALUE;i++){
876:                   int cat=Character.getType((char)i);
877:                   data[cat][BLOCK_SIZE]++;
878:                   int b=(i>>8)&0xff;
879:                   if(data[cat][b]==0){
880:                      data[cat][b]=1;
881:                      data[cat][BLOCK_SIZE+1]++;
882:                   }
883:                }
884:                for(int i=0;i<CATEGORY_COUNT;i++){
885:                   System.out.print(unicodeCategoryNames.get(new Integer(i))+": ");
886:                   System.out.println(data[i][BLOCK_SIZE]+" chars, "+data[i][BLOCK_SIZE+1]+" blocks, "+(data[i][BLOCK_SIZE]/data[i][BLOCK_SIZE+1])+" chars/block");
887:                }
888:                 */
889:            }
890:
891:            private static void printRealm(Vector realm, String name) {
892:                System.out.println(name + ":");
893:                Enumeration e = realm.elements();
894:                while (e.hasMoreElements()) {
895:                    System.out.println("  " + e.nextElement());
896:                }
897:            }
898:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.