Source Code Cross Referenced for StandardTokenizerProperties.java in  » Parser » JTopas » de » susebox » jtopas » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Parser » JTopas » de.susebox.jtopas 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


0001:        /*
0002:         * StandardTokenizerProperties.java: general-use TokenizerProperties implementation
0003:         *
0004:         * Copyright (C) 2002 Heiko Blau
0005:         *
0006:         * This file belongs to the JTopas Library.
0007:         * JTopas is free software; you can redistribute it and/or modify it 
0008:         * under the terms of the GNU Lesser General Public License as published by the 
0009:         * Free Software Foundation; either version 2.1 of the License, or (at your 
0010:         * option) any later version.
0011:         *
0012:         * This software is distributed in the hope that it will be useful, but WITHOUT
0013:         * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
0014:         * FITNESS FOR A PARTICULAR PURPOSE. 
0015:         * See the GNU Lesser General Public License for more details.
0016:         *
0017:         * You should have received a copy of the GNU Lesser General Public License along
0018:         * with JTopas. If not, write to the
0019:         *
0020:         *   Free Software Foundation, Inc.
0021:         *   59 Temple Place, Suite 330, 
0022:         *   Boston, MA 02111-1307 
0023:         *   USA
0024:         *
0025:         * or check the Internet: http://www.fsf.org
0026:         *
0027:         * Contact:
0028:         *   email: heiko@susebox.de 
0029:         */
0030:
0031:        package de.susebox.jtopas;
0032:
0033:        //-----------------------------------------------------------------------------
0034:        // Imports
0035:        //
0036:        import java.util.Arrays;
0037:        import java.util.ArrayList;
0038:        import java.util.Map;
0039:        import java.util.HashMap;
0040:        import java.util.Iterator;
0041:        import java.util.NoSuchElementException;
0042:
0043:        import de.susebox.java.lang.ExtRuntimeException;
0044:        import de.susebox.java.lang.ExtUnsupportedOperationException;
0045:        import de.susebox.java.lang.ExtIllegalArgumentException;
0046:
0047:        import de.susebox.jtopas.spi.DataMapper;
0048:        import de.susebox.jtopas.spi.DataProvider;
0049:        import de.susebox.jtopas.spi.PatternHandler;
0050:
0051:        import de.susebox.jtopas.impl.PatternMatcher;
0052:        import de.susebox.jtopas.impl.SequenceStore;
0053:        import de.susebox.jtopas.impl.NoCaseSequenceStore;
0054:
0055:        //-----------------------------------------------------------------------------
0056:        // Class StandardTokenizerProperties
0057:        //
0058:
0059:        /**<p>
0060:         * The class <code>StandardTokenizerProperties</code> provides a simple implementation
0061:         * of the {@link TokenizerProperties} interface for use in most situations.
0062:         *</p><p>
0063:         * Note that this class takes advantage of JTopas features that use Java 1.4 or
0064:         * higher. It can still be used in older environments but not compiled with JDK
0065:         * versions below 1.4!
0066:         *</p>
0067:         *
0068:         * @see TokenizerProperties
0069:         * @see Tokenizer
0070:         * @author Heiko Blau
0071:         */
0072:        public class StandardTokenizerProperties extends
0073:                AbstractTokenizerProperties implements  TokenizerProperties,
0074:                DataMapper {
0075:
0076:            //---------------------------------------------------------------------------
0077:            // Properties
0078:            //
0079:
0080:            /**
0081:             * Maximum length of a non-free pattern match. These are patterns that dont
0082:             * have the {@link TokenizerProperties#F_FREE_PATTERN} flag set. A common 
0083:             * example are number patterns.
0084:             */
0085:            public static final short MAX_NONFREE_MATCHLEN = 1024;
0086:
0087:            //---------------------------------------------------------------------------
0088:            // Constructors
0089:            //
0090:
0091:            /**
0092:             * Default constructor that intitializes an instance with the default whitespaces
0093:             * and separator sets. {@link Tokenizer} instances using this <code>StandardTokenizerProperties</code>
0094:             * object, split text between spaces, tabs and line ending sequences as well
0095:             * as between punctuation characters.
0096:             */
0097:            public StandardTokenizerProperties() {
0098:                this (0);
0099:            }
0100:
0101:            /**
0102:             * This constructor takes the control flags to be used. It is a shortcut to:
0103:             * <pre>
0104:             *   TokenizerProperties props = new StandardTokenizerProperties();
0105:             *
0106:             *   props.setParseFlags(flags);
0107:             * </pre>
0108:             * See the {@link TokenizerProperties} interface for the supported flags.
0109:             *<br>
0110:             * The {@link TokenizerProperties#DEFAULT_WHITESPACES} and 
0111:             * {@link TokenizerProperties#DEFAULT_SEPARATORS} are used for whitespace and 
0112:             * separator handling if no explicit calls to {@link #setWhitespaces} and 
0113:             * {@link #setSeparators} will follow subsequently.
0114:             *
0115:             * @param flags     tokenizer control flags
0116:             * @see   #setParseFlags
0117:             */
0118:            public StandardTokenizerProperties(int flags) {
0119:                this (flags, DEFAULT_WHITESPACES, DEFAULT_SEPARATORS);
0120:            }
0121:
0122:            /**
0123:             * This constructor takes the whitespace and separator sets to be used. It is 
0124:             * a shortcut to:
0125:             * <pre>
0126:             *   TokenizerProperties props = new StandardTokenizerProperties();
0127:             *
0128:             *   props.setWhitespaces(ws);
0129:             *   props.setSeparators(sep);
0130:             * </pre>
0131:             *
0132:             * @param flags       tokenizer control flags
0133:             * @param whitespaces the whitespace set
0134:             * @param separators  the set of separating characters
0135:             * @see   #setParseFlags
0136:             * @see   #setWhitespaces
0137:             * @see   #setSeparators
0138:             */
0139:            public StandardTokenizerProperties(int flags, String whitespaces,
0140:                    String separators) {
0141:                Arrays.fill(_charFlags, 0);
0142:                setParseFlags(flags);
0143:                setWhitespaces(whitespaces);
0144:                setSeparators(separators);
0145:            }
0146:
0147:            //---------------------------------------------------------------------------
0148:            // Abstract methods of the base class
0149:            //
0150:
0151:            /**
0152:             * Retrieving a property by a given type and image. See the method description
0153:             * in {@link AbstractTokenizerProperties} for details.
0154:             *
0155:             * @param   type        the type the returned property should have
0156:             * @param   startImage  the (starting) image
0157:             * @return  the token description for the image or <code>null</code>
0158:             */
0159:            protected TokenizerProperty doGetProperty(int type,
0160:                    String startImage) {
0161:                TokenizerProperty prop = null;
0162:
0163:                switch (type) {
0164:                case Token.KEYWORD:
0165:                    if (_keywords[0] != null) {
0166:                        prop = _keywords[0].getKeyword(startImage);
0167:                    }
0168:                    if (prop == null && _keywords[1] != null) {
0169:                        prop = _keywords[1].getKeyword(startImage);
0170:                    }
0171:                    break;
0172:
0173:                case Token.STRING:
0174:                case Token.LINE_COMMENT:
0175:                case Token.BLOCK_COMMENT:
0176:                case Token.SPECIAL_SEQUENCE:
0177:                    if (_sequences[0] != null) {
0178:                        prop = _sequences[0].getSpecialSequence(startImage);
0179:                    }
0180:                    if (prop == null && _sequences[1] != null) {
0181:                        prop = _sequences[1].getSpecialSequence(startImage);
0182:                    }
0183:                    break;
0184:
0185:                case Token.PATTERN:
0186:                    for (int index = 0; index < _patterns.size(); ++index) {
0187:                        PatternMatcher data = (PatternMatcher) _patterns
0188:                                .get(index);
0189:
0190:                        prop = data.getProperty();
0191:                        if (prop.getImages()[0].equals(startImage)) {
0192:                            break;
0193:                        }
0194:                        prop = null;
0195:                    }
0196:                    break;
0197:
0198:                case Token.WHITESPACE:
0199:                case Token.SEPARATOR:
0200:                default:
0201:                    throw new ExtIllegalArgumentException(
0202:                            "Unsupported property type {0}. (Leading) image \"{1}\".",
0203:                            new Object[] { new Integer(type), startImage });
0204:                }
0205:
0206:                // either the required property or null
0207:                return prop;
0208:            }
0209:
0210:            /**
0211:             * Setting a new separator set. See the method description in 
0212:             * {@link AbstractTokenizerProperties} for details.
0213:             *
0214:             * @param   separators    the set of separators including ranges
0215:             * @return  the replaced separator set or <code>null</code>
0216:             */
0217:            protected String doSetSeparators(String separators) {
0218:                String oldValue;
0219:
0220:                // which separators should be set?
0221:                if ((_flags & Flags.F_NO_CASE) == 0) {
0222:                    oldValue = (_separatorsCase.length() > 0) ? _separatorsCase
0223:                            : _separatorsNoCase;
0224:                    _separatorsCase = separators;
0225:                    _separatorsNoCase = "";
0226:                } else {
0227:                    oldValue = (_separatorsNoCase.length() > 0) ? _separatorsNoCase
0228:                            : _separatorsCase;
0229:                    _separatorsCase = "";
0230:                    _separatorsNoCase = separators;
0231:                }
0232:
0233:                // mark seaparators in character table
0234:                putCharSet(oldValue, Token.SEPARATOR, false);
0235:                putCharSet(separators, Token.SEPARATOR, true);
0236:
0237:                // normalize the old value
0238:                if (oldValue == null || oldValue.length() == 0) {
0239:                    return null;
0240:                } else {
0241:                    return oldValue;
0242:                }
0243:            }
0244:
0245:            /**
0246:             * Setting a new whitespace set. See the method description in 
0247:             * {@link AbstractTokenizerProperties} for details.
0248:             *
0249:             * @param   whitespaces   the set of whitespaces including ranges
0250:             * @return  the replaced whitespace set or <code>null</code>
0251:             */
0252:            protected String doSetWhitespaces(String whitespaces) {
0253:                // set the right whitespaces
0254:                String oldValue;
0255:
0256:                if ((_flags & Flags.F_NO_CASE) == 0) {
0257:                    oldValue = (_whitespacesCase.length() > 0) ? _whitespacesCase
0258:                            : _whitespacesNoCase;
0259:                    _whitespacesCase = whitespaces;
0260:                    _whitespacesNoCase = "";
0261:                } else {
0262:                    oldValue = (_whitespacesNoCase.length() > 0) ? _whitespacesNoCase
0263:                            : _whitespacesCase;
0264:                    _whitespacesCase = "";
0265:                    _whitespacesNoCase = whitespaces;
0266:                }
0267:
0268:                // mark whitespaces in character table
0269:                putCharSet(oldValue, Token.WHITESPACE, false);
0270:                putCharSet(whitespaces, Token.WHITESPACE, true);
0271:
0272:                // return changes
0273:                if (oldValue == null || oldValue.length() == 0) {
0274:                    return null;
0275:                } else {
0276:                    return oldValue;
0277:                }
0278:            }
0279:
0280:            /**
0281:             * Registering a {@link TokenizerProperty}.
0282:             * See the method description in {@link AbstractTokenizerProperties}.
0283:             *
0284:             * @param   property   property to register
0285:             * @return  the replaced property or <code>null</code>
0286:             */
0287:            protected TokenizerProperty doAddProperty(TokenizerProperty property) {
0288:                switch (property.getType()) {
0289:                case Token.STRING:
0290:                case Token.LINE_COMMENT:
0291:                case Token.BLOCK_COMMENT:
0292:                case Token.SPECIAL_SEQUENCE:
0293:                    return addSpecialSequence(property);
0294:
0295:                case Token.KEYWORD:
0296:                    return addKeyword(property);
0297:
0298:                case Token.PATTERN:
0299:                    return addPattern(property);
0300:
0301:                case Token.WHITESPACE:
0302:                case Token.SEPARATOR:
0303:                default:
0304:                    throw new ExtIllegalArgumentException(
0305:                            "Unsupported property type {0}. (Leading) image \"{1}\".",
0306:                            new Object[] { new Integer(property.getType()),
0307:                                    property.getImages()[0] });
0308:                }
0309:            }
0310:
0311:            /**
0312:             * Deregistering a {@link TokenizerProperty} from the store.
0313:             * See the method description in {@link AbstractTokenizerProperties}.
0314:             *
0315:             * @param   property    property to remove
0316:             * @return  the replaced property or <code>null</code>
0317:             */
0318:            protected TokenizerProperty doRemoveProperty(
0319:                    TokenizerProperty property) {
0320:                // removing property according to type
0321:                TokenizerProperty prop = null;
0322:                String image = property.getImages()[0];
0323:
0324:                switch (property.getType()) {
0325:                case Token.LINE_COMMENT:
0326:                case Token.BLOCK_COMMENT:
0327:                case Token.STRING:
0328:                case Token.SPECIAL_SEQUENCE:
0329:                    if (_sequences[0] != null) {
0330:                        prop = _sequences[0].removeSpecialSequence(image);
0331:                    }
0332:                    if (prop == null && _sequences[1] != null) {
0333:                        prop = _sequences[1].removeSpecialSequence(image);
0334:                    }
0335:                    break;
0336:
0337:                case Token.KEYWORD:
0338:                    if (_keywords[0] != null) {
0339:                        prop = _keywords[0].removeKeyword(image);
0340:                    }
0341:                    if (prop == null && _keywords[1] != null) {
0342:                        prop = _keywords[1].removeKeyword(image);
0343:                    }
0344:                    break;
0345:
0346:                case Token.PATTERN:
0347:                    for (int index = 0; index < _patterns.size(); ++index) {
0348:                        PatternMatcher data = (PatternMatcher) _patterns
0349:                                .get(index);
0350:
0351:                        prop = data.getProperty();
0352:                        if (prop.getImages()[0].equals(image)) {
0353:                            _patterns.remove(index);
0354:                            break;
0355:                        } else {
0356:                            prop = null;
0357:                        }
0358:                    }
0359:                    break;
0360:
0361:                case Token.WHITESPACE:
0362:                case Token.SEPARATOR:
0363:                default:
0364:                    throw new ExtIllegalArgumentException(
0365:                            "Unsupported property type {0}. (Leading) image \"{1}\".",
0366:                            new Object[] { new Integer(property.getType()),
0367:                                    image });
0368:                }
0369:
0370:                // return removed property
0371:                return prop;
0372:            }
0373:
0374:            //---------------------------------------------------------------------------
0375:            // Methods of the TokenizerProperties interface
0376:            //
0377:
0378:            /**
0379:             * This method returns an {@link java.util.Iterator} of {@link TokenizerProperty}
0380:             * objects. See the method description in {@link TokenizerProperties}.
0381:             *
0382:             * @return enumeration of {@link TokenizerProperty} objects
0383:             */
0384:            public Iterator getStrings() {
0385:                return new SpecialSequencesIterator(this , _sequences,
0386:                        Token.STRING);
0387:            }
0388:
0389:            /**
0390:             * Obtaining the whitespace character set.
0391:             * See the method description in {@link TokenizerProperties}.
0392:             *
0393:             * @see #setWhitespaces
0394:             * @return the currently active whitespace set
0395:             */
0396:            public String getWhitespaces() {
0397:                synchronized (this ) {
0398:                    return _whitespacesCase + _whitespacesNoCase;
0399:                }
0400:            }
0401:
0402:            /**
0403:             * Obtaining the separator set of the <code>Tokenizer</code>.
0404:             * See the method description in {@link TokenizerProperties}.
0405:             *
0406:             * @see #setSeparators
0407:             * @return the currently used set of separating characters
0408:             */
0409:            public String getSeparators() {
0410:                synchronized (this ) {
0411:                    return _separatorsCase + _separatorsNoCase;
0412:                }
0413:            }
0414:
0415:            /**
0416:             * This method returns an {@link java.util.Iterator} of {@link TokenizerProperty}
0417:             * objects.
0418:             * See the method description in {@link TokenizerProperties}.
0419:             *
0420:             * @return enumeration of {@link TokenizerProperty} objects
0421:             */
0422:            public Iterator getLineComments() {
0423:                return new SpecialSequencesIterator(this , _sequences,
0424:                        Token.LINE_COMMENT);
0425:            }
0426:
0427:            /**
0428:             * This method returns an {@link java.util.Iterator} of {@link TokenizerProperty}
0429:             * objects.
0430:             * See the method description in {@link TokenizerProperties}.
0431:             *
0432:             * @return enumeration of {@link TokenizerProperty} objects
0433:             */
0434:            public Iterator getBlockComments() {
0435:                return new SpecialSequencesIterator(this , _sequences,
0436:                        Token.BLOCK_COMMENT);
0437:            }
0438:
0439:            /**
0440:             * This method returns an {@link java.util.Iterator} of {@link TokenizerProperty}
0441:             * objects.
0442:             * See the method description in {@link TokenizerProperties}.
0443:             *
0444:             * @return enumeration of {@link TokenizerProperty} objects
0445:             */
0446:            public Iterator getSpecialSequences() {
0447:                return new SpecialSequencesIterator(this , _sequences,
0448:                        Token.SPECIAL_SEQUENCE);
0449:            }
0450:
0451:            /**
0452:             * This method returns an {@link java.util.Iterator} of {@link TokenizerProperty}
0453:             * objects.
0454:             * See the method description in {@link TokenizerProperties}.
0455:             *
0456:             * @return iteration of {@link TokenizerProperty} objects
0457:             */
0458:            public Iterator getKeywords() {
0459:                return new SpecialSequencesIterator(this , _keywords,
0460:                        Token.KEYWORD);
0461:            }
0462:
0463:            /**
0464:             * This method returns an {@link java.util.Iterator} of {@link TokenizerProperty}
0465:             * objects. Each <code>TokenizerProperty</code> object contains a pattern and 
0466:             * its companion if such an associated object exists.
0467:             *
0468:             * @return enumeration of {@link TokenizerProperty} objects
0469:             */
0470:            public Iterator getPatterns() {
0471:                return new PatternIterator(this );
0472:            }
0473:
0474:            /**
0475:             * This method returns an {@link java.util.Iterator} of {@link TokenizerProperty}
0476:             * objects.
0477:             * See the method description in {@link TokenizerProperties}.
0478:             *
0479:             * @return enumeration of {@link TokenizerProperty} objects
0480:             */
0481:            public Iterator getProperties() {
0482:                return new FullIterator(this );
0483:            }
0484:
0485:            //---------------------------------------------------------------------------
0486:            // Methods of the DataMapper interface
0487:            //
0488:
0489:            /**
0490:             * Setting the backing {@link TokenizerProperties} instance this <code>DataMapper</code> 
0491:             * is working with. Usually, the <code>DataMapper</code>
0492:             * interface is implemented by <code>TokenizerProperties</code> implementations,
0493:             * too. Otherwise the {@link Tokenizer} using the <code>TokenizerProperties</code>, 
0494:             * will construct a default <code>DataMapper</code> an propagate the 
0495:             * <code>TokenizerProperties</code> instance by calling this method.
0496:             *<br>
0497:             * The method should throw an {@link java.lang.UnsupportedOperationException}
0498:             * if this <code>DataMapper</code> is an extension to an <code>TokenizerProperties</code>
0499:             * implementation.
0500:             *
0501:             * @param   props   the {@link de.susebox.jtopas.TokenizerProperties}
0502:             * @throws  UnsupportedOperationException is this is a <code>DataMapper</code>
0503:             *          implemented by a {@link de.susebox.jtopas.TokenizerProperties}
0504:             *          implementation
0505:             * @throws  NullPointerException  if no {@link TokenizerProperties} are given
0506:             */
0507:            public void setTokenizerProperties(TokenizerProperties props)
0508:                    throws UnsupportedOperationException, NullPointerException {
0509:                throw new ExtUnsupportedOperationException(
0510:                        "Class {0} already defines the {1} interface.",
0511:                        new Object[] {
0512:                                StandardTokenizerProperties.class.getName(),
0513:                                DataMapper.class.getName() });
0514:            }
0515:
0516:            /**
0517:             * The method retrieves the backing {@link de.susebox.jtopas.TokenizerProperties}
0518:             * instance, this <code>DataMapper</code> is working on. For implementations
0519:             * of the <code>TokenizerProperties</code> interface that also implement the
0520:             * <code>DataMapper</code> interface, this method returns the instance itself
0521:             * it is called on.
0522:             *<br>
0523:             * Otherwise the method returns the <code>TokenizerProperties</code> instance 
0524:             * passed through the last call to {@link #setTokenizerProperties} or <code>null</code>
0525:             * if no such call has taken place so far.
0526:             *
0527:             * @return the backing {@link de.susebox.jtopas.TokenizerProperties} or <code>null</code>
0528:             */
0529:            public TokenizerProperties getTokenizerProperties() {
0530:                return this ;
0531:            }
0532:
0533:            /**
0534:             * This method checks if the character is a whitespace. Implement Your own
0535:             * code for situations where this default implementation is not fast enough
0536:             * or otherwise not really good.
0537:             *
0538:             * @param testChar  check this character
0539:             * @return <code>true</code> if the given character is a whitespace,
0540:             *         <code>false</code> otherwise
0541:             */
0542:            public boolean isWhitespace(char testChar) {
0543:                try {
0544:                    return (_charFlags[testChar] & CHARFLAG_WHITESPACE) != 0;
0545:                } catch (ArrayIndexOutOfBoundsException ex) {
0546:                    Integer extFlags = (Integer) _extCharFlags.get(new Integer(
0547:                            testChar));
0548:                    return (extFlags != null && (extFlags.intValue() & CHARFLAG_WHITESPACE) != 0);
0549:                }
0550:            }
0551:
0552:            /**
0553:             * This method detects the number of whitespace characters the data range given
0554:             * through the {@link DataProvider} parameter starts with.
0555:             *
0556:             * @param   dataProvider  the source to get the data range from
0557:             * @return  number of whitespace characters starting from the given offset
0558:             * @throws  TokenizerException failure while reading data from the input stream
0559:             * @throws  NullPointerException  if no {@link DataProvider} is given
0560:             * @see     de.susebox.jtopas.spi.DataProvider
0561:             */
0562:            public int countLeadingWhitespaces(DataProvider dataProvider)
0563:                    throws NullPointerException {
0564:                int maxChars = dataProvider.getLength();
0565:                int len = 0;
0566:
0567:                while (len < maxChars
0568:                        && isWhitespace(dataProvider.getCharAt(len))) {
0569:                    len++;
0570:                }
0571:                return len;
0572:            }
0573:
0574:            /** 
0575:             * If a {@link Tokenizer} performs line counting, it is often nessecary to
0576:             * know if newline characters is considered to be a whitespace. See {@link WhitespaceHandler}
0577:             * for details.
0578:             *
0579:             * @return  <code>true</code> if newline characters are in the current whitespace set,
0580:             *          <code>false</code> otherwise
0581:             *
0582:             */
0583:            public boolean newlineIsWhitespace() {
0584:                return (_charFlags['\n'] & CHARFLAG_WHITESPACE) != 0
0585:                        && (_charFlags['\r'] & CHARFLAG_WHITESPACE) != 0;
0586:            }
0587:
0588:            /**
0589:             * This method checks the given character if it is a separator.
0590:             *
0591:             * @param testChar  check this character
0592:             * @return <code>true</code> if the given character is a separator,
0593:             *         <code>false</code> otherwise
0594:             */
0595:            public boolean isSeparator(char testChar) {
0596:                try {
0597:                    return (_charFlags[testChar] & CHARFLAG_SEPARATOR) != 0;
0598:                } catch (ArrayIndexOutOfBoundsException ex) {
0599:                    Integer extFlags = (Integer) _extCharFlags.get(new Integer(
0600:                            testChar));
0601:                    return (extFlags != null && (extFlags.intValue() & CHARFLAG_SEPARATOR) != 0);
0602:                }
0603:            }
0604:
0605:            /**
0606:             * This method can be used by a {@link de.susebox.jtopas.Tokenizer} implementation 
0607:             * for a fast detection if special sequence checking must be performed at all. 
0608:             * If the method returns <code>false</code> time-consuming preparations can be 
0609:             * skipped.
0610:             *
0611:             * @return  <code>true</code> if there actually are pattern that can be tested
0612:             *          for a match, <code>false</code> otherwise.
0613:             */
0614:            public boolean hasSequenceCommentOrString() {
0615:                synchronized (_sequences) {
0616:                    return (_sequences[0] != null || _sequences[1] != null);
0617:                }
0618:            }
0619:
0620:            /**
0621:             * This method checks if a given range of data starts with a special sequence,
0622:             * a comment or a string. These three types of token are testet together since
0623:             * both comment and string prefixes are ordinary special sequences. Only the 
0624:             * actions preformed <strong>after</strong> a string or comment has been detected,
0625:             * are different.
0626:             *<br>
0627:             * The method returns <code>null</code> if no special sequence, comment or string 
0628:             * could matches the the leading part of the data range given through the
0629:             * {@link DataProvider}.
0630:             *<br>
0631:             * In cases of strings or comments, the return value contains the description
0632:             * for the introducing character sequence, <strong>NOT</strong> the whole
0633:             * string or comment. The reading of the rest of the string or comment is done
0634:             * by the calling {@link de.susebox.jtopas.Tokenizer}.
0635:             *
0636:             * @param   dataProvider  the source to get the data range from
0637:             * @return  a {@link de.susebox.jtopas.TokenizerProperty} if a special sequence, 
0638:             *          comment or string could be detected, <code>null</code> otherwise
0639:             * @throws  TokenizerException failure while reading more data
0640:             * @throws  NullPointerException  if no {@link DataProvider} is given
0641:             */
0642:            public TokenizerProperty startsWithSequenceCommentOrString(
0643:                    DataProvider dataProvider) throws TokenizerException,
0644:                    NullPointerException {
0645:                // we need the longest possible match
0646:                synchronized (_sequences) {
0647:                    TokenizerProperty caseProp = (_sequences[0] != null) ? _sequences[0]
0648:                            .startsWithSequenceCommentOrString(dataProvider)
0649:                            : null;
0650:
0651:                    TokenizerProperty noCaseProp = (_sequences[1] != null) ? _sequences[1]
0652:                            .startsWithSequenceCommentOrString(dataProvider)
0653:                            : null;
0654:
0655:                    if (noCaseProp == null) {
0656:                        return caseProp;
0657:                    } else if (caseProp == null) {
0658:                        return noCaseProp;
0659:                    } else if (caseProp.getImages()[0].length() >= noCaseProp
0660:                            .getImages()[0].length()) {
0661:                        return caseProp;
0662:                    } else {
0663:                        return noCaseProp;
0664:                    }
0665:                }
0666:            }
0667:
0668:            /**
0669:             * This method returns the length of the longest special sequence, comment or
0670:             * string prefix that is known to this <code>SequenceHandler</code>. When
0671:             * calling {@link #startsWithSequenceCommentOrString}, the passed {@link DataProvider}
0672:             * parameter will supply at least this number of characters (see {@link DataProvider#getLength}).
0673:             * If less characters are provided, EOF is reached.
0674:             *
0675:             * @return  the number of characters needed in the worst case to identify a 
0676:             *          special sequence
0677:             */
0678:            public int getSequenceMaxLength() {
0679:                int maxLength = 0;
0680:
0681:                synchronized (_sequences) {
0682:                    if (_sequences[0] != null) {
0683:                        maxLength = _sequences[0].getSequenceMaxLength();
0684:                    }
0685:                    if (_sequences[1] != null
0686:                            && _sequences[1].getSequenceMaxLength() > maxLength) {
0687:                        maxLength = _sequences[1].getSequenceMaxLength();
0688:                    }
0689:                }
0690:                return maxLength;
0691:            }
0692:
0693:            /**
0694:             * This method can be used by a {@link de.susebox.jtopas.Tokenizer} implementation 
0695:             * for a fast detection if keyword matching must be performed at all. If the method
0696:             * returns <code>false</code> time-consuming preparations can be skipped.
0697:             *
0698:             * @return  <code>true</code> if there actually are pattern that can be tested
0699:             *          for a match, <code>false</code> otherwise.
0700:             */
0701:            public boolean hasKeywords() {
0702:                synchronized (_keywords) {
0703:                    return (_keywords[0] != null || _keywords[1] != null);
0704:                }
0705:            }
0706:
0707:            /**
0708:             * This method checks if the character range given through the 
0709:             * {@link DataProvider} comprises a keyword.
0710:             *
0711:             * @param   dataProvider  the source to get the data from, that are checked
0712:             * @return  a {@link de.susebox.jtopas.TokenizerProperty} if a keyword could be 
0713:             *          found, <code>null</code> otherwise
0714:             * @throws  TokenizerException failure while reading more data
0715:             * @throws  NullPointerException  if no {@link DataProvider} is given
0716:             */
0717:            public TokenizerProperty isKeyword(DataProvider dataProvider)
0718:                    throws TokenizerException, NullPointerException {
0719:                synchronized (_keywords) {
0720:                    TokenizerProperty prop;
0721:
0722:                    if (_keywords[0] != null) {
0723:                        prop = _keywords[0].isKeyword(dataProvider);
0724:                    } else {
0725:                        prop = null;
0726:                    }
0727:                    if (prop == null && _keywords[1] != null) {
0728:                        prop = _keywords[1].isKeyword(dataProvider);
0729:                    }
0730:                    return prop;
0731:                }
0732:            }
0733:
0734:            /**
0735:             * This method can be used by a {@link de.susebox.jtopas.Tokenizer} implementation 
0736:             * for a fast detection if pattern matching must be performed at all. If the method
0737:             * returns <code>false</code> time-consuming preparations can be skipped.
0738:             *
0739:             * @return  <code>true</code> if there actually are pattern that can be tested
0740:             *          for a match, <code>false</code> otherwise.
0741:             */
0742:            public boolean hasPattern() {
0743:                synchronized (_patterns) {
0744:                    return (_patterns.size() > 0);
0745:                }
0746:            }
0747:
0748:            /**
0749:             * This method checks if the start of a character range given through the 
0750:             * {@link DataProvider} matches a pattern.
0751:             *
0752:             * @param   dataProvider    the source to get the data from
0753:             * @return  a {@link PatternHandler.Result} object or <code>null</code> if no
0754:             *          match was found
0755:             * @throws  TokenizerException    generic exception
0756:             * @throws  NullPointerException  if no {@link DataProvider} is given
0757:             */
0758:            public PatternHandler.Result matches(DataProvider dataProvider)
0759:                    throws TokenizerException, NullPointerException {
0760:                synchronized (_patterns) {
0761:                    int longestMatch = 0;
0762:                    PatternHandler.Result bestResult = null;
0763:
0764:                    // only get the string if pattern are available
0765:                    for (int index = 0; index < _patterns.size(); ++index) {
0766:                        PatternMatcher data = (PatternMatcher) _patterns
0767:                                .get(index);
0768:                        PatternHandler.Result result = data
0769:                                .matches(dataProvider);
0770:
0771:                        if (result != null) {
0772:                            if (bestResult == null
0773:                                    || bestResult.getLengthOfMatch() < result
0774:                                            .getLengthOfMatch()) {
0775:                                bestResult = result;
0776:                            }
0777:                        }
0778:                    }
0779:
0780:                    // return the best result
0781:                    return bestResult;
0782:                }
0783:            }
0784:
0785:            //---------------------------------------------------------------------------
0786:            // Implementation
0787:            //
0788:
0789:            /**
0790:             * Registering a pattern with an associated object. The method assumes that the 
0791:             * given pattern property has been checked for not being null, having a non-empty 
0792:             * pattern image and normalized flags ({@link AbstractTokenizerProperties#normalizeFlags}).
0793:             * See the method description in {@link AbstractTokenizerProperties}.
0794:             *
0795:             * @param   patternProp     the regular expression to be added
0796:             * @return  the replaced pattern property or <code>null</code>
0797:             * @throws  IllegalArgumentException if pattern matching is not available
0798:             */
0799:            protected TokenizerProperty addPattern(TokenizerProperty patternProp)
0800:                    throws IllegalArgumentException {
0801:                // construct the pattern
0802:                PatternMatcher data = null;
0803:                String pattern = patternProp.getImages()[0];
0804:
0805:                try {
0806:                    data = new PatternMatcher(patternProp, getParseFlags());
0807:                } catch (Throwable ex) {
0808:                    throw new ExtIllegalArgumentException(ex,
0809:                            "Pattern matching is not available (use JDK 1.4 or above).");
0810:                }
0811:
0812:                // Register pattern. First search for existing one
0813:                for (int index = 0; index < _patterns.size(); ++index) {
0814:                    PatternMatcher oldData = (PatternMatcher) _patterns
0815:                            .get(index);
0816:                    TokenizerProperty oldProp = oldData.getProperty();
0817:
0818:                    if (oldProp.getImages()[0].equals(pattern)) {
0819:                        _patterns.set(index, data);
0820:                        return oldProp;
0821:                    }
0822:                }
0823:
0824:                // not found -> register new pattern
0825:                _patterns.add(data);
0826:                return null;
0827:            }
0828:
0829:            /**
0830:             * Registering a keyword property. The method assumes that the given keyword 
0831:             * property has been checked for not being null, having a non-empty keyword 
0832:             * image and normalized flags ({@link AbstractTokenizerProperties#normalizeFlags}).
0833:             *
0834:             * @param   keywordProp   keyword property to register
0835:             * @return  the replaced keyword property or <code>null</code>
0836:             */
0837:            protected TokenizerProperty addKeyword(TokenizerProperty keywordProp) {
0838:                // case-sensitive keyword?
0839:                boolean noCase = isFlagSet(keywordProp, Flags.F_NO_CASE);
0840:                int arrayIdx = noCase ? 1 : 0;
0841:
0842:                // first keyword?
0843:                if (_keywords[arrayIdx] == null) {
0844:                    if (noCase) {
0845:                        _keywords[arrayIdx] = new NoCaseSequenceStore(true);
0846:                    } else {
0847:                        _keywords[arrayIdx] = new SequenceStore(true);
0848:                    }
0849:                }
0850:
0851:                // add / replace property
0852:                return _keywords[arrayIdx].addKeyword(keywordProp);
0853:            }
0854:
0855:            /**
0856:             * This method adds or replaces strings, comments and ordinary special sequences.
0857:             * The method assumes that the given special sequence property has been checked 
0858:             * for not being null, having a non-empty imagesand normalized flags 
0859:             * ({@link AbstractTokenizerProperties#normalizeFlags}).
0860:             *
0861:             * @param   property  the description of the new sequence
0862:             * @return  the replaced special sequence property or <code>null</code>
0863:             */
0864:            protected TokenizerProperty addSpecialSequence(
0865:                    TokenizerProperty property) {
0866:                // case-sensitive sequence?
0867:                boolean noCase = isFlagSet(property, Flags.F_NO_CASE);
0868:                int arrayIdx = noCase ? 1 : 0;
0869:
0870:                // first special sequence?
0871:                if (_sequences[arrayIdx] == null) {
0872:                    if (noCase) {
0873:                        _sequences[arrayIdx] = new NoCaseSequenceStore(false);
0874:                    } else {
0875:                        _sequences[arrayIdx] = new SequenceStore(false);
0876:                    }
0877:                }
0878:
0879:                // add / replace property
0880:                return _sequences[arrayIdx].addSpecialSequence(property);
0881:            }
0882:
0883:            /**
0884:             * Set or removes the flags corresponding to type and case-sensitivity from the
0885:             * character flags tables.
0886:             *
0887:             * @param set   the character set to handle (may contain ranges)
0888:             * @param type  token type fro the characters ({@link Token#WHITESPACE} or {@link Token#SEPARATOR})
0889:             * @param setIt if <code>true</code> the approbriate flags will be set, otherwise removed
0890:             */
0891:            private void putCharSet(String set, int type, boolean setIt) {
0892:                // which flags ?
0893:                int charFlags = 0;
0894:
0895:                switch (type) {
0896:                case Token.WHITESPACE:
0897:                    charFlags = CHARFLAG_WHITESPACE;
0898:                    break;
0899:                case Token.SEPARATOR:
0900:                    charFlags = CHARFLAG_SEPARATOR;
0901:                    break;
0902:                }
0903:
0904:                // analyze the given set
0905:                int length = (set != null) ? set.length() : 0;
0906:                char start, end, setChar;
0907:
0908:                for (int ii = 0; ii < length; ++ii) {
0909:                    setChar = set.charAt(ii);
0910:
0911:                    switch (setChar) {
0912:                    case '-':
0913:                        start = (ii > 0) ? set.charAt(ii - 1) : 0;
0914:                        end = (ii < length - 1) ? set.charAt(ii + 1) : 0xFFFF;
0915:                        ii += 2;
0916:                        break;
0917:
0918:                    case '\\':
0919:                        setChar = (ii + 1 >= length) ? 0 : set.charAt(ii + 1);
0920:                        ii++;
0921:                        /* no break */
0922:
0923:                    default:
0924:                        start = end = setChar;
0925:                    }
0926:
0927:                    // put flags
0928:                    for (char index = start; index <= end; ++index) {
0929:                        char currChar = index;
0930:
0931:                        do {
0932:                            if (currChar < _charFlags.length) {
0933:                                // one-byte characters 
0934:                                if (setIt) {
0935:                                    _charFlags[currChar] |= charFlags;
0936:                                } else {
0937:                                    _charFlags[currChar] &= ~charFlags;
0938:                                }
0939:
0940:                            } else {
0941:                                // longer characters
0942:                                Integer key = new Integer(currChar);
0943:                                Integer extFlags = (Integer) _extCharFlags
0944:                                        .get(key);
0945:
0946:                                if (setIt) {
0947:                                    extFlags = new Integer(extFlags.intValue()
0948:                                            | charFlags);
0949:                                } else {
0950:                                    extFlags = new Integer(extFlags.intValue()
0951:                                            & ~charFlags);
0952:                                }
0953:                                _extCharFlags.put(key, extFlags);
0954:                            }
0955:
0956:                            // settings must be also done for the upper/lowercase variant 
0957:                            if (Character.isLowerCase(currChar)) {
0958:                                currChar = Character.toUpperCase(currChar);
0959:                            } else if (Character.isUpperCase(currChar)) {
0960:                                currChar = Character.toLowerCase(currChar);
0961:                            }
0962:                        } while ((_flags & Flags.F_NO_CASE) != 0
0963:                                && currChar != index);
0964:                    }
0965:                }
0966:            }
0967:
0968:            //---------------------------------------------------------------------------
0969:            // Class members
0970:            //
0971:
0972:            /**
0973:             * character flag for whitespaces
0974:             */
0975:            public static final int CHARFLAG_WHITESPACE = 1;
0976:
0977:            /**
0978:             * character flag for whitespaces
0979:             */
0980:            public static final int CHARFLAG_SEPARATOR = 2;
0981:
0982:            //---------------------------------------------------------------------------
0983:            // Members
0984:            //
0985:
0986:            /**
0987:             * array containing the flags for whitespaces and separators
0988:             */
0989:            protected int _charFlags[] = new int[256];
0990:
0991:            /**
0992:             * Map with flags for characters beyond 256;
0993:             */
0994:            protected HashMap _extCharFlags = new HashMap();
0995:
0996:            /**
0997:             * current whitespace characters including character ranges.
0998:             */
0999:            protected String _whitespacesCase = DEFAULT_WHITESPACES;
1000:
1001:            /**
1002:             * current whitespace characters including character ranges. Case is ignored.
1003:             */
1004:            protected String _whitespacesNoCase = "";
1005:
1006:            /**
1007:             * current separator characters including character ranges.
1008:             */
1009:            protected String _separatorsCase = DEFAULT_SEPARATORS;
1010:
1011:            /**
1012:             * current separator characters including character ranges. Case is ignored.
1013:             */
1014:            protected String _separatorsNoCase = "";
1015:
1016:            /**
1017:             * The first element is the {@link de.susebox.jtopas.impl.SequenceStore} for 
1018:             * the case-sensitive sequences, the second is for the case-insensitive ones.
1019:             */
1020:            protected SequenceStore[] _sequences = new SequenceStore[2];
1021:
1022:            /**
1023:             * Like the array {@link #_sequences} this two-element Array contains two
1024:             * {@link de.susebox.jtopas.impl.SequenceStore}, the first for the case-sensitive 
1025:             * keywords, the second for the case-insensitive ones.
1026:             */
1027:            protected SequenceStore[] _keywords = new SequenceStore[2];
1028:
1029:            /**
1030:             * This array contains the patterns
1031:             */
1032:            protected ArrayList _patterns = new ArrayList();
1033:
1034:            /**
1035:             * Which regular expression parser to use
1036:             */
1037:            private Class _patternClass = null;
1038:
1039:            /**
1040:             * A buffer used for pattern matching
1041:             */
1042:            private StringBuffer _foundMatch = new StringBuffer();
1043:        }
1044:
1045:        //---------------------------------------------------------------------------
1046:        // inner classes
1047:        //
1048:
1049:        /**
1050:         * Instances of this inner class are returned when a call to 
1051:         * {@link TokenizerProperties#getProperties}.
1052:         * Each element of the enumeration contains a {@link TokenizerProperty} element.
1053:         */
1054:        final class FullIterator implements  Iterator {
1055:
1056:            /**
1057:             * constructor taking the calling {@link TokenizerProperties} object to retrieve
1058:             * the members holding {@link TokenizerProperty} elements which are iterated by 
1059:             * this <code>FullIterator</code> instance.
1060:             *
1061:             * @param caseSensitiveMap  map with properties where case matters
1062:             * @param caseSensitiveMap  map with properties where case doesn't matter
1063:             */
1064:            public FullIterator(StandardTokenizerProperties parent) {
1065:                _parent = parent;
1066:
1067:                // create list of iterators
1068:                _iterators = new Object[3];
1069:                _iterators[0] = new SpecialSequencesIterator(parent,
1070:                        parent._keywords, Token.KEYWORD);
1071:                _iterators[1] = new SpecialSequencesIterator(parent,
1072:                        parent._sequences, 0);
1073:                _iterators[2] = new PatternIterator(parent);
1074:                _currIndex = 0;
1075:            }
1076:
1077:            /**
1078:             * Test wether there is another element in the iterated set or not. See
1079:             * {@link java.util.Iterator} for details.
1080:             *
1081:             * @return <code>true</code>if another call to {@link #next} will return an object,
1082:             *        <code>false</code> otherwise
1083:             */
1084:            public boolean hasNext() {
1085:                synchronized (this ) {
1086:                    while (_currIndex < _iterators.length) {
1087:                        Iterator iter = (Iterator) _iterators[_currIndex];
1088:
1089:                        if (iter.hasNext()) {
1090:                            return true;
1091:                        }
1092:                        _currIndex++;
1093:                    }
1094:                    return false;
1095:                }
1096:            }
1097:
1098:            /**
1099:             * Retrieve the next element in the iterated set. See {@link java.util.Iterator} 
1100:             * for details.
1101:             *
1102:             * @return the next element or <code>null</code> if there is none
1103:             */
1104:            public Object next() {
1105:                if (hasNext()) {
1106:                    synchronized (this ) {
1107:                        Iterator iter = (Iterator) _iterators[_currIndex];
1108:                        return iter.next();
1109:                    }
1110:                } else {
1111:                    return null;
1112:                }
1113:            }
1114:
1115:            /**
1116:             * Retrieve the next element in the iterated set. See {@link java.util.Iterator} 
1117:             * for details.
1118:             *
1119:             * @return the next element or <code>null</code> if there is none
1120:             */
1121:            public void remove() {
1122:                if (_currIndex < _iterators.length) {
1123:                    Iterator iter = (Iterator) _iterators[_currIndex];
1124:                    iter.remove();
1125:                }
1126:            }
1127:
1128:            // members
1129:            private StandardTokenizerProperties _parent = null;
1130:            private Object[] _iterators = null;
1131:            private int _currIndex = -1;
1132:        }
1133:
1134:        /**
1135:         * Instances of this inner class are returned when a call to {@link TokenizerProperties#getKeywords}
1136:         * or {@link TokenizerProperties#getPatterns}.
1137:         * Each element of the enumeration contains a {@link TokenizerProperty} element,
1138:         * that in turn has the keyword or a pattern with its companion
1139:         */
1140:        final class MapIterator implements  Iterator {
1141:
1142:            /**
1143:             * constructor taking the a case-sensitive and a case-insensitive {@link java.util.Map}
1144:             * which are iterated by this <code>MapIterator</code> instance.
1145:             *
1146:             * @param caseSensitiveMap  map with properties where case matters
1147:             * @param caseSensitiveMap  map with properties where case doesn't matter
1148:             */
1149:            public MapIterator(StandardTokenizerProperties parent,
1150:                    Map caseSensitiveMap, Map caseInsensitiveMap) {
1151:                synchronized (this ) {
1152:                    _parent = parent;
1153:                    if (caseSensitiveMap != null) {
1154:                        _iterators[0] = caseSensitiveMap.values().iterator();
1155:                    }
1156:                    if (caseInsensitiveMap != null) {
1157:                        _iterators[1] = caseInsensitiveMap.values().iterator();
1158:                    }
1159:                }
1160:            }
1161:
1162:            /**
1163:             * the well known method from the {@link java.util.Iterator} interface.
1164:             *
1165:             * @return <code>true</code> if there are more {@link TokenizerProperty}
1166:             *         elements, <code>false</code> otherwise
1167:             */
1168:            public boolean hasNext() {
1169:                // check the current array
1170:                synchronized (_iterators) {
1171:                    if (_iterators[0] != null) {
1172:                        if (_iterators[0].hasNext()) {
1173:                            return true;
1174:                        } else {
1175:                            _iterators[0] = null;
1176:                        }
1177:                    }
1178:                    if (_iterators[1] != null) {
1179:                        if (_iterators[1].hasNext()) {
1180:                            return true;
1181:                        } else {
1182:                            _iterators[1] = null;
1183:                        }
1184:                    }
1185:                    return false;
1186:                }
1187:            }
1188:
1189:            /**
1190:             * Retrieve the next {@link TokenizerProperty} in this enumeration. 
1191:             *
1192:             * @return the next keyword as a <code>TokenizerProperty</code>
1193:             * @throws NoSuchElementException if there is no more element in this iterator
1194:             */
1195:            public Object next() {
1196:                if (!hasNext()) {
1197:                    throw new NoSuchElementException();
1198:                }
1199:
1200:                synchronized (this ) {
1201:                    if (_iterators[0] != null) {
1202:                        _currentData = (TokenizerProperty) _iterators[0].next();
1203:                    } else {
1204:                        _currentData = (TokenizerProperty) _iterators[1].next();
1205:                    }
1206:                    return _currentData;
1207:                }
1208:            }
1209:
1210:            /**
1211:             * This method is similar to {@link Tokenizer#removeKeyword}.
1212:             *
1213:             * @throws  IllegalStateExcpetion if {@link #next} has not been called before or
1214:             *          <code>remove</code> has been called already after the last <code>next</code>.
1215:             */
1216:            public void remove() {
1217:                synchronized (this ) {
1218:                    // if current element is not set
1219:                    if (_currentData == null) {
1220:                        throw new IllegalStateException();
1221:                    }
1222:
1223:                    if (_iterators[0] != null) {
1224:                        _iterators[0].remove();
1225:                    } else {
1226:                        _iterators[1].remove();
1227:                    }
1228:                    _parent.notifyListeners(new TokenizerPropertyEvent(
1229:                            TokenizerPropertyEvent.PROPERTY_REMOVED,
1230:                            _currentData));
1231:                    _currentData = null;
1232:                }
1233:            }
1234:
1235:            // members
1236:            private StandardTokenizerProperties _parent = null;
1237:            private Iterator[] _iterators = new Iterator[2];
1238:            private TokenizerProperty _currentData = null;
1239:        }
1240:
1241:        /**
1242:         * Iterator for comments, strings and special sequences.
1243:         * Instances of this inner class are returned when a call to one of the methods
1244:         *<ul><li>
1245:         *    {@link #getBlockComments}
1246:         *</li><li>
1247:         *    {@link #getLineComments}
1248:         *</li><li>
1249:         *    {@link #getStrings}
1250:         *</li><li>
1251:         *    {@link #getSpecialSequences}
1252:         *</li></ul>
1253:         * is done. Each element of the enumeration contains a {@link TokenizerProperty}
1254:         * element, that in turn has the comment, special sequence etc. together with
1255:         * its companion
1256:         */
1257:        final class SpecialSequencesIterator implements  Iterator {
1258:
1259:            /**
1260:             * constructor taking the calling <code>Tokenizer</code> and the type of the
1261:             * {@link TokenizerProperty}. If the type is 0 then special sequences, line and 
1262:             * block comments are returned in one iterator
1263:             *
1264:             * @param parent  the calling tokenizer
1265:             * @param stores  which array of {@link de.susebox.jtopas.impl.SequenceStore} to use
1266:             * @param type    type of the <code>TokenizerProperty</code> 
1267:             */
1268:            public SpecialSequencesIterator(StandardTokenizerProperties parent,
1269:                    SequenceStore[] stores, int type) {
1270:                _type = type;
1271:                _parent = parent;
1272:                _stores = stores;
1273:            }
1274:
1275:            /**
1276:             * the well known method from the {@link java.util.Iterator} interface.
1277:             *
1278:             * @return <code>true</code> if there are more {@link TokenizerProperty}
1279:             *         elements, <code>false</code> otherwise
1280:             */
1281:            public boolean hasNext() {
1282:                synchronized (this ) {
1283:                    if (_currentIterator != null && _currentIterator.hasNext()) {
1284:                        return true;
1285:                    }
1286:
1287:                    while (_stores != null && ++_currentIndex < _stores.length) {
1288:                        if (_stores[_currentIndex] != null) {
1289:                            _currentIterator = _stores[_currentIndex]
1290:                                    .getSpecialSequences(_type);
1291:                            if (_currentIterator.hasNext()) {
1292:                                return true;
1293:                            }
1294:                        }
1295:                    }
1296:                    return false;
1297:                }
1298:            }
1299:
1300:            /**
1301:             * Retrieve the next {@link TokenizerProperty} in this enumeration.
1302:             *
1303:             * @return a {@link TokenizerProperty} of the desired type or <code>null</code>
1304:             * @throws NoSuchElementException if there is no more element in this iterator
1305:             */
1306:            public Object next() throws NoSuchElementException {
1307:                synchronized (this ) {
1308:                    if (!hasNext()) {
1309:                        throw new NoSuchElementException();
1310:                    }
1311:                    _currentElement = (TokenizerProperty) _currentIterator
1312:                            .next();
1313:                    return _currentElement;
1314:                }
1315:            }
1316:
1317:            /**
1318:             * Remove the current special sequence entry from the collection. This is an
1319:             * alternative to {@link Tokenizer#removeSpecialSequence}.
1320:             *
1321:             * @throws  IllegalStateExcpetion if {@link #next} has not been called before or
1322:             *          <code>remove</code> has been called already after the last <code>next</code>.
1323:             */
1324:            public void remove() throws IllegalStateException {
1325:                synchronized (this ) {
1326:                    // if current element is not set
1327:                    if (_currentElement == null) {
1328:                        throw new IllegalStateException();
1329:                    }
1330:
1331:                    // remove current element
1332:                    try {
1333:                        _currentIterator.remove();
1334:                        _parent.notifyListeners(new TokenizerPropertyEvent(
1335:                                TokenizerPropertyEvent.PROPERTY_REMOVED,
1336:                                _currentElement));
1337:                        _currentElement = null;
1338:                    } catch (Exception ex) {
1339:                        throw new ExtRuntimeException(ex,
1340:                                "While trying to remove current element of a SpecialSequencesIterator.");
1341:                    }
1342:                }
1343:            }
1344:
1345:            // members
1346:            private StandardTokenizerProperties _parent = null;
1347:            private SequenceStore[] _stores = null;
1348:            private TokenizerProperty _currentElement = null;
1349:            private Iterator _currentIterator = null;
1350:            private int _currentIndex = -1;
1351:            private int _type = Token.UNKNOWN;
1352:        }
1353:
1354:        /**
1355:         * An {@link java.util.Iterator} for pattern.
1356:         */
1357:        final class PatternIterator implements  Iterator {
1358:            /**
1359:             * constructor taking the calling {@link TokenizerProperties} object.
1360:             *
1361:             * @param parent  the caller
1362:             */
1363:            public PatternIterator(StandardTokenizerProperties parent) {
1364:                _parent = parent;
1365:                synchronized (parent._patterns) {
1366:                    _iterator = parent._patterns.iterator();
1367:                }
1368:            }
1369:
1370:            /**
1371:             * the well known method from the {@link java.util.Iterator} interface.
1372:             *
1373:             * @return <code>true</code> if there are more {@link TokenizerProperty}
1374:             *         elements, <code>false</code> otherwise
1375:             */
1376:            public boolean hasNext() {
1377:                return _iterator.hasNext();
1378:            }
1379:
1380:            /**
1381:             * Retrieve the next {@link TokenizerProperty} in this enumeration. 
1382:             *
1383:             * @return  the next keyword as a <code>TokenizerProperty</code>
1384:             * @throws NoSuchElementException if there is no more element in this iterator
1385:             */
1386:            public Object next() throws NoSuchElementException {
1387:                synchronized (this ) {
1388:                    _currentData = (PatternMatcher) _iterator.next();
1389:                    return _currentData.getProperty();
1390:                }
1391:            }
1392:
1393:            /**
1394:             * This method is similar to {@link Tokenizer#removeKeyword}
1395:             */
1396:            public void remove() {
1397:                synchronized (this ) {
1398:                    _iterator.remove();
1399:                    _parent.notifyListeners(new TokenizerPropertyEvent(
1400:                            TokenizerPropertyEvent.PROPERTY_REMOVED,
1401:                            _currentData.getProperty()));
1402:                }
1403:            }
1404:
1405:            // members
1406:            private StandardTokenizerProperties _parent = null;
1407:            private Iterator _iterator = null;
1408:            private PatternMatcher _currentData = null;
1409:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.