Source Code Cross Referenced for XmlPullParser.java in  » J2EE » wicket » org » apache » wicket » markup » parser » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » J2EE » wicket » org.apache.wicket.markup.parser 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         * Licensed to the Apache Software Foundation (ASF) under one or more
003:         * contributor license agreements.  See the NOTICE file distributed with
004:         * this work for additional information regarding copyright ownership.
005:         * The ASF licenses this file to You under the Apache License, Version 2.0
006:         * (the "License"); you may not use this file except in compliance with
007:         * the License.  You may obtain a copy of the License at
008:         *
009:         *      http://www.apache.org/licenses/LICENSE-2.0
010:         *
011:         * Unless required by applicable law or agreed to in writing, software
012:         * distributed under the License is distributed on an "AS IS" BASIS,
013:         * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014:         * See the License for the specific language governing permissions and
015:         * limitations under the License.
016:         */
017:        package org.apache.wicket.markup.parser;
018:
019:        import java.io.BufferedInputStream;
020:        import java.io.ByteArrayInputStream;
021:        import java.io.IOException;
022:        import java.io.InputStream;
023:        import java.text.ParseException;
024:
025:        import org.apache.wicket.markup.MarkupElement;
026:        import org.apache.wicket.util.io.FullyBufferedReader;
027:        import org.apache.wicket.util.io.XmlReader;
028:        import org.apache.wicket.util.parse.metapattern.parsers.TagNameParser;
029:        import org.apache.wicket.util.parse.metapattern.parsers.VariableAssignmentParser;
030:        import org.apache.wicket.util.resource.ResourceStreamNotFoundException;
031:
032:        /**
033:         * A fairly shallow markup pull parser which parses a markup string of a given
034:         * type of markup (for example, html, xml, vxml or wml) into ComponentTag and
035:         * RawMarkup tokens.
036:         * 
037:         * @author Jonathan Locke
038:         * @author Juergen Donnerstag
039:         */
040:        public final class XmlPullParser extends AbstractMarkupFilter implements 
041:                IXmlPullParser {
042:            /** next() must be called at least once for the Type to be valid */
043:            public static final int NOT_INITIALIZED = 0;
044:
045:            /** <name ...> */
046:            public static final int TAG = 1;
047:
048:            /** Tag body in between two tags */
049:            public static final int BODY = 2;
050:
051:            /** <!-- ... --> */
052:            public static final int COMMENT = 3;
053:
054:            /** <![CDATA[ .. ]]> */
055:            public static final int CDATA = 4;
056:
057:            /** <?...> */
058:            public static final int PROCESSING_INSTRUCTION = 5;
059:
060:            /** all other tags which look like <!.. > */
061:            public static final int SPECIAL_TAG = 6;
062:
063:            /**
064:             * Reads the xml data from an input stream and converts the chars according
065:             * to its encoding (<?xml ... encoding="..." ?>)
066:             */
067:            private XmlReader xmlReader;
068:
069:            /**
070:             * A XML independent reader which loads the whole source data into memory
071:             * and which provides convinience methods to access the data.
072:             */
073:            private FullyBufferedReader input;
074:
075:            /** temporary variable which will hold the name of the closing tag. */
076:            private String skipUntilText;
077:
078:            /** The last substring selected from the input */
079:            private CharSequence lastText;
080:
081:            /** The type of what is in lastText */
082:            private int lastType = NOT_INITIALIZED;
083:
084:            /** If lastType == TAG, than ... */
085:            private XmlTag lastTag;
086:
087:            /**
088:             * Construct.
089:             */
090:            public XmlPullParser() {
091:            }
092:
093:            /**
094:             * 
095:             * @see org.apache.wicket.markup.parser.IXmlPullParser#getEncoding()
096:             */
097:            public String getEncoding() {
098:                return this .xmlReader.getEncoding();
099:            }
100:
101:            /**
102:             * 
103:             * @see org.apache.wicket.markup.parser.IXmlPullParser#getXmlDeclaration()
104:             */
105:            public String getXmlDeclaration() {
106:                return this .xmlReader.getXmlDeclaration();
107:            }
108:
109:            /**
110:             * 
111:             * @see org.apache.wicket.markup.parser.IXmlPullParser#getInputFromPositionMarker(int)
112:             */
113:            public final CharSequence getInputFromPositionMarker(final int toPos) {
114:                return this .input.getSubstring(toPos);
115:            }
116:
117:            /**
118:             * 
119:             * @see org.apache.wicket.markup.parser.IXmlPullParser#getInput(int, int)
120:             */
121:            public final CharSequence getInput(final int fromPos,
122:                    final int toPos) {
123:                return this .input.getSubstring(fromPos, toPos);
124:            }
125:
126:            /**
127:             * Whatever will be in between the current index and the closing tag, will
128:             * be ignored (and thus treated as raw markup (text). This is useful for
129:             * tags like 'script'.
130:             * 
131:             * @throws ParseException
132:             */
133:            private final void skipUntil() throws ParseException {
134:                // this is a tag with non-XHTML text as body - skip this until the
135:                // skipUntilText is found.
136:                final int startIndex = this .input.getPosition();
137:                final int tagNameLen = this .skipUntilText.length();
138:
139:                int pos = this .input.getPosition() - 1;
140:                String endTagText = null;
141:                int lastPos = 0;
142:                while (!skipUntilText.equalsIgnoreCase(endTagText)) {
143:                    pos = this .input.find("</", pos + 1);
144:                    if ((pos == -1)
145:                            || ((pos + (tagNameLen + 2)) >= this .input.size())) {
146:                        throw new ParseException(skipUntilText
147:                                + " tag not closed (line "
148:                                + this .input.getLineNumber() + ", column "
149:                                + this .input.getColumnNumber() + ")",
150:                                startIndex);
151:                    }
152:
153:                    lastPos = pos + 2;
154:                    endTagText = this .input.getSubstring(lastPos,
155:                            lastPos + tagNameLen).toString();
156:                }
157:
158:                this .input.setPosition(pos);
159:                this .lastText = this .input.getSubstring(startIndex, pos);
160:                this .lastType = BODY;
161:
162:                // Check that the tag is properly closed
163:                lastPos = this .input.find('>', lastPos + tagNameLen);
164:                if (lastPos == -1) {
165:                    throw new ParseException("Script tag not closed (line "
166:                            + this .input.getLineNumber() + ", column "
167:                            + this .input.getColumnNumber() + ")", startIndex);
168:                }
169:
170:                // Reset the state variable
171:                this .skipUntilText = null;
172:            }
173:
174:            /**
175:             * Gets the next tag from the input string.
176:             * 
177:             * @return The extracted tag (will always be of type XmlTag).
178:             * @throws ParseException
179:             */
180:            public final boolean next() throws ParseException {
181:                // Reached end of markup file?
182:                if (this .input.getPosition() >= this .input.size()) {
183:                    return false;
184:                }
185:
186:                if (this .skipUntilText != null) {
187:                    skipUntil();
188:                    return true;
189:                }
190:
191:                // Any more tags in the markup?
192:                final int openBracketIndex = this .input.find('<');
193:
194:                // Tag or Body?
195:                if (this .input.charAt(this .input.getPosition()) != '<') {
196:                    if (openBracketIndex == -1) {
197:                        // There is no next matching tag.
198:                        this .lastText = this .input.getSubstring(-1);
199:                        this .input.setPosition(this .input.size());
200:                        this .lastType = BODY;
201:                        return true;
202:                    }
203:
204:                    this .lastText = this .input.getSubstring(openBracketIndex);
205:                    this .input.setPosition(openBracketIndex);
206:                    this .lastType = BODY;
207:                    return true;
208:                }
209:
210:                // Determine the line number
211:                this .input.countLinesTo(openBracketIndex);
212:
213:                // Get index of closing tag and advance past the tag
214:                int closeBracketIndex = this .input.find('>',
215:                        openBracketIndex + 1);
216:                if (closeBracketIndex == -1) {
217:                    throw new ParseException(
218:                            "No matching close bracket at position "
219:                                    + openBracketIndex, this .input
220:                                    .getPosition());
221:                }
222:
223:                // Get the complete tag text
224:                this .lastText = this .input.getSubstring(openBracketIndex,
225:                        closeBracketIndex + 1);
226:
227:                // Get the tagtext between open and close brackets
228:                String tagText = this .lastText.subSequence(1,
229:                        this .lastText.length() - 1).toString();
230:                if (tagText.length() == 0) {
231:                    throw new ParseException(
232:                            "Found empty tag: '<>' at position "
233:                                    + openBracketIndex, this .input
234:                                    .getPosition());
235:                }
236:
237:                // Handle special tags like <!-- and <![CDATA ...
238:                final char firstChar = tagText.charAt(0);
239:                if ((firstChar == '!') || (firstChar == '?')) {
240:                    specialTagHandling(tagText, openBracketIndex,
241:                            closeBracketIndex);
242:                    return true;
243:                }
244:
245:                // Type of the tag, to be determined next
246:                final XmlTag.Type type;
247:
248:                // If the tag ends in '/', it's a "simple" tag like <foo/>
249:                if (tagText.endsWith("/")) {
250:                    type = XmlTag.OPEN_CLOSE;
251:                    tagText = tagText.substring(0, tagText.length() - 1);
252:                } else if (tagText.startsWith("/")) {
253:                    // The tag text starts with a '/', it's a simple close tag
254:                    type = XmlTag.CLOSE;
255:                    tagText = tagText.substring(1);
256:                } else {
257:                    // It must be an open tag
258:                    type = XmlTag.OPEN;
259:
260:                    // If open tag and starts with "s" like "script" or "style", than
261:                    // ...
262:                    if ((tagText.length() > 5)
263:                            && ((tagText.charAt(0) == 's') || (tagText
264:                                    .charAt(0) == 'S'))) {
265:                        final String lowerCase = tagText.substring(0, 6)
266:                                .toLowerCase();
267:                        if (lowerCase.startsWith("script")) {
268:                            // prepare to skip everything between the open and close tag
269:                            this .skipUntilText = "script";
270:                        } else if (lowerCase.startsWith("style")) {
271:                            // prepare to skip everything between the open and close tag
272:                            this .skipUntilText = "style";
273:                        }
274:                    }
275:                }
276:
277:                // Parse remaining tag text, obtaining a tag object or null
278:                // if it's invalid
279:                this .lastTag = parseTagText(tagText);
280:                if (this .lastTag != null) {
281:                    // Populate tag fields
282:                    this .lastTag.type = type;
283:                    this .lastTag.pos = openBracketIndex;
284:                    this .lastTag.length = this .lastText.length();
285:                    this .lastTag.text = this .lastText;
286:                    this .lastTag.lineNumber = this .input.getLineNumber();
287:                    this .lastTag.columnNumber = this .input.getColumnNumber();
288:
289:                    // Move to position after the tag
290:                    this .input.setPosition(closeBracketIndex + 1);
291:                    this .lastType = TAG;
292:                    return true;
293:                } else {
294:                    throw new ParseException("Malformed tag (line "
295:                            + this .input.getLineNumber() + ", column "
296:                            + this .input.getColumnNumber() + ")",
297:                            openBracketIndex);
298:                }
299:            }
300:
301:            /**
302:             * Handle special tags like <!-- --> or <![CDATA[..]]> or <?xml>
303:             * 
304:             * @param tagText
305:             * @param openBracketIndex
306:             * @param closeBracketIndex
307:             * @throws ParseException
308:             */
309:            private void specialTagHandling(String tagText,
310:                    final int openBracketIndex, int closeBracketIndex)
311:                    throws ParseException {
312:                // Handle comments
313:                if (tagText.startsWith("!--")) {
314:                    // Normal comment section.
315:                    // Skip ahead to "-->". Note that you can not simply test for
316:                    // tagText.endsWith("--") as the comment might contain a '>'
317:                    // inside.
318:                    int pos = this .input.find("-->", openBracketIndex + 1);
319:                    if (pos == -1) {
320:                        throw new ParseException(
321:                                "Unclosed comment beginning at line:"
322:                                        + input.getLineNumber() + " column:"
323:                                        + input.getColumnNumber(),
324:                                openBracketIndex);
325:                    }
326:
327:                    pos += 3;
328:                    this .lastText = this .input.getSubstring(openBracketIndex,
329:                            pos);
330:                    this .lastType = COMMENT;
331:
332:                    // Conditional comment? <!--[if ...]>..<![endif]-->
333:                    if (tagText.startsWith("!--[if ")
334:                            && tagText.endsWith("]")
335:                            && this .lastText.toString()
336:                                    .endsWith("<![endif]-->")) {
337:                        // Actually it is no longer a comment. It is now
338:                        // up to the browser to select the section appropriate.
339:                        this .input.setPosition(closeBracketIndex + 1);
340:                    } else {
341:                        this .input.setPosition(pos);
342:                    }
343:                    return;
344:                }
345:
346:                // The closing tag of a conditional comment <!--[if IE]>...<![endif]-->
347:                if (tagText.equals("![endif]--")) {
348:                    this .lastType = COMMENT;
349:                    this .input.setPosition(closeBracketIndex + 1);
350:                    return;
351:                }
352:
353:                // CDATA sections might contain "<" which is not part of an XML tag.
354:                // Make sure escaped "<" are treated right
355:                if (tagText.startsWith("![")) {
356:                    final String startText = (tagText.length() <= 8 ? tagText
357:                            : tagText.substring(0, 8));
358:                    if (startText.toUpperCase().equals("![CDATA[")) {
359:                        int pos1 = openBracketIndex;
360:                        do {
361:                            // Get index of closing tag and advance past the tag
362:                            closeBracketIndex = findChar('>', pos1);
363:
364:                            if (closeBracketIndex == -1) {
365:                                throw new ParseException(
366:                                        "No matching close bracket at line:"
367:                                                + input.getLineNumber()
368:                                                + " column:"
369:                                                + input.getColumnNumber(),
370:                                        this .input.getPosition());
371:                            }
372:
373:                            // Get the tagtext between open and close brackets
374:                            tagText = this .input.getSubstring(
375:                                    openBracketIndex + 1, closeBracketIndex)
376:                                    .toString();
377:
378:                            pos1 = closeBracketIndex + 1;
379:                        } while (tagText.endsWith("]]") == false);
380:
381:                        // Move to position after the tag
382:                        this .input.setPosition(closeBracketIndex + 1);
383:
384:                        this .lastText = tagText;
385:                        this .lastType = CDATA;
386:                        return;
387:                    }
388:                }
389:
390:                if (tagText.charAt(0) == '?') {
391:                    this .lastType = PROCESSING_INSTRUCTION;
392:
393:                    // Move to position after the tag
394:                    this .input.setPosition(closeBracketIndex + 1);
395:                    return;
396:                }
397:
398:                // Move to position after the tag
399:                this .lastType = SPECIAL_TAG;
400:                this .input.setPosition(closeBracketIndex + 1);
401:            }
402:
403:            /**
404:             * Gets the next tag from the input string.
405:             * 
406:             * @return The extracted tag (will always be of type XmlTag).
407:             * @throws ParseException
408:             */
409:            public final MarkupElement nextTag() throws ParseException {
410:                while (next()) {
411:                    switch (this .lastType) {
412:                    case TAG:
413:                        return this .lastTag;
414:
415:                    case BODY:
416:                        break;
417:
418:                    case COMMENT:
419:                        break;
420:
421:                    case CDATA:
422:                        break;
423:
424:                    case PROCESSING_INSTRUCTION:
425:                        break;
426:
427:                    case SPECIAL_TAG:
428:                        break;
429:                    }
430:                }
431:
432:                return null;
433:            }
434:
435:            /**
436:             * Find the char but ignore any text within ".." and '..'
437:             * 
438:             * @param ch
439:             *            The character to search
440:             * @param startIndex
441:             *            Start index
442:             * @return -1 if not found, else the index
443:             */
444:            private int findChar(final char ch, int startIndex) {
445:                char quote = 0;
446:
447:                for (; startIndex < this .input.size(); startIndex++) {
448:                    final char charAt = this .input.charAt(startIndex);
449:                    if (quote != 0) {
450:                        if (quote == charAt) {
451:                            quote = 0;
452:                        }
453:                    } else if ((charAt == '"') || (charAt == '\'')) {
454:                        quote = charAt;
455:                    } else if (charAt == ch) {
456:                        return startIndex;
457:                    }
458:                }
459:
460:                return -1;
461:            }
462:
463:            /**
464:             * Parse the given string.
465:             * <p>
466:             * Note: xml character encoding is NOT applied. It is assumed the input
467:             * provided does have the correct encoding already.
468:             * 
469:             * @param string
470:             *            The input string
471:             * @throws IOException
472:             *             Error while reading the resource
473:             * @throws ResourceStreamNotFoundException
474:             *             Resource not found
475:             */
476:            public void parse(final CharSequence string) throws IOException,
477:                    ResourceStreamNotFoundException {
478:                parse(new ByteArrayInputStream(string.toString().getBytes()),
479:                        null);
480:            }
481:
482:            /**
483:             * Reads and parses markup from an input stream, using UTF-8 encoding by
484:             * default when not specified in XML declaration.
485:             * 
486:             * @param in
487:             *            The input stream to read and parse
488:             * @throws IOException
489:             * @throws ResourceStreamNotFoundException
490:             */
491:            public void parse(final InputStream in) throws IOException,
492:                    ResourceStreamNotFoundException {
493:                // When XML declaration does not specify encoding, it defaults to UTF-8
494:                parse(in, "UTF-8");
495:            }
496:
497:            /**
498:             * Reads and parses markup from an input stream
499:             * 
500:             * @param inputStream
501:             *            The input stream to read and parse
502:             * @param encoding
503:             *            The default character encoding of the input
504:             * @throws IOException
505:             * @throws ResourceStreamNotFoundException
506:             */
507:            public void parse(final InputStream inputStream,
508:                    final String encoding) throws IOException,
509:                    ResourceStreamNotFoundException {
510:                try {
511:                    this .xmlReader = new XmlReader(new BufferedInputStream(
512:                            inputStream, 4000), encoding);
513:                    this .input = new FullyBufferedReader(this .xmlReader);
514:                } finally {
515:                    inputStream.close();
516:                    if (this .xmlReader != null) {
517:                        this .xmlReader.close();
518:                    }
519:                }
520:            }
521:
522:            /**
523:             * 
524:             * @see org.apache.wicket.markup.parser.IXmlPullParser#setPositionMarker()
525:             */
526:            public final void setPositionMarker() {
527:                this .input.setPositionMarker(this .input.getPosition());
528:            }
529:
530:            /**
531:             * 
532:             * @see org.apache.wicket.markup.parser.IXmlPullParser#setPositionMarker(int)
533:             */
534:            public final void setPositionMarker(final int pos) {
535:                this .input.setPositionMarker(pos);
536:            }
537:
538:            /**
539:             * 
540:             * @see java.lang.Object#toString()
541:             */
542:            public String toString() {
543:                return this .input.toString();
544:            }
545:
546:            /**
547:             * Parses the text between tags. For example, "a href=foo.html".
548:             * 
549:             * @param tagText
550:             *            The text between tags
551:             * @return A new Tag object or null if the tag is invalid
552:             * @throws ParseException
553:             */
554:            private XmlTag parseTagText(final String tagText)
555:                    throws ParseException {
556:                // Get the length of the tagtext
557:                final int tagTextLength = tagText.length();
558:
559:                // If we match tagname pattern
560:                final TagNameParser tagnameParser = new TagNameParser(tagText);
561:                if (tagnameParser.matcher().lookingAt()) {
562:                    final XmlTag tag = new XmlTag();
563:
564:                    // Extract the tag from the pattern matcher
565:                    tag.name = tagnameParser.getName();
566:                    tag.namespace = tagnameParser.getNamespace();
567:
568:                    // Are we at the end? Then there are no attributes, so we just
569:                    // return the tag
570:                    int pos = tagnameParser.matcher().end(0);
571:                    if (pos == tagTextLength) {
572:                        return tag;
573:                    }
574:
575:                    // Extract attributes
576:                    final VariableAssignmentParser attributeParser = new VariableAssignmentParser(
577:                            tagText);
578:                    while (attributeParser.matcher().find(pos)) {
579:                        // Get key and value using attribute pattern
580:                        String value = attributeParser.getValue();
581:
582:                        // In case like <html xmlns:wicket> will the value be null
583:                        if (value == null) {
584:                            value = "";
585:                        }
586:
587:                        // Set new position to end of attribute
588:                        pos = attributeParser.matcher().end(0);
589:
590:                        // Chop off double quotes or single quotes
591:                        if (value.startsWith("\"") || value.startsWith("\'")) {
592:                            value = value.substring(1, value.length() - 1);
593:                        }
594:
595:                        // Trim trailing whitespace
596:                        value = value.trim();
597:
598:                        // Get key
599:                        final String key = attributeParser.getKey();
600:
601:                        // Put the attribute in the attributes hash
602:                        if (null != tag.put(key, value)) {
603:                            throw new ParseException(
604:                                    "Same attribute found twice: " + key,
605:                                    this .input.getPosition());
606:                        }
607:
608:                        // The input has to match exactly (no left over junk after
609:                        // attributes)
610:                        if (pos == tagTextLength) {
611:                            return tag;
612:                        }
613:                    }
614:
615:                    return tag;
616:                }
617:
618:                return null;
619:            }
620:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.