Source Code Cross Referenced for HTMLParser.java in  » Testing » htmlunit » com » gargoylesoftware » htmlunit » html » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1. 6.0 JDK Core
2. 6.0 JDK Modules
3. 6.0 JDK Modules com.sun
4. 6.0 JDK Modules com.sun.java
5. 6.0 JDK Modules sun
6. 6.0 JDK Platform
7. Ajax
8. Apache Harmony Java SE
9. Aspect oriented
10. Authentication Authorization
11. Blogger System
12. Build
13. Byte Code
14. Cache
15. Chart
16. Chat
17. Code Analyzer
18. Collaboration
19. Content Management System
20. Database Client
21. Database DBMS
22. Database JDBC Connection Pool
23. Database ORM
24. Development
25. EJB Server geronimo
26. EJB Server GlassFish
27. EJB Server JBoss 4.2.1
28. EJB Server resin 3.1.5
29. ERP CRM Financial
30. ESB
31. Forum
32. GIS
33. Graphic Library
34. Groupware
35. HTML Parser
36. IDE
37. IDE Eclipse
38. IDE Netbeans
39. Installer
40. Internationalization Localization
41. Inversion of Control
42. Issue Tracking
43. J2EE
44. JBoss
45. JMS
46. JMX
47. Library
48. Mail Clients
49. Net
50. Parser
51. PDF
52. Portal
53. Profiler
54. Project Management
55. Report
56. RSS RDF
57. Rule Engine
58. Science
59. Scripting
60. Search Engine
61. Security
62. Sevlet Container
63. Source Control
64. Swing Library
65. Template Engine
66. Test Coverage
67. Testing
68. UML
69. Web Crawler
70. Web Framework
71. Web Mail
72. Web Server
73. Web Services
74. Web Services apache cxf 2.0.1
75. Web Services AXIS2
76. Wiki Engine
77. Workflow Engines
78. XML
79. XML UI
Java
Java Tutorial
Java Open Source
Jar File Download
Java Articles
Java Products
Java by API
Photoshop Tutorials
Maya Tutorials
Flash Tutorials
3ds-Max Tutorials
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
ASP.Net
ASP.NET Tutorial
JavaScript DHTML
JavaScript Tutorial
JavaScript Reference
HTML / CSS
HTML CSS Reference
C / ANSI-C
C Tutorial
C++
C++ Tutorial
Ruby
PHP
Python
Python Tutorial
Python Open Source
SQL Server / T-SQL
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
PostgreSQL
SQL / MySQL
MySQL Tutorial
VB.Net
VB.Net Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » Testing » htmlunit » com.gargoylesoftware.htmlunit.html 
Source Cross Referenced  Class Diagram Java Document (Java Doc) 


001:        /*
002:         * Copyright (c) 2002-2008 Gargoyle Software Inc. All rights reserved.
003:         *
004:         * Redistribution and use in source and binary forms, with or without
005:         * modification, are permitted provided that the following conditions are met:
006:         *
007:         * 1. Redistributions of source code must retain the above copyright notice,
008:         *    this list of conditions and the following disclaimer.
009:         * 2. Redistributions in binary form must reproduce the above copyright notice,
010:         *    this list of conditions and the following disclaimer in the documentation
011:         *    and/or other materials provided with the distribution.
012:         * 3. The end-user documentation included with the redistribution, if any, must
013:         *    include the following acknowledgment:
014:         *
015:         *       "This product includes software developed by Gargoyle Software Inc.
016:         *        (http://www.GargoyleSoftware.com/)."
017:         *
018:         *    Alternately, this acknowledgment may appear in the software itself, if
019:         *    and wherever such third-party acknowledgments normally appear.
020:         * 4. The name "Gargoyle Software" must not be used to endorse or promote
021:         *    products derived from this software without prior written permission.
022:         *    For written permission, please contact info@GargoyleSoftware.com.
023:         * 5. Products derived from this software may not be called "HtmlUnit", nor may
024:         *    "HtmlUnit" appear in their name, without prior written permission of
025:         *    Gargoyle Software Inc.
026:         *
027:         * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
028:         * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
029:         * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GARGOYLE
030:         * SOFTWARE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
031:         * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
032:         * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
033:         * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
034:         * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
035:         * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
036:         * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
037:         */
038:        package com.gargoylesoftware.htmlunit.html;
039:
040:        import java.io.IOException;
041:        import java.io.StringReader;
042:        import java.lang.reflect.InvocationTargetException;
043:        import java.net.URL;
044:        import java.nio.charset.Charset;
045:        import java.util.HashMap;
046:        import java.util.Map;
047:        import java.util.Stack;
048:
049:        import org.apache.xerces.parsers.AbstractSAXParser;
050:        import org.apache.xerces.util.DefaultErrorHandler;
051:        import org.apache.xerces.xni.XNIException;
052:        import org.apache.xerces.xni.parser.XMLInputSource;
053:        import org.apache.xerces.xni.parser.XMLParseException;
054:        import org.cyberneko.html.HTMLConfiguration;
055:        import org.xml.sax.Attributes;
056:        import org.xml.sax.ContentHandler;
057:        import org.xml.sax.Locator;
058:        import org.xml.sax.SAXException;
059:        import org.xml.sax.ext.LexicalHandler;
060:
061:        import com.gargoylesoftware.htmlunit.Assert;
062:        import com.gargoylesoftware.htmlunit.ObjectInstantiationException;
063:        import com.gargoylesoftware.htmlunit.TextUtil;
064:        import com.gargoylesoftware.htmlunit.WebResponse;
065:        import com.gargoylesoftware.htmlunit.WebWindow;
066:
067:        /**
068:         * SAX parser implementation that uses the neko {@link org.cyberneko.html.HTMLConfiguration}
069:         * to parse HTML into a HtmlUnit-specific DOM (HU-DOM) tree.
070:         * <p>
071:         * <em>Note that the parser currently does not handle CDATA or comment sections, i.e. these
072:         * do not appear in the resulting DOM tree</em>
073:         *
074:         * @version $Revision: 2132 $
075:         * @author <a href="mailto:cse@dynabean.de">Christian Sell</a>
076:         * @author David K. Taylor
077:         * @author Chris Erskine
078:         * @author Ahmed Ashour
079:         */
080:        public final class HTMLParser {
081:
082:            private static final Map ELEMENT_FACTORIES = new HashMap();
083:            private static boolean IgnoreOutsideContent_;
084:
085:            static {
086:                ELEMENT_FACTORIES.put("input", InputElementFactory.instance);
087:
088:                final DefaultElementFactory defaultElementFactory = new DefaultElementFactory();
089:                ELEMENT_FACTORIES.put(HtmlAnchor.TAG_NAME,
090:                        defaultElementFactory);
091:                ELEMENT_FACTORIES.put(HtmlApplet.TAG_NAME,
092:                        defaultElementFactory);
093:                ELEMENT_FACTORIES.put(HtmlAddress.TAG_NAME,
094:                        defaultElementFactory);
095:                ELEMENT_FACTORIES.put(HtmlArea.TAG_NAME, defaultElementFactory);
096:                ELEMENT_FACTORIES.put(HtmlBase.TAG_NAME, defaultElementFactory);
097:                ELEMENT_FACTORIES.put(HtmlBaseFont.TAG_NAME,
098:                        defaultElementFactory);
099:                ELEMENT_FACTORIES.put(HtmlBidirectionalOverride.TAG_NAME,
100:                        defaultElementFactory);
101:                ELEMENT_FACTORIES.put(HtmlBlockQuote.TAG_NAME,
102:                        defaultElementFactory);
103:                ELEMENT_FACTORIES.put(HtmlBody.TAG_NAME, defaultElementFactory);
104:                ELEMENT_FACTORIES
105:                        .put(HtmlBreak.TAG_NAME, defaultElementFactory);
106:                ELEMENT_FACTORIES.put(HtmlButton.TAG_NAME,
107:                        defaultElementFactory);
108:                ELEMENT_FACTORIES.put(HtmlCaption.TAG_NAME,
109:                        defaultElementFactory);
110:                ELEMENT_FACTORIES.put(HtmlCenter.TAG_NAME,
111:                        defaultElementFactory);
112:                ELEMENT_FACTORIES.put(HtmlTableColumn.TAG_NAME,
113:                        defaultElementFactory);
114:                ELEMENT_FACTORIES.put(HtmlTableColumnGroup.TAG_NAME,
115:                        defaultElementFactory);
116:                ELEMENT_FACTORIES.put(HtmlDefinitionDescription.TAG_NAME,
117:                        defaultElementFactory);
118:                ELEMENT_FACTORIES.put(HtmlDeletedText.TAG_NAME,
119:                        defaultElementFactory);
120:                ELEMENT_FACTORIES.put(HtmlTextDirection.TAG_NAME,
121:                        defaultElementFactory);
122:                ELEMENT_FACTORIES.put(HtmlDivision.TAG_NAME,
123:                        defaultElementFactory);
124:                ELEMENT_FACTORIES.put(HtmlDefinitionList.TAG_NAME,
125:                        defaultElementFactory);
126:                ELEMENT_FACTORIES.put(HtmlDefinitionTerm.TAG_NAME,
127:                        defaultElementFactory);
128:                ELEMENT_FACTORIES.put(HtmlFieldSet.TAG_NAME,
129:                        defaultElementFactory);
130:                ELEMENT_FACTORIES.put(HtmlFont.TAG_NAME, defaultElementFactory);
131:                ELEMENT_FACTORIES.put(HtmlForm.TAG_NAME, defaultElementFactory);
132:                ELEMENT_FACTORIES
133:                        .put(HtmlFrame.TAG_NAME, defaultElementFactory);
134:                ELEMENT_FACTORIES.put(HtmlFrameSet.TAG_NAME,
135:                        defaultElementFactory);
136:                ELEMENT_FACTORIES.put(HtmlHeader1.TAG_NAME,
137:                        defaultElementFactory);
138:                ELEMENT_FACTORIES.put(HtmlHeader2.TAG_NAME,
139:                        defaultElementFactory);
140:                ELEMENT_FACTORIES.put(HtmlHeader3.TAG_NAME,
141:                        defaultElementFactory);
142:                ELEMENT_FACTORIES.put(HtmlHeader4.TAG_NAME,
143:                        defaultElementFactory);
144:                ELEMENT_FACTORIES.put(HtmlHeader5.TAG_NAME,
145:                        defaultElementFactory);
146:                ELEMENT_FACTORIES.put(HtmlHeader6.TAG_NAME,
147:                        defaultElementFactory);
148:                ELEMENT_FACTORIES.put(HtmlHead.TAG_NAME, defaultElementFactory);
149:                ELEMENT_FACTORIES.put(HtmlHorizontalRule.TAG_NAME,
150:                        defaultElementFactory);
151:                ELEMENT_FACTORIES.put(HtmlHtml.TAG_NAME, defaultElementFactory);
152:                ELEMENT_FACTORIES.put(HtmlInlineFrame.TAG_NAME,
153:                        defaultElementFactory);
154:                ELEMENT_FACTORIES
155:                        .put(HtmlImage.TAG_NAME, defaultElementFactory);
156:                ELEMENT_FACTORIES.put(HtmlInsertedText.TAG_NAME,
157:                        defaultElementFactory);
158:                ELEMENT_FACTORIES.put(HtmlIsIndex.TAG_NAME,
159:                        defaultElementFactory);
160:                ELEMENT_FACTORIES
161:                        .put(HtmlLabel.TAG_NAME, defaultElementFactory);
162:                ELEMENT_FACTORIES.put(HtmlLegend.TAG_NAME,
163:                        defaultElementFactory);
164:                ELEMENT_FACTORIES.put(HtmlListItem.TAG_NAME,
165:                        defaultElementFactory);
166:                ELEMENT_FACTORIES.put(HtmlLink.TAG_NAME, defaultElementFactory);
167:                ELEMENT_FACTORIES.put(HtmlMap.TAG_NAME, defaultElementFactory);
168:                ELEMENT_FACTORIES.put(HtmlMenu.TAG_NAME, defaultElementFactory);
169:                ELEMENT_FACTORIES.put(HtmlMeta.TAG_NAME, defaultElementFactory);
170:                ELEMENT_FACTORIES.put(HtmlNoFrames.TAG_NAME,
171:                        defaultElementFactory);
172:                ELEMENT_FACTORIES.put(HtmlNoScript.TAG_NAME,
173:                        defaultElementFactory);
174:                ELEMENT_FACTORIES.put(HtmlObject.TAG_NAME,
175:                        defaultElementFactory);
176:                ELEMENT_FACTORIES.put(HtmlOrderedList.TAG_NAME,
177:                        defaultElementFactory);
178:                ELEMENT_FACTORIES.put(HtmlOptionGroup.TAG_NAME,
179:                        defaultElementFactory);
180:                ELEMENT_FACTORIES.put(HtmlOption.TAG_NAME,
181:                        defaultElementFactory);
182:                ELEMENT_FACTORIES.put(HtmlParagraph.TAG_NAME,
183:                        defaultElementFactory);
184:                ELEMENT_FACTORIES.put(HtmlParameter.TAG_NAME,
185:                        defaultElementFactory);
186:                ELEMENT_FACTORIES.put(HtmlPreformattedText.TAG_NAME,
187:                        defaultElementFactory);
188:                ELEMENT_FACTORIES.put(HtmlInlineQuotation.TAG_NAME,
189:                        defaultElementFactory);
190:                ELEMENT_FACTORIES.put(HtmlScript.TAG_NAME,
191:                        defaultElementFactory);
192:                ELEMENT_FACTORIES.put(HtmlSelect.TAG_NAME,
193:                        defaultElementFactory);
194:                ELEMENT_FACTORIES.put(HtmlSpan.TAG_NAME, defaultElementFactory);
195:                ELEMENT_FACTORIES
196:                        .put(HtmlStyle.TAG_NAME, defaultElementFactory);
197:                ELEMENT_FACTORIES
198:                        .put(HtmlTitle.TAG_NAME, defaultElementFactory);
199:
200:                ELEMENT_FACTORIES
201:                        .put(HtmlTable.TAG_NAME, defaultElementFactory);
202:                ELEMENT_FACTORIES.put(HtmlTableBody.TAG_NAME,
203:                        defaultElementFactory);
204:                ELEMENT_FACTORIES.put(HtmlTableDataCell.TAG_NAME,
205:                        defaultElementFactory);
206:                ELEMENT_FACTORIES.put(HtmlTableHeaderCell.TAG_NAME,
207:                        defaultElementFactory);
208:                ELEMENT_FACTORIES.put(HtmlTableRow.TAG_NAME,
209:                        defaultElementFactory);
210:
211:                ELEMENT_FACTORIES.put(HtmlTextArea.TAG_NAME,
212:                        defaultElementFactory);
213:                ELEMENT_FACTORIES.put(HtmlTableFooter.TAG_NAME,
214:                        defaultElementFactory);
215:                ELEMENT_FACTORIES.put(HtmlTableHeader.TAG_NAME,
216:                        defaultElementFactory);
217:                ELEMENT_FACTORIES.put(HtmlUnorderedList.TAG_NAME,
218:                        defaultElementFactory);
219:            }
220:
221:            /**
222:             * Set the flag to control validation of the HTML content that is outside of the
223:             * BODY and HTML tags.  This flag is false by default to maintain compatibility with
224:             * current NekoHTML defaults.
225:             * @param ignoreOutsideContent - boolean flag to set
226:             */
227:            public static void setIgnoreOutsideContent(
228:                    final boolean ignoreOutsideContent) {
229:                IgnoreOutsideContent_ = ignoreOutsideContent;
230:            }
231:
232:            /**
233:             * Get the state of the flag to ignore content outside the BODY and HTML tags
234:             * @return - The current state
235:             */
236:            public static boolean getIgnoreOutsideContent() {
237:                return IgnoreOutsideContent_;
238:            }
239:
240:            /**
241:             * @param tagName an HTML element tag name
242:             * @return a factory for creating HtmlElements representing the given tag
243:             */
244:            public static IElementFactory getFactory(final String tagName) {
245:                final IElementFactory result = (IElementFactory) ELEMENT_FACTORIES
246:                        .get(tagName);
247:
248:                if (result != null) {
249:                    return result;
250:                } else {
251:                    return UnknownElementFactory.instance;
252:                }
253:            }
254:
255:            /**
256:             * You should never need to create one of these!
257:             */
258:            private HTMLParser() {
259:            }
260:
261:            /**
262:             * Parses the HTML content from the given string into an object tree representation.
263:             *
264:             * @param parent the parent for the new nodes
265:             * @param source the (X)HTML to be parsed
266:             * @throws SAXException if a SAX error occurs
267:             * @throws IOException if an IO error occurs
268:             */
269:            public static void parseFragment(final DomNode parent,
270:                    final String source) throws SAXException, IOException {
271:
272:                final URL url = parent.getPage().getWebResponse().getUrl();
273:                final HtmlUnitDOMBuilder domBuilder = new HtmlUnitDOMBuilder(
274:                        parent, url);
275:                domBuilder
276:                        .setFeature(
277:                                "http://cyberneko.org/html/features/balance-tags/document-fragment",
278:                                true);
279:                final XMLInputSource in = new XMLInputSource(null, parent
280:                        .getPage().getWebResponse().getUrl().toString(), null,
281:                        new StringReader(source), null);
282:
283:                domBuilder.parse(in);
284:            }
285:
286:            /**
287:             * parse the HTML content from the given WebResponse into an object tree representation
288:             *
289:             * @param webResponse the response data
290:             * @param webWindow the web window into which the page is to be loaded
291:             * @return the page object which forms the root of the DOM tree, or <code>null</code> if the &lt;HTML&gt;
292:             * tag is missing
293:             * @throws java.io.IOException io error
294:             */
295:            public static HtmlPage parse(final WebResponse webResponse,
296:                    final WebWindow webWindow) throws IOException {
297:                final HtmlPage page = new HtmlPage(webResponse.getUrl(),
298:                        webResponse, webWindow);
299:                webWindow.setEnclosedPage(page);
300:
301:                final HtmlUnitDOMBuilder domBuilder = new HtmlUnitDOMBuilder(
302:                        page, webResponse.getUrl());
303:                String charSet = webResponse.getContentCharSet();
304:                if (!Charset.isSupported(charSet)) {
305:                    charSet = TextUtil.DEFAULT_CHARSET;
306:                }
307:                final XMLInputSource in = new XMLInputSource(null, webResponse
308:                        .getUrl().toString(), null, webResponse
309:                        .getContentAsStream(), charSet);
310:
311:                try {
312:                    domBuilder.parse(in);
313:                } catch (final XNIException e) {
314:                    // extract enclosed exception
315:                    final Throwable origin = extractNestedException(e);
316:                    throw new RuntimeException("Failed parsing content from "
317:                            + webResponse.getUrl(), origin);
318:                }
319:                return domBuilder.page_;
320:            }
321:
322:            /**
323:             * Extract nested exception within an XNIException
324:             * (Nekohtml uses reflection and generated exceptions are wrapped many times
325:             * within XNIException and InvocationTargetException)
326:             * @param e the original XNIException
327:             * @return the cause exception
328:             */
329:            static Throwable extractNestedException(final Throwable e) {
330:                Throwable originalException = e;
331:                Throwable cause = ((XNIException) e).getException();
332:                while (cause != null) {
333:                    originalException = cause;
334:                    if (cause instanceof  XNIException) {
335:                        cause = ((XNIException) cause).getException();
336:                    } else if (cause instanceof  InvocationTargetException) {
337:                        cause = cause.getCause();
338:                    } else {
339:                        cause = null;
340:                    }
341:                }
342:                return originalException;
343:            }
344:
345:            /**
346:             * The parser and DOM builder. This class subclasses Xerces's AbstractSAXParser and implements
347:             * the ContentHandler interface. Thus all parser APIs are kept private. The ContentHandler methods
348:             * consume SAX events to build the page DOM
349:             */
350:            private static final class HtmlUnitDOMBuilder extends
351:                    AbstractSAXParser implements  ContentHandler, LexicalHandler {
352:                private final HtmlPage page_;
353:
354:                private Locator locator_;
355:                private final Stack stack_ = new Stack();
356:
357:                private DomNode currentNode_;
358:                private StringBuffer characters_;
359:                private boolean headParsed_ = false;
360:
361:                /**
362:                 * create a new builder for parsing the given response contents
363:                 * @param webResponse the response data
364:                 * @param webWindow the web window into which the page is to be loaded
365:                 */
366:                private HtmlUnitDOMBuilder(final DomNode page, final URL url) {
367:                    super (new HTMLConfiguration());
368:                    this .page_ = page.getPage();
369:
370:                    currentNode_ = page;
371:                    stack_.push(currentNode_);
372:
373:                    final HTMLParserListener listener = page_.getWebClient()
374:                            .getHTMLParserListener();
375:                    final boolean reportErrors;
376:                    if (listener != null) {
377:                        reportErrors = true;
378:                        fConfiguration.setErrorHandler(new HTMLErrorHandler(
379:                                listener, url));
380:                    } else {
381:                        reportErrors = false;
382:                    }
383:
384:                    try {
385:                        setFeature(
386:                                "http://cyberneko.org/html/features/augmentations",
387:                                true);
388:                        setProperty(
389:                                "http://cyberneko.org/html/properties/names/elems",
390:                                "lower");
391:                        setFeature(
392:                                "http://cyberneko.org/html/features/report-errors",
393:                                reportErrors);
394:                        setFeature(
395:                                "http://cyberneko.org/html/features/balance-tags/ignore-outside-content",
396:                                IgnoreOutsideContent_);
397:
398:                        setContentHandler(this );
399:                        setLexicalHandler(this ); //comments and CDATA
400:
401:                    } catch (final SAXException e) {
402:                        throw new ObjectInstantiationException(
403:                                "unable to create HTML parser", e);
404:                    }
405:                }
406:
407:                /**
408:                 * @return the document locator
409:                 */
410:                public Locator getLocator() {
411:                    return locator_;
412:                }
413:
414:                /**
415:                 * set the document locator
416:                 * @param locator
417:                 */
418:                public void setDocumentLocator(final Locator locator) {
419:                    locator_ = locator;
420:                }
421:
422:                /** @inheritDoc ContentHandler#startDocument() */
423:                public void startDocument() throws SAXException {
424:                }
425:
426:                /** @inheritDoc ContentHandler#startElement(String,String,String,Attributes) */
427:                public void startElement(final String namespaceURI,
428:                        final String localName, final String qName,
429:                        final Attributes atts) throws SAXException {
430:
431:                    handleCharacters();
432:
433:                    final String tagLower = localName.toLowerCase();
434:
435:                    if (tagLower.equals("head")) {
436:                        headParsed_ = true;
437:                    }
438:                    // add a head if none was there
439:                    else if (!headParsed_
440:                            && (tagLower.equals("body") || tagLower
441:                                    .equals("frameset"))) {
442:                        final IElementFactory factory = getElementFactory("head");
443:                        final HtmlElement newElement = factory.createElement(
444:                                page_, "head", null);
445:                        currentNode_.appendDomChild(newElement);
446:                        headParsed_ = true;
447:                    }
448:                    // add a <tbody> if a <tr> is directly in <table>
449:                    else if (tagLower.equals("tr")
450:                            && currentNode_.getNodeName().equals("table")) {
451:                        final IElementFactory factory = getElementFactory("tbody");
452:                        final HtmlElement newElement = factory.createElement(
453:                                page_, "tbody", null);
454:                        currentNode_.appendDomChild(newElement);
455:                        currentNode_ = newElement;
456:                        stack_.push(currentNode_);
457:                    }
458:
459:                    final IElementFactory factory = getElementFactory(tagLower);
460:                    final HtmlElement newElement = factory.createElement(page_,
461:                            tagLower, atts);
462:                    newElement.setStartLocation(locator_.getLineNumber(),
463:                            locator_.getColumnNumber());
464:                    currentNode_.appendDomChild(newElement);
465:                    currentNode_ = newElement;
466:                    stack_.push(currentNode_);
467:                }
468:
469:                /** @inheritDoc ContentHandler@endElement(String,String,String) */
470:                public void endElement(final String namespaceURI,
471:                        final String localName, final String qName)
472:                        throws SAXException {
473:
474:                    handleCharacters();
475:
476:                    final DomNode previousNode = (DomNode) stack_.pop(); //remove currentElement from stack
477:                    previousNode.setEndLocation(locator_.getLineNumber(),
478:                            locator_.getColumnNumber());
479:                    previousNode.onAllChildrenAddedToPage();
480:
481:                    // if we have added a extra node (tbody), we should remove it
482:                    if (!currentNode_.getNodeName().equalsIgnoreCase(localName)) {
483:                        stack_.pop(); //remove extra node from stack
484:                    }
485:
486:                    if (!stack_.isEmpty()) {
487:                        currentNode_ = (DomNode) stack_.peek();
488:                    }
489:                }
490:
491:                /** @inheritDoc ContentHandler#characters(char,int,int) */
492:                public void characters(final char ch[], final int start,
493:                        final int length) throws SAXException {
494:
495:                    if (characters_ == null) {
496:                        characters_ = new StringBuffer();
497:                    }
498:                    characters_.append(ch, start, length);
499:                }
500:
501:                /** @inheritDoc ContentHandler#ignorableWhitespace(char,int,int) */
502:                public void ignorableWhitespace(final char ch[],
503:                        final int start, final int length) throws SAXException {
504:
505:                    if (characters_ == null) {
506:                        characters_ = new StringBuffer();
507:                    }
508:                    characters_.append(ch, start, length);
509:                }
510:
511:                /**
512:                 * pick up the character data accumulated so far and add it to the
513:                 * current element as a text node
514:                 */
515:                private void handleCharacters() {
516:
517:                    if (characters_ != null && characters_.length() > 0) {
518:                        final DomText text = new DomText(page_, characters_
519:                                .toString());
520:                        currentNode_.appendDomChild(text);
521:                        characters_.setLength(0);
522:                    }
523:                }
524:
525:                /**
526:                 * @param tagName an HTML tag name, in lowercase
527:                 * @return the pre-registered element factory for the tag, or an UnknownElementFactory
528:                 */
529:                private IElementFactory getElementFactory(final String tagName) {
530:
531:                    final IElementFactory factory = (IElementFactory) ELEMENT_FACTORIES
532:                            .get(tagName);
533:
534:                    if (factory != null) {
535:                        return factory;
536:                    } else {
537:                        return UnknownElementFactory.instance;
538:                    }
539:                }
540:
541:                /** @inheritDoc ContentHandler#endDocument() */
542:                public void endDocument() throws SAXException {
543:                    handleCharacters();
544:                    final DomNode currentPage = page_;
545:                    currentPage.setEndLocation(locator_.getLineNumber(),
546:                            locator_.getColumnNumber());
547:                }
548:
549:                /** @inheritDoc ContentHandler#startPrefixMapping(String,String) */
550:                public void startPrefixMapping(final String prefix,
551:                        final String uri) throws SAXException {
552:                }
553:
554:                /** @inheritDoc ContentHandler#endPrefixMapping(String) */
555:                public void endPrefixMapping(final String prefix)
556:                        throws SAXException {
557:                }
558:
559:                /** @inheritDoc ContentHandler#processingInstrucction(String,String) */
560:                public void processingInstruction(final String target,
561:                        final String data) throws SAXException {
562:                }
563:
564:                /** @inheritDoc ContentHandler#skippedEntity(String) */
565:                public void skippedEntity(final String name)
566:                        throws SAXException {
567:                }
568:
569:                // LexicalHandler methods
570:
571:                /** @inheritDoc LexicalHandler#comment(char[],int,int) */
572:                public void comment(final char[] ch, final int start,
573:                        final int length) {
574:                    handleCharacters();
575:                    final DomComment comment = new DomComment(page_, String
576:                            .valueOf(ch, start, length));
577:                    currentNode_.appendDomChild(comment);
578:                }
579:
580:                /** @inheritDoc LexicalHandler#endCDATA() */
581:                public void endCDATA() {
582:                }
583:
584:                /** @inheritDoc LexicalHandler#endDTD() */
585:                public void endDTD() {
586:                }
587:
588:                /** @inheritDoc LexicalHandler#endEntity() */
589:                public void endEntity(final String name) {
590:                }
591:
592:                /** @inheritDoc LexicalHandler#startCDATA() */
593:                public void startCDATA() {
594:                }
595:
596:                /** @inheritDoc LexicalHandler#startDTD(String,String,String) */
597:                public void startDTD(final String name, final String publicId,
598:                        final String systemId) {
599:                }
600:
601:                /** @inheritDoc LexicalHandler#startEntity(String) */
602:                public void startEntity(final String name) {
603:                }
604:            }
605:        }
606:
607:        /**
608:         * Utility to transmit parsing errors to a {@link HTMLParserListener}.
609:         */
610:        class HTMLErrorHandler extends DefaultErrorHandler {
611:            private final HTMLParserListener listener_;
612:            private final URL url_;
613:
614:            HTMLErrorHandler(final HTMLParserListener listener, final URL url) {
615:                Assert.notNull("listener", listener);
616:                Assert.notNull("url", url);
617:                listener_ = listener;
618:                url_ = url;
619:            }
620:
621:            /** @see DefaultErrorHandler#error(String,String,XMLParseException) */
622:            public void error(final String domain, final String key,
623:                    final XMLParseException exception) throws XNIException {
624:                listener_.error(exception.getMessage(), url_, exception
625:                        .getLineNumber(), exception.getColumnNumber(), key);
626:            }
627:
628:            /** @see DefaultErrorHandler#warning(String,String,XMLParseException) */
629:            public void warning(final String domain, final String key,
630:                    final XMLParseException exception) throws XNIException {
631:                listener_.warning(exception.getMessage(), url_, exception
632:                        .getLineNumber(), exception.getColumnNumber(), key);
633:            }
634:        }
www.java2java.com | Contact Us
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.