001: /*
002: * Copyright (c) 2002-2008 Gargoyle Software Inc. All rights reserved.
003: *
004: * Redistribution and use in source and binary forms, with or without
005: * modification, are permitted provided that the following conditions are met:
006: *
007: * 1. Redistributions of source code must retain the above copyright notice,
008: * this list of conditions and the following disclaimer.
009: * 2. Redistributions in binary form must reproduce the above copyright notice,
010: * this list of conditions and the following disclaimer in the documentation
011: * and/or other materials provided with the distribution.
012: * 3. The end-user documentation included with the redistribution, if any, must
013: * include the following acknowledgment:
014: *
015: * "This product includes software developed by Gargoyle Software Inc.
016: * (http://www.GargoyleSoftware.com/)."
017: *
018: * Alternately, this acknowledgment may appear in the software itself, if
019: * and wherever such third-party acknowledgments normally appear.
020: * 4. The name "Gargoyle Software" must not be used to endorse or promote
021: * products derived from this software without prior written permission.
022: * For written permission, please contact info@GargoyleSoftware.com.
023: * 5. Products derived from this software may not be called "HtmlUnit", nor may
024: * "HtmlUnit" appear in their name, without prior written permission of
025: * Gargoyle Software Inc.
026: *
027: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
028: * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
029: * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GARGOYLE
030: * SOFTWARE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
031: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
032: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
033: * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
034: * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
035: * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
036: * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
037: */
038: package com.gargoylesoftware.htmlunit.html;
039:
040: import java.util.ArrayList;
041: import java.util.List;
042:
043: import com.gargoylesoftware.htmlunit.StringWebResponse;
044: import com.gargoylesoftware.htmlunit.WebClient;
045: import com.gargoylesoftware.htmlunit.WebResponse;
046: import com.gargoylesoftware.htmlunit.WebTestCase;
047: import com.gargoylesoftware.htmlunit.html.xpath.HtmlUnitXPath;
048:
049: /**
050: * Test class for {@link HTMLParser}.
051: *
052: * @version $Revision: 2132 $
053: * @author <a href="mailto:cse@dynabean.de">Christian Sell</a>
054: * @author Marc Guillemot
055: */
056: public class HTMLParserTest extends WebTestCase {
057:
058: /**
059: * Create an instance
060: * @param name The name of the test
061: */
062: public HTMLParserTest(final String name) {
063: super (name);
064: }
065:
066: /**
067: * test the new HTMLParser on a simple HTML string and use the Jaxen XPath navigator
068: * to validate results
069: * @throws Exception failure
070: */
071: public void testSimpleHTMLString() throws Exception {
072: final WebClient webClient = new WebClient();
073: final WebResponse webResponse = new StringWebResponse(
074: "<html><head><title>TITLE</title><noscript>TEST</noscript></head><body></body></html>");
075:
076: final HtmlPage page = HTMLParser.parse(webResponse, webClient
077: .getCurrentWindow());
078:
079: HtmlUnitXPath xpath = new HtmlUnitXPath("//noscript");
080: final String stringVal = xpath.stringValueOf(page);
081:
082: assertEquals("TEST", stringVal);
083:
084: xpath = new HtmlUnitXPath("//*[./text() = 'TEST']");
085: final HtmlElement node = (HtmlElement) xpath
086: .selectSingleNode(page);
087:
088: assertEquals(node.getTagName(), HtmlNoScript.TAG_NAME);
089: }
090:
091: /**
092: * Test when <form> inside <table> and before <tr>
093: * @throws Exception failure
094: */
095: public void testBadlyFormedHTML() throws Exception {
096: final String content = "<html><head><title>first</title>\n"
097: + "<script>\n"
098: + "function test()\n"
099: + "{\n"
100: + " alert(document.getElementById('myInput').form.id);\n"
101: + "}\n" + "</script>\n" + "</head>\n"
102: + "<body onload='test()'>\n" + "<table>\n"
103: + "<form name='myForm' action='foo' id='myForm'>\n"
104: + "<tr><td>\n"
105: + "<input type='text' name='myInput' id='myInput'/>\n"
106: + "</td></tr>\n" + "</form>\n" + "</table>\n"
107: + "</body></html>";
108:
109: final List collectedAlerts = new ArrayList();
110: final String[] expectedAlerts = { "myForm" };
111: createTestPageForRealBrowserIfNeeded(content, expectedAlerts);
112:
113: loadPage(content, collectedAlerts);
114:
115: assertEquals(expectedAlerts, collectedAlerts);
116: }
117:
118: /**
119: * Test when an illegal tag is found in head as some websites do
120: * @throws Exception failure
121: */
122: public void testUnknownTagInHead() throws Exception {
123: if (notYetImplemented()) {
124: return;
125: }
126:
127: // Note: the <meta> tag in this test is quite important because
128: // I could adapt the TagBalancer to make it work except with this <meta http-equiv...
129: // (it worked with <meta name=...)
130: final String content = "<html><head><mainA3>\n"
131: + "<meta http-equiv='Content-Type' content='text/html; charset=ISO-8859-1'>\n"
132: + "<title>first</title>\n" + "<script>\n"
133: + "function test()\n" + "{\n"
134: + " alert(document.title);\n" + "}\n" + "</script>\n"
135: + "</head>\n" + "<body onload='test()'>\n"
136: + "</body></html>";
137:
138: final List collectedAlerts = new ArrayList();
139: final String[] expectedAlerts = { "first" };
140: createTestPageForRealBrowserIfNeeded(content, expectedAlerts);
141:
142: loadPage(content, collectedAlerts);
143:
144: assertEquals(expectedAlerts, collectedAlerts);
145: }
146:
147: /**
148: * Test the HTMLParser by accessing the HtmlUnit home page and detecting the copyright
149: * string.
150: *
151: * @throws Exception failure
152: */
153: public void testHtmlUnitHomePage() throws Exception {
154: final HtmlPage page = loadUrl("http://htmlunit.sourceforge.net");
155: if (page != null) {
156: // No connectivity issues.
157: final HtmlUnitXPath xpath = new HtmlUnitXPath(
158: "//div[@id='footer']/div[@class='xright']");
159: final String stringVal = xpath.stringValueOf(page).trim();
160: assertEquals("\u00A9 2002-2007, Gargoyle Software Inc.",
161: stringVal);
162: }
163: }
164:
165: /**
166: * Works since NekoHtml 0.9.5
167: * @exception Exception If the test fails
168: */
169: public void testBadTagInHead() throws Exception {
170: final String htmlContent = "<html>\n"
171: + "<head><foo/>\n<title>foo\n</head>\n"
172: + "<body>\nfoo\n</body>\n</html>";
173:
174: final HtmlPage page = loadPage(htmlContent);
175: assertEquals("foo", page.getTitleText());
176: }
177: }
|