0001: /*
0002: * Copyright (c) 2002-2008 Gargoyle Software Inc. All rights reserved.
0003: *
0004: * Redistribution and use in source and binary forms, with or without
0005: * modification, are permitted provided that the following conditions are met:
0006: *
0007: * 1. Redistributions of source code must retain the above copyright notice,
0008: * this list of conditions and the following disclaimer.
0009: * 2. Redistributions in binary form must reproduce the above copyright notice,
0010: * this list of conditions and the following disclaimer in the documentation
0011: * and/or other materials provided with the distribution.
0012: * 3. The end-user documentation included with the redistribution, if any, must
0013: * include the following acknowledgment:
0014: *
0015: * "This product includes software developed by Gargoyle Software Inc.
0016: * (http://www.GargoyleSoftware.com/)."
0017: *
0018: * Alternately, this acknowledgment may appear in the software itself, if
0019: * and wherever such third-party acknowledgments normally appear.
0020: * 4. The name "Gargoyle Software" must not be used to endorse or promote
0021: * products derived from this software without prior written permission.
0022: * For written permission, please contact info@GargoyleSoftware.com.
0023: * 5. Products derived from this software may not be called "HtmlUnit", nor may
0024: * "HtmlUnit" appear in their name, without prior written permission of
0025: * Gargoyle Software Inc.
0026: *
0027: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
0028: * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
0029: * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GARGOYLE
0030: * SOFTWARE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
0031: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
0032: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
0033: * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
0034: * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
0035: * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
0036: * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
0037: */
0038: package com.gargoylesoftware.htmlunit;
0039:
0040: import java.io.BufferedInputStream;
0041: import java.io.ByteArrayInputStream;
0042: import java.io.File;
0043: import java.io.FileInputStream;
0044: import java.io.IOException;
0045: import java.io.InputStream;
0046: import java.io.Serializable;
0047: import java.lang.reflect.Constructor;
0048: import java.net.MalformedURLException;
0049: import java.net.URL;
0050: import java.net.URLConnection;
0051: import java.net.URLStreamHandler;
0052: import java.security.GeneralSecurityException;
0053: import java.util.ArrayList;
0054: import java.util.BitSet;
0055: import java.util.Collections;
0056: import java.util.HashMap;
0057: import java.util.HashSet;
0058: import java.util.Iterator;
0059: import java.util.List;
0060: import java.util.Map;
0061: import java.util.Set;
0062: import java.util.Stack;
0063: import java.util.StringTokenizer;
0064: import java.util.regex.Pattern;
0065:
0066: import org.apache.commons.httpclient.HttpStatus;
0067: import org.apache.commons.httpclient.NameValuePair;
0068: import org.apache.commons.httpclient.URI;
0069: import org.apache.commons.httpclient.URIException;
0070: import org.apache.commons.httpclient.auth.CredentialsProvider;
0071: import org.apache.commons.httpclient.cookie.CookiePolicy;
0072: import org.apache.commons.httpclient.protocol.Protocol;
0073: import org.apache.commons.httpclient.protocol.ProtocolSocketFactory;
0074: import org.apache.commons.httpclient.util.URIUtil;
0075: import org.apache.commons.io.FileUtils;
0076: import org.apache.commons.io.IOUtils;
0077: import org.apache.commons.lang.StringUtils;
0078: import org.apache.commons.logging.Log;
0079: import org.apache.commons.logging.LogFactory;
0080:
0081: import com.gargoylesoftware.htmlunit.html.FrameWindow;
0082: import com.gargoylesoftware.htmlunit.html.HTMLParser;
0083: import com.gargoylesoftware.htmlunit.html.HTMLParserListener;
0084: import com.gargoylesoftware.htmlunit.html.HtmlPage;
0085: import com.gargoylesoftware.htmlunit.javascript.JavaScriptEngine;
0086: import com.gargoylesoftware.htmlunit.javascript.host.Window;
0087: import com.gargoylesoftware.htmlunit.ssl.InsecureSSLProtocolSocketFactory;
0088: import com.gargoylesoftware.htmlunit.util.UrlUtils;
0089:
0090: /**
0091: * An object that represents a web browser.
0092: *
0093: * @version $Revision: 2161 $
0094: * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
0095: * @author <a href="mailto:gudujarlson@sf.net">Mike J. Bresnahan</a>
0096: * @author Dominique Broeglin
0097: * @author Noboru Sinohara
0098: * @author <a href="mailto:chen_jun@users.sourceforge.net">Chen Jun</a>
0099: * @author David K. Taylor
0100: * @author <a href="mailto:cse@dynabean.de">Christian Sell</a>
0101: * @author <a href="mailto:bcurren@esomnie.com">Ben Curren</a>
0102: * @author Marc Guillemot
0103: * @author Chris Erskine
0104: * @author Daniel Gredler
0105: * @author Sergey Gorelkin
0106: * @author Hans Donner
0107: * @author Paul King
0108: * @author Ahmed Ashour
0109: */
0110: public class WebClient implements Serializable {
0111:
0112: private static final long serialVersionUID = -7214321203864969635L;
0113:
0114: /**
0115: * HtmlUnit's cookie policy is to be browser-compatible. Code which requires access to
0116: * HtmlUnit's cookie policy should use this constant, rather than making assumptions
0117: * and using one of the HttpClient {@link CookiePolicy} constants directly.
0118: */
0119: public static final String HTMLUNIT_COOKIE_POLICY = CookiePolicy.BROWSER_COMPATIBILITY;
0120:
0121: private transient WebConnection webConnection_;
0122: private boolean printContentOnFailingStatusCode_ = true;
0123: private boolean throwExceptionOnFailingStatusCode_ = true;
0124: private CredentialsProvider credentialsProvider_ = new DefaultCredentialsProvider();
0125: private final String proxyHost_;
0126: private final int proxyPort_;
0127: private final Map proxyBypassHosts_;
0128: private JavaScriptEngine scriptEngine_;
0129: private boolean javaScriptEnabled_ = true;
0130: private boolean cookiesEnabled_ = true;
0131: private boolean popupBlockerEnabled_;
0132: private String homePage_;
0133: private final Map requestHeaders_ = Collections
0134: .synchronizedMap(new HashMap(89));
0135: private IncorrectnessListener incorrectnessListener_ = new IncorrectnessListenerImpl();
0136:
0137: /**
0138: * like Firefox default value for network.http.redirection-limit
0139: */
0140: private static final int ALLOWED_REDIRECTIONS_SAME_URL = 20;
0141:
0142: private AlertHandler alertHandler_;
0143: private ConfirmHandler confirmHandler_;
0144: private PromptHandler promptHandler_;
0145: private StatusHandler statusHandler_;
0146: private AjaxController ajaxController_ = new AjaxController();
0147:
0148: private BrowserVersion browserVersion_;
0149: private boolean isRedirectEnabled_ = true;
0150: private PageCreator pageCreator_ = new DefaultPageCreator();
0151:
0152: private final Set webWindowListeners_ = new HashSet(5);
0153: private final List webWindows_ = Collections
0154: .synchronizedList(new ArrayList());
0155:
0156: private WebWindow currentWindow_;
0157: private Stack firstWindowStack_ = new Stack();
0158: private int timeout_;
0159: private HTMLParserListener htmlParserListener_;
0160: private OnbeforeunloadHandler onbeforeunloadHandler_;
0161: private Cache cache_ = new Cache();
0162:
0163: private static URLStreamHandler JavaScriptUrlStreamHandler_ = new com.gargoylesoftware.htmlunit.protocol.javascript.Handler();
0164: private static URLStreamHandler AboutUrlStreamHandler_ = new com.gargoylesoftware.htmlunit.protocol.about.Handler();
0165:
0166: /**
0167: * URL for "about:blank"
0168: */
0169: public static final URL URL_ABOUT_BLANK;
0170: static {
0171: URL tmpUrl = null;
0172: try {
0173: tmpUrl = new URL(null, "about:blank",
0174: AboutUrlStreamHandler_);
0175: } catch (final MalformedURLException e) {
0176: // impossible
0177: e.printStackTrace();
0178: }
0179: URL_ABOUT_BLANK = tmpUrl;
0180: }
0181:
0182: //singleton WebResponse for "about:blank"
0183: private static final WebResponse WEB_RESPONSE_FOR_ABOUT_BLANK = new StringWebResponse(
0184: "", URL_ABOUT_BLANK);
0185:
0186: private ScriptPreProcessor scriptPreProcessor_;
0187: private Map activeXObjectMap_ = Collections.EMPTY_MAP;
0188: private RefreshHandler refreshHandler_ = new ImmediateRefreshHandler();
0189: private boolean throwExceptionOnScriptError_ = true;
0190:
0191: /**
0192: * Creates a web client instance using the browser version returned by
0193: * {@link BrowserVersion#getDefault()}.
0194: */
0195: public WebClient() {
0196: this (BrowserVersion.getDefault());
0197: }
0198:
0199: /**
0200: * Creates a web client instance using the specified {@link BrowserVersion}.
0201: * @param browserVersion The browser version to simulate.
0202: */
0203: public WebClient(final BrowserVersion browserVersion) {
0204: Assert.notNull("browserVersion", browserVersion);
0205:
0206: homePage_ = "http://www.gargoylesoftware.com/";
0207: browserVersion_ = browserVersion;
0208: proxyHost_ = null;
0209: proxyPort_ = 0;
0210: proxyBypassHosts_ = new HashMap();
0211: try {
0212: scriptEngine_ = createJavaScriptEngineIfPossible(this );
0213: } catch (final NoClassDefFoundError e) {
0214: scriptEngine_ = null;
0215: }
0216: // The window must be constructed after the script engine.
0217: currentWindow_ = new TopLevelWindow("", this );
0218: }
0219:
0220: /**
0221: * Create an instance that will use the specified {@link BrowserVersion} and proxy server
0222: * @param browserVersion The browser version to simulate
0223: * @param proxyHost The server that will act as proxy
0224: * @param proxyPort The port to use on the proxy server
0225: */
0226: public WebClient(final BrowserVersion browserVersion,
0227: final String proxyHost, final int proxyPort) {
0228: Assert.notNull("browserVersion", browserVersion);
0229: Assert.notNull("proxyHost", proxyHost);
0230:
0231: homePage_ = "http://www.gargoylesoftware.com/";
0232: browserVersion_ = browserVersion;
0233: proxyHost_ = proxyHost;
0234: proxyPort_ = proxyPort;
0235: proxyBypassHosts_ = new HashMap();
0236: try {
0237: scriptEngine_ = createJavaScriptEngineIfPossible(this );
0238: } catch (final NoClassDefFoundError e) {
0239: scriptEngine_ = null;
0240: }
0241: // The window must be constructed after the script engine.
0242: currentWindow_ = new TopLevelWindow("", this );
0243: }
0244:
0245: /**
0246: * Create a javascript engine if possible.
0247: *
0248: * @param webClient The webclient that we are creating the script engine for.
0249: * @return A javascript engine or null if one could not be created.
0250: */
0251: private static JavaScriptEngine createJavaScriptEngineIfPossible(
0252: final WebClient webClient) {
0253: try {
0254: Class.forName("org.mozilla.javascript.Context");
0255: return new JavaScriptEngine(webClient);
0256: } catch (final ClassNotFoundException e) {
0257: return null;
0258: } catch (final NoClassDefFoundError e) {
0259: return null;
0260: }
0261: }
0262:
0263: /**
0264: * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br/>
0265: *
0266: * <p>Return the object that will resolve all url requests<p>
0267: * @return The connection that will be used.
0268: */
0269: public synchronized WebConnection getWebConnection() {
0270: if (webConnection_ == null) {
0271: webConnection_ = new HttpWebConnection(this );
0272: }
0273: return webConnection_;
0274: }
0275:
0276: /**
0277: * Set the object that will resolve all url requests <p />
0278: *
0279: * This method is intended for unit testing HtmlUnit itself. It is not expected
0280: * to change but you shouldn't need to call it during normal use of HtmlUnit.
0281: *
0282: * @param webConnection The new web connection
0283: */
0284: public void setWebConnection(final WebConnection webConnection) {
0285: Assert.notNull("webConnection", webConnection);
0286: webConnection_ = webConnection;
0287: }
0288:
0289: /**
0290: * Send a request to a server and return a Page that represents the
0291: * response from the server. This page will be used to populate this frame.<p>
0292: *
0293: * The type of Page will depend on the content type of the http response. <p />
0294: *
0295: * <table>
0296: * <tr>
0297: * <th>Content type</th>
0298: * <th>Type of page</th>
0299: * </tr>
0300: * <tr>
0301: * <td>"text/html"</td>
0302: * <td>{@link com.gargoylesoftware.htmlunit.html.HtmlPage}</td>
0303: * </tr>
0304: * <tr>
0305: * <td>"text/xhtml"</td>
0306: * <td>{@link com.gargoylesoftware.htmlunit.html.HtmlPage}</td>
0307: * </tr>
0308: * <tr>
0309: * <td>"application/xhtml+xml"</td>
0310: * <td>{@link com.gargoylesoftware.htmlunit.html.HtmlPage} for now, in the
0311: * future it will be XML validated as well
0312: * </td>
0313: * </tr>
0314: * <tr>
0315: * <td>"text/*"</td>
0316: * <td>{@link com.gargoylesoftware.htmlunit.TextPage}</td>
0317: * </tr>
0318: * <tr>
0319: * <td>Anything else</td>
0320: * <td>{@link com.gargoylesoftware.htmlunit.UnexpectedPage}</td>
0321: * </tr>
0322: * </table>
0323: *
0324: *
0325: * @param webWindow The WebWindow to load this request into
0326: * @param parameters Parameter object for the web request
0327: * @return See above
0328: * @throws IOException If an IO error occurs
0329: * @throws FailingHttpStatusCodeException If the server returns a failing status code AND the property
0330: * {@link #setThrowExceptionOnFailingStatusCode(boolean)} is set to true
0331: *
0332: * @see WebRequestSettings
0333: */
0334: public Page getPage(final WebWindow webWindow,
0335: final WebRequestSettings parameters) throws IOException,
0336: FailingHttpStatusCodeException {
0337:
0338: final Page page = webWindow.getEnclosedPage();
0339: if (page != null && page instanceof HtmlPage) {
0340: final HtmlPage htmlPage = (HtmlPage) page;
0341: if (!htmlPage.isOnbeforeunloadAccepted()) {
0342: getLog()
0343: .debug(
0344: "The registered OnbeforeunloadHandler rejected to load a new page.");
0345: return page;
0346: }
0347: }
0348:
0349: getLog().debug(
0350: "Get page for window named '" + webWindow.getName()
0351: + "', using " + parameters);
0352:
0353: final WebResponse webResponse;
0354: final String protocol = parameters.getURL().getProtocol();
0355: if (protocol.equals("javascript")) {
0356: webResponse = makeWebResponseForJavaScriptUrl(webWindow,
0357: parameters.getURL(), parameters.getCharset());
0358: } else {
0359: webResponse = loadWebResponse(parameters);
0360: }
0361:
0362: printContentIfNecessary(webResponse);
0363: loadWebResponseInto(webResponse, webWindow);
0364: throwFailingHttpStatusCodeExceptionIfNecessary(webResponse);
0365:
0366: return webWindow.getEnclosedPage();
0367: }
0368:
0369: /**
0370: * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br/>
0371: *
0372: * <p>Open a new web window and populate it with a page loaded by
0373: * {@link #getPage(WebWindow,WebRequestSettings)}</p>
0374: *
0375: * @param opener The web window that initiated the request.
0376: * @param target The name of the window to be opened. This is the name that would
0377: * be passed into the javascript open() method.
0378: * @param params Any parameters
0379: * @return The new page.
0380: * @throws FailingHttpStatusCodeException If the server returns a failing status code AND the property
0381: * {@link #setThrowExceptionOnFailingStatusCode(boolean)} is set to true.
0382: * @throws IOException If an IO problem occurs.
0383: */
0384: public Page getPage(final WebWindow opener, final String target,
0385: final WebRequestSettings params)
0386: throws FailingHttpStatusCodeException, IOException {
0387: return getPage(openTargetWindow(opener, target, "_self"),
0388: params);
0389: }
0390:
0391: /**
0392: * Convenient method to build an URL and load it into the current WebWindow
0393: * @param url The url of the new content.
0394: * @return The new page.
0395: * @throws FailingHttpStatusCodeException If the server returns a failing status code AND the property
0396: * {@link #setThrowExceptionOnFailingStatusCode(boolean)} is set to true.
0397: * @throws IOException If an IO problem occurs.
0398: * @throws MalformedURLException if no url can be created from the provided string
0399: */
0400: public Page getPage(final String url) throws IOException,
0401: FailingHttpStatusCodeException, MalformedURLException {
0402: return getPage(new URL(url));
0403: }
0404:
0405: /**
0406: * Convenient method to load a URL into the current WebWindow
0407: * @param url The url of the new content.
0408: * @return The new page.
0409: * @throws FailingHttpStatusCodeException If the server returns a failing status code AND the property
0410: * {@link #setThrowExceptionOnFailingStatusCode(boolean)} is set to true.
0411: * @throws IOException If an IO problem occurs.
0412: */
0413: public Page getPage(final URL url) throws IOException,
0414: FailingHttpStatusCodeException {
0415: return getPage(getCurrentWindow(), new WebRequestSettings(url));
0416: }
0417:
0418: /**
0419: * Convenient method to load a web request into the current WebWindow
0420: * @param request The request parameters
0421: * @return The new page.
0422: * @throws FailingHttpStatusCodeException If the server returns a failing status code AND the property
0423: * {@link #setThrowExceptionOnFailingStatusCode(boolean)} is set to true.
0424: * @throws IOException If an IO problem occurs.
0425: * @see #getPage(WebWindow,WebRequestSettings)
0426: */
0427: public Page getPage(final WebRequestSettings request)
0428: throws IOException, FailingHttpStatusCodeException {
0429: return getPage(getCurrentWindow(), request);
0430: }
0431:
0432: /**
0433: * Use the specified WebResponse to create a Page object which will then
0434: * get inserted into the WebWindow. All initialization and event notification
0435: * will be handled here.
0436: *
0437: * @param webResponse The response that will be used to create the new page.
0438: * @param webWindow The window that the new page will be placed within.
0439: * @throws IOException If an IO error occurs.
0440: * @throws FailingHttpStatusCodeException If the server returns a failing status code AND the property
0441: * {@link #setThrowExceptionOnFailingStatusCode(boolean)} is set to true
0442: * @return The newly created page.
0443: */
0444: public Page loadWebResponseInto(final WebResponse webResponse,
0445: final WebWindow webWindow) throws IOException,
0446: FailingHttpStatusCodeException {
0447:
0448: Assert.notNull("webResponse", webResponse);
0449: Assert.notNull("webWindow", webWindow);
0450:
0451: final Page oldPage = webWindow.getEnclosedPage();
0452: if (oldPage != null) {
0453: // Remove the old windows before create new ones.
0454: oldPage.cleanUp();
0455: }
0456:
0457: final Page newPage = pageCreator_.createPage(webResponse,
0458: webWindow);
0459:
0460: synchronized (firstWindowStack_) {
0461: if (!firstWindowStack_.empty()
0462: && firstWindowStack_.peek() == null) {
0463: firstWindowStack_.pop();
0464: firstWindowStack_.push(webWindow);
0465: }
0466: }
0467:
0468: // the page being loaded may already have been replaced by an other one through js code
0469: if (webWindow.getEnclosedPage() == newPage) {
0470: newPage.initialize();
0471: }
0472:
0473: fireWindowContentChanged(new WebWindowEvent(webWindow,
0474: WebWindowEvent.CHANGE, oldPage, newPage));
0475: return newPage;
0476: }
0477:
0478: /**
0479: * Specify whether or not the content of the resulting document will be
0480: * printed to the console in the event of a failing response code.
0481: * Successful response codes are in the range 200-299. The default is true.
0482: *
0483: * @param enabled True to enable this feature
0484: */
0485: public void setPrintContentOnFailingStatusCode(final boolean enabled) {
0486: printContentOnFailingStatusCode_ = enabled;
0487: }
0488:
0489: /**
0490: * Return true if the content of the resulting document will be printed to
0491: * the console in the event of a failing response code.
0492: *
0493: * @return See above
0494: * @see #setPrintContentOnFailingStatusCode
0495: */
0496: public boolean getPrintContentOnFailingStatusCode() {
0497: return printContentOnFailingStatusCode_;
0498: }
0499:
0500: /**
0501: * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span>
0502: *
0503: * <p>Logs the response's content if its status code indicates a request failure and
0504: * {@link #getPrintContentOnFailingStatusCode()} returns <tt>true</tt>.
0505: *
0506: * @param webResponse the response whose content may be logged
0507: */
0508: public void printContentIfNecessary(final WebResponse webResponse) {
0509: final String contentType = webResponse.getContentType();
0510: final int statusCode = webResponse.getStatusCode();
0511: final boolean successful = (statusCode >= HttpStatus.SC_OK && statusCode < HttpStatus.SC_MULTIPLE_CHOICES);
0512: if (getPrintContentOnFailingStatusCode() && !successful) {
0513: getLog().info(
0514: "statusCode=[" + statusCode + "] contentType=["
0515: + contentType + "]");
0516: getLog().info(webResponse.getContentAsString());
0517: }
0518: }
0519:
0520: /**
0521: * Specify whether or not an exception will be thrown in the event of a
0522: * failing status code. Successful status codes are in the range 200-299.
0523: * The default is true.
0524: *
0525: * @param enabled True to enable this feature
0526: */
0527: public void setThrowExceptionOnFailingStatusCode(
0528: final boolean enabled) {
0529: throwExceptionOnFailingStatusCode_ = enabled;
0530: }
0531:
0532: /**
0533: * Return true if an exception will be thrown in the event of a failing response code.
0534: * @return See above
0535: * @see #setThrowExceptionOnFailingStatusCode
0536: */
0537: public boolean isThrowExceptionOnFailingStatusCode() {
0538: return throwExceptionOnFailingStatusCode_;
0539: }
0540:
0541: /**
0542: * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span>
0543: *
0544: * <p>Throws a {@link FailingHttpStatusCodeException} if the request's status code indicates a request
0545: * failure and {@link #isThrowExceptionOnFailingStatusCode()} returns <tt>true</tt>.
0546: *
0547: * @param webResponse the response which may trigger a {@link FailingHttpStatusCodeException}
0548: */
0549: public void throwFailingHttpStatusCodeExceptionIfNecessary(
0550: final WebResponse webResponse) {
0551: final int statusCode = webResponse.getStatusCode();
0552: final boolean successful = (statusCode >= HttpStatus.SC_OK && statusCode < HttpStatus.SC_MULTIPLE_CHOICES);
0553: if (isThrowExceptionOnFailingStatusCode() && !successful) {
0554: throw new FailingHttpStatusCodeException(webResponse);
0555: }
0556: }
0557:
0558: /**
0559: * Set a header which will be sent up on EVERY request from this client.
0560: *
0561: * @param name The name of the header
0562: * @param value The value of the header
0563: */
0564: public void addRequestHeader(final String name, final String value) {
0565: requestHeaders_.put(name, value);
0566: }
0567:
0568: /**
0569: * Remove a header
0570: *
0571: * @param name Name of the header
0572: * @see #addRequestHeader
0573: */
0574: public void removeRequestHeader(final String name) {
0575: requestHeaders_.remove(name);
0576: }
0577:
0578: /**
0579: * Sets the credentials provider that will provide authentication information when
0580: * trying to access protected information on a web server. This information is
0581: * required when the server is using Basic HTTP authentication, NTLM authentication,
0582: * or Digest authentication.
0583: * @param credentialsProvider The new credentials provider to use to authenticate.
0584: */
0585: public void setCredentialsProvider(
0586: final CredentialsProvider credentialsProvider) {
0587: Assert.notNull("credentialsProvider", credentialsProvider);
0588: credentialsProvider_ = credentialsProvider;
0589: }
0590:
0591: /**
0592: * Returns the credentials provider for this client instance. By default, this
0593: * method returns an instance of {@link DefaultCredentialsProvider}.
0594: * @return The credentials provider for this client instance.
0595: */
0596: public CredentialsProvider getCredentialsProvider() {
0597: return credentialsProvider_;
0598: }
0599:
0600: /**
0601: * Throw an exception with the specified message. If junit is found in the
0602: * classpath then a junit.framework.AssertionFailedError will be thrown
0603: * (the same behavior as calling fail() in junit). If junit is not found
0604: * then an IllegalStateException will be thrown instead of the
0605: * AssertionFailedError. <p>
0606: *
0607: * Override this to provide custom behavior.
0608: *
0609: * @param message The failure message
0610: * @deprecated
0611: * @see WebAssert
0612: */
0613: public void assertionFailed(final String message) {
0614: try {
0615: final Class clazz = Class
0616: .forName("junit.framework.AssertionFailedError");
0617: final Constructor constructor = clazz
0618: .getConstructor(new Class[] { String.class });
0619: final Error error = (Error) constructor
0620: .newInstance(new Object[] { message });
0621: throw error;
0622: } catch (final Exception e) {
0623: throw new IllegalStateException(message);
0624: }
0625: }
0626:
0627: /**
0628: * This method is intended for testing only - use at your own risk.
0629: * @return the current JavaScript engine (never <code>null</code>).
0630: */
0631: public JavaScriptEngine getJavaScriptEngine() {
0632: return scriptEngine_;
0633: }
0634:
0635: /**
0636: * This method is intended for testing only - use at your own risk.
0637: *
0638: * @param engine The new script engine to use.
0639: */
0640: public void setJavaScriptEngine(final JavaScriptEngine engine) {
0641: if (engine == null) {
0642: throw new NullPointerException(
0643: "Can't set JavaScriptEngine to null");
0644: }
0645: scriptEngine_ = engine;
0646: }
0647:
0648: /**
0649: * Enable/disable JavaScript support. By default, this property is enabled.
0650: *
0651: * @param enabled <tt>true</tt> to enable JavaScript support
0652: */
0653: public void setJavaScriptEnabled(final boolean enabled) {
0654: javaScriptEnabled_ = enabled;
0655: }
0656:
0657: /**
0658: * Returns <tt>true</tt> if JavaScript is enabled and the script engine was loaded successfully.
0659: *
0660: * @return <tt>true</tt> if JavaScript is enabled
0661: */
0662: public boolean isJavaScriptEnabled() {
0663: return javaScriptEnabled_ && scriptEngine_ != null;
0664: }
0665:
0666: /**
0667: * Enable/disable cookie support. By default, this property is enabled.
0668: *
0669: * @param enabled <tt>true</tt> to enable cookie support
0670: */
0671: public void setCookiesEnabled(final boolean enabled) {
0672: cookiesEnabled_ = enabled;
0673: }
0674:
0675: /**
0676: * Returns <tt>true</tt> if cookies are enabled.
0677: *
0678: * @return <tt>true</tt> if cookies are enabled
0679: */
0680: public boolean isCookiesEnabled() {
0681: return cookiesEnabled_;
0682: }
0683:
0684: /**
0685: * Enable/disable the popup window blocker. By default, the popup blocker is disabled, and popup
0686: * windows are allowed. When set to <tt>true</tt>, <tt>window.open()</tt> has no effect and
0687: * returns <tt>null</tt>.
0688: *
0689: * @param enabled <tt>true</tt> to enable the popup window blocker
0690: */
0691: public void setPopupBlockerEnabled(final boolean enabled) {
0692: popupBlockerEnabled_ = enabled;
0693: }
0694:
0695: /**
0696: * Returns <tt>true</tt> if the popup window blocker is enabled.
0697: *
0698: * @return <tt>true</tt> if the popup window blocker is enabled
0699: */
0700: public boolean isPopupBlockerEnabled() {
0701: return popupBlockerEnabled_;
0702: }
0703:
0704: /**
0705: * Returns the client's current homepage.
0706: * @return the client's current homepage.
0707: */
0708: public String getHomePage() {
0709: return homePage_;
0710: }
0711:
0712: /**
0713: * Sets the client's homepage.
0714: * @param homePage the new homepage URL
0715: */
0716: public void setHomePage(final String homePage) {
0717: homePage_ = homePage;
0718: }
0719:
0720: /**
0721: * Any hosts matched by the specified regular expression pattern will bypass the configured proxy.
0722: * @param pattern A regular expression pattern that matches the hostnames of the hosts which should
0723: * bypass the configured proxy.
0724: * @see Pattern
0725: */
0726: public void addHostsToProxyBypass(final String pattern) {
0727: proxyBypassHosts_.put(pattern, Pattern.compile(pattern));
0728: }
0729:
0730: /**
0731: * Any hosts matched by the specified regular expression pattern will no longer bypass the configured proxy.
0732: * @param pattern The previously added regular expression pattern.
0733: * @see Pattern
0734: */
0735: public void removeHostsFromProxyBypass(final String pattern) {
0736: proxyBypassHosts_.remove(pattern);
0737: }
0738:
0739: /**
0740: * Returns <tt>true</tt> if the host with the specified hostname should be accessed bypassing the
0741: * configured proxy.
0742: * @param hostname The name of the host to check.
0743: * @return <tt>true</tt> if the host with the specified hostname should be accessed bypassing the
0744: * configured proxy, <tt>false</tt> otherwise.
0745: */
0746: private boolean shouldBypassProxy(final String hostname) {
0747: boolean bypass = false;
0748: for (final Iterator i = proxyBypassHosts_.values().iterator(); i
0749: .hasNext();) {
0750: final Pattern p = (Pattern) i.next();
0751: if (p.matcher(hostname).find()) {
0752: bypass = true;
0753: break;
0754: }
0755: }
0756: return bypass;
0757: }
0758:
0759: /**
0760: * Set the alert handler for this webclient.
0761: * @param alertHandler The new alerthandler or null if none is specified.
0762: */
0763: public void setAlertHandler(final AlertHandler alertHandler) {
0764: alertHandler_ = alertHandler;
0765: }
0766:
0767: /**
0768: * Return the alert handler for this webclient.
0769: * @return the alert handler or null if one hasn't been set.
0770: */
0771: public AlertHandler getAlertHandler() {
0772: return alertHandler_;
0773: }
0774:
0775: /**
0776: * Set the handler that will be executed when the javascript method Window.confirm() is called.
0777: * @param handler The new handler or null if no handler is to be used.
0778: */
0779: public void setConfirmHandler(final ConfirmHandler handler) {
0780: confirmHandler_ = handler;
0781: }
0782:
0783: /**
0784: * Return the confirm handler.
0785: * @return the confirm handler or null if one hasn't been set.
0786: */
0787: public ConfirmHandler getConfirmHandler() {
0788: return confirmHandler_;
0789: }
0790:
0791: /**
0792: * Set the handler that will be executed when the javascript method Window.prompt() is called.
0793: * @param handler The new handler or null if no handler is to be used.
0794: */
0795: public void setPromptHandler(final PromptHandler handler) {
0796: promptHandler_ = handler;
0797: }
0798:
0799: /**
0800: * Return the prompt handler.
0801: * @return the prompt handler or null if one hasn't been set.
0802: */
0803: public PromptHandler getPromptHandler() {
0804: return promptHandler_;
0805: }
0806:
0807: /**
0808: * Set the status handler for this webclient.
0809: * @param statusHandler The new alerthandler or null if none is specified.
0810: */
0811: public void setStatusHandler(final StatusHandler statusHandler) {
0812: statusHandler_ = statusHandler;
0813: }
0814:
0815: /**
0816: * Return the status handler for this webclient.
0817: * @return the status handler or null if one hasn't been set.
0818: */
0819: public StatusHandler getStatusHandler() {
0820: return statusHandler_;
0821: }
0822:
0823: /**
0824: * Return the current browser version
0825: * @return the current browser version.
0826: */
0827: public BrowserVersion getBrowserVersion() {
0828: return browserVersion_;
0829: }
0830:
0831: /**
0832: * Return the "current" window for this client. This is the window that will be used
0833: * when getPage() is called without specifying a window.
0834: * @return The current window.
0835: */
0836: public WebWindow getCurrentWindow() {
0837: return currentWindow_;
0838: }
0839:
0840: /**
0841: * Set the current window for this client. This is the window that will be used when
0842: * getPage() is called without specifying a window.
0843: * @param window The new window.
0844: */
0845: public void setCurrentWindow(final WebWindow window) {
0846: Assert.notNull("window", window);
0847: currentWindow_ = window;
0848: }
0849:
0850: /**
0851: * Return the "first" window for this client. This is the first window
0852: * opened since pushClearFirstWindow() was last called.
0853: * @return The first window.
0854: */
0855: public WebWindow popFirstWindow() {
0856: synchronized (firstWindowStack_) {
0857: return (WebWindow) firstWindowStack_.pop();
0858: }
0859: }
0860:
0861: /**
0862: * Clear the first window for this client.
0863: */
0864: public void pushClearFirstWindow() {
0865: synchronized (firstWindowStack_) {
0866: firstWindowStack_.push(null);
0867: }
0868: }
0869:
0870: /**
0871: * Add a listener for WebWindowEvent's. All events from all windows associated with this
0872: * client will be sent to the specified listener.
0873: * @param listener A listener.
0874: */
0875: public void addWebWindowListener(final WebWindowListener listener) {
0876: Assert.notNull("listener", listener);
0877: webWindowListeners_.add(listener);
0878: }
0879:
0880: /**
0881: * Remove a listener for WebWindowEvent's.
0882: * @param listener A listener.
0883: */
0884: public void removeWebWindowListener(final WebWindowListener listener) {
0885: Assert.notNull("listener", listener);
0886: webWindowListeners_.remove(listener);
0887: }
0888:
0889: private void fireWindowContentChanged(final WebWindowEvent event) {
0890: final Iterator iterator = new ArrayList(webWindowListeners_)
0891: .iterator();
0892: while (iterator.hasNext()) {
0893: final WebWindowListener listener = (WebWindowListener) iterator
0894: .next();
0895: listener.webWindowContentChanged(event);
0896: }
0897: }
0898:
0899: private void fireWindowOpened(final WebWindowEvent event) {
0900: final Iterator iterator = new ArrayList(webWindowListeners_)
0901: .iterator();
0902: while (iterator.hasNext()) {
0903: final WebWindowListener listener = (WebWindowListener) iterator
0904: .next();
0905: listener.webWindowOpened(event);
0906: }
0907: }
0908:
0909: private void fireWindowClosed(final WebWindowEvent event) {
0910: final Iterator iterator = new ArrayList(webWindowListeners_)
0911: .iterator();
0912: while (iterator.hasNext()) {
0913: final WebWindowListener listener = (WebWindowListener) iterator
0914: .next();
0915: listener.webWindowClosed(event);
0916: }
0917: }
0918:
0919: /**
0920: * Open a new window with the specified name. If the url is non-null then attempt to load
0921: * a page from that location and put it in the new window.
0922: *
0923: * @param url The url to load content from or null if no content is to be loaded.
0924: * @param windowName The name of the new window
0925: * @return The new window.
0926: */
0927: public WebWindow openWindow(final URL url, final String windowName) {
0928: Assert.notNull("windowName", windowName);
0929: return openWindow(url, windowName, getCurrentWindow());
0930: }
0931:
0932: /**
0933: * Open a new window with the specified name. If the url is non-null then attempt to load
0934: * a page from that location and put it in the new window.
0935: *
0936: * @param url The url to load content from or null if no content is to be loaded.
0937: * @param windowName The name of the new window
0938: * @param opener The web window that is calling openWindow
0939: * @return The new window.
0940: */
0941: public WebWindow openWindow(final URL url, final String windowName,
0942: final WebWindow opener) {
0943: final WebWindow window = openTargetWindow(opener, windowName,
0944: "_blank");
0945: if (url != null) {
0946: try {
0947: final WebRequestSettings settings = new WebRequestSettings(
0948: url);
0949: final HtmlPage openerPage = (HtmlPage) opener
0950: .getEnclosedPage();
0951: if (!getBrowserVersion().isIE()) {
0952: settings
0953: .addAdditionalHeader("Referer", openerPage
0954: .getWebResponse().getUrl()
0955: .toExternalForm());
0956: }
0957: getPage(window, settings);
0958: } catch (final IOException e) {
0959: getLog().error(
0960: "Error when loading content into window", e);
0961: }
0962: } else {
0963: initializeEmptyWindow(window);
0964: }
0965: return window;
0966: }
0967:
0968: /**
0969: * Open the window with the specified name. The name may be a special
0970: * target name of _self, _parent, _top, or _blank. An empty or null
0971: * name is set to the default. The special target names are relative to
0972: * the opener window.
0973: *
0974: * @param opener The web window that is calling openWindow
0975: * @param windowName The name of the new window
0976: * @param defaultName The default target if no name is given
0977: * @return The new window.
0978: */
0979: private WebWindow openTargetWindow(final WebWindow opener,
0980: final String windowName, final String defaultName) {
0981:
0982: Assert.notNull("opener", opener);
0983: Assert.notNull("defaultName", defaultName);
0984:
0985: String windowToOpen = windowName;
0986: if (windowToOpen == null || windowToOpen.length() == 0) {
0987: windowToOpen = defaultName;
0988: }
0989:
0990: WebWindow webWindow = null;
0991: if (windowToOpen.equals("_self")) {
0992: webWindow = opener;
0993: windowToOpen = "";
0994: } else if (windowToOpen.equals("_parent")) {
0995: webWindow = opener.getParentWindow();
0996: windowToOpen = "";
0997: } else if (windowToOpen.equals("_top")) {
0998: webWindow = opener.getTopWindow();
0999: windowToOpen = "";
1000: } else if (windowToOpen.equals("_blank")) {
1001: // Leave window null to create a new window.
1002: windowToOpen = "";
1003: } else if (windowToOpen.length() != 0) {
1004: try {
1005: webWindow = getWebWindowByName(windowToOpen);
1006: } catch (final WebWindowNotFoundException e) {
1007: // Fall through - a new window will be created below
1008: }
1009: }
1010:
1011: if (webWindow == null) {
1012: webWindow = new TopLevelWindow(windowToOpen, this );
1013: fireWindowOpened(new WebWindowEvent(webWindow,
1014: WebWindowEvent.OPEN, null, null));
1015: }
1016:
1017: if (webWindow instanceof TopLevelWindow
1018: && webWindow != opener.getTopWindow()) {
1019: ((TopLevelWindow) webWindow).setOpener(opener);
1020: }
1021:
1022: return webWindow;
1023: }
1024:
1025: /**
1026: * Set whether or not redirections will be followed automatically on receipt of
1027: * a redirect status code from the server.
1028: * @param enabled true to enable automatic redirection.
1029: */
1030: public void setRedirectEnabled(final boolean enabled) {
1031: isRedirectEnabled_ = enabled;
1032: }
1033:
1034: /**
1035: * Return whether or not redirections will be followed automatically on receipt of
1036: * a redirect status code from the server.
1037: * @return true if automatic redirection is enabled.
1038: */
1039: public boolean isRedirectEnabled() {
1040: return isRedirectEnabled_;
1041: }
1042:
1043: /**
1044: * If set to <tt>true</tt>, the client will accept connections to any host, regardless of
1045: * whether they have valid certificates or not. This is especially useful when you are trying to
1046: * connect to a server with expired or corrupt certificates.
1047: *
1048: * @param useInsecureSSL whether or not to use insecure SSL
1049: * @throws GeneralSecurityException if a security error occurs
1050: */
1051: public void setUseInsecureSSL(final boolean useInsecureSSL)
1052: throws GeneralSecurityException {
1053: if (useInsecureSSL) {
1054: final ProtocolSocketFactory factory = new InsecureSSLProtocolSocketFactory();
1055: final Protocol https = new Protocol("https", factory, 443);
1056: Protocol.registerProtocol("https", https);
1057: } else {
1058: Protocol.unregisterProtocol("https");
1059: }
1060: }
1061:
1062: /**
1063: * Set the object that will be used to create pages. Set this if you want
1064: * to customize the type of page that is returned for a given content type.
1065: *
1066: * @param pageCreator The new page creator
1067: */
1068: public void setPageCreator(final PageCreator pageCreator) {
1069: Assert.notNull("pageCreator", pageCreator);
1070: pageCreator_ = pageCreator;
1071: }
1072:
1073: /**
1074: * Return the current page creator.
1075: *
1076: * @return the page creator
1077: */
1078: public PageCreator getPageCreator() {
1079: return pageCreator_;
1080: }
1081:
1082: /**
1083: * Return the first {@link WebWindow} that matches the specified name.
1084: *
1085: * @param name The name to search for.
1086: * @return The {@link WebWindow} with the specified name
1087: * @throws WebWindowNotFoundException If the {@link WebWindow} can't be found.
1088: */
1089: public WebWindow getWebWindowByName(final String name)
1090: throws WebWindowNotFoundException {
1091: Assert.notNull("name", name);
1092:
1093: final Iterator iterator = webWindows_.iterator();
1094: while (iterator.hasNext()) {
1095: final WebWindow webWindow = (WebWindow) iterator.next();
1096: if (webWindow.getName().equals(name)) {
1097: return webWindow;
1098: }
1099: }
1100:
1101: throw new WebWindowNotFoundException(name);
1102: }
1103:
1104: /**
1105: * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br/>
1106: *
1107: * Initialize a new web window for JavaScript.
1108: * @param webWindow The new WebWindow
1109: */
1110: public void initialize(final WebWindow webWindow) {
1111: Assert.notNull("webWindow", webWindow);
1112: if (scriptEngine_ != null) {
1113: scriptEngine_.initialize(webWindow);
1114: }
1115: }
1116:
1117: /**
1118: * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br/>
1119: *
1120: * Initialize a new page for JavaScript.
1121: * @param newPage The new page.
1122: */
1123: public void initialize(final Page newPage) {
1124: Assert.notNull("newPage", newPage);
1125: if (scriptEngine_ != null) {
1126: ((Window) newPage.getEnclosingWindow().getScriptObject())
1127: .initialize(newPage);
1128: }
1129: }
1130:
1131: /**
1132: * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br/>
1133: *
1134: * Initialize a new empty web window for JavaScript.
1135: * @param webWindow The new WebWindow
1136: */
1137: public void initializeEmptyWindow(final WebWindow webWindow) {
1138: Assert.notNull("webWindow", webWindow);
1139: if (scriptEngine_ != null) {
1140: initialize(webWindow);
1141: ((Window) webWindow.getScriptObject()).initialize();
1142: }
1143: }
1144:
1145: /**
1146: * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br/>
1147: *
1148: * Add a new web window to the list of available windows.
1149: * @param webWindow The new WebWindow
1150: */
1151: public void registerWebWindow(final WebWindow webWindow) {
1152: Assert.notNull("webWindow", webWindow);
1153: webWindows_.add(webWindow);
1154: }
1155:
1156: /**
1157: * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br/>
1158: *
1159: * Remove a web window from the list of available windows.
1160: * @param webWindow The WebWindow to remove
1161: */
1162: public void deregisterWebWindow(final WebWindow webWindow) {
1163: Assert.notNull("webWindow", webWindow);
1164: webWindows_.remove(webWindow);
1165:
1166: if (getCurrentWindow() == webWindow) {
1167: if (webWindows_.size() == 0) {
1168: // Create a new one - we always have to have at least one window.
1169: setCurrentWindow(new TopLevelWindow("", this ));
1170: } else {
1171: setCurrentWindow((WebWindow) webWindows_.get(0));
1172: }
1173: }
1174: fireWindowClosed(new WebWindowEvent(webWindow,
1175: WebWindowEvent.CLOSE, webWindow.getEnclosedPage(), null));
1176: }
1177:
1178: /**
1179: * Return the log object for this web client
1180: * @return The log object
1181: */
1182: protected final Log getLog() {
1183: return LogFactory.getLog(getClass());
1184: }
1185:
1186: private static URL makeUrl(final String urlString)
1187: throws MalformedURLException {
1188: Assert.notNull("urlString", urlString);
1189:
1190: if (TextUtil.startsWithIgnoreCase(urlString, "javascript:")) {
1191: return new URL(null, urlString, JavaScriptUrlStreamHandler_);
1192: } else if (TextUtil.startsWithIgnoreCase(urlString, "about:")) {
1193: return new URL(null, urlString, AboutUrlStreamHandler_);
1194: } else {
1195: return new URL(urlString);
1196: }
1197: }
1198:
1199: /**
1200: * Expands a relative url relative to the specified base. In most situations
1201: * this is the same as <code>new URL(baseUrl, relativeUrl)</code> but
1202: * there are some cases that URL doesn't handle correctly. See
1203: * <a href="http://www.faqs.org/rfcs/rfc1808.html">RFC1808</a>
1204: * regarding Relative Uniform Resource Locators for more information.
1205: *
1206: * @param baseUrl The base url
1207: * @param relativeUrl The relative url
1208: * @return See above
1209: * @throws MalformedURLException If an error occurred when creating a URL object
1210: */
1211: public static URL expandUrl(final URL baseUrl,
1212: final String relativeUrl) throws MalformedURLException {
1213:
1214: if (StringUtils.isEmpty(relativeUrl)) {
1215: return baseUrl;
1216: }
1217: String parseUrl = relativeUrl;
1218:
1219: // section 2.4.2 - parsing scheme
1220: final int schemeIndex = parseUrl.indexOf(":");
1221: if (schemeIndex != -1) {
1222: boolean isProtocolSpecified = true;
1223: for (int i = 0; i < schemeIndex; i++) {
1224: if (Character.isLetter(parseUrl.charAt(i)) == false) {
1225: isProtocolSpecified = false;
1226: break;
1227: }
1228: }
1229: if (isProtocolSpecified) {
1230: return makeUrl(parseUrl);
1231: }
1232: }
1233:
1234: // section 2.4.3 - parsing network location/login
1235: if (parseUrl.startsWith("//")) {
1236: return makeUrl(baseUrl.getProtocol() + ":" + parseUrl);
1237: }
1238:
1239: // section 2.4.1 - parsing fragment
1240: final int fragmentIndex = parseUrl.lastIndexOf("#");
1241: String reference = null;
1242: if (fragmentIndex != -1) {
1243: reference = StringUtils.substringAfterLast(parseUrl, "#");
1244: parseUrl = parseUrl.substring(0, fragmentIndex);
1245: }
1246:
1247: // section 2.4.4 - parsing query
1248: String stringQuery = null;
1249: final int queryIndex = parseUrl.lastIndexOf("?");
1250: if (queryIndex != -1) {
1251: stringQuery = parseUrl.substring(queryIndex);
1252: parseUrl = parseUrl.substring(0, queryIndex);
1253: }
1254:
1255: // section 2.4.5 - parsing parameters
1256: String stringParameters = null;
1257: final int parametersIndex = parseUrl.lastIndexOf(";");
1258: if (parametersIndex != -1) {
1259: stringParameters = parseUrl.substring(parametersIndex);
1260: parseUrl = parseUrl.substring(0, parametersIndex);
1261: }
1262:
1263: // section 2.4.6 - parse path
1264: final List tokens = new ArrayList();
1265: final String stringToTokenize;
1266: if (parseUrl.trim().length() == 0) {
1267: stringToTokenize = baseUrl.getPath();
1268: } else if (parseUrl.startsWith("/")) {
1269: stringToTokenize = parseUrl;
1270: } else {
1271: String path = baseUrl.getPath();
1272: if (!path.endsWith("/") && parseUrl.length() != 0) {
1273: path += "/..";
1274: }
1275: stringToTokenize = path + "/" + parseUrl;
1276: }
1277:
1278: final String pathToTokenize = stringToTokenize;
1279: final StringTokenizer tokenizer = new StringTokenizer(
1280: pathToTokenize, "/");
1281: while (tokenizer.hasMoreTokens()) {
1282: tokens.add(tokenizer.nextToken());
1283: }
1284:
1285: for (int i = 0; i < tokens.size(); i++) {
1286: final String oneToken = (String) tokens.get(i);
1287: if (oneToken.length() == 0 || oneToken.equals(".")) {
1288: tokens.remove(i--);
1289: } else if (oneToken.equals("..")) {
1290: tokens.remove(i--);
1291: if (i >= 0) {
1292: tokens.remove(i--);
1293: }
1294: }
1295: }
1296:
1297: final StringBuffer buffer = new StringBuffer();
1298: buffer.append(baseUrl.getProtocol());
1299: buffer.append("://");
1300: buffer.append(baseUrl.getHost());
1301: final int port = baseUrl.getPort();
1302: if (port != -1) {
1303: buffer.append(":");
1304: buffer.append(port);
1305: }
1306:
1307: final Iterator iterator = tokens.iterator();
1308: while (iterator.hasNext()) {
1309: buffer.append("/");
1310: buffer.append(iterator.next());
1311: }
1312:
1313: if (pathToTokenize.endsWith("/")) {
1314: buffer.append("/");
1315: }
1316:
1317: if (stringParameters != null) {
1318: buffer.append(stringParameters);
1319: }
1320: if (stringQuery != null) {
1321: buffer.append(stringQuery);
1322: }
1323: if (reference != null) {
1324: buffer.append("#").append(reference);
1325: }
1326: final String newUrlString = buffer.toString();
1327: return makeUrl(newUrlString);
1328: }
1329:
1330: private WebResponse makeWebResponseForAboutUrl(final URL url) {
1331: final String urlWithoutQuery = StringUtils.substringBefore(url
1332: .toExternalForm(), "?");
1333: if (!StringUtils.substringAfter(urlWithoutQuery, "about:")
1334: .equalsIgnoreCase("blank")) {
1335: throw new IllegalArgumentException(
1336: url.toExternalForm()
1337: + "is not supported, only about:blank is supported now.");
1338: }
1339: return WEB_RESPONSE_FOR_ABOUT_BLANK;
1340: }
1341:
1342: /**
1343: * Builds a WebResponse for a file URL.
1344: * This first implementation is basic.
1345: * It assumes that the file contains an html page encoded with the specified encoding.
1346: * @param url The file url
1347: * @param charset encoding to use
1348: * @return The web response
1349: * @throws IOException If an IO problem occurs
1350: */
1351: private WebResponse makeWebResponseForFileUrl(final URL url,
1352: final String charset) throws IOException {
1353:
1354: URL cleanUrl = url;
1355: if (cleanUrl.getQuery() != null) {
1356: // Get rid of the query portion before trying to load the file.
1357: cleanUrl = UrlUtils.getUrlWithNewQuery(cleanUrl, null);
1358: }
1359: if (cleanUrl.getRef() != null) {
1360: // Get rid of the ref portion before trying to load the file.
1361: cleanUrl = UrlUtils.getUrlWithNewRef(cleanUrl, null);
1362: }
1363:
1364: final File file = FileUtils.toFile(cleanUrl);
1365: final String contentType = guessContentType(file);
1366:
1367: if (contentType.startsWith("text")) {
1368: final String str = IOUtils.toString(new FileInputStream(
1369: file), charset);
1370: return new StringWebResponse(str, charset, url) {
1371: private static final long serialVersionUID = 5713127877370126236L;
1372:
1373: public String getContentType() {
1374: return contentType;
1375: }
1376: };
1377: } else {
1378: final byte[] data = IOUtils
1379: .toByteArray(new FileInputStream(file));
1380: return new BinaryWebResponse(data, url, contentType);
1381: }
1382: }
1383:
1384: /**
1385: * A simple WebResponse created from a byte array. Content is assumed to be
1386: * of some binary type.
1387: *
1388: * @author Paul King
1389: */
1390: private static final class BinaryWebResponse extends
1391: WebResponseImpl {
1392:
1393: private static final long serialVersionUID = 8000117717229261957L;
1394:
1395: private final byte[] data_;
1396:
1397: private static WebResponseData getWebResponseData(
1398: final byte[] data, final String contentType) {
1399: final List compiledHeaders = new ArrayList();
1400: compiledHeaders.add(new NameValuePair("Content-Type",
1401: contentType));
1402: return new WebResponseData(data, HttpStatus.SC_OK, "OK",
1403: compiledHeaders);
1404: }
1405:
1406: private BinaryWebResponse(final byte[] data,
1407: final URL originatingURL, final String contentType) {
1408: super (getWebResponseData(data, contentType),
1409: originatingURL, SubmitMethod.GET, 0);
1410: data_ = data;
1411: }
1412:
1413: public InputStream getContentAsStream() {
1414: return new ByteArrayInputStream(data_);
1415: }
1416: }
1417:
1418: /**
1419: * Tries to guess the content type of the file.<br/>
1420: * This utility could be located in an helper class but we can compare this functionality
1421: * for instance with the "Helper Applications" settings of Mozilla and therefore see it as a
1422: * property of the "browser".
1423: * @param file the file
1424: * @return "application/octet-stream" if nothing could be guessed.
1425: */
1426: public String guessContentType(final File file) {
1427: String contentType = null;
1428: InputStream inputStream = null;
1429: try {
1430: inputStream = new BufferedInputStream(new FileInputStream(
1431: file));
1432: contentType = URLConnection
1433: .guessContentTypeFromStream(inputStream);
1434: } catch (final IOException e) {
1435: // nothing, silently ignore
1436: } finally {
1437: IOUtils.closeQuietly(inputStream);
1438: }
1439:
1440: if (contentType == null) {
1441: contentType = URLConnection.guessContentTypeFromName(file
1442: .getName());
1443: }
1444: if (contentType == null) {
1445: if (file.getName().endsWith(".js")) {
1446: contentType = "text/javascript";
1447: } else {
1448: contentType = "application/octet-stream";
1449: }
1450: }
1451:
1452: return contentType;
1453: }
1454:
1455: private WebResponse makeWebResponseForJavaScriptUrl(
1456: final WebWindow webWindow, final URL url,
1457: final String charset) {
1458: if (!(webWindow instanceof FrameWindow)) {
1459: throw new IllegalArgumentException(
1460: "javascript urls can only be used to load content into frames and iframes");
1461: }
1462:
1463: final FrameWindow frameWindow = (FrameWindow) webWindow;
1464: final HtmlPage enclosingPage = frameWindow.getEnclosingPage();
1465: final ScriptResult scriptResult = enclosingPage
1466: .executeJavaScriptIfPossible(url.toExternalForm(),
1467: "javascript url", 1);
1468:
1469: final String contentString = scriptResult.getJavaScriptResult()
1470: .toString();
1471: return new StringWebResponse(contentString, charset);
1472: }
1473:
1474: /**
1475: * Loads a {@link WebResponse} from the server
1476: * @param webRequestSettings settings to use when making the request
1477: * @throws IOException if an IO problem occurs
1478: * @return The WebResponse
1479: */
1480: public WebResponse loadWebResponse(
1481: final WebRequestSettings webRequestSettings)
1482: throws IOException {
1483:
1484: final WebResponse responseFromCache = cache_
1485: .getCachedContent(webRequestSettings);
1486: if (responseFromCache != null) {
1487: return responseFromCache;
1488: }
1489:
1490: final WebResponse response;
1491: final String protocol = webRequestSettings.getURL()
1492: .getProtocol();
1493: if (protocol.equals("about")) {
1494: response = makeWebResponseForAboutUrl(webRequestSettings
1495: .getURL());
1496: } else if (protocol.equals("file")) {
1497: response = makeWebResponseForFileUrl(webRequestSettings
1498: .getURL(), webRequestSettings.getCharset());
1499: } else {
1500: response = loadWebResponseFromWebConnection(
1501: webRequestSettings, ALLOWED_REDIRECTIONS_SAME_URL);
1502: }
1503:
1504: cache_.cacheIfNeeded(webRequestSettings, response);
1505: return response;
1506: }
1507:
1508: /**
1509: * Loads a {@link WebResponse} from the server through the WebConnection.
1510: * @param webRequestSettings settings to use when making the request
1511: * @throws IOException if an IO problem occurs
1512: * @return The WebResponse
1513: */
1514: private WebResponse loadWebResponseFromWebConnection(
1515: final WebRequestSettings webRequestSettings,
1516: final int nbAllowedRedirections) throws IOException {
1517: final URL url = webRequestSettings.getURL();
1518: final SubmitMethod method = webRequestSettings
1519: .getSubmitMethod();
1520: final List parameters = webRequestSettings
1521: .getRequestParameters();
1522:
1523: Assert.notNull("url", url);
1524: Assert.notNull("method", method);
1525: Assert.notNull("parameters", parameters);
1526:
1527: getLog().debug("Load response for " + url.toExternalForm());
1528:
1529: // If the request settings don't specify a custom proxy, use the default client proxy...
1530: if (webRequestSettings.getProxyHost() == null) {
1531: // ...unless the host needs to bypass the configured client proxy!
1532: if (!shouldBypassProxy(webRequestSettings.getURL()
1533: .getHost())) {
1534: webRequestSettings.setProxyHost(proxyHost_);
1535: webRequestSettings.setProxyPort(proxyPort_);
1536: }
1537: }
1538:
1539: //TODO: this should probably be handled inside of WebRequestSettings and
1540: // could cause a bug if anything above here reads the url again
1541: final URL fixedUrl = encodeUrl(url);
1542: webRequestSettings.setURL(fixedUrl);
1543:
1544: // adds the headers that are sent on every request
1545: webRequestSettings.getAdditionalHeaders().putAll(
1546: requestHeaders_);
1547:
1548: final WebResponse webResponse = getWebConnection().getResponse(
1549: webRequestSettings);
1550: final int statusCode = webResponse.getStatusCode();
1551:
1552: if (statusCode >= HttpStatus.SC_MOVED_PERMANENTLY
1553: && statusCode <= HttpStatus.SC_TEMPORARY_REDIRECT
1554: && isRedirectEnabled()) {
1555: final URL newUrl;
1556: String locationString = null;
1557: try {
1558: locationString = webResponse
1559: .getResponseHeaderValue("Location");
1560: newUrl = expandUrl(fixedUrl, locationString);
1561: } catch (final MalformedURLException e) {
1562: getIncorrectnessListener()
1563: .notify(
1564: "Got a redirect status code ["
1565: + statusCode
1566: + " "
1567: + webResponse
1568: .getStatusMessage()
1569:
1570: + "] but the location is not a valid url ["
1571: + locationString
1572: + "]. Skipping redirection processing.",
1573: this );
1574: return webResponse;
1575: }
1576:
1577: getLog()
1578: .debug(
1579: "Got a redirect status code [" + statusCode
1580: + "] new location=["
1581: + locationString + "]");
1582:
1583: if (webRequestSettings.getSubmitMethod().equals(
1584: SubmitMethod.GET)
1585: && webResponse.getUrl().toExternalForm().equals(
1586: locationString)) {
1587:
1588: if (nbAllowedRedirections == 0) {
1589: getLog().warn(
1590: "Max redirections allowed to the same location reached for ["
1591: + locationString
1592: + "]. Skipping redirection.");
1593: } else {
1594: getLog().debug(
1595: "Got a redirect with location same as the page we just loaded. "
1596: + "Nb self redirection allowed: "
1597: + nbAllowedRedirections);
1598: return loadWebResponseFromWebConnection(
1599: webRequestSettings,
1600: nbAllowedRedirections - 1);
1601: }
1602: } else if ((statusCode == HttpStatus.SC_MOVED_PERMANENTLY || statusCode == HttpStatus.SC_TEMPORARY_REDIRECT)
1603: && method.equals(SubmitMethod.GET)) {
1604:
1605: final WebRequestSettings wrs = new WebRequestSettings(
1606: newUrl);
1607: wrs.setRequestParameters(parameters);
1608: return loadWebResponse(wrs);
1609: } else if (statusCode <= HttpStatus.SC_SEE_OTHER) {
1610: final WebRequestSettings wrs = new WebRequestSettings(
1611: newUrl);
1612: wrs.setSubmitMethod(SubmitMethod.GET);
1613: return loadWebResponse(wrs);
1614: }
1615: }
1616:
1617: return webResponse;
1618: }
1619:
1620: /**
1621: * Encodes illegal parameter in path or query string (if any) as done by browsers.
1622: * Example: changes "http://first?a=b c" to "http://first?a=b%20c"
1623: * @param url the url to encode
1624: * @return the provided url if no change needed, the fixed url else
1625: * @throws MalformedURLException if the new URL could note be instantiated
1626: * @throws URIException if the default protocol charset is not supported
1627: */
1628: protected URL encodeUrl(final URL url)
1629: throws MalformedURLException, URIException {
1630: final String path = url.getPath();
1631: final String fixedPath = encode(path, URI.allowed_abs_path);
1632: final String query = url.getQuery();
1633: final String fixedQuery = encode(query, URI.allowed_query);
1634:
1635: if (!StringUtils.equals(path, fixedPath)
1636: || !StringUtils.equals(query, fixedQuery)) {
1637: final StringBuffer newUrl = new StringBuffer();
1638: newUrl.append(url.getProtocol());
1639: newUrl.append("://");
1640: newUrl.append(url.getHost());
1641: if (url.getPort() != -1) {
1642: newUrl.append(":");
1643: newUrl.append(url.getPort());
1644: }
1645: newUrl.append(fixedPath);
1646: if (url.getUserInfo() != null) {
1647: newUrl.append(url.getUserInfo());
1648: }
1649: if (fixedQuery != null) {
1650: newUrl.append("?");
1651: newUrl.append(fixedQuery);
1652: }
1653: if (url.getRef() != null) {
1654: newUrl.append("#");
1655: newUrl.append(url.getRef());
1656: }
1657:
1658: return new URL(newUrl.toString());
1659: } else {
1660: return url;
1661: }
1662: }
1663:
1664: /**
1665: * Encodes unallowed characters in a string
1666: * @param str the string to encode
1667: * @param allowed the allowed characters
1668: * @return the encoded string
1669: * @throws URIException if encoding fails
1670: */
1671: private String encode(final String str, final BitSet allowed)
1672: throws URIException {
1673: if (str == null) {
1674: return null;
1675: }
1676: final BitSet bits = new BitSet(str.length());
1677: bits.set('%');
1678: bits.set('+');
1679: bits.or(allowed);
1680: return URIUtil.encode(str, bits);
1681: }
1682:
1683: /**
1684: * Return an immutable list of open web windows (top windows or not).
1685: * @return The web windows
1686: */
1687: public List getWebWindows() {
1688: return Collections.unmodifiableList(webWindows_);
1689: }
1690:
1691: /**
1692: * Set the handler to be used whenever a refresh is triggered. Refer
1693: * to the documentation for {@link RefreshHandler} for more details.
1694: * @param handler The new handler
1695: */
1696: public void setRefreshHandler(final RefreshHandler handler) {
1697: if (handler == null) {
1698: refreshHandler_ = new ImmediateRefreshHandler();
1699: } else {
1700: refreshHandler_ = handler;
1701: }
1702: }
1703:
1704: /**
1705: * Return the current refresh handler or null if one has not been set.
1706: * @return The current RefreshHandler or null
1707: */
1708: public RefreshHandler getRefreshHandler() {
1709: return refreshHandler_;
1710: }
1711:
1712: /**
1713: * Set the script pre processor for this webclient.
1714: * @param scriptPreProcessor The new preprocessor or null if none is specified
1715: */
1716: public void setScriptPreProcessor(
1717: final ScriptPreProcessor scriptPreProcessor) {
1718: scriptPreProcessor_ = scriptPreProcessor;
1719: }
1720:
1721: /**
1722: * Return the script pre processor for this webclient.
1723: * @return the pre processor or null of one hasn't been set.
1724: */
1725: public ScriptPreProcessor getScriptPreProcessor() {
1726: return scriptPreProcessor_;
1727: }
1728:
1729: /**
1730: * Set the active X object map for this webclient. The <code>Map</code> is used to map the
1731: * string passed into the <code>ActiveXObject</code> constructor to a java class name. Therefore
1732: * you can emulate <code>ActiveXObject</code>s in a web page's javascript by mapping the object
1733: * name to a java class to emulate the active X object.
1734: * @param activeXObjectMap The new preprocessor or null if none is specified
1735: */
1736: public void setActiveXObjectMap(final Map activeXObjectMap) {
1737: activeXObjectMap_ = activeXObjectMap;
1738: }
1739:
1740: /**
1741: * Return the active X object map for this webclient.
1742: * @return the active X object map.
1743: */
1744: public Map getActiveXObjectMap() {
1745: return activeXObjectMap_;
1746: }
1747:
1748: /**
1749: * Defines a listener for messages generated by the html parser.<br/>
1750: * <b>Note</b>: If {@link #getIgnoreOutsideContent()} returns <code>false</code>, the parser
1751: * will ignore closing <body> and <html> tags to be able to handle html content
1752: * incorrectly located after the end of the html file. As a consequence it will finally
1753: * notify as errors that <body> and <html> are not closed properly even if
1754: * they were correctly present.
1755: * @param listener the new listener, <code>null</code> if messages should be totally ignored.
1756: */
1757: public void setHTMLParserListener(final HTMLParserListener listener) {
1758: htmlParserListener_ = listener;
1759: }
1760:
1761: /**
1762: * Gets the configured listener for messages generated by the html parser.
1763: * @return <code>null</code> if no listener is defined (default value).
1764: */
1765: public HTMLParserListener getHTMLParserListener() {
1766: return htmlParserListener_;
1767: }
1768:
1769: /**
1770: * Set the flag on the HtmlParse to ignore the content that is outside of the BODY
1771: * and HTML tags.
1772: * @param ignoreOutsideContent The boolean flag to enable or disable the support of
1773: * content outside of the HTML and BODY tags
1774: */
1775: public static void setIgnoreOutsideContent(
1776: final boolean ignoreOutsideContent) {
1777: HTMLParser.setIgnoreOutsideContent(ignoreOutsideContent);
1778: }
1779:
1780: /**
1781: * Get the state of the flag to ignore content outside the BODY and HTML tags
1782: * @return - The current state
1783: */
1784: public static boolean getIgnoreOutsideContent() {
1785: return HTMLParser.getIgnoreOutsideContent();
1786: }
1787:
1788: /**
1789: * Gets the timeout value for the WebConnection
1790: *
1791: * @return The timeout value in milliseconds
1792: * @see WebClient#setTimeout(int)
1793: */
1794: public int getTimeout() {
1795: return timeout_;
1796: }
1797:
1798: /**
1799: * Sets the timeout of the WebConnection. Set to zero (the default) for an infinite wait.
1800: *
1801: * Note: The timeout is used twice. The first is for making the socket connection, the
1802: * second is for data retrieval. If the time is critical you must allow for twice the
1803: * time specified here.
1804: *
1805: * @param timeout The value of the timeout in milliseconds
1806: */
1807: public void setTimeout(final int timeout) {
1808: timeout_ = timeout;
1809: }
1810:
1811: /**
1812: * Indicates if an exception should be thrown when a script execution fails
1813: * (the default) or if it should be caught and just logged to allow page
1814: * execution to continue.
1815: * @return <code>true</code> if an exception is thrown on script error (the default)
1816: */
1817: public boolean isThrowExceptionOnScriptError() {
1818: return throwExceptionOnScriptError_;
1819: }
1820:
1821: /**
1822: * Changes the behavior of this webclient when a script error occurs.
1823: * @param newValue indicates if exception should be thrown or not
1824: */
1825: public void setThrowExceptionOnScriptError(final boolean newValue) {
1826: throwExceptionOnScriptError_ = newValue;
1827: }
1828:
1829: /**
1830: * Gets the current listener for encountered incorrectness (except HTML parsing messages that
1831: * are handled by the HTML parser listener). Default value is an instance of
1832: * {@link IncorrectnessListenerImpl}.
1833: * @return the current listener (not <code>null</code>).
1834: */
1835: public IncorrectnessListener getIncorrectnessListener() {
1836: return incorrectnessListener_;
1837: }
1838:
1839: /**
1840: * Returns the current HTML incorrectness listener.
1841: * @param listener the new value (not <code>null</code>)
1842: */
1843: public void setIncorrectnessListener(
1844: final IncorrectnessListener listener) {
1845: if (listener == null) {
1846: throw new NullPointerException(
1847: "Null incorrectness listener.");
1848: }
1849: incorrectnessListener_ = listener;
1850: }
1851:
1852: /**
1853: * Gets the current Ajax controller
1854: * @return the controller
1855: */
1856: public AjaxController getAjaxController() {
1857: return ajaxController_;
1858: }
1859:
1860: /**
1861: * Sets the current Ajax controller
1862: * <span style="color:red">EXPERIMENTAL - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br/>
1863: * @param newValue the controller
1864: */
1865: public void setAjaxController(final AjaxController newValue) {
1866: if (newValue == null) {
1867: throw new NullPointerException();
1868: }
1869: ajaxController_ = newValue;
1870: }
1871:
1872: /**
1873: * Set the onbeforeunload handler for this webclient.
1874: * @param onbeforeunloadHandler The new onbeforeunloadHandler or null if none is specified.
1875: */
1876: public void setOnbeforeunloadHandler(
1877: final OnbeforeunloadHandler onbeforeunloadHandler) {
1878: onbeforeunloadHandler_ = onbeforeunloadHandler;
1879: }
1880:
1881: /**
1882: * Return the onbeforeunload handler for this webclient.
1883: * @return the onbeforeunload handler or null if one hasn't been set.
1884: */
1885: public OnbeforeunloadHandler getOnbeforeunloadHandler() {
1886: return onbeforeunloadHandler_;
1887: }
1888:
1889: /**
1890: * Gets the cache currently used
1891: * @return the cache (may not be null)
1892: */
1893: public Cache getCache() {
1894: return cache_;
1895: }
1896:
1897: /**
1898: * Sets the cache to use
1899: * @param cache the new cache (must not be <code>null</code>)
1900: */
1901: public void setCache(final Cache cache) {
1902: if (cache == null) {
1903: throw new IllegalArgumentException(
1904: "cache should not be null!");
1905: }
1906: cache_ = cache;
1907: }
1908: }
|