001: /*
002: * Copyright (c) 2002-2008 Gargoyle Software Inc. All rights reserved.
003: *
004: * Redistribution and use in source and binary forms, with or without
005: * modification, are permitted provided that the following conditions are met:
006: *
007: * 1. Redistributions of source code must retain the above copyright notice,
008: * this list of conditions and the following disclaimer.
009: * 2. Redistributions in binary form must reproduce the above copyright notice,
010: * this list of conditions and the following disclaimer in the documentation
011: * and/or other materials provided with the distribution.
012: * 3. The end-user documentation included with the redistribution, if any, must
013: * include the following acknowledgment:
014: *
015: * "This product includes software developed by Gargoyle Software Inc.
016: * (http://www.GargoyleSoftware.com/)."
017: *
018: * Alternately, this acknowledgment may appear in the software itself, if
019: * and wherever such third-party acknowledgments normally appear.
020: * 4. The name "Gargoyle Software" must not be used to endorse or promote
021: * products derived from this software without prior written permission.
022: * For written permission, please contact info@GargoyleSoftware.com.
023: * 5. Products derived from this software may not be called "HtmlUnit", nor may
024: * "HtmlUnit" appear in their name, without prior written permission of
025: * Gargoyle Software Inc.
026: *
027: * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
028: * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
029: * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GARGOYLE
030: * SOFTWARE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
031: * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
032: * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
033: * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
034: * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
035: * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
036: * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
037: */
038: package com.gargoylesoftware.htmlunit;
039:
040: import java.io.FileNotFoundException;
041: import java.io.IOException;
042: import java.io.OutputStream;
043: import java.net.URL;
044: import java.util.ArrayList;
045: import java.util.Iterator;
046: import java.util.List;
047: import java.util.Map;
048:
049: import org.apache.commons.httpclient.Header;
050: import org.apache.commons.httpclient.HostConfiguration;
051: import org.apache.commons.httpclient.HttpClient;
052: import org.apache.commons.httpclient.HttpException;
053: import org.apache.commons.httpclient.HttpMethod;
054: import org.apache.commons.httpclient.HttpMethodBase;
055: import org.apache.commons.httpclient.HttpState;
056: import org.apache.commons.httpclient.HttpStatus;
057: import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
058: import org.apache.commons.httpclient.NameValuePair;
059: import org.apache.commons.httpclient.URI;
060: import org.apache.commons.httpclient.URIException;
061: import org.apache.commons.httpclient.auth.CredentialsProvider;
062: import org.apache.commons.httpclient.cookie.CookiePolicy;
063: import org.apache.commons.httpclient.methods.EntityEnclosingMethod;
064: import org.apache.commons.httpclient.methods.GetMethod;
065: import org.apache.commons.httpclient.methods.PostMethod;
066: import org.apache.commons.httpclient.methods.StringRequestEntity;
067: import org.apache.commons.httpclient.methods.multipart.FilePart;
068: import org.apache.commons.httpclient.methods.multipart.MultipartRequestEntity;
069: import org.apache.commons.httpclient.methods.multipart.Part;
070: import org.apache.commons.httpclient.methods.multipart.PartBase;
071: import org.apache.commons.httpclient.methods.multipart.StringPart;
072: import org.apache.commons.httpclient.params.HttpMethodParams;
073: import org.apache.commons.httpclient.util.EncodingUtil;
074: import org.apache.commons.logging.Log;
075: import org.apache.commons.logging.LogFactory;
076: import org.apache.commons.logging.impl.SimpleLog;
077:
078: /**
079: * <span style="color:red">INTERNAL API - SUBJECT TO CHANGE AT ANY TIME - USE AT YOUR OWN RISK.</span><br/>
080: *
081: * An object that handles the actual communication portion of page
082: * retrieval/submission
083: *
084: * @version $Revision: 2168 $
085: * @author <a href="mailto:mbowler@GargoyleSoftware.com">Mike Bowler</a>
086: * @author Noboru Sinohara
087: * @author David D. Kilzer
088: * @author Marc Guillemot
089: * @author Brad Clarke
090: * @author Ahmed Ashour
091: */
092: public class HttpWebConnection extends WebConnectionImpl {
093: private HttpClient httpClient_;
094:
095: private String virtualHost_;
096:
097: /**
098: * Create a new HTTP web connection instance.
099: * @param webClient The WebClient that is using this connection
100: */
101: public HttpWebConnection(final WebClient webClient) {
102: super (webClient);
103: }
104:
105: /**
106: * Submit a request and retrieve a response
107: *
108: * @param webRequestSettings Settings to make the request with
109: * @return See above
110: * @exception IOException If an IO error occurs
111: */
112: public WebResponse getResponse(
113: final WebRequestSettings webRequestSettings)
114: throws IOException {
115: final URL url = webRequestSettings.getURL();
116:
117: final HttpClient httpClient = getHttpClient();
118:
119: final HttpMethodBase httpMethod = makeHttpMethod(webRequestSettings);
120: try {
121: final HostConfiguration hostConfiguration = getHostConfiguration(webRequestSettings);
122: final long startTime = System.currentTimeMillis();
123: final int responseCode = httpClient.executeMethod(
124: hostConfiguration, httpMethod);
125: final long endTime = System.currentTimeMillis();
126: return makeWebResponse(responseCode, httpMethod, url,
127: endTime - startTime, webRequestSettings
128: .getCharset());
129: } catch (final HttpException e) {
130: // KLUDGE: hitting www.yahoo.com will cause an exception to be thrown while
131: // www.yahoo.com/ (note the trailing slash) will not. If an exception is
132: // caught here then check to see if this is the situation. If so, then retry
133: // it with a trailing slash. The bug manifests itself with httpClient
134: // complaining about not being able to find a line with HTTP/ on it.
135: if (url.getPath().length() == 0) {
136: final StringBuffer buffer = new StringBuffer();
137: buffer.append(url.getProtocol());
138: buffer.append("://");
139: buffer.append(url.getHost());
140: buffer.append("/");
141: if (url.getQuery() != null) {
142: buffer.append(url.getQuery());
143: }
144: //TODO: There might be a bug here since the original encoding type is lost.
145: final WebRequestSettings newRequest = new WebRequestSettings(
146: new URL(buffer.toString()));
147: newRequest.setSubmitMethod(webRequestSettings
148: .getSubmitMethod());
149: newRequest.setRequestParameters(webRequestSettings
150: .getRequestParameters());
151: newRequest.setAdditionalHeaders(webRequestSettings
152: .getAdditionalHeaders());
153: return getResponse(newRequest);
154: } else {
155: e.printStackTrace();
156: throw new RuntimeException("HTTP Error: "
157: + e.getMessage());
158: }
159: } finally {
160: onResponseGenerated(httpMethod);
161: }
162: }
163:
164: /**
165: * Called when the response has been generated. Default action is to release
166: * the HttpMethod's connection. Subclasses may override.
167: * @param httpMethod the httpMethod used.
168: */
169: protected void onResponseGenerated(final HttpMethodBase httpMethod) {
170: httpMethod.releaseConnection();
171: }
172:
173: /**
174: * Gets the host configuration for the request.
175: * Should we cache it?
176: * @param webRequestSettings the current request settings
177: * @return the host configuration to use for this request
178: */
179: private HostConfiguration getHostConfiguration(
180: final WebRequestSettings webRequestSettings) {
181: final HostConfiguration hostConfiguration = new HostConfiguration();
182: final URL url = webRequestSettings.getURL();
183: final URI uri;
184: try {
185: uri = new URI(url.toExternalForm(), false);
186: } catch (final URIException e) {
187: // Theoretically impossible but ....
188: throw new IllegalStateException(
189: "Unable to create URI from URL: "
190: + url.toExternalForm());
191: }
192: hostConfiguration.setHost(uri);
193: if (webRequestSettings.getProxyHost() != null) {
194: final String proxyHost = webRequestSettings.getProxyHost();
195: final int proxyPort = webRequestSettings.getProxyPort();
196: hostConfiguration.setProxy(proxyHost, proxyPort);
197: }
198: return hostConfiguration;
199: }
200:
201: /**
202: * Creates an <tt>HttpMethod</tt> instance according to the specified parameters.
203: * @param webRequestSettings the parameters.
204: * @return The <tt>HttpMethod</tt> instance constructed according to the specified parameters.
205: * @throws IOException
206: */
207: private HttpMethodBase makeHttpMethod(
208: final WebRequestSettings webRequestSettings)
209: throws IOException {
210:
211: String path = webRequestSettings.getURL().getPath();
212: if (path.length() == 0) {
213: path = "/";
214: } else if (path.startsWith("//")) {
215: path = "//" + path; // cf https://issues.apache.org/jira/browse/HTTPCLIENT-727
216: }
217: final HttpMethodBase httpMethod = buildHttpMethod(
218: webRequestSettings.getSubmitMethod(), path);
219: if (!(httpMethod instanceof EntityEnclosingMethod)) {
220: // this is the case for GET as well as TRACE, DELETE, OPTIONS and HEAD
221:
222: if (webRequestSettings.getRequestParameters().isEmpty()) {
223: final String queryString = webRequestSettings.getURL()
224: .getQuery();
225: httpMethod.setQueryString(queryString);
226: } else {
227: final NameValuePair[] pairs = new NameValuePair[webRequestSettings
228: .getRequestParameters().size()];
229: webRequestSettings.getRequestParameters()
230: .toArray(pairs);
231: httpMethod.setQueryString(pairs);
232: }
233: } else { // POST as well as PUT
234: final EntityEnclosingMethod method = (EntityEnclosingMethod) httpMethod;
235: method.getParams().setContentCharset(
236: webRequestSettings.getCharset());
237:
238: final String queryString = webRequestSettings.getURL()
239: .getQuery();
240: if (queryString != null) {
241: method.setQueryString(queryString);
242: }
243: if (webRequestSettings.getRequestBody() != null) {
244: final String body = webRequestSettings.getRequestBody();
245: final String charset = webRequestSettings.getCharset();
246: method.setRequestEntity(new StringRequestEntity(body,
247: null, charset));
248: }
249:
250: // Note that this has to be done in two loops otherwise it won't
251: // be able to support two elements with the same name.
252: if (webRequestSettings.getEncodingType() == FormEncodingType.URL_ENCODED
253: && method instanceof PostMethod) {
254: final PostMethod postMethod = (PostMethod) httpMethod;
255: Iterator iterator = webRequestSettings
256: .getRequestParameters().iterator();
257: while (iterator.hasNext()) {
258: final NameValuePair pair = (NameValuePair) iterator
259: .next();
260: postMethod.removeParameter(pair.getName(), pair
261: .getValue());
262: }
263:
264: iterator = webRequestSettings.getRequestParameters()
265: .iterator();
266: while (iterator.hasNext()) {
267: final NameValuePair pair = (NameValuePair) iterator
268: .next();
269: postMethod.addParameter(pair.getName(), pair
270: .getValue());
271: }
272: } else {
273: final List partList = new ArrayList();
274: final Iterator iterator = webRequestSettings
275: .getRequestParameters().iterator();
276: while (iterator.hasNext()) {
277: final PartBase newPart;
278: final KeyValuePair pair = (KeyValuePair) iterator
279: .next();
280: if (pair instanceof KeyDataPair) {
281: final KeyDataPair pairWithFile = (KeyDataPair) pair;
282: final String charset = webRequestSettings
283: .getCharset();
284: newPart = buildFilePart(pairWithFile, charset);
285: } else {
286: newPart = new StringPart(pair.getName(), pair
287: .getValue(), webRequestSettings
288: .getCharset());
289: newPart.setContentType(null); // Firefox and IE seem not to send a content type
290: }
291: newPart.setTransferEncoding(null); // Firefox and IE don't send transfer encoding headers
292: partList.add(newPart);
293: }
294: Part[] parts = new Part[partList.size()];
295: parts = (Part[]) partList.toArray(parts);
296: method.setRequestEntity(new MultipartRequestEntity(
297: parts, method.getParams()));
298: }
299: }
300:
301: httpMethod.setRequestHeader("User-Agent", getWebClient()
302: .getBrowserVersion().getUserAgent());
303:
304: writeRequestHeadersToHttpMethod(httpMethod, webRequestSettings
305: .getAdditionalHeaders());
306: httpMethod.setFollowRedirects(false);
307:
308: if (webRequestSettings.getCredentialsProvider() != null) {
309: httpMethod.getParams().setParameter(
310: CredentialsProvider.PROVIDER,
311: webRequestSettings.getCredentialsProvider());
312: }
313:
314: if (getWebClient().isCookiesEnabled()) {
315: // Cookies are enabled. Note that it's important that we enable single cookie headers,
316: // for compatibility purposes.
317: httpMethod.getParams().setBooleanParameter(
318: HttpMethodParams.SINGLE_COOKIE_HEADER, true);
319: if (webRequestSettings.getCookiePolicy() != null) {
320: // TODO: remove this down the line; the setter is deprecated for now.
321: httpMethod.getParams().setCookiePolicy(
322: webRequestSettings.getCookiePolicy());
323: } else {
324: httpMethod.getParams().setCookiePolicy(
325: WebClient.HTMLUNIT_COOKIE_POLICY);
326: }
327: } else {
328: // Cookies are disabled.
329: httpMethod.getParams().setCookiePolicy(
330: CookiePolicy.IGNORE_COOKIES);
331: }
332:
333: return httpMethod;
334: }
335:
336: FilePart buildFilePart(final KeyDataPair pairWithFile,
337: final String charset) throws FileNotFoundException {
338: final FilePart part = new FilePart(pairWithFile.getName(),
339: pairWithFile.getValue(), pairWithFile.getFile(),
340: pairWithFile.getContentType(), null) {
341:
342: /**
343: * This implementation overrides the super one by encoding filename
344: * according to the page charset.
345: * @see http://issues.apache.org/jira/browse/HTTPCLIENT-293
346: * {@inheritDoc}
347: */
348: protected void sendDispositionHeader(final OutputStream out)
349: throws IOException {
350: out.write(CONTENT_DISPOSITION_BYTES);
351: out.write(QUOTE_BYTES);
352: out.write(EncodingUtil.getAsciiBytes(getName()));
353: out.write(QUOTE_BYTES);
354: final String filename = getSource().getFileName();
355: if (filename != null) {
356: out.write(EncodingUtil.getAsciiBytes(FILE_NAME));
357: out.write(QUOTE_BYTES);
358: out.write(EncodingUtil.getBytes(getFileName(),
359: charset));
360: out.write(QUOTE_BYTES);
361: }
362: }
363:
364: private String getFileName() {
365: if (pairWithFile.getFile() == null) {
366: return pairWithFile.getValue();
367: } else if (getWebClient().getBrowserVersion().isIE()) {
368: return pairWithFile.getFile().getAbsolutePath();
369: } else {
370: return pairWithFile.getValue();
371: }
372: }
373: };
374: // Firefox and IE seem not to specify a charset for a file part
375: part.setCharSet(null);
376:
377: return part;
378: }
379:
380: private HttpMethodBase buildHttpMethod(
381: final SubmitMethod submitMethod, final String path) {
382: final HttpMethodBase method;
383:
384: if (SubmitMethod.GET == submitMethod) {
385: method = new GetMethod(path);
386: } else if (SubmitMethod.POST == submitMethod) {
387: method = new PostMethod(path);
388: } else {
389: throw new IllegalStateException(
390: "Submit method not yet supported: " + submitMethod);
391: }
392: return method;
393: }
394:
395: /**
396: * Lazily initialize the httpClient
397: * @return the initialized client
398: */
399: protected synchronized HttpClient getHttpClient() {
400: if (httpClient_ == null) {
401: httpClient_ = createHttpClient();
402:
403: // Disable informational messages from httpclient
404: final Log log = LogFactory.getLog("httpclient.wire");
405: if (log instanceof SimpleLog) {
406: ((SimpleLog) log).setLevel(SimpleLog.LOG_LEVEL_WARN);
407: }
408:
409: httpClient_.getHttpConnectionManager().getParams()
410: .setSoTimeout(getTimeout());
411: httpClient_.getHttpConnectionManager().getParams()
412: .setConnectionTimeout(getTimeout());
413:
414: if (virtualHost_ != null) {
415: httpClient_.getParams().setVirtualHost(virtualHost_);
416: }
417: }
418:
419: // Tell the client where to get its credentials from
420: // (it may have changed on the webClient since last call to getHttpClientFor(...))
421: httpClient_.getParams().setParameter(
422: CredentialsProvider.PROVIDER,
423: getWebClient().getCredentialsProvider());
424:
425: return httpClient_;
426: }
427:
428: /**
429: * Return the timeout to use for socket and connection timeouts for HttpConnectionManager.
430: * is overridden to 0 by StreamingWebConnection which keeps reading after a timeout and
431: * must have long running connections explicitly terminated.
432: * @return the WebClient's timeout.
433: */
434: protected int getTimeout() {
435: return getWebClient().getTimeout();
436: }
437:
438: /**
439: * Creates the httpClient that will be used by this WebConnection.
440: * Extensions may override this method to create the HttpClient with for instance a custom
441: * {@link org.apache.commons.httpclient.HttpConnectionManager} to perform some tracking
442: * (see feature request 1438216).
443: * @return the client
444: */
445: protected HttpClient createHttpClient() {
446: final MultiThreadedHttpConnectionManager connectionManager = new MultiThreadedHttpConnectionManager();
447: return new HttpClient(connectionManager);
448: }
449:
450: /**
451: * Return the log object for this class
452: * @return The log object
453: */
454: protected final Log getLog() {
455: return LogFactory.getLog(getClass());
456: }
457:
458: /**
459: * set the virtual host
460: * @param virtualHost The virtualHost to set.
461: */
462: public void setVirtualHost(final String virtualHost) {
463: virtualHost_ = virtualHost;
464: }
465:
466: /**
467: * Get the virtual host
468: * @return virtualHost The current virtualHost
469: */
470: public String getVirtualHost() {
471: return virtualHost_;
472: }
473:
474: /**
475: * Return the {@link HttpState} that is being used.
476: * @return The state.
477: */
478: public HttpState getState() {
479: return getHttpClient().getState();
480: }
481:
482: /**
483: * Converts the HttpMethod into a WebResponse
484: */
485: private WebResponse makeWebResponse(final int statusCode,
486: final HttpMethodBase method, final URL originatingURL,
487: final long loadTime, final String charset)
488: throws IOException {
489:
490: String statusMessage = method.getStatusText();
491: if (statusMessage == null || statusMessage.length() == 0) {
492: statusMessage = HttpStatus.getStatusText(statusCode);
493: }
494: if (statusMessage == null) {
495: statusMessage = "Unknown status code";
496: }
497: final List headers = new ArrayList();
498: final Header[] array = method.getResponseHeaders();
499: for (int i = 0; i < array.length; i++) {
500: headers.add(new NameValuePair(array[i].getName(), array[i]
501: .getValue()));
502: }
503: final WebResponseData responseData = newWebResponseDataInstance(
504: statusMessage, headers, statusCode, method);
505:
506: final SubmitMethod requestMethod = SubmitMethod
507: .getInstance(method.getName());
508: return newWebResponseInstance(charset, responseData, loadTime,
509: requestMethod, originatingURL);
510: }
511:
512: /**
513: * Construct an appropriate WebResponseData.
514: * May be overridden by subclasses to return a specialized WebResponseData.
515: * @param statusMessage StatusMessage from the response
516: * @param headers response headers
517: * @param statusCode response status code
518: * @param method request method
519: * @return The WebResponseData to use for this response.
520: * @throws IOException if there is a problem reading the response body.
521: */
522: protected WebResponseData newWebResponseDataInstance(
523: final String statusMessage, final List headers,
524: final int statusCode, final HttpMethodBase method)
525: throws IOException {
526: return new WebResponseData(method.getResponseBodyAsStream(),
527: statusCode, statusMessage, headers);
528: }
529:
530: /**
531: * Construct an appropriate WebResponse.
532: * May be overridden by subclasses to return a specialized WebResponse.
533: * @param responseData Data that was send back
534: * @param charset Charset used if not returned in the response.
535: * @param originatingURL Where this response came from
536: * @param requestMethod The method used to get this response
537: * @param loadTime How long the response took to be sent
538: * @return the new WebResponse.
539: */
540: protected WebResponse newWebResponseInstance(final String charset,
541: final WebResponseData responseData, final long loadTime,
542: final SubmitMethod requestMethod, final URL originatingURL) {
543: return new WebResponseImpl(responseData, charset,
544: originatingURL, requestMethod, loadTime);
545: }
546:
547: private void writeRequestHeadersToHttpMethod(
548: final HttpMethod httpMethod, final Map requestHeaders) {
549: synchronized (requestHeaders) {
550: final Iterator iterator = requestHeaders.entrySet()
551: .iterator();
552: while (iterator.hasNext()) {
553: final Map.Entry entry = (Map.Entry) iterator.next();
554: httpMethod.setRequestHeader((String) entry.getKey(),
555: (String) entry.getValue());
556: }
557: }
558: }
559:
560: }
|