0001: package com.sun.portal.portlet.rssportlet.filecache;
0002:
0003: import javax.servlet.*;
0004: import javax.servlet.http.*;
0005: import java.io.*;
0006: import java.util.*;
0007: import java.net.*;
0008:
0009: /**
0010: *@author Bill York
0011: *@created January 20, 2004
0012: */
0013:
0014: public class FileCache extends HttpServlet {
0015: protected HttpServletRequest req;
0016: protected HttpServletResponse res;
0017: private int timeoutValue;
0018: protected Properties props;
0019: private boolean preserveContentType;
0020: private Hashtable cachedFiles;
0021: protected boolean debug;
0022: private final static int CONTENT_BUFFER_SIZE = 2048;
0023: /** Description of the Field */
0024: public Hashtable cookieTable;
0025: /** Array of File extensions mapped to the MIMETypes */
0026: protected static String typeTable[][] = { { ".html", "text/html" },
0027: { ".htm", "text/html" }, { ".gif", "image/gif" },
0028: { ".txt", "text/plain" }, { ".jpg", "image/jpeg" },
0029: { ".xml", "text/xml" }, { ".wml", "text/vnd.wap.wml" } };
0030:
0031: /**
0032: * Init is called on servlet startup
0033: *
0034: *@param servletConf Description of the Parameter
0035: *@exception ServletException Description of the Exception
0036: */
0037: public void init(ServletConfig servletConf) throws ServletException {
0038: // Init servlet
0039: super .init(servletConf);
0040: super .log("FileCache start initialization");
0041: // Create empty cache table
0042: cachedFiles = new Hashtable();
0043: //Create cookie table
0044: cookieTable = new Hashtable();
0045: }
0046:
0047: private Properties setProperties(HttpServletRequest req) {
0048: // Set default properties
0049: Properties prop = new Properties();
0050: Enumeration e = req.getHeaderNames();
0051: String headerName = "";
0052: log("FileCache.setProperties(): Header info:");
0053: while (e.hasMoreElements()) {
0054: headerName = (String) e.nextElement();
0055: log(headerName + "=" + req.getHeader(headerName));
0056: }
0057:
0058: // Determine if debugging is enabled
0059: String debugEnabled = req.getHeader("debug");
0060: log("FileCache.setProperties():debugEnabled = " + debugEnabled);
0061: if (debugEnabled == null || debugEnabled.length() < 1) {
0062: debugEnabled = "false";
0063: }
0064: Boolean bdebug = new Boolean(debugEnabled);
0065: debug = bdebug.booleanValue();
0066: log("FileCache.setProperties(): debug = "
0067: + String.valueOf(debug));
0068:
0069: // Get cache timeout value
0070: String timeoutString = req.getHeader("timeoutvalue");
0071: if (debug) {
0072: log("FileCache.setProperties(): timeoutString ="
0073: + timeoutString);
0074: }
0075: if (timeoutString != null && timeoutString.length() > 0) {
0076: timeoutValue = Integer.parseInt(timeoutString);
0077: } else {
0078: timeoutValue = 60;
0079:
0080: }
0081: //log("FileCache.setProperties(): timeoutValue=" + String.valueOf(timeoutValue));
0082: //props.setProperty("FileCache.timeoutValue", String.valueOf(timeoutValue));
0083:
0084: // Load up properties file for non global values
0085: log("FileCache.setProperties(): encoding="
0086: + req.getCharacterEncoding());
0087: if (req.getCharacterEncoding() != null)
0088: prop.setProperty("FileCache.inputEncoding", req
0089: .getCharacterEncoding());
0090: else
0091: prop.setProperty("FileCache.inputEncoding", "ISO-8859-1");
0092:
0093: if (req.getHeader("rewriterruleset") != null) {
0094: prop.setProperty("FileCache.RewriterRuleset", req
0095: .getHeader("rewriterruleset"));
0096: } else {
0097: prop.setProperty("FileCache.RewriterRuleset",
0098: "default_ruleset");
0099: }
0100: if (req.getHeader("proxyHost") != null) {
0101: prop.setProperty("Fetcher.useProxy", "proxy");
0102: prop.setProperty("Fetcher.proxyHost", req
0103: .getHeader("proxyhost"));
0104: } else {
0105: prop.setProperty("Fetcher.useProxy", "");
0106: prop.setProperty("Fetcher.proxyHost", "");
0107: }
0108: if (req.getHeader("proxyport") != null) {
0109: prop.setProperty("Fetcher.proxyPort", req
0110: .getHeader("proxyport"));
0111: } else {
0112: prop.setProperty("Fetcher.proxyPort", "");
0113: }
0114:
0115: prop.setProperty("FileCache.DefaultContentType", "text/xml");
0116: prop.setProperty("Fetcher.cookiesToForwardAll", "true");
0117: prop.setProperty("Fetcher.cookiesToForwardList", "");
0118: return prop;
0119: }
0120:
0121: /**
0122: * doGet just forwards to doPost()
0123: *
0124: *@param request Description of the Parameter
0125: *@param response Description of the Parameter
0126: *@exception ServletException Description of the Exception
0127: *@exception IOException Description of the Exception
0128: */
0129: public void doGet(HttpServletRequest request,
0130: HttpServletResponse response) throws ServletException,
0131: IOException {
0132: doPost(request, response);
0133: }
0134:
0135: /**
0136: * Handles the initia lrequest
0137: *
0138: *@param request Description of the Parameter
0139: *@param response Description of the Parameter
0140: *@exception ServletException Description of the Exception
0141: *@exception IOException Description of the Exception
0142: */
0143: public void doPost(HttpServletRequest request,
0144: HttpServletResponse response) throws ServletException,
0145: IOException {
0146: req = request;
0147: res = response;
0148: if (props == null) {
0149: props = setProperties(request);
0150: log("FileCache.doPost(): Setting properties");
0151: }
0152: String action = req.getParameter("action");
0153: if (debug) {
0154: log("FileCache.doPost(): action=" + action);
0155: }
0156: String file = "";
0157:
0158: if (action == null || action.length() <= 0) {
0159: log("FileCache.doPost(): No argument given.");
0160: res.sendError(res.SC_BAD_REQUEST, "No action given.");
0161: } else if (action.equals("get")) {
0162: file = req.getParameter("file");
0163: if (debug) {
0164: log("FileCache.doPost(): file=\"" + file + "\"");
0165: }
0166: if (file != null && (file.length() > 0)) {
0167: String proto = null;
0168: try {
0169: proto = file.substring(0, file.indexOf(':'));
0170: } catch (IndexOutOfBoundsException iobe) {
0171: }
0172: if (proto.equals("file") || proto.equals("http")
0173: || proto.equals("https")) {
0174: FileCacheData fileCacheData = getFileCache(req,
0175: res, file);
0176: if (fileCacheData == null) {
0177: res.sendError(res.SC_NOT_FOUND,
0178: "Unable to retrieve " + file);
0179: } else {
0180: StringBuffer fileContents = fileCacheData
0181: .getFileContents();
0182: if (!(fileContents == null || fileContents
0183: .length() <= 0)) {
0184: OutputStream outputStream = res
0185: .getOutputStream();
0186: if (preserveContentType) {
0187: res.setContentType(fileCacheData
0188: .getContentType());
0189: } else {
0190: res
0191: .setContentType(props
0192: .getProperty("FileCache.DefaultContentType"));
0193: }
0194: //response.setContentLength(fileCacheData.getFileSize());
0195: res.addHeader("Content-Disposition",
0196: "filename=\""
0197: + fileCacheData
0198: .getFileName()
0199: + "\"");
0200: // Need to change this to handle StreamBuffer instead of InputStream
0201: String output = fileContents.toString();
0202: outputStream.write(output.getBytes());
0203: outputStream.flush();
0204: }
0205: }
0206: } else {
0207: res.sendError(res.SC_UNSUPPORTED_MEDIA_TYPE,
0208: "URL Protocol " + proto
0209: + " is not supported");
0210: }
0211: } else {
0212: res.sendError(res.SC_BAD_REQUEST,
0213: "File argument not specified.");
0214: }
0215: } else if (action.equals("reset")) {
0216: cachedFiles = new Hashtable();
0217: if (debug) {
0218: log("FileCache.doPost(): File Cache reset");
0219: }
0220: res.setStatus(res.SC_OK);
0221: }
0222: }
0223:
0224: /**
0225: * <P>
0226: *
0227: * Gets the inputEncoding to be used by content. This method returns the
0228: * inputEncoding which would be used in encoding the scraped content.
0229: *
0230: *@return String value
0231: *@see com.sun.portal.providers.ProviderException
0232: */
0233: public String getInputEncoding() {
0234: //BY Hardcode for now, but should return Character encoding from properties file
0235: //return getPropertyValue("inputEncoding");
0236: return props.getProperty("FileCache.inputEncoding");
0237: }
0238:
0239: /**
0240: * Gets the charset from content <p>
0241: *
0242: * This method determines the charset based on meta tag in content
0243: *
0244: *@param contentBytes Bytes from the scraped content
0245: *@return String charset or null if charset cannot be determined
0246: */
0247: protected String getContentEncodingFromContentBytes(
0248: byte[] contentBytes) {
0249: String charset = null;
0250: /*
0251: * The character encoding info was not found in the contentType
0252: * header. We have to parse through the content portion to
0253: * figure it out. It may be specified in the html <meta> tag
0254: * as the following;
0255: *
0256: * <html><head>
0257: * ...
0258: * <meta content="text/html; charset=gb2312">
0259: * ...
0260: * </head>
0261: * ...
0262: */
0263: String contentString = new String(contentBytes);
0264:
0265: String str = contentString.toLowerCase();
0266: int idxMetaTag;
0267: int idxCloseArrowBracket;
0268: int idxCharset;
0269: int startIdx = 0;
0270:
0271: while (true) {
0272: idxMetaTag = str.indexOf("<meta", startIdx);
0273: if (idxMetaTag == -1) {
0274: break;
0275: }
0276:
0277: idxCloseArrowBracket = str.indexOf(">", idxMetaTag);
0278:
0279: if (idxCloseArrowBracket == -1) {
0280: break;
0281: }
0282:
0283: String headerstr = contentString.substring(idxMetaTag,
0284: idxCloseArrowBracket);
0285: String header = headerstr.toLowerCase();
0286:
0287: idxCharset = header.indexOf("charset=");
0288: if (idxCharset == -1) {
0289: startIdx = idxCloseArrowBracket + 1;
0290: continue;
0291: }
0292: /*
0293: * We found one charset within a <meta> tag
0294: */
0295: int startCE = idxCharset + 8;
0296: char chquotes = header.charAt(startCE);
0297: if (chquotes == '\"') {
0298: startCE = startCE + 1;
0299: }
0300: int endCE = startCE;
0301: char ch = header.charAt(endCE);
0302:
0303: /*
0304: * the charset value can only contain letter, digit,
0305: * charcter '-' or '_'
0306: */
0307: while (Character.isLetterOrDigit(ch) || (ch == '-')
0308: || (ch == '_')) {
0309: endCE++;
0310: ch = header.charAt(endCE);
0311: }
0312: if (endCE > startCE) {
0313: charset = headerstr.substring(startCE, endCE);
0314: return charset;
0315: }
0316:
0317: break;
0318: }
0319: return charset;
0320: }
0321:
0322: /**
0323: * Gets the charset <p>
0324: *
0325: * This method determines the charset based on the contentType header if it
0326: * is available (only applies to http(s) urls), or from the inputEncoding
0327: * property if it is non-blank, or from the meta tag in content, e.g. meta
0328: * tag in html, xml or wml header if they are available (only applies to
0329: * HTML, XML, WML).
0330: *
0331: *@param contentType If http(s) urls, null otherwise
0332: *@param bytes Bytes from the scraped content
0333: *@param MIMEType MIMEType for the content
0334: *@return String charset or null if charset cannot be determined
0335: *@see com.sun.portal.providers.ProviderException
0336: */
0337: protected String getContentEncoding(String contentType,
0338: byte[] bytes, String MIMEType) {
0339: String charset = null;
0340: if (contentType != null) {
0341: // http(s) url
0342: int charsetidx = contentType.indexOf("charset=");
0343: if (charsetidx != -1) {
0344: charset = (contentType.substring(charsetidx + 8))
0345: .trim();
0346: if (charset != null && charset.length() != 0) {
0347: //strip the quotes
0348: if (charset.startsWith("\"")
0349: && charset.endsWith("\"")) {
0350: charset = charset.substring(1,
0351: charset.length() - 1);
0352: }
0353: return charset;
0354: }
0355: }
0356: }
0357: //get Encoding from profile
0358: charset = getInputEncoding();
0359:
0360: if (charset != null && charset.length() != 0) {
0361: return charset;
0362: }
0363: //Look for charset in meta tag if html , xml , wml
0364: if ((MIMEType != null)
0365: && (MIMEType.equalsIgnoreCase("text/html")
0366: || MIMEType.equalsIgnoreCase("text/xml") || MIMEType
0367: .equalsIgnoreCase("application/xml"))
0368: || MIMEType.equalsIgnoreCase("text/vnd.wap.wml")) {
0369: //get charset from meta tag if avaialble
0370: charset = getContentEncodingFromContentBytes(bytes);
0371: }
0372: return charset;
0373: }
0374:
0375: /**
0376: * Checks to see if the cache has expired
0377: *
0378: *@param timeStamp Description of the Parameter
0379: *@return true if cache has expired false if it has not.
0380: */
0381: private boolean localCacheExpired(long timeStamp) {
0382: // Get current time
0383: Calendar cal = Calendar.getInstance();
0384: long currentTime = cal.getTime().getTime();
0385: long difference = currentTime - timeStamp;
0386: Long convertToMin = new Long((difference / 1000) / 60);
0387: int minutes = convertToMin.intValue();
0388:
0389: if (minutes > timeoutValue) {
0390: return true;
0391: } else {
0392: return false;
0393: }
0394: }
0395:
0396: /**
0397: * <P>
0398: *
0399: * <P>
0400: *
0401: * Gets the file as a string buffer. <P>
0402: *
0403: * This method calls the <code>getHttpContent</code> method to get the XML
0404: * file content as a StringBuffer if the XML URL specified is a http or https
0405: * url.
0406: *
0407: *@param fileLocation Location of the file
0408: *@param req Description of the Parameter
0409: *@param res Description of the Parameter
0410: *@return the file contents as a StringBuffer
0411: */
0412: protected StringBuffer getRemoteFile(HttpServletRequest req,
0413: HttpServletResponse res, String fileLocation)
0414: throws IOException {
0415:
0416: StringBuffer content = new StringBuffer();
0417: String proto = null;
0418: try {
0419: proto = fileLocation
0420: .substring(0, fileLocation.indexOf(':'));
0421: } catch (IndexOutOfBoundsException iobe) {
0422: }
0423: if (proto != null && proto.equalsIgnoreCase("file")) {
0424: String PathName = null;
0425: try {
0426: PathName = fileLocation.substring(fileLocation
0427: .indexOf('/'));
0428: } catch (IndexOutOfBoundsException iobe) {
0429: log("FileCache.getXML(): Unsupported URL type : "
0430: + fileLocation, iobe);
0431: res.sendError(res.SC_BAD_REQUEST,
0432: "FileCache.getXML(): Unsupported URL type : "
0433: + fileLocation + iobe.getMessage());
0434: }
0435: try {
0436: content = getFileAsBuffer(PathName);
0437: } catch (UnsupportedEncodingException ue) {
0438: log(
0439: "FileCache.getContent(): UnsupportedEncoding specified : "
0440: + PathName, ue);
0441: res.sendError(res.SC_BAD_REQUEST,
0442: "FileCache.getContent(): UnsupportedEncoding specified : "
0443: + PathName + ue.getMessage());
0444: } catch (IOException ioe) {
0445: log("FileCache.getContent():IOException received : "
0446: + PathName, ioe);
0447: res.sendError(res.SC_BAD_REQUEST,
0448: "FileCache.getContent():IOException received : "
0449: + PathName + ioe.getMessage());
0450: }
0451: } else {
0452:
0453: try {
0454: content = getHttpContent(req, res, fileLocation);
0455: } catch (InterruptedException ie) {
0456: log("FileCache.getXML(): "
0457: + "fetcher did not finish : " + fileLocation,
0458: ie);
0459: res.sendError(res.SC_REQUEST_TIMEOUT,
0460: "FileCache.getXML(): fetcher did not finish."
0461: + ie.getMessage());
0462: } catch (MalformedURLException mue) {
0463: log("FileCache.getXML(). Malformed URL : "
0464: + fileLocation, mue);
0465: res.sendError(res.SC_BAD_REQUEST,
0466: "FileCache.getXML(). Malformed URL : "
0467: + fileLocation + mue.getMessage());
0468: }
0469: }
0470: return content;
0471: }
0472:
0473: /**
0474: * This method is called by <code>getContent()</code> if the url returned by
0475: * <code>getURL()</code> is a file url.
0476: *
0477: *@param pathname Description of the Parameter
0478: *@return File Object specified by the pathName or null if the file
0479: * does not exists or cannot be read.
0480: */
0481: protected File getFile(String pathname) {
0482:
0483: File returnFile = null;
0484:
0485: try {
0486: returnFile = new File(pathname);
0487:
0488: if ((returnFile.exists()) && (returnFile.isFile())
0489: && (returnFile.canRead())) {
0490: return returnFile;
0491: } else {
0492: return null;
0493: }
0494: } catch (SecurityException se) {
0495: log("FileCache.getFile(): ", se);
0496: return null;
0497: } catch (NullPointerException npe) {
0498: log("FileCache.getFile(): Null path name ", npe);
0499: return null;
0500: }
0501: }
0502:
0503: /**
0504: * Gets the mIMEType attribute of the FileCache object
0505: *
0506: *@param filename Description of the Parameter
0507: *@return The mIMEType value
0508: */
0509: private String getMIMEType(String filename) {
0510: String MIMEType = null;
0511: if (filename != null) {
0512: try {
0513: FileNameMap fnm = URLConnection.getFileNameMap();
0514: if (fnm.getContentTypeFor(filename) != null) {
0515: MIMEType = fnm.getContentTypeFor(filename);
0516: }
0517: } catch (NullPointerException npe) {
0518: }
0519: // happens if URLConnection.fileNameMap is not set
0520: if (MIMEType == null) {
0521: String fname = filename.toLowerCase();
0522: for (int i = 0; i < typeTable.length
0523: && MIMEType == null; i++) {
0524: if (fname.endsWith(typeTable[i][0])) {
0525: MIMEType = typeTable[i][1];
0526: }
0527: }
0528: }
0529: }
0530: return MIMEType;
0531: }
0532:
0533: /**
0534: * Gets the specified file as StringBuffer
0535: *
0536: *@param pathName Description of the Parameter
0537: *@return StringBuffer containing the data from the specified
0538: * file or null if file does not exist or cannot be read.
0539: *@exception IOException
0540: *@see com.sun.portal.providers.ProviderException
0541: */
0542: protected StringBuffer getFileAsBuffer(String pathName)
0543: throws IOException {
0544:
0545: StringBuffer result = null;
0546: File f = null;
0547: try {
0548: f = getFile(pathName);
0549: if (f != null) {
0550: FileInputStream fin = new FileInputStream(f);
0551: byte[] bytes = readContent(fin, -1);
0552: String MIMEType = getMIMEType(f.getName());
0553: String charset = getContentEncoding(null, bytes,
0554: MIMEType);
0555: if (fin != null) {
0556: fin.close();
0557: }
0558: if (charset != null && charset.length() != 0) {
0559: result = new StringBuffer(
0560: new String(bytes, charset));
0561: } else {
0562: result = new StringBuffer(new String(bytes));
0563: }
0564: try {
0565: ContentFilter conFilter = ContentFilterImpl
0566: .getInstance(MIMEType);
0567: result = conFilter.filter(result);
0568: } catch (ContentFilterException cfe) {
0569: log("FileCache.getFileAsBuffer():"
0570: + "ContentFilterImpl.getInstance failed returned null");
0571: }
0572: } else {
0573: log("FileCache.getFileAsBuffer(): getFile() returned null");
0574: return null;
0575: }
0576:
0577: } catch (NegativeArraySizeException nas) {
0578: log("FileCache.getFileAsBuffer():", nas);
0579: return null;
0580: } catch (FileNotFoundException fe) {
0581: log("FileCache.getFileAsBuffer():", fe);
0582: return null;
0583: }
0584: return result;
0585: }
0586:
0587: /**
0588: * This method will get the file if it exists locally and the disk cache
0589: * timeout value has not expired
0590: *
0591: *@param req Description of the Parameter
0592: *@param res Description of the Parameter
0593: *@param fileName Description of the Parameter
0594: *@return the contents of the file
0595: */
0596: protected FileCacheData getFileCache(HttpServletRequest req,
0597: HttpServletResponse res, String fileName) {
0598: StringBuffer fileContents = null;
0599:
0600: if (debug) {
0601: log("FileCache.getFileCache(): entered:");
0602: }
0603: FileCacheData fileData = null;
0604: // Check to see if file is available in cache
0605: if (!((cachedFiles == null) || (cachedFiles.size() == 0))) {
0606: if (debug) {
0607: log("FileCache.getFileCache(): attempting to retrieve cached file");
0608: }
0609: fileData = (FileCacheData) cachedFiles.get(fileName);
0610: }
0611: if (fileData == null) {
0612: try {
0613: fileContents = getRemoteFile(req, res, fileName);
0614: } catch (IOException ioe) {
0615: log("FileCache.getFileCache(): Unable to generate Error Page.");
0616: }
0617: if (fileContents == null) {
0618: log("fileContents = null");
0619: } else {
0620: log("fileContents = " + fileContents.toString());
0621: }
0622:
0623: if ((fileContents != null) && (fileContents.length() > 0)) {
0624: fileData = new FileCacheData();
0625: fileData.setFileName(fileName);
0626: fileData.setFileContents(fileContents);
0627: fileData.setContentType(getMIMEType(fileName));
0628: Date currentTime = new Date();
0629: fileData.setFileLoadTime(currentTime.getTime());
0630: cachedFiles.remove(fileName);
0631: cachedFiles.put(fileName, fileData);
0632: if (debug) {
0633: log("FileCache.getFileCache(): cached file not found.");
0634: log("FileCache.getFileCache(): Creating FileCache Data object"
0635: + "\nFilename = "
0636: + fileName
0637: + "\nMIME-TYPE = "
0638: + getMIMEType(fileName)
0639: + "\nTimestamp = " + currentTime.getTime());
0640: }
0641: return fileData;
0642: } else {
0643: return null;
0644: }
0645: } else {
0646: //File is found. Check if expired and retrieve if cache expired.
0647: if (localCacheExpired(fileData.getFileLoadTime())) {
0648: // Cache expired. Load new copy
0649: try {
0650: fileContents = getRemoteFile(req, res, fileName);
0651: } catch (IOException ioe) {
0652: log("FileCache.getFileCache(): Unable to generate Error Page.");
0653: }
0654: if (fileContents != null
0655: || !(fileContents.length() > 0)) {
0656: fileData.resetAll();
0657: fileData.setFileName(fileName);
0658: fileData.setFileContents(fileContents);
0659: fileData.setContentType(getMIMEType(fileName));
0660: Date currentTime = new Date();
0661: fileData.setFileLoadTime(currentTime.getTime());
0662: if (debug) {
0663: log("FileCache.getFileCache(): file found, but cache timeout has occurred.");
0664: log("FileCache.getFileCache(): Creating FileCache Data object"
0665: + "\nFilename = "
0666: + fileName
0667: + "\nMIME-TYPE = "
0668: + getMIMEType(fileName)
0669: + "\nTimestamp = "
0670: + currentTime.getTime());
0671: }
0672: return fileData;
0673: } else {
0674: return null;
0675: }
0676: } else {
0677: // Cache current. Return cached copy
0678: if (debug) {
0679: log("FileCache.getFileCache(): Returning cached file");
0680: }
0681: return fileData;
0682: }
0683:
0684: }
0685: }
0686:
0687: /**
0688: * <P>
0689: *
0690: * Gets the urlScraperRulesetID to be used by rewriter.
0691: *
0692: *@return String value
0693: *@exception ProviderException if there is an error getting the
0694: * urlScrapperRulesetID.
0695: *@see com.sun.portal.providers.ProviderException
0696: */
0697: protected String getRuleSetID() {
0698: String ruleset = (String) props
0699: .getProperty("FileCache.RewriterRuleset");
0700: if (ruleset == null || ruleset.equals("")) {
0701: return "";
0702: }
0703: return ruleset;
0704: }
0705:
0706: /**
0707: * Gets the timeout property for the provider.
0708: *
0709: *@return timeout value
0710: *@exception ProviderException if there is an error getting the timeout
0711: * property.
0712: *@see com.sun.portal.providers.ProviderException
0713: */
0714: protected int getTimeout() {
0715: return timeoutValue;
0716: }
0717:
0718: /**
0719: * <P>
0720: *
0721: * Get the provider's content by retrieving content from the specified http
0722: * or https URL. <P>
0723: *
0724: * This method does not handle file URLs. It only handles http or https urls.
0725: * The content scraped from the specified url is rewritten if a rewriter is
0726: * available using the ruleset returned by <code>getRuleSetID()</code> <P>
0727: *
0728: * This method throws exceptions for certain exceptional conditions instead
0729: * of returning an error message in the returned <code>StringBuffer</code>
0730: *
0731: *@param req An HttpServletRequest that contains
0732: * information related to this request for content.
0733: *@param res An HttpServletResponse that allows the
0734: * provider to influence the overall response for the desktop page
0735: * (besides generating the content).
0736: *@param url http or https url string
0737: *@return Scraped content
0738: *@exception InterruptedException if there is a timeout while trying to get
0739: * the scraped content
0740: *@exception MalformedURLException if the url passed in is not a valid http
0741: * or https url.
0742: *@see com.sun.portal.providers.ProviderException
0743: *@see #getRuleSetID
0744: */
0745: protected StringBuffer getHttpContent(HttpServletRequest req,
0746: HttpServletResponse res, String url)
0747: throws InterruptedException, MalformedURLException {
0748: StringBuffer content = new StringBuffer();
0749:
0750: //
0751: // fetch the content
0752: //
0753: String rulesetID = getRuleSetID();
0754: Fetcher fetcher = null;
0755: try {
0756: fetcher = new Fetcher(url, this , req, res, rulesetID);
0757: } catch (MalformedURLException mue) {
0758: log("Error constructing fetcher", mue);
0759: }
0760: fetcher.run();
0761:
0762: //
0763: // wait for fetcher to get content
0764: //
0765: try {
0766: String fetcherTimeoutString = props
0767: .getProperty("FileCache.fetcherTimeoutMillis");
0768: if (fetcherTimeoutString == null
0769: || fetcherTimeoutString.length() < 1) {
0770: fetcherTimeoutString = "1000";
0771: }
0772: int fetcherTimeout = Integer.parseInt(fetcherTimeoutString);
0773:
0774: fetcher.join(getTimeout() * fetcherTimeout);
0775: if (!fetcher.isFinished()) {
0776: throw new InterruptedException(
0777: "FileCache.getFetcherContent(): "
0778: + "fetcher did not finish!");
0779: } else {
0780: content = fetcher.getContent();
0781: if (content != null) {
0782: String contentType = fetcher.getContentType();
0783: ContentFilter conFilter = null;
0784: try {
0785: conFilter = ContentFilterImpl
0786: .getInstance(contentType);
0787: } catch (ContentFilterException cfe) {
0788: log(
0789: "ERROR: FileCache.getHttpContent threw an exception",
0790: cfe);
0791: }
0792: log("FileCache.getContent():ContentFilterClassName="
0793: + conFilter.getClass().getName());
0794: content = conFilter.filter(content);
0795: }
0796: }
0797: } finally {
0798: //
0799: // interrupt thread if it hung
0800: //
0801: if (fetcher.isAlive()) {
0802: fetcher.terminate();
0803: }
0804: fetcher = null;
0805: }
0806: return content;
0807: }
0808:
0809: /**
0810: * <P>
0811: *
0812: * This method returns true if allCookies property is true otherwise checks
0813: * if the cookie name exists in the cookiesToForward list and returns true if
0814: * it does or false if it doesn't.
0815: *
0816: *@param allCookies allCookies property value from display profile
0817: *@param cookiesToForwardList cookiesToForwardList property value from
0818: * display profile
0819: *@param cookieName Description of the Parameter
0820: *@return boolean value
0821: */
0822: public boolean forward(String cookieName, boolean allCookies,
0823: List cookiesToForwardList) {
0824: if (debug) {
0825: StringBuffer sb = new StringBuffer();
0826: if (cookiesToForwardList != null) {
0827: Iterator iter = cookiesToForwardList.iterator();
0828: while (iter.hasNext()) {
0829: sb.append((String) iter.next() + ", ");
0830: }
0831: }
0832: log("FileCache.forward(): cookieName = " + cookieName
0833: + "\n allCookies = " + String.valueOf(allCookies)
0834: + "\n cookiesToForwardList = " + sb.toString());
0835: } else {
0836: log("FileCache.forward(): debug is false");
0837: }
0838:
0839: if (allCookies) {
0840: if (debug) {
0841: return true;
0842: }
0843: } else {
0844: if (cookiesToForwardList.contains(cookieName)) {
0845: return true;
0846: }
0847: }
0848: return false;
0849: }
0850:
0851: /**
0852: * Description of the Method
0853: *
0854: *@param in Description of the Parameter
0855: *@param contentLength Description of the Parameter
0856: *@return Description of the Return Value
0857: */
0858: public byte[] readContent(InputStream in, int contentLength)
0859: throws IOException {
0860: byte[] contentbytes = new byte[CONTENT_BUFFER_SIZE];
0861: if (contentLength != -1) {
0862: contentbytes = new byte[contentLength];
0863: } else {
0864: contentbytes = new byte[CONTENT_BUFFER_SIZE];
0865: }
0866: ByteArrayOutputStream baos = new ByteArrayOutputStream();
0867: int count = 0;
0868: while ((count = in.read(contentbytes)) > 0) {
0869: baos.write(contentbytes, 0, count);
0870: }
0871: byte[] content = baos.toByteArray();
0872: if (baos != null) {
0873: baos.close();
0874: }
0875: return content;
0876: }
0877:
0878: /**
0879: * Description of the Class
0880: *
0881: *@author byork
0882: *@created February 3, 2004
0883: */
0884: class FileCacheData {
0885: /** Description of the Field */
0886: protected String fileURL = null;
0887: /** Description of the Field */
0888: protected long fileLoadTime = -1;
0889: /** Description of the Field */
0890: protected StringBuffer fileContents = null;
0891: /** Description of the Field */
0892: protected String contentType = null;
0893: /** Description of the Field */
0894: protected String fileName = null;
0895:
0896: /** Constructor for the FileCacheData object */
0897: public FileCacheData() {
0898: }
0899:
0900: /**
0901: * Constructor for the FileCacheData object
0902: *
0903: *@param fn Description of the Parameter
0904: *@param fu Description of the Parameter
0905: *@param flt Description of the Parameter
0906: *@param fc Description of the Parameter
0907: */
0908: public FileCacheData(String fn, String fu, long flt,
0909: StringBuffer fc) {
0910: fileName = fn;
0911: fileURL = fu;
0912: fileLoadTime = flt;
0913: fileContents = fc;
0914: }
0915:
0916: /** Description of the Method */
0917: public void resetAll() {
0918: fileURL = null;
0919: fileLoadTime = -1;
0920: fileContents = null;
0921: contentType = null;
0922: }
0923:
0924: /**
0925: * Gets the fileURL attribute of the FileCacheData object
0926: *
0927: *@return The fileURL value
0928: */
0929: public String getFileURL() {
0930: return fileURL;
0931: }
0932:
0933: /**
0934: * Gets the fileLoadTime attribute of the FileCacheData object
0935: *
0936: *@return The fileLoadTime value
0937: */
0938: public long getFileLoadTime() {
0939: return fileLoadTime;
0940: }
0941:
0942: /**
0943: * Gets the fileContents attribute of the FileCacheData object
0944: *
0945: *@return The fileContents value
0946: */
0947: public StringBuffer getFileContents() {
0948: return fileContents;
0949: }
0950:
0951: /**
0952: * Gets the contentType attribute of the FileCacheData object
0953: *
0954: *@return The contentType value
0955: */
0956: public String getContentType() {
0957: return contentType;
0958: }
0959:
0960: /**
0961: * Gets the contentType attribute of the FileCacheData object
0962: *
0963: *@param filename Description of the Parameter
0964: *@return The contentType value
0965: */
0966: public String getContentType(String filename) {
0967: return contentType;
0968: }
0969:
0970: /**
0971: * Gets the fileName attribute of the FileCacheData object
0972: *
0973: *@return The fileName value
0974: */
0975: public String getFileName() {
0976: return fileName;
0977: }
0978:
0979: /**
0980: * Sets the fileURL attribute of the FileCacheData object
0981: *
0982: *@param fu The new fileURL value
0983: */
0984: public void setFileURL(String fu) {
0985: fileURL = fu;
0986: }
0987:
0988: /**
0989: * Sets the fileLoadTime attribute of the FileCacheData object
0990: *
0991: *@param time The new fileLoadTime value
0992: */
0993: public void setFileLoadTime(long time) {
0994: fileLoadTime = time;
0995: }
0996:
0997: /**
0998: * Sets the fileContents attribute of the FileCacheData object
0999: *
1000: *@param content The new fileContents value
1001: */
1002: public void setFileContents(StringBuffer content) {
1003: fileContents = content;
1004: }
1005:
1006: /**
1007: * Sets the contentType attribute of the FileCacheData object
1008: *
1009: *@param type The new contentType value
1010: */
1011: public void setContentType(String type) {
1012: contentType = type;
1013: }
1014:
1015: /**
1016: * Sets the fileName attribute of the FileCacheData object
1017: *
1018: *@param fn The new fileName value
1019: */
1020: public void setFileName(String fn) {
1021: fileName = fn;
1022: }
1023: }
1024: }
|