001: /* Copyright (c) 2006-2007, Vladimir Nikic
002: All rights reserved.
003:
004: Redistribution and use of this software in source and binary forms,
005: with or without modification, are permitted provided that the following
006: conditions are met:
007:
008: * Redistributions of source code must retain the above
009: copyright notice, this list of conditions and the
010: following disclaimer.
011:
012: * Redistributions in binary form must reproduce the above
013: copyright notice, this list of conditions and the
014: following disclaimer in the documentation and/or other
015: materials provided with the distribution.
016:
017: * The name of Web-Harvest may not be used to endorse or promote
018: products derived from this software without specific prior
019: written permission.
020:
021: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
022: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
023: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
024: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
025: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
026: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
027: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
028: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
029: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
030: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
031: POSSIBILITY OF SUCH DAMAGE.
032:
033: You can contact Vladimir Nikic by sending e-mail to
034: nikic_vladimir@yahoo.com. Please include the word "Web-Harvest" in the
035: subject line.
036: */
037: package org.webharvest.runtime.processors;
038:
039: import org.webharvest.definition.IncludeDef;
040: import org.webharvest.definition.ScraperConfiguration;
041: import org.webharvest.runtime.Scraper;
042: import org.webharvest.runtime.ScraperContext;
043: import org.webharvest.runtime.templaters.BaseTemplater;
044: import org.webharvest.runtime.variables.EmptyVariable;
045: import org.webharvest.runtime.variables.IVariable;
046: import org.webharvest.utils.CommonUtil;
047: import org.webharvest.exception.FileException;
048:
049: import java.io.*;
050: import java.net.URL;
051: import java.net.MalformedURLException;
052:
053: /**
054: * Include processor.
055: */
056: public class IncludeProcessor extends BaseProcessor {
057:
058: private IncludeDef includeDef;
059:
060: public IncludeProcessor(IncludeDef includeDef) {
061: super (includeDef);
062: this .includeDef = includeDef;
063: }
064:
065: public IVariable execute(Scraper scraper, ScraperContext context) {
066: boolean isUrl = false;
067:
068: String path = BaseTemplater.execute(includeDef.getPath(),
069: scraper.getScriptEngine());
070: path = CommonUtil.adaptFilename(path);
071: String fullPath = path;
072:
073: ScraperConfiguration configuration = scraper.getConfiguration();
074: File originalFile = configuration.getSourceFile();
075: String originalUrl = configuration.getUrl();
076: if (originalFile != null) {
077: String originalPath = CommonUtil.adaptFilename(originalFile
078: .getAbsolutePath());
079: int index = originalPath.lastIndexOf('/');
080: if (index > 0) {
081: String workingPath = originalPath.substring(0, index);
082: fullPath = CommonUtil.getAbsoluteFilename(workingPath,
083: path);
084: }
085: } else if (originalUrl != null) {
086: fullPath = CommonUtil.fullUrl(originalUrl, path);
087: isUrl = true;
088: }
089:
090: ScraperConfiguration includedConfig;
091: try {
092: includedConfig = isUrl ? new ScraperConfiguration(new URL(
093: fullPath)) : new ScraperConfiguration(fullPath);
094: scraper.execute(includedConfig.getOperations());
095: return new EmptyVariable();
096: } catch (FileNotFoundException e) {
097: throw new FileException(
098: "Cannot include configuration file " + fullPath, e);
099: } catch (MalformedURLException e) {
100: throw new FileException(
101: "Cannot include configuration file " + fullPath, e);
102: } catch (IOException e) {
103: throw new FileException(
104: "Cannot include configuration file " + fullPath, e);
105: }
106: }
107:
108: }
|