001: /* Copyright (c) 2006-2007, Vladimir Nikic
002: All rights reserved.
003:
004: Redistribution and use of this software in source and binary forms,
005: with or without modification, are permitted provided that the following
006: conditions are met:
007:
008: * Redistributions of source code must retain the above
009: copyright notice, this list of conditions and the
010: following disclaimer.
011:
012: * Redistributions in binary form must reproduce the above
013: copyright notice, this list of conditions and the
014: following disclaimer in the documentation and/or other
015: materials provided with the distribution.
016:
017: * The name of Web-Harvest may not be used to endorse or promote
018: products derived from this software without specific prior
019: written permission.
020:
021: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
022: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
023: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
024: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
025: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
026: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
027: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
028: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
029: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
030: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
031: POSSIBILITY OF SUCH DAMAGE.
032:
033: You can contact Vladimir Nikic by sending e-mail to
034: nikic_vladimir@yahoo.com. Please include the word "Web-Harvest" in the
035: subject line.
036: */
037: package org.webharvest.runtime.processors;
038:
039: import java.io.*;
040:
041: import org.webharvest.definition.FileDef;
042: import org.webharvest.exception.FileException;
043: import org.webharvest.runtime.Scraper;
044: import org.webharvest.runtime.ScraperContext;
045: import org.webharvest.runtime.scripting.ScriptEngine;
046: import org.webharvest.runtime.templaters.BaseTemplater;
047: import org.webharvest.runtime.variables.*;
048: import org.webharvest.utils.CommonUtil;
049:
050: /**
051: * File processor.
052: */
053: public class FileProcessor extends BaseProcessor {
054:
055: private FileDef fileDef;
056:
057: public FileProcessor(FileDef fileDef) {
058: super (fileDef);
059: this .fileDef = fileDef;
060: }
061:
062: public IVariable execute(Scraper scraper, ScraperContext context) {
063: String workingDir = scraper.getWorkingDir();
064:
065: ScriptEngine scriptEngine = scraper.getScriptEngine();
066: String action = BaseTemplater.execute(fileDef.getAction(),
067: scriptEngine);
068: String filePath = BaseTemplater.execute(fileDef.getPath(),
069: scriptEngine);
070: String type = BaseTemplater.execute(fileDef.getType(),
071: scriptEngine);
072: String charset = BaseTemplater.execute(fileDef.getCharset(),
073: scriptEngine);
074: if (charset == null) {
075: charset = scraper.getConfiguration().getCharset();
076: }
077:
078: String fullPath = CommonUtil.getAbsoluteFilename(workingDir,
079: filePath);
080:
081: // depending on file acton calls appropriate method
082: if ("write".equalsIgnoreCase(action)) {
083: return executeFileWrite(false, scraper, context, fullPath,
084: type, charset);
085: } else if ("append".equalsIgnoreCase(action)) {
086: return executeFileWrite(true, scraper, context, fullPath,
087: type, charset);
088: } else {
089: return executeFileRead(fullPath, type, charset);
090: }
091: }
092:
093: /**
094: * Writing content to the specified file.
095: * If parameter "append" is true, then append content, otherwise write
096: */
097: private IVariable executeFileWrite(boolean append, Scraper scraper,
098: ScraperContext context, String fullPath, String type,
099: String charset) {
100: IVariable result;
101:
102: try {
103: // ensure that target directory exists
104: new File(CommonUtil.getDirectoryFromPath(fullPath))
105: .mkdirs();
106:
107: FileOutputStream out = new FileOutputStream(fullPath,
108: append);
109: byte[] data;
110:
111: if (Types.TYPE_BINARY.equalsIgnoreCase(type)) {
112: IVariable body = getBodyBinaryContent(fileDef, scraper,
113: context);
114: data = body.toBinary();
115: result = new NodeVariable(data);
116: } else {
117: IVariable body = getBodyTextContent(fileDef, scraper,
118: context);
119: String content = body.toString();
120: data = content.getBytes(charset);
121: result = new NodeVariable(content);
122: }
123:
124: out.write(data);
125: out.flush();
126: out.close();
127:
128: return result;
129: } catch (IOException e) {
130: throw new FileException("Error writing data to file: "
131: + fullPath, e);
132: }
133: }
134:
135: /**
136: * Reading the specified file.
137: */
138: private IVariable executeFileRead(String fullPath, String type,
139: String charset) {
140: if (Types.TYPE_BINARY.equalsIgnoreCase(type)) {
141: try {
142: byte[] data = CommonUtil.readBytesFromFile(new File(
143: fullPath));
144: log.info("Binary file read processor: " + data.length
145: + " bytes read.");
146: return new NodeVariable(data);
147: } catch (IOException e) {
148: throw new FileException("Error reading file: "
149: + fullPath, e);
150: }
151: } else {
152: try {
153: String content = CommonUtil.readStringFromFile(
154: new File(fullPath), charset);
155: log.info("Text file read processor: "
156: + (content == null ? 0 : content.length())
157: + " characters read.");
158: return new NodeVariable(content);
159: } catch (IOException e) {
160: throw new FileException("Error reading the file: "
161: + fullPath, e);
162: }
163: }
164: }
165:
166: }
|