001: /* Copyright (c) 2006-2007, Vladimir Nikic
002: All rights reserved.
003:
004: Redistribution and use of this software in source and binary forms,
005: with or without modification, are permitted provided that the following
006: conditions are met:
007:
008: * Redistributions of source code must retain the above
009: copyright notice, this list of conditions and the
010: following disclaimer.
011:
012: * Redistributions in binary form must reproduce the above
013: copyright notice, this list of conditions and the
014: following disclaimer in the documentation and/or other
015: materials provided with the distribution.
016:
017: * The name of Web-Harvest may not be used to endorse or promote
018: products derived from this software without specific prior
019: written permission.
020:
021: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
022: AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
023: IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
024: ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
025: LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
026: CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
027: SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
028: INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
029: CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
030: ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
031: POSSIBILITY OF SUCH DAMAGE.
032:
033: You can contact Vladimir Nikic by sending e-mail to
034: nikic_vladimir@yahoo.com. Please include the word "Web-Harvest" in the
035: subject line.
036: */
037: package org.webharvest.runtime.processors;
038:
039: import org.webharvest.definition.BaseElementDef;
040: import org.webharvest.definition.LoopDef;
041: import org.webharvest.runtime.Scraper;
042: import org.webharvest.runtime.ScraperContext;
043: import org.webharvest.runtime.scripting.ScriptEngine;
044: import org.webharvest.runtime.templaters.BaseTemplater;
045: import org.webharvest.runtime.variables.*;
046: import org.webharvest.utils.CommonUtil;
047: import org.webharvest.utils.Constants;
048:
049: import java.util.*;
050: import java.util.regex.Matcher;
051: import java.util.regex.Pattern;
052:
053: /**
054: * Loop list processor.
055: */
056: public class LoopProcessor extends BaseProcessor {
057:
058: private LoopDef loopDef;
059:
060: public LoopProcessor(LoopDef loopDef) {
061: super (loopDef);
062: this .loopDef = loopDef;
063: }
064:
065: public IVariable execute(Scraper scraper, ScraperContext context) {
066: ScriptEngine scriptEngine = scraper.getScriptEngine();
067: String item = BaseTemplater.execute(loopDef.getItem(),
068: scriptEngine);
069: String index = BaseTemplater.execute(loopDef.getIndex(),
070: scriptEngine);
071: String maxLoopsString = BaseTemplater.execute(loopDef
072: .getMaxloops(), scriptEngine);
073: String filter = BaseTemplater.execute(loopDef.getFilter(),
074: scriptEngine);
075:
076: double maxLoops = Constants.DEFAULT_MAX_LOOPS;
077: if (maxLoopsString != null && !"".equals(maxLoopsString.trim())) {
078: maxLoops = Double.parseDouble(maxLoopsString);
079: }
080:
081: BaseElementDef loopValueDef = loopDef.getLoopValueDef();
082: IVariable loopValue = getBodyListContent(loopValueDef, scraper,
083: context);
084: debug(loopValueDef, scraper, loopValue);
085:
086: List resultList = new ArrayList();
087:
088: List list = loopValue != null ? loopValue.toList() : null;
089: if (list != null) {
090: IVariable itemBeforeLoop = (IVariable) context.get(item);
091: IVariable indexBeforeLoop = (IVariable) context.get(index);
092:
093: List filteredList = filter != null ? createFilteredList(
094: list, filter) : list;
095: Iterator it = filteredList.iterator();
096:
097: for (int i = 1; it.hasNext() && i <= maxLoops; i++) {
098: IVariable currElement = (IVariable) it.next();
099:
100: // define current value of item variable
101: if (item != null && !"".equals(item)) {
102: context.put(item, currElement);
103: }
104:
105: // define current value of index variable
106: if (index != null && !"".equals(index)) {
107: context.put(index, new NodeVariable(String
108: .valueOf(i)));
109: }
110:
111: // execute the loop body
112: BaseElementDef bodyDef = loopDef.getLoopBodyDef();
113: IVariable loopResult = bodyDef != null ? getBodyListContent(
114: bodyDef, scraper, context)
115: : new EmptyVariable();
116: debug(bodyDef, scraper, loopResult);
117: resultList.addAll(loopResult.toList());
118: }
119:
120: // restores previous value of item variable
121: if (item != null && itemBeforeLoop != null) {
122: context.put(item, itemBeforeLoop);
123: }
124:
125: // restores previous value of index variable
126: if (index != null && indexBeforeLoop != null) {
127: context.put(index, indexBeforeLoop);
128: }
129: }
130:
131: return new ListVariable(resultList);
132: }
133:
134: /**
135: * Create filtered list based on specified list and filterStr
136: * @param list
137: * @param filterStr
138: * @return Filtered list
139: */
140: private List createFilteredList(List list, String filterStr) {
141: List result = new ArrayList();
142: Set stringSet = new HashSet();
143:
144: Filter filter = new Filter(filterStr, list.size());
145:
146: Iterator it = list.iterator();
147: int index = 1;
148: while (it.hasNext()) {
149: IVariable curr = (IVariable) it.next();
150:
151: if (filter.isInFilter(index)) {
152: if (filter.isUnique) {
153: String currStr = curr.toString();
154: if (!stringSet.contains(curr.toString())) {
155: result.add(curr);
156: stringSet.add(currStr);
157: }
158: } else {
159: result.add(curr);
160: }
161: }
162:
163: index++;
164: }
165:
166: return result;
167: }
168:
169: /**
170: * x - starting index in range
171: * y - ending index in range
172: */
173: private static class IntRange extends CommonUtil.IntPair {
174:
175: // checks if strins is in form [n][-][m]
176: static boolean isValid(String s) {
177: Pattern pattern = Pattern.compile("(\\d*)(-?)(\\d*?)");
178: Matcher matcher = pattern.matcher(s);
179: return matcher.matches();
180: }
181:
182: private IntRange(int x, int y) {
183: super (x, y);
184: }
185:
186: public IntRange(String s, int size) {
187: defineFromString(s, '-', size);
188: }
189:
190: public boolean isInRange(int index) {
191: return index >= x && index <= y;
192: }
193:
194: }
195:
196: /**
197: * x - starting index
198: * y - index skip - x is first, x+y second, x+2y third, end so on.
199: */
200: private static class IntSublist extends CommonUtil.IntPair {
201:
202: // checks if strins is in form [n][:][m]
203: static boolean isValid(String s) {
204: Pattern pattern = Pattern.compile("(\\d*)(:?)(\\d*?)");
205: Matcher matcher = pattern.matcher(s);
206: return matcher.matches();
207: }
208:
209: private IntSublist(int x, int y) {
210: super (x, y);
211: }
212:
213: public IntSublist(String s, int size) {
214: defineFromString(s, ':', size);
215: }
216:
217: public boolean isInSublist(int index) {
218: return (index - x) % y == 0;
219: }
220:
221: }
222:
223: /**
224: * Class that represents filter for list filtering. It is created based on filter string.
225: * Filter string is comma separated list of filter tokens. Valid filter tokens are:
226: * m - specific integer m
227: * m-n - integers in specified range, if m is ommited it's vaue is 1, if n is
228: * ommited it's value is specified size of list to be filtered
229: * m:n - all integerers starting from m and all subsequent with step n,
230: * m, m+1*n , m+2*n, ...
231: * odd - the same as 1:2
232: * even - the same as 2:2
233: * unique - tells that list must contain unique values (no duplicates)
234: */
235: private static class Filter {
236:
237: private boolean isUnique = false;
238: private List filterList;
239:
240: private Filter(String filterStr, int size) {
241: StringTokenizer tokenizer = new StringTokenizer(filterStr,
242: ",");
243: filterList = new ArrayList();
244:
245: while (tokenizer.hasMoreTokens()) {
246: String token = tokenizer.nextToken().trim();
247:
248: if ("unique".equals(token)) {
249: isUnique = true;
250: } else if ("odd".equals(token)) {
251: filterList.add(new IntSublist(1, 2));
252: } else if ("even".equals(token)) {
253: filterList.add(new IntSublist(2, 2));
254: } else if (IntRange.isValid(token)) {
255: filterList.add(new IntRange(token, size));
256: } else if (IntSublist.isValid(token)) {
257: filterList.add(new IntSublist(token, size));
258: }
259: }
260: }
261:
262: /**
263: * Checks if specified integer passes the filter
264: */
265: private boolean isInFilter(int num) {
266: int listSize = filterList.size();
267:
268: if (listSize == 0) {
269: return true;
270: }
271:
272: for (int i = 0; i < listSize; i++) {
273: CommonUtil.IntPair curr = (CommonUtil.IntPair) filterList
274: .get(i);
275: if (curr instanceof IntRange
276: && ((IntRange) curr).isInRange(num)) {
277: return true;
278: } else if (curr instanceof IntSublist
279: && ((IntSublist) curr).isInSublist(num)) {
280: return true;
281: }
282: }
283:
284: return false;
285: }
286:
287: }
288:
289: }
|