01: package net.javacoding.jspider.mod.rule;
02:
03: import net.javacoding.jspider.core.rule.impl.BaseRuleImpl;
04: import net.javacoding.jspider.core.util.config.PropertySet;
05: import net.javacoding.jspider.core.logging.LogFactory;
06: import net.javacoding.jspider.core.logging.Log;
07: import net.javacoding.jspider.core.SpiderContext;
08: import net.javacoding.jspider.core.model.DecisionInternal;
09: import net.javacoding.jspider.api.model.Decision;
10: import net.javacoding.jspider.api.model.Site;
11:
12: import java.net.URL;
13:
14: /**
15: * $Id: MaxNumberOfURLParamsRule.java,v 1.1 2003/04/07 15:51:00 vanrogu Exp $
16: */
17: public class MaxNumberOfURLParamsRule extends BaseRuleImpl {
18:
19: public static final String MAX = "max";
20:
21: protected int max;
22:
23: public MaxNumberOfURLParamsRule(PropertySet config) {
24: Log log = LogFactory.getLog(MaxNumberOfURLParamsRule.class);
25: max = config.getInteger(MaxNumberOfURLParamsRule.MAX, 0);
26: log.info("max set to " + max);
27: }
28:
29: public Decision apply(SpiderContext context, Site currentSite,
30: URL url) {
31: Decision decision = null;
32:
33: String query = url.getQuery();
34: int params;
35:
36: if (query == null || query.length() < 2) {
37: params = 0;
38: } else {
39: int amps = 0;
40: int pos = query.indexOf('&');
41: while (pos != -1) {
42: amps++;
43: pos = query.indexOf('&', pos + 1);
44: }
45: params = amps + 1;
46: }
47:
48: if (params > max) {
49: decision = new DecisionInternal(Decision.RULE_IGNORE,
50: "params = " + params + ", max = " + max
51: + ", url ingored");
52: } else {
53: decision = new DecisionInternal(Decision.RULE_ACCEPT,
54: "params = " + params + ", max = " + max
55: + ", url accepted");
56: }
57:
58: return decision;
59: }
60:
61: }
|