001: /*
002: JSPWiki - a JSP-based WikiWiki clone.
003:
004: Copyright (C) 2001-2005 Janne Jalkanen (Janne.Jalkanen@iki.fi)
005:
006: This program is free software; you can redistribute it and/or modify
007: it under the terms of the GNU Lesser General Public License as published by
008: the Free Software Foundation; either version 2.1 of the License, or
009: (at your option) any later version.
010:
011: This program is distributed in the hope that it will be useful,
012: but WITHOUT ANY WARRANTY; without even the implied warranty of
013: MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
014: GNU Lesser General Public License for more details.
015:
016: You should have received a copy of the GNU Lesser General Public License
017: along with this program; if not, write to the Free Software
018: Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
019: */
020: package com.ecyrd.jspwiki.filters;
021:
022: import java.util.*;
023:
024: import javax.servlet.http.HttpServletRequest;
025:
026: import com.ecyrd.jspwiki.*;
027:
028: import org.apache.oro.text.regex.*;
029:
030: import org.apache.log4j.Logger;
031:
032: /**
033: * A regular expression-based spamfilter that can also do choke modifications.
034: *
035: * Parameters:
036: * <ul>
037: * <li>wordlist - Page name where the regexps are found. Use [{SET spamwords='regexp list separated with spaces'}] on
038: * that page. Default is "SpamFilterWordList".
039: * <li>errorpage - The page to which the user is redirected. Has a special variable $msg which states the reason. Default is "RejectedMessage".
040: * <li>pagechangesinminute - How many page changes are allowed/minute. Default is 5.
041: * <li>bantime - How long an IP address stays on the temporary ban list (default is 60 for 60 minutes).
042: * </ul>
043: * @since 2.1.112
044: * @author Janne Jalkanen
045: */
046: public class SpamFilter extends BasicPageFilter {
047: private String m_forbiddenWordsPage = "SpamFilterWordList";
048: private String m_errorPage = "RejectedMessage";
049:
050: private static final String LISTVAR = "spamwords";
051: private PatternMatcher m_matcher = new Perl5Matcher();
052: private PatternCompiler m_compiler = new Perl5Compiler();
053:
054: private Collection m_spamPatterns = null;
055:
056: private Date m_lastRebuild = new Date(0L);
057:
058: static Logger log = Logger.getLogger(SpamFilter.class);
059:
060: public static final String PROP_WORDLIST = "wordlist";
061: public static final String PROP_ERRORPAGE = "errorpage";
062: public static final String PROP_PAGECHANGES = "pagechangesinminute";
063: public static final String PROP_BANTIME = "bantime";
064:
065: private Vector m_temporaryBanList = new Vector();
066:
067: private int m_banTime = 60; // minutes
068:
069: private Vector m_lastModifications = new Vector();
070:
071: /**
072: * How many times a single IP address can change a page per minute?
073: */
074: private int m_limitSinglePageChanges = 5;
075:
076: public void initialize(Properties properties) {
077: m_forbiddenWordsPage = properties.getProperty(PROP_WORDLIST,
078: m_forbiddenWordsPage);
079: m_errorPage = properties.getProperty(PROP_ERRORPAGE,
080: m_errorPage);
081:
082: m_limitSinglePageChanges = TextUtil.getIntegerProperty(
083: properties, PROP_PAGECHANGES, m_limitSinglePageChanges);
084:
085: m_banTime = TextUtil.getIntegerProperty(properties,
086: PROP_BANTIME, m_banTime);
087:
088: log.info("Spam filter initialized. Temporary ban time "
089: + m_banTime + " mins, max page changes/minute: "
090: + m_limitSinglePageChanges);
091: }
092:
093: private Collection parseWordList(WikiPage source, String list) {
094: ArrayList compiledpatterns = new ArrayList();
095:
096: if (list != null) {
097: StringTokenizer tok = new StringTokenizer(list, " \t\n");
098:
099: while (tok.hasMoreTokens()) {
100: String pattern = tok.nextToken();
101:
102: try {
103: compiledpatterns.add(m_compiler.compile(pattern));
104: } catch (MalformedPatternException e) {
105: log.debug("Malformed spam filter pattern "
106: + pattern);
107:
108: source.setAttribute("error",
109: "Malformed spam filter pattern " + pattern);
110: }
111: }
112: }
113:
114: return compiledpatterns;
115: }
116:
117: private synchronized void checkSinglePageChange(WikiContext context)
118: throws RedirectException {
119: HttpServletRequest req = context.getHttpRequest();
120:
121: if (req != null) {
122: String addr = req.getRemoteAddr();
123: int counter = 0;
124:
125: long time = System.currentTimeMillis() - 60 * 1000L; // 1 minute
126:
127: for (Iterator i = m_lastModifications.iterator(); i
128: .hasNext();) {
129: Host host = (Host) i.next();
130:
131: //
132: // Check if this item is invalid
133: //
134: if (host.getAddedTime() < time) {
135: log.debug("Removed host " + host.getAddress()
136: + " from modification queue (expired)");
137: i.remove();
138: continue;
139: }
140:
141: if (host.getAddress().equals(addr)) {
142: counter++;
143: }
144: }
145:
146: if (counter >= m_limitSinglePageChanges) {
147: Host host = new Host(addr);
148:
149: m_temporaryBanList.add(host);
150:
151: log
152: .info("Added host "
153: + addr
154: + " to temporary ban list for doing too many modifications/minute");
155: throw new RedirectException(
156: "Too many modifications/minute",
157: //context.getViewURL( m_errorPage ) );
158: context.getEngine().getBaseURL()
159: + "Wiki.jsp?page=" + m_errorPage);
160: }
161:
162: m_lastModifications.add(new Host(addr));
163: }
164: }
165:
166: private synchronized void cleanBanList() {
167: long now = System.currentTimeMillis();
168:
169: for (Iterator i = m_temporaryBanList.iterator(); i.hasNext();) {
170: Host host = (Host) i.next();
171:
172: if (host.getReleaseTime() < now) {
173: log.debug("Removed host " + host.getAddress()
174: + " from temporary ban list (expired)");
175: i.remove();
176: }
177: }
178: }
179:
180: private void checkBanList(WikiContext context)
181: throws RedirectException {
182: HttpServletRequest req = context.getHttpRequest();
183:
184: if (req != null) {
185: String remote = req.getRemoteAddr();
186:
187: long now = System.currentTimeMillis();
188:
189: for (Iterator i = m_temporaryBanList.iterator(); i
190: .hasNext();) {
191: Host host = (Host) i.next();
192:
193: if (host.getAddress().equals(remote)) {
194: long timeleft = (host.getReleaseTime() - now) / 1000L;
195: throw new RedirectException(
196: "You have been temporarily banned from modifying this wiki. ("
197: + timeleft
198: + " seconds of ban left)",
199: //context.getViewURL( m_errorPage ) );
200: context.getEngine().getBaseURL()
201: + "Wiki.jsp?page=" + m_errorPage);
202: }
203: }
204: }
205:
206: }
207:
208: public String preSave(WikiContext context, String content)
209: throws RedirectException {
210: cleanBanList();
211: checkBanList(context);
212: checkSinglePageChange(context);
213:
214: WikiPage source = context.getEngine().getPage(
215: m_forbiddenWordsPage);
216:
217: if (source != null) {
218: if (m_spamPatterns == null || m_spamPatterns.isEmpty()
219: || source.getLastModified().after(m_lastRebuild)) {
220: m_lastRebuild = source.getLastModified();
221:
222: m_spamPatterns = parseWordList(source, (String) source
223: .getAttribute(LISTVAR));
224:
225: log
226: .info("Spam filter reloaded - recognizing "
227: + m_spamPatterns.size()
228: + " patterns from page "
229: + m_forbiddenWordsPage);
230: }
231: }
232:
233: //
234: // If we have no spam patterns defined, or we're trying to save
235: // the page containing the patterns, just return.
236: //
237: if (m_spamPatterns == null
238: || context.getPage().getName().equals(
239: m_forbiddenWordsPage)) {
240: return content;
241: }
242:
243: for (Iterator i = m_spamPatterns.iterator(); i.hasNext();) {
244: Pattern p = (Pattern) i.next();
245:
246: log.debug("Attempting to match page contents with "
247: + p.getPattern());
248:
249: if (m_matcher.contains(content, p)) {
250: //
251: // Spam filter has a match.
252: //
253:
254: throw new RedirectException(
255: "Content matches the spam filter '"
256: + p.getPattern() + "'", context.getURL(
257: WikiContext.VIEW, m_errorPage));
258: }
259: }
260:
261: return content;
262: }
263:
264: /**
265: * A local class for storing host information.
266: *
267: * @author jalkanen
268: *
269: * @since
270: */
271: public class Host {
272: private long m_addedTime = System.currentTimeMillis();
273: private long m_releaseTime;
274: private String m_address;
275:
276: public String getAddress() {
277: return m_address;
278: }
279:
280: public long getReleaseTime() {
281: return m_releaseTime;
282: }
283:
284: public long getAddedTime() {
285: return m_addedTime;
286: }
287:
288: public Host(String ipaddress) {
289: m_address = ipaddress;
290:
291: m_releaseTime = System.currentTimeMillis() + m_banTime * 60
292: * 1000L;
293: }
294: }
295: }
|