001: /****************************************************************
002: * Licensed to the Apache Software Foundation (ASF) under one *
003: * or more contributor license agreements. See the NOTICE file *
004: * distributed with this work for additional information *
005: * regarding copyright ownership. The ASF licenses this file *
006: * to you under the Apache License, Version 2.0 (the *
007: * "License"); you may not use this file except in compliance *
008: * with the License. You may obtain a copy of the License at *
009: * *
010: * http://www.apache.org/licenses/LICENSE-2.0 *
011: * *
012: * Unless required by applicable law or agreed to in writing, *
013: * software distributed under the License is distributed on an *
014: * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
015: * KIND, either express or implied. See the License for the *
016: * specific language governing permissions and limitations *
017: * under the License. *
018: ****************************************************************/package org.apache.james.jspf.parser;
019:
020: import org.apache.james.jspf.core.Logger;
021: import org.apache.james.jspf.core.SPF1Constants;
022: import org.apache.james.jspf.core.SPF1Record;
023: import org.apache.james.jspf.core.SPFRecordParser;
024: import org.apache.james.jspf.core.exceptions.NeutralException;
025: import org.apache.james.jspf.core.exceptions.NoneException;
026: import org.apache.james.jspf.core.exceptions.PermErrorException;
027: import org.apache.james.jspf.terms.Configuration;
028: import org.apache.james.jspf.terms.Directive;
029: import org.apache.james.jspf.terms.Mechanism;
030: import org.apache.james.jspf.terms.Modifier;
031:
032: import java.util.ArrayList;
033: import java.util.Collection;
034: import java.util.Collections;
035: import java.util.Iterator;
036: import java.util.List;
037: import java.util.regex.Matcher;
038: import java.util.regex.Pattern;
039:
040: /**
041: * This class is used to parse SPF1-Records from their textual form to an
042: * SPF1Record object that is composed by 2 collections: directives and
043: * modifiers.
044: *
045: * The parsing is modular and get informations from Mechanism and Modifiers
046: * classes declared in the org/apache/james/jspf/parser/jspf.default.terms file.
047: *
048: * Each term implementation provide its own REGEX in the REGEX static public
049: * field. This parser simply join all the regexp in a single "alternative"
050: * pattern and count the number of catch groups (brackets) assigned to each
051: * regex fragment.
052: *
053: * SO it creates a big regex and an array where it store what term is associated
054: * to each catch group of the big regex.
055: *
056: * If the regex matches the input vspf1 record then it start looking for the
057: * matched group (not null) and lookup the term that created that part of the
058: * regex.
059: *
060: * With this informations it creates a new instance of the term and, if the term
061: * is ConfigurationEnabled it calls the config() method passing to it only the specific
062: * subset of the MatchResult (using the MatchResultSubset).
063: *
064: * TODO doubts about the specification - redirect or exp with no domain-spec are
065: * evaluated as an unknown-modifiers according to the current spec (it does not
066: * make too much sense) - top-label is defined differently in various specs.
067: * We'll have to review the code. -
068: * http://data.iana.org/TLD/tlds-alpha-by-domain.txt (we should probably beeter
069: * use and alpha sequence being at least 2 chars - Somewhere is defined as "."
070: * TLD [ "." ] - Otherwise defined as ( *alphanum ALPHA *alphanum ) / (
071: * 1*alphanum "-" *( * alphanum / "-" ) alphanum )
072: *
073: * @see org.apache.james.jspf.core.SPF1Record
074: *
075: */
076: public class RFC4408SPF1Parser implements SPFRecordParser {
077:
078: /**
079: * Regex based on http://www.ietf.org/rfc/rfc4408.txt.
080: * This will be the next official SPF-Spec
081: */
082:
083: // Changed this because C, T and R MACRO_LETTERS are not available
084: // in record parsing and must return a PermError.
085: // private static final String MACRO_LETTER_PATTERN = "[lsodipvhcrtLSODIPVHCRT]";
086: /**
087: * ABNF: qualifier = "+" / "-" / "?" / "~"
088: */
089: private static final String QUALIFIER_PATTERN = "[" + "\\"
090: + SPF1Constants.PASS + "\\" + SPF1Constants.FAIL + "\\"
091: + SPF1Constants.NEUTRAL + "\\" + SPF1Constants.SOFTFAIL
092: + "]";
093:
094: private Pattern termsSeparatorPattern = null;
095:
096: private Pattern termPattern = null;
097:
098: private int TERM_STEP_REGEX_QUALIFIER_POS;
099:
100: private int TERM_STEP_REGEX_MECHANISM_POS;
101:
102: private int TERM_STEP_REGEX_MODIFIER_POS;
103:
104: private List matchResultPositions;
105:
106: private Logger log;
107:
108: private TermsFactory termsFactory;
109:
110: /**
111: * Constructor. Creates all the values needed to run the parsing
112: *
113: * @param logger the logger to use
114: * @param termsFactory the TermsFactory implementation
115: */
116: public RFC4408SPF1Parser(Logger logger, TermsFactory termsFactory) {
117: this .log = logger;
118: this .termsFactory = termsFactory;
119:
120: /**
121: * ABNF: mechanism = ( all / include / A / MX / PTR / IP4 / IP6 / exists )
122: */
123: String MECHANISM_REGEX = createRegex(termsFactory
124: .getMechanismsCollection());
125:
126: /**
127: * ABNF: modifier = redirect / explanation / unknown-modifier
128: */
129: String MODIFIER_REGEX = "("
130: + createRegex(termsFactory.getModifiersCollection())
131: + ")";
132:
133: /**
134: * ABNF: directive = [ qualifier ] mechanism
135: */
136: String DIRECTIVE_REGEX = "(" + QUALIFIER_PATTERN + "?)("
137: + MECHANISM_REGEX + ")";
138:
139: /**
140: * ABNF: ( directive / modifier )
141: */
142: String TERM_REGEX = "(?:" + MODIFIER_REGEX + "|"
143: + DIRECTIVE_REGEX + ")";
144:
145: /**
146: * ABNF: 1*SP
147: */
148: String TERMS_SEPARATOR_REGEX = "[ ]+";
149:
150: termsSeparatorPattern = Pattern.compile(TERMS_SEPARATOR_REGEX);
151: termPattern = Pattern.compile(TERM_REGEX);
152:
153: initializePositions();
154: }
155:
156: /**
157: * Fill in the matchResultPositions ArrayList. This array simply map each
158: * regex matchgroup to the Term class that originated that part of the
159: * regex.
160: */
161: private void initializePositions() {
162: ArrayList matchResultPositions = new ArrayList();
163:
164: // FULL MATCH
165: int posIndex = 0;
166: matchResultPositions.ensureCapacity(posIndex + 1);
167: matchResultPositions.add(posIndex, null);
168:
169: Iterator i;
170:
171: TERM_STEP_REGEX_MODIFIER_POS = ++posIndex;
172: matchResultPositions.ensureCapacity(posIndex + 1);
173: matchResultPositions.add(TERM_STEP_REGEX_MODIFIER_POS, null);
174: i = termsFactory.getModifiersCollection().iterator();
175: while (i.hasNext()) {
176: TermDefinition td = (TermDefinition) i.next();
177: int size = td.getMatchSize() + 1;
178: for (int k = 0; k < size; k++) {
179: posIndex++;
180: matchResultPositions.ensureCapacity(posIndex + 1);
181: matchResultPositions.add(posIndex, td);
182: }
183: }
184:
185: TERM_STEP_REGEX_QUALIFIER_POS = ++posIndex;
186: matchResultPositions.ensureCapacity(posIndex + 1);
187: matchResultPositions.add(posIndex, null);
188:
189: TERM_STEP_REGEX_MECHANISM_POS = ++posIndex;
190: matchResultPositions.ensureCapacity(posIndex + 1);
191: matchResultPositions.add(TERM_STEP_REGEX_MECHANISM_POS, null);
192: i = termsFactory.getMechanismsCollection().iterator();
193: while (i.hasNext()) {
194: TermDefinition td = (TermDefinition) i.next();
195: int size = td.getMatchSize() + 1;
196: for (int k = 0; k < size; k++) {
197: posIndex++;
198: matchResultPositions.ensureCapacity(posIndex + 1);
199: matchResultPositions.add(posIndex, td);
200: }
201: }
202:
203: if (log.isDebugEnabled()) {
204: log.debug("Parsing catch group positions: Modifiers["
205: + TERM_STEP_REGEX_MODIFIER_POS + "] Qualifier["
206: + TERM_STEP_REGEX_QUALIFIER_POS + "] Mechanism["
207: + TERM_STEP_REGEX_MECHANISM_POS + "]");
208: for (int k = 0; k < matchResultPositions.size(); k++) {
209: log
210: .debug(k
211: + ") "
212: + (matchResultPositions.get(k) != null ? ((TermDefinition) matchResultPositions
213: .get(k)).getPattern().pattern()
214: : null));
215: }
216: }
217:
218: this .matchResultPositions = Collections
219: .synchronizedList(matchResultPositions);
220: }
221:
222: /**
223: * Loop the classes searching for a String static field named
224: * staticFieldName and create an OR regeex like this:
225: * (?:FIELD1|FIELD2|FIELD3)
226: *
227: * @param classes
228: * classes to analyze
229: * @param staticFieldName
230: * static field to concatenate
231: * @return regex The regex
232: */
233: private String createRegex(Collection commandMap) {
234: StringBuffer modifierRegex = new StringBuffer();
235: Iterator i = commandMap.iterator();
236: boolean first = true;
237: while (i.hasNext()) {
238: if (first) {
239: modifierRegex.append("(?:(");
240: first = false;
241: } else {
242: modifierRegex.append(")|(");
243: }
244: Pattern pattern = ((TermDefinition) i.next()).getPattern();
245: modifierRegex.append(pattern.pattern());
246: }
247: modifierRegex.append("))");
248: return modifierRegex.toString();
249: }
250:
251: /**
252: * @see org.apache.james.jspf.core.SPFRecordParser#parse(java.lang.String)
253: */
254: public SPF1Record parse(String spfRecord)
255: throws PermErrorException, NoneException, NeutralException {
256:
257: log.debug("Start parsing SPF-Record: " + spfRecord);
258:
259: SPF1Record result = new SPF1Record();
260:
261: // check the version "header"
262: if (spfRecord.toLowerCase().startsWith(
263: SPF1Constants.SPF_VERSION1 + " ")
264: || spfRecord
265: .equalsIgnoreCase(SPF1Constants.SPF_VERSION1)) {
266: if (!spfRecord.toLowerCase().startsWith(
267: SPF1Constants.SPF_VERSION1 + " "))
268: throw new NeutralException("Empty SPF Record");
269: } else {
270: throw new NoneException("No valid SPF Record: " + spfRecord);
271: }
272:
273: // extract terms
274: String[] terms = termsSeparatorPattern.split(spfRecord
275: .replaceFirst(SPF1Constants.SPF_VERSION1, ""));
276:
277: // cycle terms
278: for (int i = 0; i < terms.length; i++) {
279: if (terms[i].length() > 0) {
280: Matcher termMatcher = termPattern.matcher(terms[i]);
281: if (!termMatcher.matches()) {
282: throw new PermErrorException("Term [" + terms[i]
283: + "] is not syntactically valid: "
284: + termPattern.pattern());
285: }
286:
287: // true if we matched a modifier, false if we matched a
288: // directive
289: String modifierString = termMatcher
290: .group(TERM_STEP_REGEX_MODIFIER_POS);
291:
292: if (modifierString != null) {
293: // MODIFIER
294: Modifier mod = (Modifier) lookupAndCreateTerm(
295: termMatcher, TERM_STEP_REGEX_MODIFIER_POS);
296:
297: if (mod.enforceSingleInstance()) {
298: Iterator it = result.getModifiers().iterator();
299: while (it.hasNext()) {
300: if (it.next().getClass().equals(
301: mod.getClass())) {
302: throw new PermErrorException(
303: "More than one "
304: + modifierString
305: + " found in SPF-Record");
306: }
307: }
308: }
309:
310: result.getModifiers().add(mod);
311:
312: } else {
313: // DIRECTIVE
314: String qualifier = termMatcher
315: .group(TERM_STEP_REGEX_QUALIFIER_POS);
316:
317: Object mech = lookupAndCreateTerm(termMatcher,
318: TERM_STEP_REGEX_MECHANISM_POS);
319:
320: result.getDirectives().add(
321: new Directive(qualifier, (Mechanism) mech,
322: log.getChildLogger(qualifier
323: + "directive")));
324:
325: }
326:
327: }
328: }
329:
330: return result;
331: }
332:
333: /**
334: * @param res
335: * the MatchResult
336: * @param start
337: * the position where the terms starts
338: * @return
339: * @throws PermErrorException
340: */
341: private Object lookupAndCreateTerm(Matcher res, int start)
342: throws PermErrorException {
343: for (int k = start + 1; k < res.groupCount(); k++) {
344: if (res.group(k) != null
345: && k != TERM_STEP_REGEX_QUALIFIER_POS) {
346: TermDefinition c = (TermDefinition) matchResultPositions
347: .get(k);
348: Configuration subres = new MatcherBasedConfiguration(
349: res, k, c.getMatchSize());
350: try {
351: return termsFactory.createTerm(c.getTermDef(),
352: subres);
353: } catch (InstantiationException e) {
354: e.printStackTrace();
355: // TODO is it ok to use a Runtime for this? Or should we use a PermError here?
356: throw new IllegalStateException(
357: "Unexpected error creating term: "
358: + e.getMessage());
359: }
360:
361: }
362: }
363: return null;
364: }
365:
366: }
|