01: package org.apache.lucene.xmlparser.builders;
02:
03: import java.io.IOException;
04: import java.io.StringReader;
05:
06: import org.apache.lucene.analysis.Analyzer;
07: import org.apache.lucene.analysis.Token;
08: import org.apache.lucene.analysis.TokenStream;
09: import org.apache.lucene.index.Term;
10: import org.apache.lucene.search.Filter;
11: import org.apache.lucene.search.TermsFilter;
12: import org.apache.lucene.xmlparser.DOMUtils;
13: import org.apache.lucene.xmlparser.FilterBuilder;
14: import org.apache.lucene.xmlparser.ParserException;
15: import org.w3c.dom.Element;
16:
17: /**
18: * Licensed to the Apache Software Foundation (ASF) under one or more
19: * contributor license agreements. See the NOTICE file distributed with
20: * this work for additional information regarding copyright ownership.
21: * The ASF licenses this file to You under the Apache License, Version 2.0
22: * (the "License"); you may not use this file except in compliance with
23: * the License. You may obtain a copy of the License at
24: *
25: * http://www.apache.org/licenses/LICENSE-2.0
26: *
27: * Unless required by applicable law or agreed to in writing, software
28: * distributed under the License is distributed on an "AS IS" BASIS,
29: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
30: * See the License for the specific language governing permissions and
31: * limitations under the License.
32: */
33: /**
34: * @author maharwood
35: */
36: public class TermsFilterBuilder implements FilterBuilder {
37: Analyzer analyzer;
38:
39: /**
40: * @param analyzer
41: */
42: public TermsFilterBuilder(Analyzer analyzer) {
43: this .analyzer = analyzer;
44: }
45:
46: /*
47: * (non-Javadoc)
48: *
49: * @see org.apache.lucene.xmlparser.FilterBuilder#process(org.w3c.dom.Element)
50: */
51: public Filter getFilter(Element e) throws ParserException {
52: TermsFilter tf = new TermsFilter();
53: String text = DOMUtils.getNonBlankTextOrFail(e);
54: String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(
55: e, "fieldName");
56: TokenStream ts = analyzer.tokenStream(fieldName,
57: new StringReader(text));
58:
59: try {
60: Token token = ts.next();
61: Term term = null;
62: while (token != null) {
63: if (term == null) {
64: term = new Term(fieldName, token.termText());
65: } else {
66: // create from previous to save fieldName.intern overhead
67: term = term.createTerm(token.termText());
68: }
69: tf.addTerm(term);
70: token = ts.next();
71: }
72: } catch (IOException ioe) {
73: throw new RuntimeException(
74: "Error constructing terms from index:" + ioe);
75: }
76: return tf;
77: }
78: }
|