001: /*
002: * Copyright 2004-2006 the original author or authors.
003: *
004: * Licensed under the Apache License, Version 2.0 (the "License");
005: * you may not use this file except in compliance with the License.
006: * You may obtain a copy of the License at
007: *
008: * http://www.apache.org/licenses/LICENSE-2.0
009: *
010: * Unless required by applicable law or agreed to in writing, software
011: * distributed under the License is distributed on an "AS IS" BASIS,
012: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013: * See the License for the specific language governing permissions and
014: * limitations under the License.
015: */
016:
017: package org.compass.core.lucene.engine.all;
018:
019: import java.io.IOException;
020: import java.io.Reader;
021: import java.io.StringReader;
022: import java.util.ArrayList;
023: import java.util.Iterator;
024:
025: import org.apache.lucene.analysis.Analyzer;
026: import org.apache.lucene.analysis.Token;
027: import org.apache.lucene.analysis.TokenStream;
028: import org.apache.lucene.index.Payload;
029: import org.compass.core.Property;
030: import org.compass.core.engine.SearchEngineException;
031: import org.compass.core.lucene.engine.LuceneSearchEngine;
032: import org.compass.core.mapping.AllMapping;
033: import org.compass.core.mapping.ResourceMapping;
034: import org.compass.core.mapping.ResourcePropertyMapping;
035: import org.compass.core.spi.InternalProperty;
036: import org.compass.core.spi.InternalResource;
037:
038: /**
039: * The All Analyzer is a specific analyzer that is used to wrap the analyzer passed when adding
040: * a document. It will gather all the tokens that the actual analyzer generates for fields that
041: * are included in All and allow to get them using {@link #createAllTokenStream()} (which will
042: * be used to create the all field with).
043: *
044: * <p>Un tokenized fields (which will not go through the analysis process) are identied when this
045: * analyzed is constructed and are added to the all field if they are supposed to be included.
046: * There are two options with the untokenized fields, either add them as is (un tokenized), or
047: * analyze them just for the all properties.
048: *
049: * @author kimchy
050: */
051: public class AllAnalyzer extends Analyzer {
052:
053: private Analyzer analyzer;
054:
055: private InternalResource resource;
056:
057: private ResourceMapping resourceMapping;
058:
059: private AllMapping allMapping;
060:
061: private LuceneSearchEngine searchEngine;
062:
063: private ArrayList<Token> tokens = new ArrayList<Token>();
064:
065: private AllTokenStreamCollector allTokenStreamCollector = new AllTokenStreamCollector();
066:
067: private boolean boostSupport;
068:
069: public AllAnalyzer(Analyzer analyzer, InternalResource resource,
070: LuceneSearchEngine searchEngine) {
071: this .analyzer = analyzer;
072: this .resource = resource;
073: this .resourceMapping = resource.resourceKey()
074: .getResourceMapping();
075: this .searchEngine = searchEngine;
076: this .allMapping = resourceMapping.getAllMapping();
077: this .boostSupport = searchEngine.getSearchEngineFactory()
078: .getLuceneSettings().isAllPropertyBoostSupport();
079:
080: if (!allMapping.isSupported()) {
081: return;
082: }
083:
084: if (!allMapping.isExcludeAlias()) {
085: // add the alias to all prpoerty (lowecased, so finding it will be simple)
086: tokens.add(new Token(resource.getAlias().toLowerCase(), 0,
087: resource.getAlias().length()));
088: // add the extended property
089: Property[] properties = resource.getProperties(searchEngine
090: .getSearchEngineFactory()
091: .getExtendedAliasProperty());
092: if (properties != null) {
093: for (Property property : properties) {
094: tokens.add(new Token(property.getStringValue()
095: .toLowerCase(), 0, property
096: .getStringValue().length()));
097: }
098: }
099: }
100:
101: // go over all the un tokenized properties and add them as tokens (if required)
102: // they are added since they will never get analyzed thus tokenStream will never
103: // be called on them
104: for (Property property : resource.getProperties()) {
105: ResourcePropertyMapping resourcePropertyMapping = ((InternalProperty) property)
106: .getPropertyMapping();
107: // if not found within the property, try and get it based on the name from the resource mapping
108: if (resourcePropertyMapping == null) {
109: resourcePropertyMapping = resourceMapping
110: .getResourcePropertyMapping(property.getName());
111: }
112: if (resourcePropertyMapping == null) {
113: continue;
114: }
115: if (resourcePropertyMapping.isInternal()) {
116: continue;
117: }
118: if (resourcePropertyMapping.getExcludeFromAll() == ResourcePropertyMapping.ExcludeFromAllType.YES) {
119: continue;
120: }
121: if (resourcePropertyMapping.getIndex() == Property.Index.UN_TOKENIZED) {
122: Payload payload = null;
123: if (boostSupport) {
124: if (resourcePropertyMapping.getBoost() != -1) {
125: payload = AllBoostUtils
126: .writeFloat(resourcePropertyMapping
127: .getBoost());
128: } else if (resource.getBoost() != -1) {
129: // we get the boost from the resource thus taking into account any resource property mapping
130: // and/or resource mapping boost level
131: payload = AllBoostUtils.writeFloat(resource
132: .getBoost());
133: }
134: }
135: String value = property.getStringValue();
136: if (value != null) {
137: // if NO exclude from all, just add it
138: // if NO_ANALYZED, will analyze it as well
139: if (resourcePropertyMapping.getExcludeFromAll() == ResourcePropertyMapping.ExcludeFromAllType.NO) {
140: Token t = new Token(value, 0, value.length());
141: t.setPayload(payload);
142: tokens.add(t);
143: } else if (resourcePropertyMapping
144: .getExcludeFromAll() == ResourcePropertyMapping.ExcludeFromAllType.NO_ANALYZED) {
145: Analyzer propAnalyzer;
146: if (resourcePropertyMapping.getAnalyzer() != null) {
147: propAnalyzer = searchEngine
148: .getSearchEngineFactory()
149: .getAnalyzerManager()
150: .getAnalyzerMustExist(
151: resourcePropertyMapping
152: .getAnalyzer());
153: } else {
154: propAnalyzer = searchEngine
155: .getSearchEngineFactory()
156: .getAnalyzerManager()
157: .getAnalyzerByResource(resource);
158: }
159: TokenStream ts = propAnalyzer.tokenStream(
160: property.getName(), new StringReader(
161: value));
162: try {
163: Token token = ts.next();
164: while (token != null) {
165: token.setPayload(payload);
166: tokens.add(token);
167: token = ts.next();
168: }
169: } catch (IOException e) {
170: throw new SearchEngineException(
171: "Failed to analyzer " + property, e);
172: }
173: }
174: }
175: }
176: }
177: }
178:
179: public TokenStream tokenStream(String fieldName, Reader reader) {
180: TokenStream retVal = analyzer.tokenStream(fieldName, reader);
181: return wrapTokenStreamIfNeeded(fieldName, retVal);
182: }
183:
184: public TokenStream reusableTokenStream(String fieldName,
185: Reader reader) throws IOException {
186: TokenStream retVal = analyzer.reusableTokenStream(fieldName,
187: reader);
188: return wrapTokenStreamIfNeeded(fieldName, retVal);
189: }
190:
191: public int getPositionIncrementGap(String fieldName) {
192: return analyzer.getPositionIncrementGap(fieldName);
193: }
194:
195: public TokenStream createAllTokenStream() {
196: return new AllTokenStream();
197: }
198:
199: private TokenStream wrapTokenStreamIfNeeded(String fieldName,
200: TokenStream retVal) {
201: if (!allMapping.isSupported()) {
202: return retVal;
203: }
204: ResourcePropertyMapping resourcePropertyMapping = resourceMapping
205: .getResourcePropertyMapping(fieldName);
206: if (resourcePropertyMapping == null) {
207: if (!searchEngine.getSearchEngineFactory()
208: .getPropertyNamingStrategy().isInternal(fieldName)) {
209: if (allMapping.isIncludePropertiesWithNoMappings()) {
210: allTokenStreamCollector.setTokenStream(retVal);
211: allTokenStreamCollector.updateMapping(resource,
212: resourcePropertyMapping);
213: retVal = allTokenStreamCollector;
214: }
215: }
216: } else if (!(resourcePropertyMapping.getExcludeFromAll() == ResourcePropertyMapping.ExcludeFromAllType.YES)
217: && !resourcePropertyMapping.isInternal()) {
218: allTokenStreamCollector.setTokenStream(retVal);
219: allTokenStreamCollector.updateMapping(resource,
220: resourcePropertyMapping);
221: retVal = allTokenStreamCollector;
222: }
223: return retVal;
224: }
225:
226: /**
227: * The all token stream. To be used with the all property as its token stream. This stream will
228: * return all the tokens created and collected by this analyzer.
229: */
230: private class AllTokenStream extends TokenStream {
231:
232: private Iterator<Token> tokenIt;
233:
234: private int offset = 0;
235:
236: private AllTokenStream() {
237: }
238:
239: /**
240: * Override the next with token so no unneeded token will be created. Also,
241: * no need to use the result, just return the token we saved where we just
242: * change offests.
243: */
244: public Token next(Token result) throws IOException {
245: if (tokenIt == null) {
246: tokenIt = tokens.iterator();
247: }
248: if (tokenIt.hasNext()) {
249: Token token = tokenIt.next();
250: int delta = token.endOffset() - token.startOffset();
251: token.setStartOffset(offset);
252: offset += delta;
253: token.setEndOffset(offset);
254: return token;
255: }
256:
257: tokens.clear();
258: return null;
259: }
260:
261: public String toString() {
262: return "all-stream";
263: }
264: }
265:
266: /**
267: * A token stream that wraps the actual token stream and collects all the
268: * tokens it produces.
269: */
270: private class AllTokenStreamCollector extends TokenStream {
271:
272: private TokenStream tokenStream;
273:
274: private Payload payload;
275:
276: private Token lastToken;
277:
278: public AllTokenStreamCollector() {
279:
280: }
281:
282: public void updateMapping(InternalResource resource,
283: ResourcePropertyMapping resourcePropertyMapping) {
284: if (lastToken != null && payload != null) {
285: lastToken.setPayload(payload);
286: lastToken = null;
287: }
288: if (boostSupport) {
289: if (resourcePropertyMapping != null
290: && resourcePropertyMapping.getBoost() != 1.0f) {
291: payload = AllBoostUtils
292: .writeFloat(resourcePropertyMapping
293: .getBoost());
294: } else if (resource.getBoost() != 1.0f) {
295: // we get the boost from the resource thus taking into account any resource property mapping
296: // and/or resource mapping boost level
297: payload = AllBoostUtils.writeFloat(resource
298: .getBoost());
299: } else {
300: payload = null;
301: }
302: }
303: }
304:
305: public void setTokenStream(TokenStream tokenStream) {
306: this .tokenStream = tokenStream;
307: }
308:
309: public Token next() throws IOException {
310: // we put the payload on the last token. It has already been indexed
311: // and it will be used on the all property later on
312: if (lastToken != null && payload != null) {
313: lastToken.setPayload(payload);
314: }
315: lastToken = tokenStream.next();
316: if (lastToken != null) {
317: tokens.add(lastToken);
318: }
319: return lastToken;
320: }
321:
322: public void reset() throws IOException {
323: tokenStream.reset();
324: }
325:
326: public void close() throws IOException {
327: if (lastToken != null && payload != null) {
328: lastToken.setPayload(payload);
329: }
330: tokenStream.close();
331: }
332: }
333: }
|