001: /*
002: * regain - A file search engine providing plenty of formats
003: * Copyright (C) 2004 Til Schneider
004: *
005: * This library is free software; you can redistribute it and/or
006: * modify it under the terms of the GNU Lesser General Public
007: * License as published by the Free Software Foundation; either
008: * version 2.1 of the License, or (at your option) any later version.
009: *
010: * This library is distributed in the hope that it will be useful,
011: * but WITHOUT ANY WARRANTY; without even the implied warranty of
012: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
013: * Lesser General Public License for more details.
014: *
015: * You should have received a copy of the GNU Lesser General Public
016: * License along with this library; if not, write to the Free Software
017: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
018: *
019: * Contact: Til Schneider, info@murfman.de
020: *
021: * CVS information:
022: * $RCSfile$
023: * $Source$
024: * $Date: 2007-10-20 15:40:39 +0200 (Sa, 20 Okt 2007) $
025: * $Author: til132 $
026: * $Revision: 244 $
027: */
028: package net.sf.regain.search.results;
029:
030: import java.io.IOException;
031:
032: import net.sf.regain.RegainException;
033: import net.sf.regain.search.IndexSearcherManager;
034: import net.sf.regain.search.config.IndexConfig;
035:
036: import org.apache.lucene.analysis.Analyzer;
037: import org.apache.lucene.document.Document;
038: import org.apache.lucene.index.Term;
039: import org.apache.lucene.queryParser.ParseException;
040: import org.apache.lucene.queryParser.QueryParser;
041: import org.apache.lucene.search.BooleanQuery;
042: import org.apache.lucene.search.Hits;
043: import org.apache.lucene.search.Query;
044: import org.apache.lucene.search.TermQuery;
045: import org.apache.lucene.search.BooleanClause.Occur;
046: import org.apache.regexp.RE;
047: import org.apache.regexp.RESyntaxException;
048:
049: /**
050: * Holds the results of a search on a single index.
051: *
052: * @author Til Schneider, www.murfman.de
053: */
054: public class SingleSearchResults implements SearchResults {
055:
056: /** The configuration for the index. */
057: private IndexConfig mIndexConfig;
058:
059: /** The Query text. */
060: private String mQueryText;
061:
062: /** The hits of this search. */
063: private Hits mHits;
064:
065: /** The time the search took. */
066: private int mSearchTime;
067:
068: /**
069: * Der Reguläre Ausdruck, zu dem eine URL passen muss, damit sie in einem
070: * neuen Fenster geöffnet wird.
071: */
072: private RE mOpenInNewWindowRegex;
073:
074: /**
075: * Creates a new instance of SearchContext.
076: *
077: * @param indexConfig The configuration for the index.
078: * @param queryText The query text to search for.
079: * @param groupArr The groups the searching user has reading rights for.
080: * See {@link net.sf.regain.search.access.SearchAccessController}.
081: * Is <code>null</code>, if no access control should be used.
082: *
083: * @throws RegainException If searching failed.
084: */
085: public SingleSearchResults(IndexConfig indexConfig,
086: String queryText, String[] groupArr) throws RegainException {
087: long startTime = System.currentTimeMillis();
088:
089: mIndexConfig = indexConfig;
090: mQueryText = queryText;
091:
092: if (queryText != null) {
093: IndexSearcherManager manager = IndexSearcherManager
094: .getInstance(indexConfig.getDirectory());
095:
096: // Get the Analyzer
097: Analyzer analyzer = manager.getAnalyzer();
098:
099: BooleanQuery query;
100: try {
101: query = new BooleanQuery();
102:
103: String[] searchFieldArr = indexConfig
104: .getSearchFieldList();
105: for (int i = 0; i < searchFieldArr.length; i++) {
106: QueryParser parser = new QueryParser(
107: searchFieldArr[i], analyzer);
108: parser.setDefaultOperator(QueryParser.AND_OPERATOR);
109: Query fieldQuery = parser.parse(queryText);
110:
111: // Add as OR
112: query.add(fieldQuery, Occur.SHOULD);
113: }
114: } catch (ParseException exc) {
115: throw new RegainException(
116: "Error while parsing search pattern '"
117: + queryText + "': " + exc.getMessage(),
118: exc);
119: }
120:
121: // Check whether access control is used
122: if (groupArr != null) {
123: // Create a query that matches any group
124: BooleanQuery groupQuery = new BooleanQuery();
125: for (int i = 0; i < groupArr.length; i++) {
126: // Add as OR
127: groupQuery.add(new TermQuery(new Term("groups",
128: groupArr[i])), Occur.SHOULD);
129: }
130:
131: // Create a main query that contains the group query and the search query
132: // combined with AND
133: BooleanQuery mainQuery = new BooleanQuery();
134: mainQuery.add(query, Occur.MUST);
135: mainQuery.add(groupQuery, Occur.MUST);
136:
137: // Set the main query as query to use
138: query = mainQuery;
139: }
140:
141: // System.out.println("Query: '" + queryText + "' -> '" + query.toString() + "'");
142:
143: try {
144: mHits = manager.search(query);
145: } catch (RegainException exc) {
146: throw new RegainException(
147: "Error while searching pattern: " + queryText,
148: exc);
149: }
150: }
151:
152: mSearchTime = (int) (System.currentTimeMillis() - startTime);
153:
154: String openInNewWindowRegex = indexConfig
155: .getOpenInNewWindowRegex();
156: if (openInNewWindowRegex != null) {
157: try {
158: mOpenInNewWindowRegex = new RE(openInNewWindowRegex);
159: } catch (RESyntaxException exc) {
160: throw new RegainException(
161: "Syntax error in openInNewWindowRegex: '"
162: + openInNewWindowRegex + "'", exc);
163: }
164: }
165: }
166:
167: /**
168: * Gets the query text of the search.
169: *
170: * @return The query text.
171: */
172: public String getQueryText() {
173: return mQueryText;
174: }
175:
176: /**
177: * Gets the search hits.
178: *
179: * @return The search hits.
180: */
181: Hits getHits() {
182: return mHits;
183: }
184:
185: /**
186: * Gets the number of hits the search had.
187: *
188: * @return the number of hits the search had.
189: */
190: public int getHitCount() {
191: if (mHits == null) {
192: return 0;
193: }
194:
195: return mHits.length();
196: }
197:
198: /**
199: * Gets the document of one hit.
200: *
201: * @param index The index of the hit.
202: * @return the document of one hit.
203: *
204: * @throws RegainException If getting the document failed.
205: * @see Document
206: */
207: public Document getHitDocument(int index) throws RegainException {
208: try {
209: return mHits.doc(index);
210: } catch (IOException exc) {
211: throw new RegainException(
212: "Error while getting document of search hit #"
213: + index, exc);
214: }
215: }
216:
217: /**
218: * Gets the score of one hit.
219: *
220: * @param index The index of the hit.
221: * @return the score of one hit.
222: *
223: * @throws RegainException If getting the score failed.
224: * @see Hits#score(int)
225: */
226: public float getHitScore(int index) throws RegainException {
227: try {
228: return mHits.score(index);
229: } catch (IOException exc) {
230: throw new RegainException(
231: "Error while getting score of search hit #" + index,
232: exc);
233: }
234: }
235:
236: /**
237: * Gets the time the search took in milliseconds.
238: *
239: * @return The search time.
240: */
241: public int getSearchTime() {
242: return mSearchTime;
243: }
244:
245: /**
246: * Gets whether a hit should be opened in a new window.
247: *
248: * @param index The index of the hit.
249: * @return Whether the hit should be opened in a new window.
250: * @throws RegainException If getting the URL failed.
251: */
252: public synchronized boolean getOpenHitInNewWindow(int index)
253: throws RegainException {
254: String url = getHitUrl(index);
255:
256: if (mOpenInNewWindowRegex == null) {
257: return false;
258: } else {
259: if (mOpenInNewWindowRegex.match(url)) {
260: return true;
261: } else {
262: return false;
263: }
264: }
265: }
266:
267: /**
268: * Gets whether the file-to-http-bridge should be used for a certain hit.
269: * <p>
270: * Mozilla browsers have a security mechanism that blocks loading file-URLs
271: * from pages loaded via http. To be able to load files from the search
272: * results, regain offers the file-to-http-bridge that provides all files that
273: * are listed in the index via http.
274: *
275: * @param index The index of the hit.
276: * @return Whether the file-to-http-bridge should be used.
277: */
278: public boolean getUseFileToHttpBridgeForHit(int index) {
279: return mIndexConfig.getUseFileToHttpBridge();
280: }
281:
282: /**
283: * Gets the url from a hit and rewrites it according to the rewrite rules
284: * specified in the index config.
285: *
286: * @param index The index of the hit to get the URL for.
287: * @return The url of the wanted hit.
288: * @throws RegainException If getting the hit document failed.
289: */
290: public String getHitUrl(int index) throws RegainException {
291: String url = getHitDocument(index).get("url");
292: if (url == null) {
293: return null;
294: }
295:
296: // Get the rules
297: String[][] rewriteRules = mIndexConfig.getRewriteRules();
298: if (rewriteRules != null) {
299: for (int i = 0; i < rewriteRules.length; i++) {
300: String[] rule = rewriteRules[i];
301: String prefix = rule[0];
302: if (url.startsWith(prefix)) {
303: String replacement = rule[1];
304: return replacement + url.substring(prefix.length());
305: }
306: }
307: }
308:
309: // The URL does not match any rewrite rule -> Don't change it
310: return url;
311: }
312:
313: /**
314: * Gets the name of the index a hit comes from.
315: *
316: * @param index The index of the hit to get the index name for.
317: * @return The name of the index a hit comes from.
318: * @throws RegainException If getting the index name failed.
319: */
320: public String getHitIndexName(int index) throws RegainException {
321: return mIndexConfig.getName();
322: }
323:
324: }
|