001: /*
002: * Copyright 2001-2006 C:1 Financial Services GmbH
003: *
004: * This software is free software; you can redistribute it and/or
005: * modify it under the terms of the GNU Lesser General Public
006: * License Version 2.1, as published by the Free Software Foundation.
007: *
008: * This software is distributed in the hope that it will be useful,
009: * but WITHOUT ANY WARRANTY; without even the implied warranty of
010: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
011: * Lesser General Public License for more details.
012: *
013: * You should have received a copy of the GNU Lesser General Public
014: * License along with this library; if not, write to the Free Software
015: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
016: */
017:
018: package de.finix.contelligent.search;
019:
020: import org.apache.lucene.document.Document;
021: import org.apache.lucene.index.IndexReader;
022: import org.apache.lucene.index.Term;
023: import org.apache.lucene.search.Query;
024: import org.apache.lucene.search.TermQuery;
025:
026: import de.finix.contelligent.CallData;
027: import de.finix.contelligent.ComponentManager;
028: import de.finix.contelligent.Contelligent;
029: import de.finix.contelligent.Session;
030: import de.finix.contelligent.core.ContelligentImpl;
031: import de.finix.contelligent.search.engine.LuceneIndexIterator;
032: import de.finix.contelligent.search.engine.SearchEngine;
033: import de.finix.contelligent.test.ContelligentTestCase;
034:
035: public class SearchEngineTests extends ContelligentTestCase {
036:
037: private SearchEngine searchEngine;
038:
039: public SearchEngineTests(String s) {
040: super (s);
041: searchEngine = ContelligentImpl.getInstance().getSearchEngine();
042: }
043:
044: public void testStartCrawler() throws Exception {
045: searchEngine.startCrawler("testIndex");
046:
047: LuceneIndexIterator iterator = new LuceneIndexIterator(
048: IndexReader.open(ContelligentImpl.getInstance()
049: .getContelligentDir(
050: Contelligent.DIR_INDEX + "testIndex")));
051: int i = 0;
052:
053: while (iterator.hasNext()) {
054: Document document = (Document) iterator.next();
055: System.out.println(document.getField("path"));
056: i++;
057: }
058: System.out.println(i);
059: iterator.close();
060:
061: SearchResult hits = searchEngine.search("doof", "testIndex");
062:
063: assertEquals(4, hits.getLength());
064:
065: // search for stopword
066: hits = searchEngine.search("ist", "testIndex");
067: assertEquals(0, hits.getLength());
068: // search for terms in pdf
069: hits = searchEngine.search("cvs OR cederqvist", "testIndex");
070: assertEquals(1, hits.getLength());
071: // be careful: aaa is a value for the test category so if the category
072: // values are added to URLs
073: // this will change the search result => use aaaa
074: // search for word rendered only if categoryvalue config works
075: hits = searchEngine.search("aaaa", "testIndex");
076: assertEquals(1, hits.getLength());
077: hits = searchEngine.search("AAAA", "testIndex");
078: assertEquals(1, hits.getLength());
079: hits = searchEngine.search("AaAA", "testIndex");
080: assertEquals(1, hits.getLength());
081: // search for word rendered only if categoryvalue config does not work
082: hits = searchEngine.search("bbb", "testIndex");
083: assertEquals(0, hits.getLength());
084: // bug 305
085: hits = searchEngine.search("\"ist doof\"", "testIndex");
086: assertEquals(4, hits.getLength());
087: }
088:
089: public void testStartRawCrawler() throws Exception {
090: searchEngine.startCrawler("testRaw");
091: SearchResult hits = searchEngine.search("doof", "testRaw");
092:
093: assertEquals(2, hits.getLength());
094:
095: hits = searchEngine.search("doof*", "testRaw");
096:
097: assertEquals(4, hits.getLength());
098: hits = searchEngine.search("ist", "testRaw");
099: assertEquals(4, hits.getLength());
100: // search for terms in pdf
101: hits = searchEngine.search("cvs OR cederqvist", "testRaw");
102: assertEquals(0, hits.getLength());
103:
104: hits = searchEngine.search("aaaa", "testRaw");
105: assertEquals(1, hits.getLength());
106: hits = searchEngine.search("AAAA", "testRaw");
107: assertEquals(1, hits.getLength());
108: hits = searchEngine.search("AaAA", "testRaw");
109: assertEquals(1, hits.getLength());
110: hits = searchEngine.search("bbb", "testRaw");
111: assertEquals(1, hits.getLength());
112: }
113:
114: public void testFilterAccessRights() throws Throwable {
115: searchEngine.startCrawler("newIndex");
116:
117: Session session = beginSession("test", "test-users", "test");
118: CallData callData = createCallData(session);
119: ComponentManager root = getContelligent()
120: .getRootComponentManager();
121:
122: beginTx(callData);
123: try {
124: // search for stopword
125: SearchResult hits = searchEngine.search("ist", "testIndex");
126: assertEquals(0, hits.getLength());
127:
128: Query query = new TermQuery(new Term("type",
129: "contelligent.test.crawlerPage"));
130: hits = searchEngine.search(query, "testIndex", 0, -1, null);
131: assertEquals(1, hits.getLength());
132: query = new TermQuery(new Term("path",
133: "/contelligent/test/crawler/index"));
134: hits = searchEngine.search(query, "testIndex", 0, -1, null);
135: assertEquals(1, hits.getLength());
136: commitTx();
137: } catch (Throwable e) {
138: rollbackTx();
139: throw e;
140: } finally {
141: endSession(session);
142: }
143: }
144:
145: public void testDateSearch() throws Exception {
146: searchEngine.startCrawler("testIndex");
147: long now = System.currentTimeMillis();
148: long from = 0;
149:
150: LuceneIndexIterator iterator = new LuceneIndexIterator(
151: IndexReader.open(ContelligentImpl.getInstance()
152: .getContelligentDir(
153: Contelligent.DIR_INDEX + "testIndex")));
154: int i = 0;
155:
156: while (iterator.hasNext()) {
157: Document document = (Document) iterator.next();
158: System.out.println(document.getField("path"));
159: i++;
160: }
161: System.out.println(i);
162: iterator.close();
163:
164: SearchResult hits = searchEngine.search("doof", "testIndex",
165: from, now, 0, 100);
166:
167: assertEquals(4, hits.getLength());
168:
169: // search for stopword
170: hits = searchEngine.search("ist", "testIndex", from, now, 0,
171: 100);
172: assertEquals(0, hits.getLength());
173: // search for terms in pdf
174: hits = searchEngine.search("cvs OR cederqvist", "testIndex",
175: from, now, 0, 100);
176: assertEquals(1, hits.getLength());
177: // be careful: aaa is a value for the test category so if the category
178: // values are added to URLs
179: // this will change the search result => use aaaa
180: // search for word rendered only if categoryvalue config works
181: hits = searchEngine.search("aaaa", "testIndex", from, now, 0,
182: 100);
183: assertEquals(1, hits.getLength());
184: hits = searchEngine.search("AAAA", "testIndex", from, now, 0,
185: 100);
186: assertEquals(1, hits.getLength());
187: hits = searchEngine.search("AaAA", "testIndex", from, now, 0,
188: 100);
189: assertEquals(1, hits.getLength());
190: // search for word rendered only if categoryvalue config does not work
191: hits = searchEngine.search("bbb", "testIndex", from, now, 0,
192: 100);
193: assertEquals(0, hits.getLength());
194: // bug 305
195: hits = searchEngine.search("\"ist doof\"", "testIndex", from,
196: now, 0, 100);
197: assertEquals(4, hits.getLength());
198: }
199:
200: }
|