001: package prefuse.data.search;
002:
003: import java.io.IOException;
004: import java.util.logging.Logger;
005:
006: import org.apache.lucene.document.Document;
007: import org.apache.lucene.document.Field;
008: import org.apache.lucene.queryParser.ParseException;
009: import org.apache.lucene.search.Hits;
010:
011: import prefuse.data.Tuple;
012: import prefuse.util.StringLib;
013: import prefuse.util.collections.IntObjectHashMap;
014:
015: /**
016: * <p>
017: * SearchTupleSet implementation that performs text searches on indexed Tuple
018: * data using the Lucene search engine.
019: * <a href="http://lucene.apache.org/">Lucene</a> is an open source search
020: * engine supporting full text indexing and keyword search. Please refer to
021: * the Lucene web page for more information. Note that for this class to be
022: * used by prefuse applications, the Lucene classes must be included on the
023: * application classpath.
024: * </p>
025: *
026: * @version 1.0
027: * @author <a href="http://jheer.org">jeffrey heer</a>
028: * @see prefuse.data.query.SearchQueryBinding
029: */
030: public class KeywordSearchTupleSet extends SearchTupleSet {
031:
032: private static final Logger s_logger = Logger
033: .getLogger(KeywordSearchTupleSet.class.getName());
034:
035: protected IntObjectHashMap m_map = new IntObjectHashMap();
036: protected String m_query = "";
037:
038: protected LuceneSearcher m_lucene = null;
039: protected boolean m_storeTermVectors = false;
040:
041: protected int m_id = 1;
042:
043: /**
044: * Creates a new KeywordSearchFocusSet using an in-memory search index.
045: */
046: public KeywordSearchTupleSet() {
047: m_lucene = new LuceneSearcher();
048: }
049:
050: /**
051: * Creates a new TextSearchFocusSet with the given LuceneSearcher.
052: * @param searcher the {@link LuceneSearcher} to use.
053: */
054: public KeywordSearchTupleSet(LuceneSearcher searcher) {
055: m_lucene = searcher;
056: }
057:
058: /**
059: * Returns the current search query, if any.
060: * @return the currently active search query
061: */
062: public String getQuery() {
063: return m_query;
064: }
065:
066: /**
067: * Searches the indexed Tuple fields for matching keywords, using
068: * the Lucene search engine. Matching Tuples are available as the
069: * members of this TupleSet.
070: * @param query the query string to search for
071: */
072: public void search(String query) {
073: if (query == null)
074: query = "";
075:
076: if (query.equals(m_query))
077: return; // no change
078:
079: Tuple[] rem = clearInternal();
080: m_query = query;
081:
082: query.trim();
083: if (query.length() == 0) {
084: this .fireTupleEvent(null, DELETE);
085: return;
086: }
087:
088: m_lucene.setReadMode(true);
089: try {
090: Hits hits = m_lucene.search(query);
091: for (int i = 0; i < hits.length(); i++) {
092: Tuple t = getMatchingTuple(hits.doc(i));
093: addInternal(t);
094: }
095: Tuple[] add = getTupleCount() > 0 ? toArray() : null;
096: fireTupleEvent(add, rem);
097: } catch (ParseException e) {
098: s_logger.warning("Lucene query parse exception.\n"
099: + StringLib.getStackTrace(e));
100: } catch (IOException e) {
101: s_logger.warning("Lucene IO exception.\n"
102: + StringLib.getStackTrace(e));
103: }
104:
105: }
106:
107: /**
108: * Return the Tuple matching the given Lucene Document, if any.
109: * @param d the Document to lookup.
110: * @return the matching Tuple, or null if none.
111: */
112: protected Tuple getMatchingTuple(Document d) {
113: int id = Integer.parseInt(d.get(LuceneSearcher.ID));
114: return (Tuple) m_map.get(id);
115: }
116:
117: /**
118: * @see prefuse.data.search.SearchTupleSet#index(prefuse.data.Tuple, java.lang.String)
119: */
120: public void index(Tuple t, String field) {
121: m_lucene.setReadMode(false);
122: String s;
123: if ((s = t.getString(field)) == null)
124: return;
125:
126: int id = m_id++;
127: m_lucene.addDocument(getDocument(id, s));
128: m_map.put(id, t);
129: }
130:
131: /**
132: * Returns false, as unindexing values is not currently supported.
133: * @see prefuse.data.search.SearchTupleSet#isUnindexSupported()
134: */
135: public boolean isUnindexSupported() {
136: return false;
137: }
138:
139: /**
140: * This method throws an exception, as unidexing is not supported.
141: * @see prefuse.data.search.SearchTupleSet#unindex(prefuse.data.Tuple, java.lang.String)
142: * @throws UnsupportedOperationException
143: */
144: public void unindex(Tuple t, String attrName) {
145: throw new UnsupportedOperationException();
146: }
147:
148: /**
149: * Create a Lucene Document instance with the given document ID and text.
150: * @param id the document ID
151: * @param text the text the Document should contain
152: * @return a new Lucene Document instance
153: */
154: protected Document getDocument(int id, String text) {
155: Document d = new Document();
156: d.add(Field
157: .Text(LuceneSearcher.FIELD, text, m_storeTermVectors));
158: d.add(Field.Keyword(LuceneSearcher.ID, String.valueOf(id)));
159: return d;
160: }
161:
162: /**
163: * Get the {@link LuceneSearcher} instance used by this class.
164: * @return returns the backing lucene searcher.
165: */
166: public LuceneSearcher getLuceneSearcher() {
167: return m_lucene;
168: }
169:
170: /**
171: * Returns a copy of the mapping from Lucene document IDs to prefuse Tuple instances.
172: * @return a copy of the map from lucene doc IDs to prefuse Tuples.
173: */
174: public IntObjectHashMap getTupleMap() {
175: return (IntObjectHashMap) m_map.clone();
176: }
177:
178: /**
179: * Removes all search hits and clears out the index.
180: * @see prefuse.data.tuple.TupleSet#clear()
181: */
182: public void clear() {
183: m_lucene = new LuceneSearcher();
184: super .clear();
185: }
186:
187: } // end of class KeywordSearchTupleSet
|