001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.cocoon.components.search.components.impl;
017:
018: import java.io.IOException;
019:
020: import org.apache.avalon.excalibur.pool.Recyclable;
021: import org.apache.avalon.framework.logger.AbstractLogEnabled;
022: import org.apache.cocoon.components.search.IndexException;
023: import org.apache.cocoon.components.search.components.Indexer;
024: import org.apache.lucene.analysis.Analyzer;
025: import org.apache.lucene.document.Document;
026: import org.apache.lucene.index.IndexReader;
027: import org.apache.lucene.index.IndexWriter;
028: import org.apache.lucene.index.Term;
029: import org.apache.lucene.store.Directory;
030:
031: /**
032: * Abstract Indexer
033: *
034: * @author Nicolas Maisonneuve
035: */
036: public abstract class AbstractIndexer extends AbstractLogEnabled
037: implements Indexer, Recyclable {
038:
039: /**
040: * the lucene Analyzer (see lucene doc)
041: */
042: protected Analyzer analyzer;
043:
044: /**
045: * lucene Directory (see lucene doc)
046: */
047: protected Directory dir;
048:
049: /**
050: * MergeFactor (see lucene doc)
051: */
052: protected int mergeFactor;
053:
054: /**
055: * clear mode (if true the index will be cleared)
056: */
057: protected boolean clear_mode;
058:
059: // runtime variables: lucene indexwriter and indexreader
060: protected IndexReader delete_reader;
061:
062: protected IndexWriter add_writer;
063:
064: /*
065: * (non-Javadoc)
066: *
067: * @see org.apache.cocoon.components.search.components.Indexer#setMergeFactor(int)
068: */
069: public void setMergeFactor(int value) {
070: mergeFactor = value;
071: }
072:
073: /*
074: * (non-Javadoc)
075: *
076: * @see org.apache.cocoon.components.search.components.Indexer#getMergeFactor()
077: */
078: public int getMergeFactor() {
079: return mergeFactor;
080: }
081:
082: /*
083: * (non-Javadoc)
084: *
085: * @see org.apache.cocoon.components.search.components.Indexer#getIndex()
086: */
087: public Directory getIndex() {
088: return this .dir;
089: }
090:
091: /*
092: * (non-Javadoc)
093: *
094: * @see org.apache.cocoon.components.search.components.Indexer#setIndex(org.apache.lucene.store.Directory)
095: */
096: public void setIndex(Directory dir) throws IndexException {
097: if (dir == null) {
098: throw new IllegalArgumentException("set a null directory");
099: }
100: this .dir = dir;
101: clear_mode = false;
102: try {
103: IndexReader reader = IndexReader.open(dir);
104: reader.close();
105:
106: } catch (IOException ioe) {
107: // couldn't open the index - so recreate it
108: if (getLogger().isWarnEnabled()) {
109: getLogger().warn(
110: "couldn't open the index - so recreate it");
111: }
112: this .clearIndex();
113: }
114: }
115:
116: /*
117: * (non-Javadoc)
118: *
119: * @see org.apache.cocoon.components.search.components.Indexer#setAnalyzer(org.apache.lucene.analysis.Analyzer)
120: */
121: public void setAnalyzer(Analyzer analyzer) {
122: if (analyzer == null) {
123: throw new IllegalArgumentException("set a null analyzer");
124: }
125: this .analyzer = analyzer;
126:
127: if (this .getLogger().isDebugEnabled()) {
128: this .getLogger().debug(
129: "set the analyzer "
130: + this .analyzer.getClass().getName());
131: }
132: }
133:
134: /*
135: * (non-Javadoc)
136: *
137: * @see org.apache.cocoon.components.search.components.Indexer#getAnalyzer()
138: */
139: public Analyzer getAnalyzer() {
140: return analyzer;
141: }
142:
143: protected abstract void updateDocument(Document doc)
144: throws IndexException;
145:
146: protected abstract void addDocument(Document doc)
147: throws IndexException;
148:
149: /*
150: * (non-Javadoc)
151: *
152: * @see org.apache.cocoon.components.search.components.Indexer#index(org.apache.lucene.document.Document)
153: */
154: public void index(Document doc) throws IndexException {
155: if (this .clear_mode) {
156: // As we know the index is empty , we just add the document
157: addDocument(doc);
158: } else {
159: updateDocument(doc);
160: }
161: }
162:
163: /*
164: * (non-Javadoc)
165: *
166: * @see org.apache.cocoon.components.search.components.Indexer#del(java.lang.String)
167: */
168: public int del(String uid) throws IndexException {
169: switchToDEL_MODE();
170: return deleteDocument(delete_reader, uid);
171: }
172:
173: /**
174: * Delete document
175: *
176: * @param deleter
177: * the lucene indexreader to delete document
178: * @param uid
179: * the uid of the doucment to be deleted
180: * @return the number of deleted documents
181: * @throws IndexException
182: */
183: final protected int deleteDocument(IndexReader deleter, String uid)
184: throws IndexException {
185: int r = 0;
186: try {
187: r = deleter.delete(new Term(DOCUMENT_UID_FIELD, uid));
188: } catch (IOException ex) {
189: handleError("delete document (uid:" + uid + ") error", ex);
190: }
191: if (this .getLogger().isDebugEnabled()) {
192: this .getLogger()
193: .debug("document deleted (uid:" + uid + ")");
194: }
195: return r;
196: }
197:
198: /**
199: * add document to the index
200: *
201: * @param writer
202: * the lucene indexwriter
203: * @param document
204: * the document to be indexed
205: * @throws IndexException
206: */
207: final protected void addDocument(IndexWriter writer,
208: Document document) throws IndexException {
209: try {
210: writer.addDocument(document, analyzer);
211: } catch (IOException ex) {
212: handleError("add document (uid:"
213: + document.get(DOCUMENT_UID_FIELD) + ") error", ex);
214: }
215: if (this .getLogger().isDebugEnabled()) {
216: this .getLogger().debug(
217: "document added (uid:"
218: + document.get(DOCUMENT_UID_FIELD) + ")");
219: }
220: }
221:
222: /*
223: * (non-Javadoc)
224: *
225: * @see org.apache.cocoon.components.search.components.Indexer#optimize()
226: */
227: public void optimize() throws IndexException {
228: // optimize index
229: try {
230: this .switchToADD_MODE(false);
231: add_writer.optimize();
232: } catch (IOException ex) {
233: throw new IndexException("optimization error", ex);
234: }
235: }
236:
237: /*
238: * (non-Javadoc)
239: *
240: * @see org.apache.cocoon.components.search.components.Indexer#clearIndex()
241: */
242: public void clearIndex() throws IndexException {
243: this .clear_mode = true;
244: this .switchToADD_MODE(true);
245: }
246:
247: /**
248: * releasing resources
249: *
250: * @throws IndexException
251: */
252: protected void release() throws IndexException {
253: this .closeWriter();
254: this .closeReader();
255: // set default value
256: dir = null;
257: analyzer = null;
258: mergeFactor = 10;
259: }
260:
261: /**
262: * recylcle this object
263: */
264: public void recycle() {
265: try {
266: release();
267: } catch (IndexException ex) {
268: this .getLogger().error("recycle error", ex);
269: }
270: }
271:
272: /**
273: * Switch to write mode (close read, open writer ) if it's not already done
274: *
275: * @param clear
276: * clear index
277: * @throws IndexException
278: */
279: final protected void switchToADD_MODE(boolean clear)
280: throws IndexException {
281: if (add_writer == null) {
282: closeReader();
283: openIndexWriter(clear);
284: }
285: }
286:
287: /**
288: * Switch to del mode (close writer, open reader ) if it's not already done
289: *
290: * @throws IndexException
291: */
292: final protected void switchToDEL_MODE() throws IndexException {
293: if (delete_reader == null) {
294: closeWriter();
295: openIndexReader();
296: }
297: }
298:
299: /**
300: * Open the index Writer
301: *
302: * @param create
303: * clear index or not
304: * @throws IndexException
305: */
306: final protected void openIndexWriter(boolean create)
307: throws IndexException {
308:
309: // now open writer
310: try {
311: add_writer = new IndexWriter(dir, analyzer, create);
312: // add_writer.setUseCompoundFile(true);
313: } catch (IOException e) {
314: throw new IndexException("open writer error", e);
315: }
316:
317: if (mergeFactor > add_writer.mergeFactor) {
318: add_writer.minMergeDocs = mergeFactor * 2;
319: add_writer.mergeFactor = mergeFactor;
320: }
321:
322: if (getLogger().isDebugEnabled()) {
323: getLogger().debug("writer is opened");
324: }
325: }
326:
327: /**
328: * Open Index Reader
329: *
330: * @throws IndexException
331: */
332: final protected void openIndexReader() throws IndexException {
333: try {
334: this .delete_reader = IndexReader.open(dir);
335: } catch (IOException e) {
336: throw new IndexException("open reader error", e);
337: }
338: if (getLogger().isDebugEnabled()) {
339: getLogger().debug("reader is opened");
340: }
341:
342: }
343:
344: /**
345: * Close writer
346: *
347: * @throws IndexException
348: */
349: final protected void closeWriter() throws IndexException {
350: if (add_writer != null) {
351: try {
352: add_writer.close();
353: } catch (IOException ex) {
354: throw new IndexException("close writer error", ex);
355: } finally {
356: add_writer = null;
357: }
358: if (getLogger().isDebugEnabled()) {
359: getLogger().debug("writer is closed");
360: }
361: }
362: }
363:
364: /**
365: * Close reader
366: *
367: * @throws IndexException
368: */
369: final protected void closeReader() throws IndexException {
370: if (this .delete_reader != null) {
371: try {
372: delete_reader.close();
373: } catch (IOException ex) {
374: handleError("close reader error", ex);
375: } finally {
376: delete_reader = null;
377: }
378: if (getLogger().isDebugEnabled()) {
379: getLogger().debug("reader is closed");
380: }
381: }
382: }
383:
384: /**
385: * Handle error (close writer, reader,etc.. )
386: *
387: * @param message
388: * @param exception
389: * @throws IndexException
390: */
391: private void handleError(String message, Exception exception)
392: throws IndexException {
393: try {
394: release();
395: } catch (IndexException e) {
396: }
397: throw new IndexException(message, exception);
398: }
399:
400: }
|