001: /**
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */package org.apache.cocoon.components.search.components.impl;
017:
018: import java.io.IOException;
019: import java.util.ArrayList;
020: import java.util.Iterator;
021: import java.util.List;
022:
023: import org.apache.avalon.framework.configuration.Configurable;
024: import org.apache.avalon.framework.configuration.Configuration;
025: import org.apache.avalon.framework.configuration.ConfigurationException;
026: import org.apache.cocoon.components.search.IndexException;
027: import org.apache.lucene.document.Document;
028:
029: /**
030: *
031: * @author Nicolas Maisonneuve
032: */
033: public class DefaultIndexerImpl extends AbstractIndexer implements
034: Configurable {
035:
036: /**
037: * Buffer size is element
038: */
039: static public final String DOCUMENT_BUFFERED_NUM_ELEMENT = "buffer_size";
040:
041: /**
042: * the default size of the buffer
043: */
044: private int defaultMaxBufDocs = 100;
045:
046: /**
047: * Buffer Size: the number of the maximum documents buffered, before to
048: * flush and index this documents (the buffer is used in the update mode)
049: */
050: private int bufferSize;
051:
052: /**
053: * the buffer: the List where are stored the documents
054: */
055: private List buffer = new ArrayList();
056:
057: /*
058: * (non-Javadoc)
059: *
060: * @see org.apache.avalon.framework.configuration.Configurable#configure(org.apache.avalon.framework.configuration.Configuration)
061: */
062: public void configure(Configuration conf)
063: throws ConfigurationException {
064: defaultMaxBufDocs = conf
065: .getChild(DOCUMENT_BUFFERED_NUM_ELEMENT)
066: .getValueAsInteger(100);
067: if (this .getLogger().isDebugEnabled()) {
068: this .getLogger().debug(
069: "default max buffered documents: "
070: + defaultMaxBufDocs);
071: }
072: }
073:
074: /**
075: * Set the maximum number of buffered documents to avoid to open and close
076: * the IndexWriter a lot of times
077: *
078: * @param value
079: * int number (default 100)
080: */
081: public void setBufferSize(int value) {
082: bufferSize = value;
083: }
084:
085: /*
086: * (non-Javadoc)
087: *
088: * @see org.apache.cocoon.components.search.components.impl.AbstractIndexer#release()
089: */
090: final protected void release() throws IndexException {
091: // flush the last documents to update
092: if (buffer.size() > 0) {
093: flushBufferedDocs();
094: }
095: bufferSize = defaultMaxBufDocs;
096: this .optimize();
097: super .release();
098: }
099:
100: /*
101: * (non-Javadoc)
102: *
103: * @see org.apache.cocoon.components.search.components.impl.AbstractIndexer#addDocument(org.apache.lucene.document.Document)
104: */
105: final protected void addDocument(Document doc)
106: throws IndexException {
107: switchToADD_MODE(false);
108: addDocument(add_writer, doc);
109: }
110:
111: /*
112: * (non-Javadoc)
113: *
114: * @see org.apache.cocoon.components.search.components.impl.AbstractIndexer#updateDocument(org.apache.lucene.document.Document)
115: */
116: final protected void updateDocument(Document doc)
117: throws IndexException {
118: // first delete the old document
119: del(doc.get(DOCUMENT_UID_FIELD));
120:
121: // then store in the index queue
122: buffer.add(doc);
123:
124: // flush the queue if it's necessary
125: if (buffer.size() == bufferSize) {
126: flushBufferedDocs();
127: }
128: }
129:
130: /**
131: * Index the list of documents to update
132: *
133: * @throws IOException
134: */
135: private void flushBufferedDocs() throws IndexException {
136: this .switchToADD_MODE(false);
137: Iterator iter = buffer.iterator();
138: while (iter.hasNext()) {
139: addDocument(add_writer, (Document) iter.next());
140: }
141: buffer.clear();
142: }
143: }
|