001: /*
002: * File : $Source: /usr/local/cvs/opencms/src/org/opencms/search/CmsIndexingThreadManager.java,v $
003: * Date : $Date: 2008-02-27 12:05:38 $
004: * Version: $Revision: 1.30 $
005: *
006: * This library is part of OpenCms -
007: * the Open Source Content Management System
008: *
009: * Copyright (c) 2002 - 2008 Alkacon Software GmbH (http://www.alkacon.com)
010: *
011: * This library is free software; you can redistribute it and/or
012: * modify it under the terms of the GNU Lesser General Public
013: * License as published by the Free Software Foundation; either
014: * version 2.1 of the License, or (at your option) any later version.
015: *
016: * This library is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019: * Lesser General Public License for more details.
020: *
021: * For further information about Alkacon Software GmbH, please see the
022: * company website: http://www.alkacon.com
023: *
024: * For further information about OpenCms, please see the
025: * project website: http://www.opencms.org
026: *
027: * You should have received a copy of the GNU Lesser General Public
028: * License along with this library; if not, write to the Free Software
029: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
030: */
031:
032: package org.opencms.search;
033:
034: import org.opencms.file.CmsObject;
035: import org.opencms.file.CmsPropertyDefinition;
036: import org.opencms.file.CmsResource;
037: import org.opencms.i18n.CmsMessageContainer;
038: import org.opencms.main.CmsException;
039: import org.opencms.main.CmsLog;
040: import org.opencms.main.OpenCms;
041: import org.opencms.report.CmsLogReport;
042: import org.opencms.report.I_CmsReport;
043: import org.opencms.search.documents.I_CmsDocumentFactory;
044:
045: import java.util.Collections;
046: import java.util.List;
047: import java.util.Locale;
048:
049: import org.apache.commons.logging.Log;
050: import org.apache.lucene.index.IndexWriter;
051:
052: /**
053: * Implements the management of indexing threads.<p>
054: *
055: * @author Carsten Weinholz
056: * @author Alexander Kandzior
057: *
058: * @version $Revision: 1.30 $
059: *
060: * @since 6.0.0
061: */
062: public class CmsIndexingThreadManager {
063:
064: /** The log object for this class. */
065: private static final Log LOG = CmsLog
066: .getLog(CmsIndexingThreadManager.class);
067:
068: /** Number of threads abandoned. */
069: private int m_abandonedCounter;
070:
071: /** The time the last error was written to the log. */
072: private long m_lastLogErrorTime;
073:
074: /** The time the last warning was written to the log. */
075: private long m_lastLogWarnTime;
076:
077: /** Number of thread returned. */
078: private int m_returnedCounter;
079:
080: /** Overall number of threads started. */
081: private int m_startedCounter;
082:
083: /** Timeout for abandoning threads. */
084: private long m_timeout;
085:
086: /**
087: * Creates and starts a thread manager for indexing threads.<p>
088: *
089: * @param timeout timeout after a thread is abandoned
090: */
091: public CmsIndexingThreadManager(long timeout) {
092:
093: m_timeout = timeout;
094: }
095:
096: /**
097: * Creates and starts a new indexing thread for a resource.<p>
098: *
099: * After an indexing thread was started, the manager suspends itself
100: * and waits for an amount of time specified by the <code>timeout</code>
101: * value. If the timeout value is reached, the indexing thread is
102: * aborted by an interrupt signal.<p>
103: *
104: * @param cms the cms object
105: * @param writer the write to write the index
106: * @param res the resource
107: * @param index the index
108: * @param report the report to write the indexing progress to
109: */
110: public void createIndexingThread(CmsObject cms, IndexWriter writer,
111: CmsResource res, CmsSearchIndex index, I_CmsReport report) {
112:
113: // check if this resource should be excluded from the index, if so skip it
114: boolean excludeFromIndex = false;
115: try {
116: // do property lookup with folder search
117: excludeFromIndex = Boolean
118: .valueOf(
119: cms
120: .readPropertyObject(
121: res,
122: CmsPropertyDefinition.PROPERTY_SEARCH_EXCLUDE,
123: true).getValue())
124: .booleanValue();
125: } catch (CmsException e) {
126: if (LOG.isDebugEnabled()) {
127: LOG.debug(Messages.get().getBundle().key(
128: Messages.LOG_UNABLE_TO_READ_PROPERTY_1,
129: res.getRootPath()));
130: }
131: }
132:
133: if (!excludeFromIndex) {
134: // check if any resource default locale has a match with the index locale, if not skip resource
135: List locales = OpenCms.getLocaleManager()
136: .getDefaultLocales(cms, res);
137: Locale match = OpenCms.getLocaleManager()
138: .getFirstMatchingLocale(
139: Collections
140: .singletonList(index.getLocale()),
141: locales);
142: excludeFromIndex = (match == null);
143: }
144:
145: I_CmsDocumentFactory documentType = null;
146: if (!excludeFromIndex) {
147: // don't get document type if excluded from index, this will lead to exclusion of resource
148: documentType = index.getDocumentFactory(res);
149: }
150: if (documentType == null) {
151: // this resource is not contained in the given search index
152: m_startedCounter++;
153: m_returnedCounter++;
154: if (report != null) {
155: report
156: .println(
157: org.opencms.report.Messages
158: .get()
159: .container(
160: org.opencms.report.Messages.RPT_SKIPPED_0),
161: I_CmsReport.FORMAT_NOTE);
162: }
163: if (LOG.isDebugEnabled()) {
164: LOG.debug(Messages.get().getBundle().key(
165: Messages.LOG_SKIPPED_1, res.getRootPath()));
166: }
167:
168: // no need to continue
169: return;
170: }
171:
172: // extract the content from the resource in a separate Thread
173: CmsIndexingThread thread = new CmsIndexingThread(cms, writer,
174: res, documentType, index, report);
175: m_startedCounter++;
176: thread.start();
177: try {
178: thread.join(m_timeout);
179: } catch (InterruptedException e) {
180: // ignore
181: }
182: if (thread.isAlive()) {
183: if (LOG.isWarnEnabled()) {
184: LOG.warn(Messages.get().getBundle().key(
185: Messages.LOG_INDEXING_TIMEOUT_1,
186: res.getRootPath()));
187: }
188: if (report != null) {
189: report.println();
190: report
191: .print(
192: org.opencms.report.Messages
193: .get()
194: .container(
195: org.opencms.report.Messages.RPT_FAILED_0),
196: I_CmsReport.FORMAT_WARNING);
197: report.println(Messages.get().container(
198: Messages.RPT_SEARCH_INDEXING_TIMEOUT_1,
199: res.getRootPath()), I_CmsReport.FORMAT_WARNING);
200: }
201: m_abandonedCounter++;
202: thread.interrupt();
203: } else {
204: m_returnedCounter++;
205: }
206: }
207:
208: /**
209: * Gets the current thread (file) count.<p>
210: *
211: * @return the current thread count
212: */
213: public int getCounter() {
214:
215: return m_startedCounter;
216: }
217:
218: /**
219: * Returns if the indexing manager still have indexing threads.<p>
220: *
221: * @return true if the indexing manager still have indexing threads
222: */
223: public boolean isRunning() {
224:
225: if (m_lastLogErrorTime <= 0) {
226: m_lastLogErrorTime = System.currentTimeMillis();
227: m_lastLogWarnTime = m_lastLogErrorTime;
228: } else {
229: long currentTime = System.currentTimeMillis();
230: if ((currentTime - m_lastLogWarnTime) > 30000) {
231: // write warning to log after 30 seconds
232: if (LOG.isWarnEnabled()) {
233: LOG
234: .warn(Messages
235: .get()
236: .getBundle()
237: .key(
238: Messages.LOG_WAITING_ABANDONED_THREADS_2,
239: new Integer(
240: m_abandonedCounter),
241: new Integer(
242: (m_startedCounter - m_returnedCounter))));
243: }
244: m_lastLogWarnTime = currentTime;
245: }
246: if ((currentTime - m_lastLogErrorTime) > 600000) {
247: // write error to log after 10 minutes
248: LOG
249: .error(Messages
250: .get()
251: .getBundle()
252: .key(
253: Messages.LOG_WAITING_ABANDONED_THREADS_2,
254: new Integer(m_abandonedCounter),
255: new Integer(
256: (m_startedCounter - m_returnedCounter))));
257: m_lastLogErrorTime = currentTime;
258: }
259: }
260:
261: boolean result = (m_returnedCounter + m_abandonedCounter < m_startedCounter);
262: if (result && LOG.isInfoEnabled()) {
263: // write a note to the log that all threads have finished
264: LOG.info(Messages.get().getBundle().key(
265: Messages.LOG_THREADS_FINISHED_0));
266: }
267: return result;
268: }
269:
270: /**
271: * Writes statistical information to the report.<p>
272: *
273: * The method reports the total number of threads started
274: * (equals to the number of indexed files), the number of returned
275: * threads (equals to the number of successfully indexed files),
276: * and the number of abandoned threads (hanging threads reaching the timeout).
277: *
278: * @param report the report to write the statistics to
279: */
280: public void reportStatistics(I_CmsReport report) {
281:
282: if (report != null) {
283: CmsMessageContainer message = Messages.get().container(
284: Messages.RPT_SEARCH_INDEXING_STATS_4,
285: new Object[] { new Integer(m_startedCounter),
286: new Integer(m_returnedCounter),
287: new Integer(m_abandonedCounter),
288: report.formatRuntime() });
289:
290: report.println(message);
291: if (!(report instanceof CmsLogReport)
292: && LOG.isInfoEnabled()) {
293: // only write to the log if report is not already a log report
294: LOG.info(message.key());
295: }
296: }
297: }
298:
299: /**
300: * Starts the thread manager to look for non-terminated threads<p>
301: * The thread manager looks all 10 minutes if threads are not returned
302: * and reports the number to the log file.
303: *
304: * @see java.lang.Runnable#run()
305: */
306: public void run() {
307:
308: int max = 20;
309:
310: try {
311: // wait 30 seconds for the initial indexing
312: Thread.sleep(30000);
313: while ((m_startedCounter > m_returnedCounter)
314: && (max-- > 0)) {
315: Thread.sleep(30000);
316: // wait 30 seconds before we start checking for "dead" index threads
317: if (LOG.isWarnEnabled()) {
318: LOG
319: .warn(Messages
320: .get()
321: .getBundle()
322: .key(
323: Messages.LOG_WAITING_ABANDONED_THREADS_2,
324: new Integer(
325: m_abandonedCounter),
326: new Integer(
327: (m_startedCounter - m_returnedCounter))));
328: }
329: }
330: } catch (Exception exc) {
331: // ignore
332: }
333:
334: if (max > 0) {
335: if (LOG.isInfoEnabled()) {
336: LOG.info(Messages.get().getBundle().key(
337: Messages.LOG_THREADS_FINISHED_0));
338: }
339: } else {
340: LOG.error(Messages.get().getBundle().key(
341: Messages.LOG_THREADS_FINISHED_0,
342: new Integer(m_startedCounter - m_returnedCounter)));
343: }
344: }
345: }
|