0001: /*
0002: * File : $Source: /usr/local/cvs/opencms/src/org/opencms/search/CmsSearchManager.java,v $
0003: * Date : $Date: 2008-02-27 12:05:38 $
0004: * Version: $Revision: 1.64 $
0005: *
0006: * This library is part of OpenCms -
0007: * the Open Source Content Management System
0008: *
0009: * Copyright (c) 2002 - 2008 Alkacon Software GmbH (http://www.alkacon.com)
0010: *
0011: * This library is free software; you can redistribute it and/or
0012: * modify it under the terms of the GNU Lesser General Public
0013: * License as published by the Free Software Foundation; either
0014: * version 2.1 of the License, or (at your option) any later version.
0015: *
0016: * This library is distributed in the hope that it will be useful,
0017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
0018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
0019: * Lesser General Public License for more details.
0020: *
0021: * For further information about Alkacon Software GmbH, please see the
0022: * company website: http://www.alkacon.com
0023: *
0024: * For further information about OpenCms, please see the
0025: * project website: http://www.opencms.org
0026: *
0027: * You should have received a copy of the GNU Lesser General Public
0028: * License along with this library; if not, write to the Free Software
0029: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
0030: */
0031:
0032: package org.opencms.search;
0033:
0034: import org.opencms.db.CmsPublishedResource;
0035: import org.opencms.file.CmsObject;
0036: import org.opencms.file.CmsResource;
0037: import org.opencms.file.CmsResourceFilter;
0038: import org.opencms.i18n.CmsMessageContainer;
0039: import org.opencms.loader.CmsLoaderException;
0040: import org.opencms.loader.CmsResourceManager;
0041: import org.opencms.main.CmsEvent;
0042: import org.opencms.main.CmsException;
0043: import org.opencms.main.CmsIllegalArgumentException;
0044: import org.opencms.main.CmsIllegalStateException;
0045: import org.opencms.main.CmsLog;
0046: import org.opencms.main.I_CmsEventListener;
0047: import org.opencms.main.OpenCms;
0048: import org.opencms.report.CmsLogReport;
0049: import org.opencms.report.I_CmsReport;
0050: import org.opencms.scheduler.I_CmsScheduledJob;
0051: import org.opencms.search.documents.A_CmsVfsDocument;
0052: import org.opencms.search.documents.CmsExtractionResultCache;
0053: import org.opencms.search.documents.I_CmsDocumentFactory;
0054: import org.opencms.search.documents.I_CmsTermHighlighter;
0055: import org.opencms.search.fields.CmsSearchField;
0056: import org.opencms.search.fields.CmsSearchFieldConfiguration;
0057: import org.opencms.search.fields.CmsSearchFieldMapping;
0058: import org.opencms.security.CmsRole;
0059: import org.opencms.security.CmsRoleViolationException;
0060: import org.opencms.util.A_CmsModeStringEnumeration;
0061: import org.opencms.util.CmsStringUtil;
0062: import org.opencms.util.CmsUUID;
0063:
0064: import java.io.File;
0065: import java.io.IOException;
0066: import java.util.ArrayList;
0067: import java.util.Collections;
0068: import java.util.HashMap;
0069: import java.util.Iterator;
0070: import java.util.LinkedList;
0071: import java.util.List;
0072: import java.util.Locale;
0073: import java.util.Map;
0074: import java.util.TreeMap;
0075:
0076: import org.apache.commons.logging.Log;
0077: import org.apache.lucene.analysis.Analyzer;
0078: import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
0079: import org.apache.lucene.analysis.WhitespaceAnalyzer;
0080: import org.apache.lucene.index.IndexReader;
0081: import org.apache.lucene.index.IndexWriter;
0082: import org.apache.lucene.search.Similarity;
0083: import org.apache.lucene.store.FSDirectory;
0084:
0085: /**
0086: * Implements the general management and configuration of the search and
0087: * indexing facilities in OpenCms.<p>
0088: *
0089: * @author Alexander Kandzior
0090: * @author Carsten Weinholz
0091: *
0092: * @version $Revision: 1.64 $
0093: *
0094: * @since 6.0.0
0095: */
0096: public class CmsSearchManager implements I_CmsScheduledJob,
0097: I_CmsEventListener {
0098:
0099: /**
0100: * Enumeration class for force unlock types.<p>
0101: */
0102: public static final class CmsSearchForceUnlockMode extends
0103: A_CmsModeStringEnumeration {
0104:
0105: /** Force unlock type always. */
0106: public static final CmsSearchForceUnlockMode ALWAYS = new CmsSearchForceUnlockMode(
0107: "always");
0108:
0109: /** Force unlock type never. */
0110: public static final CmsSearchForceUnlockMode NEVER = new CmsSearchForceUnlockMode(
0111: "never");
0112:
0113: /** Force unlock tyoe only full. */
0114: public static final CmsSearchForceUnlockMode ONLYFULL = new CmsSearchForceUnlockMode(
0115: "onlyfull");
0116:
0117: /** serializable version id. */
0118: private static final long serialVersionUID = 74746076708908673L;
0119:
0120: /**
0121: * Creates a new force unlock type with the given name.<p>
0122: *
0123: * @param mode the mode id to use
0124: */
0125: protected CmsSearchForceUnlockMode(String mode) {
0126:
0127: super (mode);
0128: }
0129:
0130: /**
0131: * Returns the lock type for the given type value.<p>
0132: *
0133: * @param type the type value to get the lock type for
0134: *
0135: * @return the lock type for the given type value
0136: */
0137: public static CmsSearchForceUnlockMode valueOf(String type) {
0138:
0139: if (type.equals(ALWAYS.toString())) {
0140: return ALWAYS;
0141: } else if (type.equals(NEVER.toString())) {
0142: return NEVER;
0143: } else {
0144: return ONLYFULL;
0145: }
0146: }
0147: }
0148:
0149: /** The default value used for generating search result exerpts (1024 chars). */
0150: public static final int DEFAULT_EXCERPT_LENGTH = 1024;
0151:
0152: /** The default value used for keeping the extraction results in the cache (672 hours = 4 weeks). */
0153: public static final float DEFAULT_EXTRACTION_CACHE_MAX_AGE = 672.0f;
0154:
0155: /** The default timeout value used for generating a document for the search index (60000 msec = 1 min). */
0156: public static final int DEFAULT_TIMEOUT = 60000;
0157:
0158: /** Scheduler parameter: Update only a specified list of indexes. */
0159: public static final String JOB_PARAM_INDEXLIST = "indexList";
0160:
0161: /** Scheduler parameter: Write the output of the update to the logfile. */
0162: public static final String JOB_PARAM_WRITELOG = "writeLog";
0163:
0164: /** The log object for this class. */
0165: private static final Log LOG = CmsLog
0166: .getLog(CmsSearchManager.class);
0167:
0168: /** The Admin cms object to index Cms resources. */
0169: private CmsObject m_adminCms;
0170:
0171: /** Configured analyzers for languages using <analyzer>. */
0172: private HashMap m_analyzers;
0173:
0174: /** A map of document factory configurations. */
0175: private List m_documentTypeConfigs;
0176:
0177: /** A map of document factories keyed by their matching Cms resource types and/or mimetypes. */
0178: private Map m_documentTypes;
0179:
0180: /** The max age for extraction results to remain in the cache. */
0181: private float m_extractionCacheMaxAge;
0182:
0183: /** The cache for the extration results. */
0184: private CmsExtractionResultCache m_extractionResultCache;
0185:
0186: /** Contains the available field configurations. */
0187: private Map m_fieldConfigurations;
0188:
0189: /** The force unlock type. */
0190: private CmsSearchForceUnlockMode m_forceUnlockMode;
0191:
0192: /** The class used to highlight the search terms in the excerpt of a search result. */
0193: private I_CmsTermHighlighter m_highlighter;
0194:
0195: /** A list of search indexes. */
0196: private List m_indexes;
0197:
0198: /** Seconds to wait for an index lock. */
0199: private int m_indexLockMaxWaitSeconds = 10;
0200:
0201: /** Configured index sources. */
0202: private Map m_indexSources;
0203:
0204: /** The max. char. length of the excerpt in the search result. */
0205: private int m_maxExcerptLength;
0206:
0207: /** Path to index files below WEB-INF/. */
0208: private String m_path;
0209:
0210: /** Timeout for abandoning indexing thread. */
0211: private long m_timeout;
0212:
0213: /**
0214: * Default constructor when called as cron job.<p>
0215: */
0216: public CmsSearchManager() {
0217:
0218: m_documentTypes = new HashMap();
0219: m_documentTypeConfigs = new ArrayList();
0220: m_analyzers = new HashMap();
0221: m_indexes = new ArrayList();
0222: m_indexSources = new TreeMap();
0223: m_extractionCacheMaxAge = DEFAULT_EXTRACTION_CACHE_MAX_AGE;
0224: m_maxExcerptLength = DEFAULT_EXCERPT_LENGTH;
0225:
0226: m_fieldConfigurations = new HashMap();
0227: // make sure we have a "standard" field configuration
0228: addFieldConfiguration(CmsSearchFieldConfiguration.DEFAULT_STANDARD);
0229:
0230: if (CmsLog.INIT.isInfoEnabled()) {
0231: CmsLog.INIT.info(Messages.get().getBundle().key(
0232: Messages.INIT_START_SEARCH_CONFIG_0));
0233: }
0234: }
0235:
0236: /**
0237: * Adds an analyzer.<p>
0238: *
0239: * @param analyzer an analyzer
0240: */
0241: public void addAnalyzer(CmsSearchAnalyzer analyzer) {
0242:
0243: m_analyzers.put(analyzer.getLocale(), analyzer);
0244:
0245: if (CmsLog.INIT.isInfoEnabled()) {
0246: CmsLog.INIT.info(Messages.get().getBundle().key(
0247: Messages.INIT_ADD_ANALYZER_2, analyzer.getLocale(),
0248: analyzer.getClassName()));
0249: }
0250: }
0251:
0252: /**
0253: * Adds a document type.<p>
0254: *
0255: * @param documentType a document type
0256: */
0257: public void addDocumentTypeConfig(CmsSearchDocumentType documentType) {
0258:
0259: m_documentTypeConfigs.add(documentType);
0260:
0261: if (CmsLog.INIT.isInfoEnabled()) {
0262: CmsLog.INIT.info(Messages.get().getBundle()
0263: .key(Messages.INIT_SEARCH_DOC_TYPES_2,
0264: documentType.getName(),
0265: documentType.getClassName()));
0266: }
0267: }
0268:
0269: /**
0270: * Adds a search field configuration to the search manager.<p>
0271: *
0272: * @param fieldConfiguration the search field configuration to add
0273: */
0274: public void addFieldConfiguration(
0275: CmsSearchFieldConfiguration fieldConfiguration) {
0276:
0277: m_fieldConfigurations.put(fieldConfiguration.getName(),
0278: fieldConfiguration);
0279: }
0280:
0281: /**
0282: * Adds a search index to the configuration.<p>
0283: *
0284: * @param searchIndex the search index to add
0285: */
0286: public void addSearchIndex(CmsSearchIndex searchIndex) {
0287:
0288: if ((searchIndex.getSources() == null)
0289: || (searchIndex.getPath() == null)) {
0290: if (OpenCms.getRunLevel() > OpenCms.RUNLEVEL_2_INITIALIZING) {
0291: try {
0292: searchIndex.initialize();
0293: } catch (CmsSearchException e) {
0294: // should never happen
0295: }
0296: }
0297: }
0298:
0299: // name: not null or emtpy and unique
0300: String name = searchIndex.getName();
0301: if (CmsStringUtil.isEmptyOrWhitespaceOnly(name)) {
0302: throw new CmsIllegalArgumentException(
0303: Messages
0304: .get()
0305: .container(
0306: Messages.ERR_SEARCHINDEX_CREATE_MISSING_NAME_0));
0307: }
0308: if (m_indexSources.keySet().contains(name)) {
0309: throw new CmsIllegalArgumentException(
0310: Messages
0311: .get()
0312: .container(
0313: Messages.ERR_SEARCHINDEX_CREATE_INVALID_NAME_1,
0314: name));
0315: }
0316:
0317: m_indexes.add(searchIndex);
0318:
0319: if (CmsLog.INIT.isInfoEnabled()) {
0320: CmsLog.INIT.info(Messages.get().getBundle().key(
0321: Messages.INIT_ADD_SEARCH_INDEX_2,
0322: searchIndex.getName(), searchIndex.getProject()));
0323: }
0324: }
0325:
0326: /**
0327: * Adds a search index source configuration.<p>
0328: *
0329: * @param searchIndexSource a search index source configuration
0330: */
0331: public void addSearchIndexSource(
0332: CmsSearchIndexSource searchIndexSource) {
0333:
0334: m_indexSources.put(searchIndexSource.getName(),
0335: searchIndexSource);
0336:
0337: if (CmsLog.INIT.isInfoEnabled()) {
0338: CmsLog.INIT.info(Messages.get().getBundle().key(
0339: Messages.INIT_SEARCH_INDEX_SOURCE_2,
0340: searchIndexSource.getName(),
0341: searchIndexSource.getIndexerClassName()));
0342: }
0343: }
0344:
0345: /**
0346: * Implements the event listener of this class.<p>
0347: *
0348: * @see org.opencms.main.I_CmsEventListener#cmsEvent(org.opencms.main.CmsEvent)
0349: */
0350: public void cmsEvent(CmsEvent event) {
0351:
0352: switch (event.getType()) {
0353: case I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES:
0354: List indexNames = null;
0355: if ((event.getData() != null)
0356: && CmsStringUtil
0357: .isNotEmptyOrWhitespaceOnly((String) event
0358: .getData()
0359: .get(
0360: I_CmsEventListener.KEY_INDEX_NAMES))) {
0361: indexNames = CmsStringUtil.splitAsList((String) event
0362: .getData().get(
0363: I_CmsEventListener.KEY_INDEX_NAMES),
0364: ",", true);
0365: }
0366: try {
0367: if (LOG.isDebugEnabled()) {
0368: LOG.debug(Messages.get().getBundle().key(
0369: Messages.LOG_EVENT_REBUILD_SEARCHINDEX_1,
0370: indexNames == null ? ""
0371: : CmsStringUtil.collectionAsString(
0372: indexNames, ",")));
0373: }
0374: if (indexNames == null) {
0375: rebuildAllIndexes(getEventReport(event));
0376: } else {
0377: rebuildIndexes(indexNames, getEventReport(event));
0378: }
0379: } catch (CmsException e) {
0380: if (LOG.isErrorEnabled()) {
0381: LOG.error(Messages.get().getBundle().key(
0382: Messages.ERR_EVENT_REBUILD_SEARCHINDEX_1,
0383: indexNames == null ? ""
0384: : CmsStringUtil.collectionAsString(
0385: indexNames, ",")), e);
0386: }
0387: }
0388: break;
0389: case I_CmsEventListener.EVENT_CLEAR_CACHES:
0390: if (LOG.isDebugEnabled()) {
0391: LOG.debug(Messages.get().getBundle().key(
0392: Messages.LOG_EVENT_CLEAR_CACHES_0));
0393: }
0394: break;
0395: case I_CmsEventListener.EVENT_PUBLISH_PROJECT:
0396: // event data contains a list of the published resources
0397: CmsUUID publishHistoryId = new CmsUUID((String) event
0398: .getData().get(I_CmsEventListener.KEY_PUBLISHID));
0399: if (LOG.isDebugEnabled()) {
0400: LOG.debug(Messages.get().getBundle().key(
0401: Messages.LOG_EVENT_PUBLISH_PROJECT_1,
0402: publishHistoryId));
0403: }
0404: updateAllIndexes(m_adminCms, publishHistoryId,
0405: getEventReport(event));
0406: if (LOG.isDebugEnabled()) {
0407: LOG.debug(Messages.get().getBundle().key(
0408: Messages.LOG_EVENT_PUBLISH_PROJECT_FINISHED_1,
0409: publishHistoryId));
0410: }
0411: break;
0412: default:
0413: // no operation
0414: }
0415: }
0416:
0417: /**
0418: * Returns an unmodifiable view of the map that contains the {@link CmsSearchAnalyzer} list.<p>
0419: *
0420: * The keys in the map are {@link Locale} objects, and the values are {@link CmsSearchAnalyzer} objects.
0421: *
0422: * @return an unmodifiable view of the Analyzers Map
0423: */
0424: public Map getAnalyzers() {
0425:
0426: return Collections.unmodifiableMap(m_analyzers);
0427: }
0428:
0429: /**
0430: * Returns the search analyzer for the given locale.<p>
0431: *
0432: * @param locale the locale to get the analyzer for
0433: *
0434: * @return the search analyzer for the given locale
0435: */
0436: public CmsSearchAnalyzer getCmsSearchAnalyzer(Locale locale) {
0437:
0438: return (CmsSearchAnalyzer) m_analyzers.get(locale);
0439: }
0440:
0441: /**
0442: * Returns the name of the directory below WEB-INF/ where the search indexes are stored.<p>
0443: *
0444: * @return the name of the directory below WEB-INF/ where the search indexes are stored
0445: */
0446: public String getDirectory() {
0447:
0448: return m_path;
0449: }
0450:
0451: /**
0452: * Returns a document type config.<p>
0453: *
0454: * @param name the name of the document type config
0455: * @return the document type config.
0456: */
0457: public CmsSearchDocumentType getDocumentTypeConfig(String name) {
0458:
0459: // this is really used only for the search manager GUI,
0460: // so performance is not an issue and no lookup map is generated
0461: for (int i = 0; i < m_documentTypeConfigs.size(); i++) {
0462: CmsSearchDocumentType type = (CmsSearchDocumentType) m_documentTypeConfigs
0463: .get(i);
0464: if (type.getName().equals(name)) {
0465: return type;
0466: }
0467: }
0468: return null;
0469: }
0470:
0471: /**
0472: * Returns an unmodifiable view (read-only) of the DocumentTypeConfigs Map.<p>
0473: *
0474: * @return an unmodifiable view (read-only) of the DocumentTypeConfigs Map
0475: */
0476: public List getDocumentTypeConfigs() {
0477:
0478: return Collections.unmodifiableList(m_documentTypeConfigs);
0479: }
0480:
0481: /**
0482: * Returns the maximum age a text extraction result is kept in the cache (in hours).<p>
0483: *
0484: * @return the maximum age a text extraction result is kept in the cache (in hours)
0485: */
0486: public float getExtractionCacheMaxAge() {
0487:
0488: return m_extractionCacheMaxAge;
0489: }
0490:
0491: /**
0492: * Returns the search field configuration with the given name.<p>
0493: *
0494: * In case no configuration is available with the given name, <code>null</code> is returned.<p>
0495: *
0496: * @param name the name to get the search field configuration for
0497: *
0498: * @return the search field configuration with the given name
0499: */
0500: public CmsSearchFieldConfiguration getFieldConfiguration(String name) {
0501:
0502: return (CmsSearchFieldConfiguration) m_fieldConfigurations
0503: .get(name);
0504: }
0505:
0506: /**
0507: * Returns the unmodifieable List of configured {@link CmsSearchFieldConfiguration} entries.<p>
0508: *
0509: * @return the unmodifieable List of configured {@link CmsSearchFieldConfiguration} entries
0510: */
0511: public List getFieldConfigurations() {
0512:
0513: List result = new ArrayList(m_fieldConfigurations.values());
0514: Collections.sort(result);
0515: return Collections.unmodifiableList(result);
0516: }
0517:
0518: /**
0519: * Returns the force unlock mode during indexing.<p>
0520: *
0521: * @return the force unlock mode during indexing
0522: */
0523: public CmsSearchForceUnlockMode getForceunlock() {
0524:
0525: return m_forceUnlockMode;
0526: }
0527:
0528: /**
0529: * Returns the highlighter.<p>
0530: *
0531: * @return the highlighter
0532: */
0533: public I_CmsTermHighlighter getHighlighter() {
0534:
0535: return m_highlighter;
0536: }
0537:
0538: /**
0539: * Returns the index belonging to the passed name.<p>
0540: * The index must exist already.
0541: *
0542: * @param indexName then name of the index
0543: * @return an object representing the desired index
0544: */
0545: public CmsSearchIndex getIndex(String indexName) {
0546:
0547: for (int i = 0, n = m_indexes.size(); i < n; i++) {
0548: CmsSearchIndex searchIndex = (CmsSearchIndex) m_indexes
0549: .get(i);
0550:
0551: if (indexName.equalsIgnoreCase(searchIndex.getName())) {
0552: return searchIndex;
0553: }
0554: }
0555:
0556: return null;
0557: }
0558:
0559: /**
0560: * Returns the seconds to wait for an index lock during an update operation.<p>
0561: *
0562: * @return the seconds to wait for an index lock during an update operation
0563: */
0564: public int getIndexLockMaxWaitSeconds() {
0565:
0566: return m_indexLockMaxWaitSeconds;
0567: }
0568:
0569: /**
0570: * Returns the names of all configured indexes.<p>
0571: *
0572: * @return list of names
0573: */
0574: public List getIndexNames() {
0575:
0576: List indexNames = new ArrayList();
0577: for (int i = 0, n = m_indexes.size(); i < n; i++) {
0578: indexNames.add(((CmsSearchIndex) m_indexes.get(i))
0579: .getName());
0580: }
0581:
0582: return indexNames;
0583: }
0584:
0585: /**
0586: * Returns a search index source for a specified source name.<p>
0587: *
0588: * @param sourceName the name of the index source
0589: * @return a search index source
0590: */
0591: public CmsSearchIndexSource getIndexSource(String sourceName) {
0592:
0593: return (CmsSearchIndexSource) m_indexSources.get(sourceName);
0594: }
0595:
0596: /**
0597: * Returns the max. excerpt length.<p>
0598: *
0599: * @return the max excerpt length
0600: */
0601: public int getMaxExcerptLength() {
0602:
0603: return m_maxExcerptLength;
0604: }
0605:
0606: /**
0607: * Returns an unmodifiable list of all configured <code>{@link CmsSearchIndex}</code> instances.<p>
0608: *
0609: * @return an unmodifiable list of all configured <code>{@link CmsSearchIndex}</code> instances
0610: */
0611: public List getSearchIndexes() {
0612:
0613: return Collections.unmodifiableList(m_indexes);
0614: }
0615:
0616: /**
0617: * Returns an unmodifiable view (read-only) of the SearchIndexSources Map.<p>
0618: *
0619: * @return an unmodifiable view (read-only) of the SearchIndexSources Map
0620: */
0621: public Map getSearchIndexSources() {
0622:
0623: return Collections.unmodifiableMap(m_indexSources);
0624: }
0625:
0626: /**
0627: * Returns the timeout to abandon threads indexing a resource.<p>
0628: *
0629: * @return the timeout to abandon threads indexing a resource
0630: */
0631: public long getTimeout() {
0632:
0633: return m_timeout;
0634: }
0635:
0636: /**
0637: * Initializes the search manager.<p>
0638: *
0639: * @param cms the cms object
0640: *
0641: * @throws CmsRoleViolationException in case the given opencms object does not have <code>{@link CmsRole#WORKPLACE_MANAGER}</code> permissions
0642: */
0643: public void initialize(CmsObject cms)
0644: throws CmsRoleViolationException {
0645:
0646: OpenCms.getRoleManager().checkRole(cms,
0647: CmsRole.WORKPLACE_MANAGER);
0648: try {
0649: // store the Admin cms to index Cms resources
0650: m_adminCms = OpenCms.initCmsObject(cms);
0651: } catch (CmsException e) {
0652: // this should never happen
0653: }
0654: // make sure the site root is the root site
0655: m_adminCms.getRequestContext().setSiteRoot("/");
0656:
0657: // create the extraction result cache
0658: m_extractionResultCache = new CmsExtractionResultCache(OpenCms
0659: .getSystemInfo().getAbsoluteRfsPathRelativeToWebInf(
0660: getDirectory()), "/extractCache");
0661:
0662: initializeIndexes();
0663:
0664: // register the modified default similarity implementation
0665: Similarity.setDefault(new CmsSearchSimilarity());
0666:
0667: // register this object as event listener
0668: OpenCms.addCmsEventListener(this , new int[] {
0669: I_CmsEventListener.EVENT_CLEAR_CACHES,
0670: I_CmsEventListener.EVENT_PUBLISH_PROJECT,
0671: I_CmsEventListener.EVENT_REBUILD_SEARCHINDEXES });
0672: }
0673:
0674: /**
0675: * Initializes all configured document types and search indexes.<p>
0676: *
0677: * This methods needs to be called if after a change in the index configuration has been made.
0678: */
0679: public void initializeIndexes() {
0680:
0681: initAvailableDocumentTypes();
0682: initSearchIndexes();
0683: }
0684:
0685: /**
0686: * Updates the indexes from as a scheduled job.<p>
0687: *
0688: * @param cms the OpenCms user context to use when reading resources from the VFS
0689: * @param parameters the parameters for the scheduled job
0690: *
0691: * @throws Exception if something goes wrong
0692: *
0693: * @return the String to write in the scheduler log
0694: *
0695: * @see org.opencms.scheduler.I_CmsScheduledJob#launch(org.opencms.file.CmsObject, java.util.Map)
0696: */
0697: public String launch(CmsObject cms, Map parameters)
0698: throws Exception {
0699:
0700: CmsSearchManager manager = OpenCms.getSearchManager();
0701:
0702: I_CmsReport report = null;
0703: boolean writeLog = Boolean.valueOf(
0704: (String) parameters.get(JOB_PARAM_WRITELOG))
0705: .booleanValue();
0706:
0707: if (writeLog) {
0708: report = new CmsLogReport(cms.getRequestContext()
0709: .getLocale(), CmsSearchManager.class);
0710: }
0711:
0712: List updateList = null;
0713: String indexList = (String) parameters.get(JOB_PARAM_INDEXLIST);
0714: if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(indexList)) {
0715: // index list has been provided as job parameter
0716: updateList = new ArrayList();
0717: String[] indexNames = CmsStringUtil.splitAsArray(indexList,
0718: '|');
0719: for (int i = 0; i < indexNames.length; i++) {
0720: // check if the index actually exists
0721: if (manager.getIndex(indexNames[i]) != null) {
0722: updateList.add(indexNames[i]);
0723: } else {
0724: if (LOG.isWarnEnabled()) {
0725: LOG.warn(Messages.get().getBundle().key(
0726: Messages.LOG_NO_INDEX_WITH_NAME_1,
0727: indexNames[i]));
0728: }
0729: }
0730: }
0731: }
0732:
0733: long startTime = System.currentTimeMillis();
0734:
0735: if (updateList == null) {
0736: // all indexes need to be updated
0737: manager.rebuildAllIndexes(report);
0738: } else {
0739: // rebuild only the selected indexes
0740: manager.rebuildIndexes(updateList, report);
0741: }
0742:
0743: long runTime = System.currentTimeMillis() - startTime;
0744:
0745: String finishMessage = Messages.get().getBundle().key(
0746: Messages.LOG_REBUILD_INDEXES_FINISHED_1,
0747: CmsStringUtil.formatRuntime(runTime));
0748:
0749: if (LOG.isInfoEnabled()) {
0750: LOG.info(finishMessage);
0751: }
0752: return finishMessage;
0753: }
0754:
0755: /**
0756: * Rebuilds (if required creates) all configured indexes.<p>
0757: *
0758: * @param report the report object to write messages (or <code>null</code>)
0759: *
0760: * @throws CmsException if something goes wrong
0761: */
0762: public synchronized void rebuildAllIndexes(I_CmsReport report)
0763: throws CmsException {
0764:
0765: CmsMessageContainer container = null;
0766: for (int i = 0, n = m_indexes.size(); i < n; i++) {
0767: // iterate all configured search indexes
0768: CmsSearchIndex searchIndex = (CmsSearchIndex) m_indexes
0769: .get(i);
0770: try {
0771: // update the index
0772: updateIndex(searchIndex, report, null);
0773: } catch (CmsException e) {
0774: container = new CmsMessageContainer(Messages.get(),
0775: Messages.ERR_INDEX_REBUILD_ALL_1,
0776: new Object[] { searchIndex.getName() });
0777: LOG.error(Messages.get().getBundle().key(
0778: Messages.ERR_INDEX_REBUILD_ALL_1,
0779: searchIndex.getName()), e);
0780: }
0781: }
0782: // clean up the extraction result cache
0783: m_extractionResultCache.cleanCache(m_extractionCacheMaxAge);
0784: if (container != null) {
0785: // throw stored exception
0786: throw new CmsSearchException(container);
0787: }
0788: }
0789:
0790: /**
0791: * Rebuilds (if required creates) the index with the given name.<p>
0792: *
0793: * @param indexName the name of the index to rebuild
0794: * @param report the report object to write messages (or <code>null</code>)
0795: *
0796: * @throws CmsException if something goes wrong
0797: */
0798: public synchronized void rebuildIndex(String indexName,
0799: I_CmsReport report) throws CmsException {
0800:
0801: // get the search index by name
0802: CmsSearchIndex index = getIndex(indexName);
0803: // update the index
0804: updateIndex(index, report, null);
0805: // clean up the extraction result cache
0806: m_extractionResultCache.cleanCache(m_extractionCacheMaxAge);
0807: }
0808:
0809: /**
0810: * Rebuilds (if required creates) the List of indexes with the given name.<p>
0811: *
0812: * @param indexNames the names (String) of the index to rebuild
0813: * @param report the report object to write messages (or <code>null</code>)
0814: *
0815: * @throws CmsException if something goes wrong
0816: */
0817: public synchronized void rebuildIndexes(List indexNames,
0818: I_CmsReport report) throws CmsException {
0819:
0820: Iterator i = indexNames.iterator();
0821: while (i.hasNext()) {
0822: String indexName = (String) i.next();
0823: // get the search index by name
0824: CmsSearchIndex index = getIndex(indexName);
0825: if (index != null) {
0826: // update the index
0827: updateIndex(index, report, null);
0828: } else {
0829: if (LOG.isWarnEnabled()) {
0830: LOG.warn(Messages.get().getBundle().key(
0831: Messages.LOG_NO_INDEX_WITH_NAME_1,
0832: indexName));
0833: }
0834: }
0835: }
0836: // clean up the extraction result cache
0837: m_extractionResultCache.cleanCache(m_extractionCacheMaxAge);
0838: }
0839:
0840: /**
0841: * Removes this fieldconfiguration from the OpenCms configuration (if it is not used any more).<p>
0842: *
0843: * @param fieldConfiguration the fieldconfiguration to remove from the configuration
0844: *
0845: * @return true if remove was successful, false if preconditions for removal are ok but the given
0846: * field configuration was unknown to the manager.
0847: *
0848: * @throws CmsIllegalStateException if the given field configuration is still used by at least one
0849: * <code>{@link CmsSearchIndex}</code>.
0850: *
0851: */
0852: public boolean removeSearchFieldConfiguration(
0853: CmsSearchFieldConfiguration fieldConfiguration)
0854: throws CmsIllegalStateException {
0855:
0856: // never remove the standard field configuration
0857: if (fieldConfiguration.getName().equals(
0858: CmsSearchFieldConfiguration.STR_STANDARD)) {
0859: throw new CmsIllegalStateException(
0860: Messages
0861: .get()
0862: .container(
0863: Messages.ERR_INDEX_CONFIGURATION_DELETE_STANDARD_1,
0864: fieldConfiguration.getName()));
0865: }
0866: // validation if removal will be granted
0867: Iterator itIndexes = m_indexes.iterator();
0868: CmsSearchIndex idx;
0869: // the list for collecting indexes that use the given fieldconfiguration
0870: List referrers = new LinkedList();
0871: CmsSearchFieldConfiguration refFieldConfig;
0872: while (itIndexes.hasNext()) {
0873: idx = (CmsSearchIndex) itIndexes.next();
0874: refFieldConfig = idx.getFieldConfiguration();
0875: if (refFieldConfig.equals(fieldConfiguration)) {
0876: referrers.add(idx);
0877: }
0878: }
0879: if (referrers.size() > 0) {
0880: throw new CmsIllegalStateException(Messages.get()
0881: .container(
0882: Messages.ERR_INDEX_CONFIGURATION_DELETE_2,
0883: fieldConfiguration.getName(),
0884: referrers.toString()));
0885: }
0886:
0887: // remove operation (no exception)
0888: return m_fieldConfigurations.remove(fieldConfiguration
0889: .getName()) != null;
0890:
0891: }
0892:
0893: /**
0894: * Removes a search field from the field configuration.<p>
0895: *
0896: * @param fieldConfiguration the field configuration
0897: * @param field field to remove from the field configuration
0898: *
0899: * @return true if remove was successful, false if preconditions for removal are ok but the given
0900: * field was unknown.
0901: *
0902: * @throws CmsIllegalStateException if the given field is the last field inside the given field configuration.
0903: */
0904: public boolean removeSearchFieldConfigurationField(
0905: CmsSearchFieldConfiguration fieldConfiguration,
0906: CmsSearchField field) throws CmsIllegalStateException {
0907:
0908: if (fieldConfiguration.getFields().size() < 2) {
0909: throw new CmsIllegalStateException(Messages.get()
0910: .container(
0911: Messages.ERR_CONFIGURATION_FIELD_DELETE_2,
0912: field.getName(),
0913: fieldConfiguration.getName()));
0914: } else {
0915:
0916: if (LOG.isInfoEnabled()) {
0917: LOG
0918: .info(Messages
0919: .get()
0920: .getBundle()
0921: .key(
0922: Messages.LOG_REMOVE_FIELDCONFIGURATION_FIELD_INDEX_2,
0923: field.getName(),
0924: fieldConfiguration.getName()));
0925: }
0926:
0927: return fieldConfiguration.getFields().remove(field);
0928: }
0929: }
0930:
0931: /**
0932: * Removes a search field mapping from the given field.<p>
0933: *
0934: * @param field the field
0935: * @param mapping mapping to remove from the field
0936: *
0937: * @return true if remove was successful, false if preconditions for removal are ok but the given
0938: * mapping was unknown.
0939: *
0940: * @throws CmsIllegalStateException if the given mapping is the last mapping inside the given field.
0941: */
0942: public boolean removeSearchFieldMapping(CmsSearchField field,
0943: CmsSearchFieldMapping mapping)
0944: throws CmsIllegalStateException {
0945:
0946: if (field.getMappings().size() < 2) {
0947: throw new CmsIllegalStateException(Messages.get()
0948: .container(Messages.ERR_FIELD_MAPPING_DELETE_2,
0949: mapping.getType().toString(),
0950: field.getName()));
0951: } else {
0952:
0953: if (LOG.isInfoEnabled()) {
0954: LOG.info(Messages.get().getBundle().key(
0955: Messages.LOG_REMOVE_FIELD_MAPPING_INDEX_2,
0956: mapping.toString(), field.getName()));
0957: }
0958: return field.getMappings().remove(mapping);
0959: }
0960: }
0961:
0962: /**
0963: * Removes a search index from the configuration.<p>
0964: *
0965: * @param searchIndex the search index to remove
0966: */
0967: public void removeSearchIndex(CmsSearchIndex searchIndex) {
0968:
0969: m_indexes.remove(searchIndex);
0970:
0971: if (LOG.isInfoEnabled()) {
0972: LOG.info(Messages.get().getBundle().key(
0973: Messages.LOG_REMOVE_SEARCH_INDEX_2,
0974: searchIndex.getName(), searchIndex.getProject()));
0975: }
0976: }
0977:
0978: /**
0979: * Removes all indexes included in the given list (which must contain the name of an index to remove).<p>
0980: *
0981: * @param indexNames the names of the index to remove
0982: */
0983: public void removeSearchIndexes(List indexNames) {
0984:
0985: Iterator i = indexNames.iterator();
0986: while (i.hasNext()) {
0987: String indexName = (String) i.next();
0988: // get the search index by name
0989: CmsSearchIndex index = getIndex(indexName);
0990: if (index != null) {
0991: // remove the index
0992: removeSearchIndex(index);
0993: } else {
0994: if (LOG.isWarnEnabled()) {
0995: LOG.warn(Messages.get().getBundle().key(
0996: Messages.LOG_NO_INDEX_WITH_NAME_1,
0997: indexName));
0998: }
0999: }
1000: }
1001: }
1002:
1003: /**
1004: * Removes this indexsource from the OpenCms configuration (if it is not used any more).<p>
1005: *
1006: * @param indexsource the indexsource to remove from the configuration
1007: *
1008: * @return true if remove was successful, false if preconditions for removal are ok but the given
1009: * searchindex was unknown to the manager.
1010: *
1011: * @throws CmsIllegalStateException if the given indexsource is still used by at least one
1012: * <code>{@link CmsSearchIndex}</code>.
1013: *
1014: */
1015: public boolean removeSearchIndexSource(
1016: CmsSearchIndexSource indexsource)
1017: throws CmsIllegalStateException {
1018:
1019: // validation if removal will be granted
1020: Iterator itIndexes = m_indexes.iterator();
1021: CmsSearchIndex idx;
1022: // the list for collecting indexes that use the given indexdsource
1023: List referrers = new LinkedList();
1024: // the current list of referred indexsources of the iterated index
1025: List refsources;
1026: while (itIndexes.hasNext()) {
1027: idx = (CmsSearchIndex) itIndexes.next();
1028: refsources = idx.getSources();
1029: if (refsources != null) {
1030: if (refsources.contains(indexsource)) {
1031: referrers.add(idx);
1032: }
1033: }
1034: }
1035: if (referrers.size() > 0) {
1036: throw new CmsIllegalStateException(
1037: Messages.get()
1038: .container(
1039: Messages.ERR_INDEX_SOURCE_DELETE_2,
1040: indexsource.getName(),
1041: referrers.toString()));
1042: }
1043:
1044: // remove operation (no exception)
1045: return m_indexSources.remove(indexsource.getName()) != null;
1046:
1047: }
1048:
1049: /**
1050: * Sets the name of the directory below WEB-INF/ where the search indexes are stored.<p>
1051: *
1052: * @param value the name of the directory below WEB-INF/ where the search indexes are stored
1053: */
1054: public void setDirectory(String value) {
1055:
1056: m_path = value;
1057: }
1058:
1059: /**
1060: * Sets the maximum age a text extraction result is kept in the cache (in hours).<p>
1061: *
1062: * @param extractionCacheMaxAge the maximum age for a text extraction result to set
1063: */
1064: public void setExtractionCacheMaxAge(float extractionCacheMaxAge) {
1065:
1066: m_extractionCacheMaxAge = extractionCacheMaxAge;
1067: }
1068:
1069: /**
1070: * Sets the maximum age a text extraction result is kept in the cache (in hours) as a String.<p>
1071: *
1072: * @param extractionCacheMaxAge the maximum age for a text extraction result to set
1073: */
1074: public void setExtractionCacheMaxAge(String extractionCacheMaxAge) {
1075:
1076: try {
1077: setExtractionCacheMaxAge(Float
1078: .parseFloat(extractionCacheMaxAge));
1079: } catch (NumberFormatException e) {
1080: LOG.error(Messages.get().getBundle().key(
1081: Messages.LOG_PARSE_EXTRACTION_CACHE_AGE_FAILED_2,
1082: extractionCacheMaxAge,
1083: new Float(DEFAULT_EXTRACTION_CACHE_MAX_AGE)), e);
1084: setExtractionCacheMaxAge(DEFAULT_EXTRACTION_CACHE_MAX_AGE);
1085: }
1086: }
1087:
1088: /**
1089: * Sets the unlock mode during indexing.<p>
1090: *
1091: * @param value the value
1092: */
1093: public void setForceunlock(String value) {
1094:
1095: m_forceUnlockMode = CmsSearchForceUnlockMode.valueOf(value);
1096: }
1097:
1098: /**
1099: * Sets the highlighter.<p>
1100: *
1101: * A highlighter is a class implementing org.opencms.search.documents.I_TermHighlighter.<p>
1102: *
1103: * @param highlighter the package/class name of the highlighter
1104: */
1105: public void setHighlighter(String highlighter) {
1106:
1107: try {
1108: m_highlighter = (I_CmsTermHighlighter) Class.forName(
1109: highlighter).newInstance();
1110: } catch (Exception exc) {
1111: m_highlighter = null;
1112: }
1113: }
1114:
1115: /**
1116: * Sets the seconds to wait for an index lock during an update operation.<p>
1117: *
1118: * @param value the seconds to wait for an index lock during an update operation
1119: */
1120: public void setIndexLockMaxWaitSeconds(int value) {
1121:
1122: m_indexLockMaxWaitSeconds = value;
1123: }
1124:
1125: /**
1126: * Sets the max. excerpt length.<p>
1127: *
1128: * @param maxExcerptLength the max. excerpt length to set
1129: */
1130: public void setMaxExcerptLength(int maxExcerptLength) {
1131:
1132: m_maxExcerptLength = maxExcerptLength;
1133: }
1134:
1135: /**
1136: * Sets the max. excerpt length as a String.<p>
1137: *
1138: * @param maxExcerptLength the max. excerpt length to set
1139: */
1140: public void setMaxExcerptLength(String maxExcerptLength) {
1141:
1142: try {
1143: setMaxExcerptLength(Integer.parseInt(maxExcerptLength));
1144: } catch (Exception e) {
1145: LOG.error(Messages.get().getBundle().key(
1146: Messages.LOG_PARSE_EXCERPT_LENGTH_FAILED_2,
1147: maxExcerptLength,
1148: new Integer(DEFAULT_EXCERPT_LENGTH)), e);
1149: setMaxExcerptLength(DEFAULT_EXCERPT_LENGTH);
1150: }
1151: }
1152:
1153: /**
1154: * Sets the timeout to abandon threads indexing a resource.<p>
1155: *
1156: * @param value the timeout in milliseconds
1157: */
1158: public void setTimeout(long value) {
1159:
1160: m_timeout = value;
1161: }
1162:
1163: /**
1164: * Sets the timeout to abandon threads indexing a resource as a String.<p>
1165: *
1166: * @param value the timeout in milliseconds
1167: */
1168: public void setTimeout(String value) {
1169:
1170: try {
1171: setTimeout(Long.parseLong(value));
1172: } catch (Exception e) {
1173: LOG.error(Messages.get().getBundle().key(
1174: Messages.LOG_PARSE_TIMEOUT_FAILED_2, value,
1175: new Long(DEFAULT_TIMEOUT)), e);
1176: setTimeout(DEFAULT_TIMEOUT);
1177: }
1178: }
1179:
1180: /**
1181: * Proceed the unlocking of the given index depending on the setting of <code>m_forceUnlockMode</code> and the given mode.<p>
1182: *
1183: * @param index the index to check the lock for
1184: * @param report the report to write error messages on
1185: * @param mode the mode of the index process if true the index is updated otherwise it is rebuild completely
1186: *
1187: * @throws CmsIndexException if unlocking of the index is impossible for some reasons
1188: */
1189: protected void forceIndexUnlock(CmsSearchIndex index,
1190: I_CmsReport report, boolean mode) throws CmsIndexException {
1191:
1192: File indexPath = new File(index.getPath());
1193: boolean indexLocked = true;
1194: // check if the target index path already exists
1195: if (indexPath.exists()) {
1196: // get the lock state of the given index
1197: try {
1198: indexLocked = IndexReader.isLocked(index.getPath());
1199: } catch (Exception e) {
1200: LOG.error(Messages.get().getBundle().key(
1201: Messages.LOG_IO_INDEX_READER_OPEN_2,
1202: index.getPath(), index.getName()), e);
1203: }
1204:
1205: // if index is unlocked do nothing
1206: if (indexLocked) {
1207: if ((m_forceUnlockMode != null)
1208: && m_forceUnlockMode
1209: .equals(CmsSearchForceUnlockMode.ALWAYS)) {
1210: try {
1211: // try to force unlock on the index
1212: IndexReader.unlock(FSDirectory
1213: .getDirectory(index.getPath()));
1214: } catch (Exception e) {
1215: // unable to force unlock of Lucene index, we can't continue this way
1216: CmsMessageContainer msg = Messages
1217: .get()
1218: .container(
1219: Messages.ERR_INDEX_LOCK_FAILED_1,
1220: index.getName());
1221: report.println(msg, I_CmsReport.FORMAT_ERROR);
1222: throw new CmsIndexException(msg, e);
1223: }
1224: } else if ((m_forceUnlockMode != null)
1225: && m_forceUnlockMode
1226: .equals(CmsSearchForceUnlockMode.NEVER)) {
1227: // wait if index will be unlocked during waiting
1228: indexLocked = waitIndexLock(index, report,
1229: indexLocked);
1230: // if index is still locked throw an exception
1231: if (indexLocked) {
1232: CmsMessageContainer msg = Messages
1233: .get()
1234: .container(
1235: Messages.ERR_INDEX_LOCK_FAILED_1,
1236: index.getName());
1237: report.println(msg, I_CmsReport.FORMAT_ERROR);
1238: throw new CmsIndexException(msg);
1239: }
1240: } else {
1241: if (mode) {
1242: // if index has to be updated wait if index will be unlocked during waiting
1243: indexLocked = waitIndexLock(index, report,
1244: indexLocked);
1245: }
1246: // check if the index is locked
1247: if (indexLocked) {
1248: // mode equals update throw exception
1249: if (mode) {
1250: // unable to lock the index for updating
1251: CmsMessageContainer msg = Messages
1252: .get()
1253: .container(
1254: Messages.ERR_INDEX_LOCK_FAILED_1,
1255: index.getName());
1256: report.println(msg,
1257: I_CmsReport.FORMAT_ERROR);
1258: throw new CmsIndexException(msg);
1259: } else {
1260: try {
1261: // try to force unlock on the index
1262: IndexReader.unlock(FSDirectory
1263: .getDirectory(index.getPath()));
1264: } catch (Exception e) {
1265: // unable to force unlock of Lucene index, we can't continue this way
1266: CmsMessageContainer msg = Messages
1267: .get()
1268: .container(
1269: Messages.ERR_INDEX_LOCK_FAILED_1,
1270: index.getName());
1271: report.println(msg,
1272: I_CmsReport.FORMAT_ERROR);
1273: throw new CmsIndexException(msg, e);
1274: }
1275: }
1276: }
1277: }
1278: }
1279: }
1280: }
1281:
1282: /**
1283: * Returns an analyzer for the given language.<p>
1284: *
1285: * The analyzer is selected according to the analyzer configuration.<p>
1286: *
1287: * @param locale the locale to get the analyzer for
1288: * @return the appropriate lucene analyzer
1289: * @throws CmsIndexException if something goes wrong
1290: */
1291: protected Analyzer getAnalyzer(Locale locale)
1292: throws CmsIndexException {
1293:
1294: Analyzer analyzer = null;
1295: String className = null;
1296:
1297: CmsSearchAnalyzer analyzerConf = (CmsSearchAnalyzer) m_analyzers
1298: .get(locale);
1299: if (analyzerConf == null) {
1300: throw new CmsIndexException(Messages.get().container(
1301: Messages.ERR_ANALYZER_NOT_FOUND_1, locale));
1302: }
1303:
1304: try {
1305: className = analyzerConf.getClassName();
1306: Class analyzerClass = Class.forName(className);
1307:
1308: // added param for snowball analyzer
1309: String stemmerAlgorithm = analyzerConf
1310: .getStemmerAlgorithm();
1311: if (stemmerAlgorithm != null) {
1312: analyzer = (Analyzer) analyzerClass
1313: .getDeclaredConstructor(
1314: new Class[] { String.class })
1315: .newInstance(new Object[] { stemmerAlgorithm });
1316: } else {
1317: analyzer = (Analyzer) analyzerClass.newInstance();
1318: }
1319:
1320: } catch (Exception e) {
1321: throw new CmsIndexException(Messages.get().container(
1322: Messages.ERR_LOAD_ANALYZER_1, className), e);
1323: }
1324:
1325: // change analyzer for root path field to Whitespace analyzer
1326: PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(
1327: analyzer);
1328: wrapper.addAnalyzer(CmsSearchField.FIELD_ROOT,
1329: new WhitespaceAnalyzer());
1330:
1331: return wrapper;
1332: }
1333:
1334: /**
1335: * Returns a lucene document factory for given resource.<p>
1336: *
1337: * The type of the document factory is selected by the type of the resource
1338: * and the MIME type of the resource content, according to the configuration in <code>opencms-search.xml</code>.<p>
1339: *
1340: * @param resource a cms resource
1341: * @return a lucene document factory or null
1342: */
1343: protected I_CmsDocumentFactory getDocumentFactory(
1344: CmsResource resource) {
1345:
1346: // first get the MIME type of the resource
1347: String mimeType = OpenCms.getResourceManager().getMimeType(
1348: resource.getRootPath(), null,
1349: CmsResourceManager.MIMETYPE_TEXT);
1350: I_CmsDocumentFactory result = null;
1351: String typeName = null;
1352: try {
1353: typeName = OpenCms.getResourceManager().getResourceType(
1354: resource.getTypeId()).getTypeName();
1355: } catch (CmsLoaderException e) {
1356: // ignore, unknown resource type, resource can not be indexed
1357: }
1358: if (typeName != null) {
1359: // create the factory lookup key for the document
1360: String documentTypeKey = A_CmsVfsDocument.getDocumentKey(
1361: typeName, mimeType);
1362: // check if a setting is available for this specific MIME type
1363: result = (I_CmsDocumentFactory) m_documentTypes
1364: .get(documentTypeKey);
1365: if (result == null) {
1366: // no setting is available, try to use a generic setting without MIME type
1367: result = (I_CmsDocumentFactory) m_documentTypes
1368: .get(A_CmsVfsDocument.getDocumentKey(typeName,
1369: null));
1370: // please note: the result may still be null
1371: }
1372: }
1373: return result;
1374: }
1375:
1376: /**
1377: * Returns the set of names of all configured documenttypes.<p>
1378: *
1379: * @return the set of names of all configured documenttypes
1380: */
1381: protected List getDocumentTypes() {
1382:
1383: List names = new ArrayList();
1384: for (Iterator i = m_documentTypes.values().iterator(); i
1385: .hasNext();) {
1386: I_CmsDocumentFactory factory = (I_CmsDocumentFactory) i
1387: .next();
1388: names.add(factory.getName());
1389: }
1390:
1391: return names;
1392: }
1393:
1394: /**
1395: * Initializes the available Cms resource types to be indexed.<p>
1396: *
1397: * A map stores document factories keyed by a string representing
1398: * a colon separated list of Cms resource types and/or mimetypes.<p>
1399: *
1400: * The keys of this map are used to trigger a document factory to convert
1401: * a Cms resource into a Lucene index document.<p>
1402: *
1403: * A document factory is a class implementing the interface
1404: * {@link org.opencms.search.documents.I_CmsDocumentFactory}.<p>
1405: */
1406: protected void initAvailableDocumentTypes() {
1407:
1408: CmsSearchDocumentType documenttype = null;
1409: String className = null;
1410: String name = null;
1411: I_CmsDocumentFactory documentFactory = null;
1412: List resourceTypes = null;
1413: List mimeTypes = null;
1414: Class c = null;
1415:
1416: m_documentTypes = new HashMap();
1417:
1418: for (int i = 0, n = m_documentTypeConfigs.size(); i < n; i++) {
1419:
1420: documenttype = (CmsSearchDocumentType) m_documentTypeConfigs
1421: .get(i);
1422: name = documenttype.getName();
1423:
1424: try {
1425: className = documenttype.getClassName();
1426: resourceTypes = documenttype.getResourceTypes();
1427: mimeTypes = documenttype.getMimeTypes();
1428:
1429: if (name == null) {
1430: throw new CmsIndexException(Messages.get()
1431: .container(Messages.ERR_DOCTYPE_NO_NAME_0));
1432: }
1433: if (className == null) {
1434: throw new CmsIndexException(
1435: Messages
1436: .get()
1437: .container(
1438: Messages.ERR_DOCTYPE_NO_CLASS_DEF_0));
1439: }
1440: if (resourceTypes.size() == 0) {
1441: throw new CmsIndexException(
1442: Messages
1443: .get()
1444: .container(
1445: Messages.ERR_DOCTYPE_NO_RESOURCETYPE_DEF_0));
1446: }
1447:
1448: try {
1449: c = Class.forName(className);
1450: documentFactory = (I_CmsDocumentFactory) c
1451: .getConstructor(
1452: new Class[] { String.class })
1453: .newInstance(new Object[] { name });
1454: } catch (ClassNotFoundException exc) {
1455: throw new CmsIndexException(Messages.get()
1456: .container(
1457: Messages.ERR_DOCCLASS_NOT_FOUND_1,
1458: className), exc);
1459: } catch (Exception exc) {
1460: throw new CmsIndexException(Messages.get()
1461: .container(Messages.ERR_DOCCLASS_INIT_1,
1462: className), exc);
1463: }
1464:
1465: if (documentFactory.isUsingCache()) {
1466: // init cache if used by the factory
1467: documentFactory.setCache(m_extractionResultCache);
1468: }
1469:
1470: for (Iterator key = documentFactory.getDocumentKeys(
1471: resourceTypes, mimeTypes).iterator(); key
1472: .hasNext();) {
1473: m_documentTypes.put(key.next(), documentFactory);
1474: }
1475:
1476: } catch (CmsException e) {
1477: if (LOG.isWarnEnabled()) {
1478: LOG
1479: .warn(
1480: Messages
1481: .get()
1482: .getBundle()
1483: .key(
1484: Messages.LOG_DOCTYPE_CONFIG_FAILED_1,
1485: name), e);
1486: }
1487: }
1488: }
1489: }
1490:
1491: /**
1492: * Initializes the configured search indexes.<p>
1493: *
1494: * This initializes also the list of Cms resources types
1495: * to be indexed by an index source.<p>
1496: */
1497: protected void initSearchIndexes() {
1498:
1499: CmsSearchIndex index = null;
1500: for (int i = 0, n = m_indexes.size(); i < n; i++) {
1501: index = (CmsSearchIndex) m_indexes.get(i);
1502: // reset disabled flag
1503: index.setEnabled(true);
1504: // check if the index has been configured correctly
1505: if (index.checkConfiguration(m_adminCms)) {
1506: // the index is configured correctly
1507: try {
1508: index.initialize();
1509: } catch (CmsException e) {
1510: // in this case the index will be disabled
1511: if (CmsLog.INIT.isInfoEnabled()) {
1512: CmsLog.INIT
1513: .info(
1514: Messages
1515: .get()
1516: .getBundle()
1517: .key(
1518: Messages.INIT_SEARCH_INIT_FAILED_1,
1519: index.getName()),
1520: e);
1521: }
1522: }
1523: }
1524: if (CmsLog.INIT.isInfoEnabled()) {
1525: // output a log message if the index was successfully configured or not
1526: if (index.isEnabled()) {
1527: CmsLog.INIT.info(Messages.get().getBundle().key(
1528: Messages.INIT_INDEX_CONFIGURED_2,
1529: index.getName(), index.getProject()));
1530: } else {
1531: CmsLog.INIT.info(Messages.get().getBundle().key(
1532: Messages.INIT_INDEX_NOT_CONFIGURED_2,
1533: index.getName(), index.getProject()));
1534: }
1535: }
1536: }
1537: }
1538:
1539: /**
1540: * Incrementally updates all indexes that have their rebuild mode set to <code>"auto"</code>
1541: * after resources have been published.<p>
1542: *
1543: * @param adminCms an OpenCms user context with Admin permissions
1544: * @param publishHistoryId the history ID of the published project
1545: * @param report the report to write the output to
1546: */
1547: protected synchronized void updateAllIndexes(CmsObject adminCms,
1548: CmsUUID publishHistoryId, I_CmsReport report) {
1549:
1550: List publishedResources;
1551: try {
1552: // read the list of all published resources
1553: publishedResources = adminCms
1554: .readPublishedResources(publishHistoryId);
1555: } catch (CmsException e) {
1556: LOG.error(Messages.get().getBundle().key(
1557: Messages.LOG_READING_CHANGED_RESOURCES_FAILED_1,
1558: publishHistoryId), e);
1559: return;
1560: }
1561:
1562: List updateResources = new ArrayList();
1563: Iterator itPubRes = publishedResources.iterator();
1564: while (itPubRes.hasNext()) {
1565: CmsPublishedResource res = (CmsPublishedResource) itPubRes
1566: .next();
1567: if (res.isFolder() || res.getState().isUnchanged()) {
1568: // folders and unchanged resources don't need to be indexed after publish
1569: continue;
1570: }
1571: if (res.getState().isDeleted() || res.getState().isNew()
1572: || res.getState().isChanged()) {
1573: if (updateResources.contains(res)) {
1574: // resource may have been added as a sibling of another resource
1575: // in this case we make sure to use the value from the publih list because of the "deleted" flag
1576: updateResources.remove(res);
1577: // "equals()" implementation of published resource only checks for path,
1578: // so the removed value may have a different "deleted" or "modified" status value
1579: updateResources.add(res);
1580: } else {
1581: // resource not yet contained in the list
1582: updateResources.add(res);
1583: // check for the siblings (not for deleted resources, these are already gone)
1584: if (!res.getState().isDeleted()
1585: && (res.getSiblingCount() > 1)) {
1586: // this resource has siblings
1587: try {
1588: // read siblings from the online project
1589: List siblings = adminCms.readSiblings(res
1590: .getRootPath(),
1591: CmsResourceFilter.ALL);
1592: Iterator itSib = siblings.iterator();
1593: while (itSib.hasNext()) {
1594: // check all siblings
1595: CmsResource sibling = (CmsResource) itSib
1596: .next();
1597: CmsPublishedResource sib = new CmsPublishedResource(
1598: sibling);
1599: if (!updateResources.contains(sib)) {
1600: // ensure sibling is added only once
1601: updateResources.add(sib);
1602: }
1603: }
1604: } catch (CmsException e) {
1605: // ignore, just use the original resource
1606: if (LOG.isWarnEnabled()) {
1607: LOG
1608: .warn(
1609: Messages
1610: .get()
1611: .getBundle()
1612: .key(
1613: Messages.LOG_UNABLE_TO_READ_SIBLINGS_1,
1614: res
1615: .getRootPath()),
1616: e);
1617: }
1618: }
1619: }
1620: }
1621: }
1622: }
1623:
1624: if (!updateResources.isEmpty()) {
1625: // sort the resource to update
1626: Collections.sort(updateResources);
1627: // only update the indexes if the list of remaining published resources is not empty
1628: Iterator i = m_indexes.iterator();
1629: while (i.hasNext()) {
1630: CmsSearchIndex index = (CmsSearchIndex) i.next();
1631: if (CmsSearchIndex.REBUILD_MODE_AUTO.equals(index
1632: .getRebuildMode())) {
1633: // only update indexes which have the rebuild mode set to "auto"
1634: try {
1635: updateIndex(index, report, updateResources);
1636: } catch (CmsException e) {
1637: LOG.error(Messages.get().getBundle().key(
1638: Messages.LOG_UPDATE_INDEX_FAILED_1,
1639: index.getName()), e);
1640: }
1641: }
1642: }
1643: }
1644: // clean up the extraction result cache
1645: m_extractionResultCache.cleanCache(m_extractionCacheMaxAge);
1646: }
1647:
1648: /**
1649: * Returns the report in the given event data, if <code>null</code>
1650: * a new log report is used.<p>
1651: *
1652: * @param event the event to get the report for
1653: *
1654: * @return the report
1655: */
1656: private I_CmsReport getEventReport(CmsEvent event) {
1657:
1658: I_CmsReport report = null;
1659: if (event.getData() != null) {
1660: report = (I_CmsReport) event.getData().get(
1661: I_CmsEventListener.KEY_REPORT);
1662: }
1663: if (report == null) {
1664: report = new CmsLogReport(Locale.ENGLISH, getClass());
1665: }
1666: return report;
1667: }
1668:
1669: /**
1670: * Updates (if required creates) the index with the given name.<p>
1671: *
1672: * If the optional List of <code>{@link CmsPublishedResource}</code> instances is provided, the index will be
1673: * incrementally updated for these resources only. If this List is <code>null</code> or empty,
1674: * the index will be fully rebuild.<p>
1675: *
1676: * @param index the index to update or rebuild
1677: * @param report the report to write output messages to
1678: * @param resourcesToIndex an (optional) list of <code>{@link CmsPublishedResource}</code> objects to update in the index
1679: *
1680: * @throws CmsException if something goes wrong
1681: */
1682: private void updateIndex(CmsSearchIndex index, I_CmsReport report,
1683: List resourcesToIndex) throws CmsException {
1684:
1685: // copy the stored admin context for the indexing
1686: CmsObject cms = OpenCms.initCmsObject(m_adminCms);
1687: // make sure a report is available
1688: if (report == null) {
1689: report = new CmsLogReport(cms.getRequestContext()
1690: .getLocale(), CmsSearchManager.class);
1691: }
1692:
1693: // check if the index has been configured correctly
1694: if (!index.checkConfiguration(cms)) {
1695: // the index is disabled
1696: return;
1697: }
1698:
1699: // set site root and project for this index
1700: cms.getRequestContext().setSiteRoot("/");
1701: // switch to the index project
1702: cms.getRequestContext().setCurrentProject(
1703: cms.readProject(index.getProject()));
1704:
1705: if ((resourcesToIndex == null) || resourcesToIndex.isEmpty()) {
1706: // rebuild the complete index
1707:
1708: forceIndexUnlock(index, report, false);
1709: // create a new thread manager for the indexing threads
1710: CmsIndexingThreadManager threadManager = new CmsIndexingThreadManager(
1711: m_timeout);
1712:
1713: IndexWriter writer = null;
1714: try {
1715: // create a new index writer
1716: writer = index.getIndexWriter(true);
1717:
1718: // output start information on the report
1719: report.println(Messages.get().container(
1720: Messages.RPT_SEARCH_INDEXING_REBUILD_BEGIN_1,
1721: index.getName()), I_CmsReport.FORMAT_HEADLINE);
1722:
1723: // iterate all configured index sources of this index
1724: Iterator sources = index.getSources().iterator();
1725: while (sources.hasNext()) {
1726: // get the next index source
1727: CmsSearchIndexSource source = (CmsSearchIndexSource) sources
1728: .next();
1729: // create the indexer
1730: I_CmsIndexer indexer = source.getIndexer()
1731: .newInstance(cms, report, index);
1732: // new index creation, use all resources from the index source
1733: indexer.rebuildIndex(writer, threadManager, source);
1734: }
1735:
1736: // wait for indexing threads to finish
1737: while (threadManager.isRunning()) {
1738: try {
1739: wait(1000);
1740: } catch (InterruptedException e) {
1741: // just continue with the loop after interruption
1742: }
1743: }
1744: // optimize the generated index
1745: try {
1746: writer.optimize();
1747: } catch (IOException e) {
1748: if (LOG.isWarnEnabled()) {
1749: LOG
1750: .warn(
1751: Messages
1752: .get()
1753: .getBundle()
1754: .key(
1755: Messages.LOG_IO_INDEX_WRITER_OPTIMIZE_1,
1756: index.getPath(),
1757: index.getName()),
1758: e);
1759: }
1760: }
1761:
1762: // output finish information on the report
1763: report.println(Messages.get().container(
1764: Messages.RPT_SEARCH_INDEXING_REBUILD_END_1,
1765: index.getName()), I_CmsReport.FORMAT_HEADLINE);
1766:
1767: } finally {
1768: if (writer != null) {
1769: try {
1770: writer.close();
1771: } catch (IOException e) {
1772: if (LOG.isWarnEnabled()) {
1773: LOG
1774: .warn(
1775: Messages
1776: .get()
1777: .getBundle()
1778: .key(
1779: Messages.LOG_IO_INDEX_WRITER_CLOSE_2,
1780: index
1781: .getPath(),
1782: index
1783: .getName()),
1784: e);
1785: }
1786: }
1787: }
1788: }
1789:
1790: // show information about indexing runtime
1791: threadManager.reportStatistics(report);
1792:
1793: } else {
1794: // update the existing index
1795: List updateCollections = new ArrayList();
1796:
1797: boolean hasResourcesToDelete = false;
1798: boolean hasResourcesToUpdate = false;
1799:
1800: // iterate all configured index sources of this index
1801: Iterator sources = index.getSources().iterator();
1802: while (sources.hasNext()) {
1803: // get the next index source
1804: CmsSearchIndexSource source = (CmsSearchIndexSource) sources
1805: .next();
1806: // create the indexer
1807: I_CmsIndexer indexer = source.getIndexer().newInstance(
1808: cms, report, index);
1809: // collect the resources to update
1810: CmsSearchIndexUpdateData updateData = indexer
1811: .getUpdateData(source, resourcesToIndex);
1812: if (!updateData.isEmpty()) {
1813: // add the update collection to the internal pipeline
1814: updateCollections.add(updateData);
1815: hasResourcesToDelete = hasResourcesToDelete
1816: | updateData.hasResourcesToDelete();
1817: hasResourcesToUpdate = hasResourcesToUpdate
1818: | updateData.hasResourceToUpdate();
1819: }
1820: }
1821:
1822: if (hasResourcesToDelete || hasResourcesToUpdate) {
1823: // output start information on the report
1824: report.println(Messages.get().container(
1825: Messages.RPT_SEARCH_INDEXING_UPDATE_BEGIN_1,
1826: index.getName()), I_CmsReport.FORMAT_HEADLINE);
1827: }
1828:
1829: forceIndexUnlock(index, report, true);
1830:
1831: if (hasResourcesToDelete) {
1832: // delete the resource from the index
1833: IndexReader reader = null;
1834: try {
1835: reader = IndexReader.open(index.getPath());
1836: } catch (IOException e) {
1837: LOG.error(Messages.get().getBundle().key(
1838: Messages.LOG_IO_INDEX_READER_OPEN_2,
1839: index.getPath(), index.getName()), e);
1840: }
1841: if (reader != null) {
1842: try {
1843: Iterator i = updateCollections.iterator();
1844: while (i.hasNext()) {
1845: CmsSearchIndexUpdateData updateCollection = (CmsSearchIndexUpdateData) i
1846: .next();
1847: if (updateCollection.hasResourcesToDelete()) {
1848: updateCollection
1849: .getIndexer()
1850: .deleteResources(
1851: reader,
1852: updateCollection
1853: .getResourcesToDelete());
1854: }
1855: }
1856: } finally {
1857: try {
1858: // close the reader after all resources have been deleted
1859: reader.close();
1860: } catch (IOException e) {
1861: LOG
1862: .error(
1863: Messages
1864: .get()
1865: .getBundle()
1866: .key(
1867: Messages.LOG_IO_INDEX_READER_CLOSE_2,
1868: index
1869: .getPath(),
1870: index
1871: .getName()),
1872: e);
1873: }
1874: }
1875: }
1876: }
1877:
1878: if (hasResourcesToUpdate) {
1879: // create a new thread manager
1880: CmsIndexingThreadManager threadManager = new CmsIndexingThreadManager(
1881: m_timeout);
1882:
1883: IndexWriter writer = null;
1884: try {
1885: // create an index writer that updates the current index
1886: writer = index.getIndexWriter(false);
1887:
1888: Iterator i = updateCollections.iterator();
1889: while (i.hasNext()) {
1890: CmsSearchIndexUpdateData updateCollection = (CmsSearchIndexUpdateData) i
1891: .next();
1892: if (updateCollection.hasResourceToUpdate()) {
1893: updateCollection
1894: .getIndexer()
1895: .updateResources(
1896: writer,
1897: threadManager,
1898: updateCollection
1899: .getResourcesToUpdate());
1900: }
1901: }
1902:
1903: // wait for indexing threads to finish
1904: while (threadManager.isRunning()) {
1905: try {
1906: wait(1000);
1907: } catch (InterruptedException e) {
1908: // just continue with the loop after interruption
1909: }
1910: }
1911:
1912: } finally {
1913: if (writer != null) {
1914: try {
1915: writer.close();
1916: } catch (IOException e) {
1917: LOG
1918: .error(
1919: Messages
1920: .get()
1921: .getBundle()
1922: .key(
1923: Messages.LOG_IO_INDEX_WRITER_CLOSE_2,
1924: index
1925: .getPath(),
1926: index
1927: .getName()),
1928: e);
1929: }
1930: }
1931: }
1932: }
1933:
1934: if (hasResourcesToDelete || hasResourcesToUpdate) {
1935: // output finish information on the report
1936: report.println(Messages.get().container(
1937: Messages.RPT_SEARCH_INDEXING_UPDATE_END_1,
1938: index.getName()), I_CmsReport.FORMAT_HEADLINE);
1939: }
1940: }
1941: }
1942:
1943: /**
1944: * Checks is a given index is locked, if so waits for a numer of seconds and checks again,
1945: * until either the index is unlocked or a limit of seconds set by <code>{@link #setIndexLockMaxWaitSeconds(int)}</code>
1946: * is reached and returns the lock state of the index.<p>
1947: *
1948: * @param index the index to check the lock for
1949: * @param report the report to write error messages on
1950: * @param indexLocked the boolean value if the index is locked
1951: *
1952: * @return the lock state of the index
1953: */
1954: private boolean waitIndexLock(CmsSearchIndex index,
1955: I_CmsReport report, boolean indexLocked) {
1956:
1957: try {
1958: int lockSecs = 0;
1959: while (indexLocked
1960: && (lockSecs < m_indexLockMaxWaitSeconds)) {
1961: indexLocked = IndexReader.isLocked(index.getPath());
1962: if (indexLocked) {
1963: // index is still locked, wait one second
1964: report.println(Messages.get().container(
1965: Messages.RPT_SEARCH_INDEXING_LOCK_WAIT_2,
1966: index.getName(),
1967: new Integer(m_indexLockMaxWaitSeconds
1968: - lockSecs)),
1969: I_CmsReport.FORMAT_ERROR);
1970: // sleep one second
1971: Thread.sleep(1000);
1972: lockSecs++;
1973: }
1974: }
1975: } catch (Exception e) {
1976: LOG.error(Messages.get().getBundle().key(
1977: Messages.LOG_IO_INDEX_READER_OPEN_2,
1978: index.getPath(), index.getName()), e);
1979: }
1980: return indexLocked;
1981: }
1982: }
|