001: /*
002: * File : $Source: /usr/local/cvs/opencms/src/org/opencms/search/CmsSearch.java,v $
003: * Date : $Date: 2008-02-27 12:05:38 $
004: * Version: $Revision: 1.44 $
005: *
006: * This library is part of OpenCms -
007: * the Open Source Content Management System
008: *
009: * Copyright (c) 2002 - 2008 Alkacon Software GmbH (http://www.alkacon.com)
010: *
011: * This library is free software; you can redistribute it and/or
012: * modify it under the terms of the GNU Lesser General Public
013: * License as published by the Free Software Foundation; either
014: * version 2.1 of the License, or (at your option) any later version.
015: *
016: * This library is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019: * Lesser General Public License for more details.
020: *
021: * For further information about Alkacon Software GmbH, please see the
022: * company website: http://www.alkacon.com
023: *
024: * For further information about OpenCms, please see the
025: * project website: http://www.opencms.org
026: *
027: * You should have received a copy of the GNU Lesser General Public
028: * License along with this library; if not, write to the Free Software
029: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
030: */
031:
032: package org.opencms.search;
033:
034: import org.opencms.file.CmsObject;
035: import org.opencms.i18n.CmsEncoder;
036: import org.opencms.main.CmsException;
037: import org.opencms.main.CmsIllegalArgumentException;
038: import org.opencms.main.CmsLog;
039: import org.opencms.main.OpenCms;
040: import org.opencms.util.CmsStringUtil;
041:
042: import java.util.Arrays;
043: import java.util.Collections;
044: import java.util.Iterator;
045: import java.util.LinkedList;
046: import java.util.List;
047: import java.util.Map;
048: import java.util.TreeMap;
049:
050: import org.apache.commons.logging.Log;
051: import org.apache.lucene.search.Sort;
052:
053: /**
054: * Helper class to access the search facility within a jsp.<p>
055: *
056: * Typically, the following fields are available for searching:
057: * <ul>
058: * <li>title - the title of a resource</li>
059: * <li>keywords - the keywords of a resource</li>
060: * <li>description - the description of a resource</li>
061: * <li>content - the aggregated content of a resource</li>
062: * <li>created - the creation date of a resource</li>
063: * <li>lastmodified - the date of the last modification of a resource</li>
064: * <li>path - the path to display the resource</li>
065: * <li>channel - the channel of a resource</li>
066: * <li>contentdefinition - the name of the content definition class of a resource</li>
067: * </ul>
068: *
069: * @author Carsten Weinholz
070: * @author Thomas Weckert
071: *
072: * @version $Revision: 1.44 $
073: *
074: * @since 6.0.0
075: */
076: public class CmsSearch {
077:
078: /** The log object for this class. */
079: private static final Log LOG = CmsLog.getLog(CmsSearch.class);
080:
081: /** The result categories of a search. */
082: protected Map m_categoriesFound;
083:
084: /** The cms object. */
085: protected transient CmsObject m_cms;
086:
087: /** The latest exception. */
088: protected Exception m_lastException;
089:
090: /** The URL which leads to the next result page. */
091: protected String m_nextUrl;
092:
093: /** The number of pages for the result list. */
094: protected int m_pageCount;
095:
096: /** The restriction for the search parameters, used for "search in seach result". */
097: protected CmsSearchParameters m_parameterRestriction;
098:
099: /** The search parameters used for searching, build out of the given individual parameter values. */
100: protected CmsSearchParameters m_parameters;
101:
102: /** The URL which leads to the previous result page. */
103: protected String m_prevUrl;
104:
105: /** The current search result. */
106: protected List m_result;
107:
108: /** The search parameter String. */
109: protected String m_searchParameters;
110:
111: /** The total number of search results matching the query. */
112: protected int m_searchResultCount;
113:
114: /**
115: * Default constructor, used to instanciate the search facility as a bean.<p>
116: */
117: public CmsSearch() {
118:
119: super ();
120:
121: m_parameters = new CmsSearchParameters();
122: m_parameters.setSearchRoots("");
123: m_parameters.setSearchPage(1);
124: m_searchResultCount = 0;
125: m_parameters.setSort(CmsSearchParameters.SORT_DEFAULT);
126: m_parameters.setFields(Arrays
127: .asList(CmsSearchIndex.DOC_META_FIELDS));
128: }
129:
130: /**
131: * Returns <code>true</code> if a category overview should be shown as part of the result.<p>
132: *
133: * <b>Please note:</b> The calculation of the category count slows down the search time by an order
134: * of magnitude. Make sure that you only use this feature if it's really required!
135: * Be especially careful if your search result list can become large (> 1000 documents), since in this case
136: * overall system performance will certainly be impacted considerably when calculating the categories.<p>
137: *
138: * @return <code>true</code> if a category overview should be shown as part of the result
139: */
140: public boolean getCalculateCategories() {
141:
142: return m_parameters.getCalculateCategories();
143: }
144:
145: /**
146: * Returns the search categories.<p>
147: *
148: * @return the search categories
149: */
150: public String[] getCategories() {
151:
152: List l = m_parameters.getCategories();
153: return (String[]) l.toArray(new String[l.size()]);
154: }
155:
156: /**
157: * Returns the maximum number of pages which should be shown.<p>
158: *
159: * @return the maximum number of pages which should be shown
160: */
161: public int getDisplayPages() {
162:
163: return m_parameters.getDisplayPages();
164: }
165:
166: /**
167: * Gets the current fields list.<p>
168: *
169: * @return the fields to search
170: */
171: public String getFields() {
172:
173: if (m_parameters.getFields() == null) {
174: return "";
175: }
176: StringBuffer result = new StringBuffer();
177: Iterator it = m_parameters.getFields().iterator();
178: while (it.hasNext()) {
179: result.append(it.next());
180: result.append(" ");
181: }
182: return result.toString();
183: }
184:
185: /**
186: * Gets the name of the current search index.<p>
187: *
188: * @return the name of the index
189: */
190: public String getIndex() {
191:
192: return m_parameters.getSearchIndex().getName();
193: }
194:
195: /**
196: * Gets the last exception after a search operation.<p>
197: *
198: * @return the exception occured in a search operation or null
199: */
200: public Exception getLastException() {
201:
202: return m_lastException;
203: }
204:
205: /**
206: * Gets the number of matches displayed on each page.<p>
207: *
208: * @return matches per result page
209: */
210: public int getMatchesPerPage() {
211:
212: return m_parameters.getMatchesPerPage();
213: }
214:
215: /**
216: * Gets the URL for the link to the next result page.<p>
217: *
218: * @return the URL to the next result page
219: */
220: public String getNextUrl() {
221:
222: return m_nextUrl;
223: }
224:
225: /**
226: * Creates a sorted map of URLs to link to other search result pages.<p>
227: *
228: * The key values are Integers representing the page number, the entry
229: * holds the corresponding link.<p>
230: *
231: * @return a map with String URLs
232: */
233: public Map getPageLinks() {
234:
235: Map links = new TreeMap();
236: if (m_pageCount <= 1) {
237: return links;
238: }
239: int startIndex, endIndex;
240: String link = m_cms.getRequestContext().getUri()
241: + getSearchParameters() + "&searchPage=";
242: if (getDisplayPages() < 1) {
243: // number of displayed pages not limited, build a map with all available page links
244: startIndex = 1;
245: endIndex = m_pageCount;
246: } else {
247: // limited number of displayed pages, calculate page range
248: int currentPage = getSearchPage();
249: int countBeforeCurrent = getDisplayPages() / 2;
250: int countAfterCurrent;
251: if ((currentPage - countBeforeCurrent) < 1) {
252: // set count before to number of available pages
253: countBeforeCurrent = currentPage - 1;
254: }
255: // set count after to number of remaining pages (- 1 for current page)
256: countAfterCurrent = getDisplayPages() - countBeforeCurrent
257: - 1;
258: // calculate start and end index
259: startIndex = currentPage - countBeforeCurrent;
260: endIndex = currentPage + countAfterCurrent;
261: // check end index
262: if (endIndex > m_pageCount) {
263: int delta = endIndex - m_pageCount;
264: // decrease start index with delta to get the right number of displayed pages
265: startIndex -= delta;
266: // check start index to avoid values < 1
267: if (startIndex < 1) {
268: startIndex = 1;
269: }
270: endIndex = m_pageCount;
271: }
272: }
273:
274: // build the sorted tree map of page links
275: for (int i = startIndex; i <= endIndex; i++) {
276: links.put(new Integer(i), (link + i));
277: }
278: return links;
279: }
280:
281: /**
282: * Returns the search parameters used for searching, build out of the given individual parameter values.<p>
283: *
284: * @return the search parameters used for searching, build out of the given individual parameter values
285: */
286: public CmsSearchParameters getParameters() {
287:
288: if (m_parameterRestriction != null) {
289: m_parameters = m_parameters
290: .restrict(m_parameterRestriction);
291: }
292: return m_parameters;
293:
294: }
295:
296: /**
297: * Gets the URL for the link to the previous result page.<p>
298: *
299: * @return the URL to the previous result page
300: */
301: public String getPreviousUrl() {
302:
303: return m_prevUrl;
304: }
305:
306: /**
307: * Gets the current search query.<p>
308: *
309: * @return the current query string or null if no query was set before
310: */
311: public String getQuery() {
312:
313: return m_parameters.getQuery();
314: }
315:
316: /**
317: * Gets the minimum search query length.<p>
318: *
319: * @return the minimum search query length
320: */
321: public int getQueryLength() {
322:
323: return m_parameters.getQueryLength();
324: }
325:
326: /**
327: * Gets the current result page.<p>
328: *
329: * @return the current result page
330: */
331: public int getSearchPage() {
332:
333: return m_parameters.getSearchPage();
334: }
335:
336: /**
337: * Creates a String with the necessary search parameters for page links.<p>
338: *
339: * @return String with search parameters
340: *
341: * @deprecated use {@link CmsSearchParameters#toQueryString()} instead
342: *
343: * @see #getParameters()
344: */
345: public String getSearchParameters() {
346:
347: return m_parameters.toQueryString();
348: }
349:
350: /**
351: * Returns the search result for the current query, as a list of <code>{@link CmsSearchResult}</code> objects.<p>
352: *
353: * @return the search result (may be empty) or null if no index or query was set before
354: */
355: public List getSearchResult() {
356:
357: if ((m_cms != null) && (m_result == null)
358: && (m_parameters.getIndex() != null)
359: && CmsStringUtil.isNotEmpty(m_parameters.getQuery())) {
360:
361: if ((getQueryLength() > 0)
362: && (m_parameters.getQuery().trim().length() < getQueryLength())) {
363:
364: m_lastException = new CmsSearchException(Messages.get()
365: .container(Messages.ERR_QUERY_TOO_SHORT_1,
366: new Integer(getQueryLength())));
367:
368: return m_result;
369: }
370:
371: try {
372:
373: CmsSearchResultList result = m_parameters
374: .getSearchIndex()
375: .search(m_cms, getParameters());
376:
377: if (result.size() > 0) {
378:
379: m_result = result;
380: m_searchResultCount = result.getHitCount();
381: m_categoriesFound = result.getCategories();
382:
383: // re-caluclate the number of pages for this search result
384: m_pageCount = m_searchResultCount
385: / m_parameters.getMatchesPerPage();
386: if ((m_searchResultCount % m_parameters
387: .getMatchesPerPage()) != 0) {
388: m_pageCount++;
389: }
390:
391: // re-calculate the URLs to browse forward and backward in the search result
392: String url = m_cms.getRequestContext().getUri()
393: + m_parameters.toQueryString()
394: + "&searchPage=";
395: if (m_parameters.getSearchPage() > 1) {
396: m_prevUrl = url
397: + (m_parameters.getSearchPage() - 1);
398: }
399: if (m_parameters.getSearchPage() < m_pageCount) {
400: m_nextUrl = url
401: + (m_parameters.getSearchPage() + 1);
402: }
403: } else {
404: m_result = Collections.EMPTY_LIST;
405: m_searchResultCount = 0;
406: m_categoriesFound = null;
407: m_pageCount = 0;
408: m_prevUrl = null;
409: m_nextUrl = null;
410: }
411: } catch (Exception exc) {
412:
413: if (LOG.isDebugEnabled()) {
414: LOG.debug(Messages.get().getBundle().key(
415: Messages.LOG_SEARCHING_FAILED_0), exc);
416: }
417:
418: m_result = null;
419: m_searchResultCount = 0;
420: m_pageCount = 0;
421:
422: m_lastException = exc;
423: }
424: }
425:
426: return m_result;
427: }
428:
429: /**
430: * Returns a map of categories (Strings) for the last search result, mapped to the hit count (Integer) of
431: * the documents in this category, or <code>null</code> if the categories have not been calculated.<p>
432: *
433: * @return a map of categories for the last search result
434: *
435: * @see CmsSearch#getCalculateCategories()
436: * @see CmsSearch#setCalculateCategories(boolean)
437: */
438: public Map getSearchResultCategories() {
439:
440: return m_categoriesFound;
441: }
442:
443: /**
444: * Returns the total number of search results matching the query.<p>
445: *
446: * @return the total number of search results matching the query
447: */
448: public int getSearchResultCount() {
449:
450: return m_searchResultCount;
451: }
452:
453: /**
454: * Returns the search roots.<p>
455: *
456: * Only resources that are sub-resources of one of the search roots
457: * are included in the search result.<p>
458: *
459: * The search roots are used <i>in addition to</i> the current site root
460: * of the user performing the search.<p>
461: *
462: * By default, the search roots contain only one entry with an empty string.<p>
463: *
464: * @return the search roots
465: */
466: public String[] getSearchRoots() {
467:
468: List l = m_parameters.getRoots();
469: return (String[]) l.toArray(new String[l.size()]);
470: }
471:
472: /**
473: * Returns the sort order used for sorting the results of s search.<p>
474: *
475: * @return the sort order used for sorting the results of s search
476: */
477: public Sort getSortOrder() {
478:
479: return m_parameters.getSort();
480: }
481:
482: /**
483: * Initializes the bean with the cms object.<p>
484: *
485: * @param cms the cms object
486: */
487: public void init(CmsObject cms) {
488:
489: m_cms = cms;
490: m_result = null;
491: m_lastException = null;
492: m_pageCount = 0;
493: m_nextUrl = null;
494: m_prevUrl = null;
495: }
496:
497: /**
498: * Sets the flag that controls calculation of result categories for the next search,
499: * use this only if it's really required since the search can become very slow using this option.<p>
500: *
501: * <b>Please note:</b> The calculation of the category count slows down the search time by an order
502: * of magnitude. Make sure that you only use this feature if it's really required!
503: * Be especially careful if your search result list can become large (> 1000 documents), since in this case
504: * overall system performance will certainly be impacted considerably when calculating the categories.<p>
505: *
506: * @param calculateCategories if <code>true</code>, the category count will be calculated for the next search
507: */
508: public void setCalculateCategories(boolean calculateCategories) {
509:
510: m_parameters.setCalculateCategories(calculateCategories);
511: }
512:
513: /**
514: * Sets the search categories, all search results must be in one of the categories,
515: * the category set must match the indexed category exactly.<p>
516: *
517: * All categories will automatically be trimmed and lowercased, since search categories
518: * are also stored this way in the index.<p>
519: *
520: * @param categories the categories to set
521: */
522: public void setCategories(String[] categories) {
523:
524: List setCategories = new LinkedList();
525: if (categories != null) {
526: if (categories.length != 0) {
527: // ensure all categories are not null, trimmed, not-empty and lowercased
528: String cat;
529: for (int i = 0; i < categories.length; i++) {
530: cat = categories[i];
531: if (CmsStringUtil.isNotEmptyOrWhitespaceOnly(cat)) {
532: // all categories must internally be lower case,
533: // since the index keywords are lowercased as well
534: cat = cat.trim().toLowerCase();
535: setCategories.add(cat);
536: }
537: }
538: }
539: }
540: m_parameters.setCategories(setCategories);
541: resetLastResult();
542: }
543:
544: /**
545: * Sets the maximum number of pages which should be shown.<p>
546: *
547: * Enter an odd value to achieve a nice, "symmetric" output.<p>
548: *
549: * @param value the maximum number of pages which should be shown
550: */
551: public void setDisplayPages(int value) {
552:
553: m_parameters.setDisplayPages(value);
554: }
555:
556: /**
557: * Controls if the excerpt from a field is generated only for searched fields, or for all fields (the default).<p>
558: *
559: * The default setting is <code>false</code>, which means all text fields configured for the excerpt will
560: * be used to gernerate the excerpt, regardless if they have been searched in or not.<p>
561: *
562: * Please note: A field will only be included in the excerpt if it has been configured as <code>excerpt="true"</code>
563: * in <code>opencms-search.xml</code>. This method controls if so configured fields are used depending on the
564: * fields searched, see {@link #setField(String[])}.<p>
565: *
566: * @param value if <code>true</code>, the excerpt is generated only from the fields actually searched in
567: */
568: public void setExcerptOnlySearchedFields(boolean value) {
569:
570: m_parameters.setExcerptOnlySearchedFields(value);
571: resetLastResult();
572: }
573:
574: /**
575: * Sets the fields to search.<p>
576: *
577: * If the fields are set to <code>null</code>,
578: * or not set at all, the default fields "content" and "meta" are used.<p>
579: *
580: * For a list of valid field names, see the Interface constants of
581: * <code>{@link org.opencms.search.documents.I_CmsDocumentFactory}</code>.
582: *
583: * @param fields the fields to search
584: */
585: public void setField(String[] fields) {
586:
587: m_parameters.setFields(Arrays.asList(fields));
588: resetLastResult();
589: }
590:
591: /**
592: * Set the name of the index to search.<p>
593: *
594: * A former search result will be deleted.<p>
595: *
596: * @param indexName the name of the index
597: */
598: public void setIndex(String indexName) {
599:
600: resetLastResult();
601: CmsSearchIndex index;
602: if (CmsStringUtil.isNotEmpty(indexName)) {
603: try {
604: index = OpenCms.getSearchManager().getIndex(indexName);
605: if (index == null) {
606: throw new CmsException(Messages.get().container(
607: Messages.ERR_INDEX_NOT_FOUND_1, indexName));
608: }
609: m_parameters.setSearchIndex(index);
610: } catch (Exception exc) {
611: if (LOG.isDebugEnabled()) {
612: LOG.debug(Messages.get().getBundle().key(
613: Messages.LOG_INDEX_ACCESS_FAILED_1,
614: indexName), exc);
615: }
616: m_lastException = exc;
617: }
618: }
619: }
620:
621: /**
622: * Sets the number of matches per page.<p>
623: *
624: * @param matches the number of matches per page
625: */
626: public void setMatchesPerPage(int matches) {
627:
628: m_parameters.setMatchesPerPage(matches);
629: resetLastResult();
630: }
631:
632: /**
633: * Set the parameters to use if a non null instance is provided. <p>
634: *
635: * @param parameters the parameters to use for the search if a non null instance is provided
636: *
637: */
638: public void setParameters(CmsSearchParameters parameters) {
639:
640: if (parameters != null) {
641: m_parameters = parameters;
642: }
643: }
644:
645: /**
646: * Sets the search query.<p>
647: *
648: * The syntax of the query depends on the search engine used.
649: * A former search result will be deleted.<p>
650: *
651: * @param query the search query (escaped format)
652: */
653: public void setQuery(String query) {
654:
655: try {
656: m_parameters.setQuery(CmsEncoder.decodeParameter(query));
657: } catch (CmsIllegalArgumentException iae) {
658: m_lastException = iae;
659: }
660: resetLastResult();
661: }
662:
663: /**
664: * Sets the minimum length of the search query.<p>
665: *
666: * @param length the minimum search query length
667: */
668: public void setQueryLength(int length) {
669:
670: m_parameters.setQueryLength(length);
671: }
672:
673: /**
674: * Restrict the result of the next search to the results of the last search,
675: * restricted with the provided parameters.<p>
676: *
677: * Use this for "seach in search result" functions.<p>
678: *
679: * @param restriction the restriction to use
680: *
681: * @see CmsSearchParameters#restrict(CmsSearchParameters)
682: */
683: public void setResultRestriction(CmsSearchParameters restriction) {
684:
685: resetLastResult();
686: m_parameterRestriction = restriction;
687: }
688:
689: /**
690: * Sets the current result page.<p>
691: *
692: * Works with jsp bean mechanism for request parameter "searchPage"
693: * that is generated here for page links.<p>
694: *
695: * @param page the current result page
696: */
697: public void setSearchPage(int page) {
698:
699: m_parameters.setSearchPage(page);
700: resetLastResult();
701: }
702:
703: /**
704: * Convenience method to set exactly one search root.<p>
705: *
706: * @param searchRoot the search root to set
707: *
708: * @see #setSearchRoots(String[])
709: */
710: public void setSearchRoot(String searchRoot) {
711:
712: setSearchRoots(CmsStringUtil.splitAsArray(searchRoot, ","));
713: }
714:
715: /**
716: * Sets the search root list.<p>
717: *
718: * Only resources that are sub-resources of one of the search roots
719: * are included in the search result.<p>
720: *
721: * The search roots set here are used <i>in addition to</i> the current site root
722: * of the user performing the search.<p>
723: *
724: * By default, the search roots contain only one entry with an empty string.<p>
725: *
726: * @param searchRoots the search roots to set
727: */
728: public void setSearchRoots(String[] searchRoots) {
729:
730: List l = new LinkedList(Arrays.asList(searchRoots));
731: m_parameters.setRoots(l);
732: resetLastResult();
733: }
734:
735: /**
736: * Sets the sort order used for sorting the results of s search.<p>
737: *
738: * @param sortOrder the sort order to set
739: */
740: public void setSortOrder(Sort sortOrder) {
741:
742: m_parameters.setSort(sortOrder);
743: resetLastResult();
744: }
745:
746: /**
747: * Resets the last seach result.<p>
748: */
749: private void resetLastResult() {
750:
751: m_result = null;
752: m_lastException = null;
753: m_categoriesFound = null;
754: m_parameterRestriction = null;
755: }
756: }
|