001: /*
002: * File : $Source: /usr/local/cvs/opencms/src/org/opencms/search/CmsSearchCategoryCollector.java,v $
003: * Date : $Date: 2008-02-27 12:05:38 $
004: * Version: $Revision: 1.10 $
005: *
006: * This library is part of OpenCms -
007: * the Open Source Content Management System
008: *
009: * Copyright (c) 2002 - 2008 Alkacon Software GmbH (http://www.alkacon.com)
010: *
011: * This library is free software; you can redistribute it and/or
012: * modify it under the terms of the GNU Lesser General Public
013: * License as published by the Free Software Foundation; either
014: * version 2.1 of the License, or (at your option) any later version.
015: *
016: * This library is distributed in the hope that it will be useful,
017: * but WITHOUT ANY WARRANTY; without even the implied warranty of
018: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
019: * Lesser General Public License for more details.
020: *
021: * For further information about Alkacon Software GmbH, please see the
022: * company website: http://www.alkacon.com
023: *
024: * For further information about OpenCms, please see the
025: * project website: http://www.opencms.org
026: *
027: * You should have received a copy of the GNU Lesser General Public
028: * License along with this library; if not, write to the Free Software
029: * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
030: */
031:
032: package org.opencms.search;
033:
034: import org.opencms.main.CmsLog;
035: import org.opencms.search.fields.CmsSearchField;
036: import org.opencms.util.CmsStringUtil;
037:
038: import java.io.IOException;
039: import java.util.HashMap;
040: import java.util.Iterator;
041: import java.util.Map;
042: import java.util.TreeMap;
043:
044: import org.apache.commons.logging.Log;
045: import org.apache.lucene.document.Document;
046: import org.apache.lucene.search.HitCollector;
047: import org.apache.lucene.search.IndexSearcher;
048:
049: /**
050: * Collects category information during a search process.<p>
051: *
052: * <b>Please note:</b> The calculation of the category count slows down the search time by an order
053: * of magnitude. Make sure that you only use this feature if it's really required!
054: * Be especially careful if your search result list can become large (> 1000 documents), since in this case
055: * overall system performance will certainly be impacted considerably when calculating the categories.<p>
056: *
057: * @author Alexander Kandzior
058: *
059: * @version $Revision: 1.10 $
060: *
061: * @since 6.0.0
062: */
063: public class CmsSearchCategoryCollector extends HitCollector {
064:
065: /**
066: * Class with an increasable counter to avoid multiple look ups and
067: * object creations when dealing with the category count.<p>
068: */
069: private static class CmsCategroyCount {
070:
071: /** The category count. */
072: int m_count;
073:
074: /**
075: * Creates a new instance with a initial count of 1.<p>
076: */
077: CmsCategroyCount() {
078:
079: m_count = 1;
080: }
081:
082: /**
083: * Returns the count.<p>
084: *
085: * @return the count
086: */
087: int getCount() {
088:
089: return m_count;
090: }
091:
092: /**
093: * Increases the count by one.<p>
094: */
095: void inc() {
096:
097: m_count++;
098: }
099:
100: /**
101: * Creates an Integer for this count.<p>
102: *
103: * @return an Integer for this count
104: */
105: Integer toInteger() {
106:
107: return new Integer(m_count);
108: }
109: }
110:
111: /** Category used in case the document belongs to no category. */
112: public static final String UNKNOWN_CATEGORY = "unknown";
113:
114: /** The log object for this class. */
115: private static final Log LOG = CmsLog
116: .getLog(CmsSearchCategoryCollector.class);
117:
118: /** The internal map of the categories found. */
119: private Map m_categories;
120:
121: /** The indes searcher used. */
122: private IndexSearcher m_searcher;
123:
124: /**
125: * Creates a new category search collector instance.<p>
126: *
127: * @param searcher the index searcher used
128: */
129: public CmsSearchCategoryCollector(IndexSearcher searcher) {
130:
131: super ();
132: m_searcher = searcher;
133: m_categories = new HashMap();
134: }
135:
136: /**
137: * Convenience method to format a map of categories in a nice 2 column list, for example
138: * for display of debugging output.<p>
139: *
140: * @param categories the map to format
141: * @return the formatted category map
142: */
143: public static final String formatCategoryMap(Map categories) {
144:
145: StringBuffer result = new StringBuffer(256);
146: result.append("Total categories: ");
147: result.append(categories.size());
148: result.append('\n');
149: Iterator i = categories.entrySet().iterator();
150: while (i.hasNext()) {
151: Map.Entry entry = (Map.Entry) i.next();
152: String category = (String) entry.getKey();
153: Integer count = (Integer) entry.getValue();
154: result.append(CmsStringUtil.padRight(category, 30));
155: result.append(count.intValue());
156: result.append('\n');
157: }
158: return result.toString();
159: }
160:
161: /**
162: * @see org.apache.lucene.search.HitCollector#collect(int, float)
163: */
164: public void collect(int id, float score) {
165:
166: String category = null;
167: try {
168: Document doc = m_searcher.doc(id);
169: category = doc.get(CmsSearchField.FIELD_CATEGORY);
170: } catch (IOException e) {
171: // category will be null
172: if (LOG.isDebugEnabled()) {
173: LOG.debug(Messages.get().getBundle().key(
174: Messages.LOG_READ_CATEGORY_FAILED_1,
175: new Integer(id)), e);
176: }
177:
178: }
179: if (category == null) {
180: category = UNKNOWN_CATEGORY;
181: }
182: CmsCategroyCount count = (CmsCategroyCount) m_categories
183: .get(category);
184: if (count != null) {
185: count.inc();
186: } else {
187: count = new CmsCategroyCount();
188: m_categories.put(category, count);
189: }
190: }
191:
192: /**
193: * Returns the category count result, the returned map
194: * contains Strings (category names) mapped to an Integer (the count).<p>
195: *
196: * @return the category count result
197: */
198: public Map getCategoryCountResult() {
199:
200: Map result = new TreeMap();
201: Iterator i = m_categories.keySet().iterator();
202: while (i.hasNext()) {
203: String category = (String) i.next();
204: CmsCategroyCount count = (CmsCategroyCount) m_categories
205: .get(category);
206: result.put(category, count.toInteger());
207: }
208: return result;
209: }
210:
211: /**
212: * @see java.lang.Object#toString()
213: */
214: public String toString() {
215:
216: return formatCategoryMap(getCategoryCountResult());
217: }
218: }
|