001: package org.contineo.core.searchengine.util;
002:
003: import java.util.ArrayList;
004: import java.util.Collection;
005: import java.util.Enumeration;
006: import java.util.Hashtable;
007: import java.util.Iterator;
008:
009: import org.contineo.core.document.Term;
010: import org.contineo.core.document.dao.TermDAO;
011: import org.contineo.core.searchengine.search.Result;
012: import org.contineo.util.Context;
013:
014: /**
015: * Created on 15.11.2004
016: */
017: public class TermSelector {
018: private Hashtable<String, TermEntry> terms;
019:
020: /**
021: * Creates a new TermSelector.
022: */
023: public TermSelector() {
024: terms = new Hashtable<String, TermEntry>();
025: } // end ctor TermSelector
026:
027: /**
028: * Selects n terms the mostly occured in all document being in a given search result.
029: * @param searchResult
030: * @param count Count of terms to be returned.
031: * @return
032: */
033: public Collection getTopTerms(Collection searchResult, int count) {
034: Iterator iter = searchResult.iterator();
035: TermDAO termDao = (TermDAO) Context.getInstance().getBean(
036: TermDAO.class);
037:
038: while (iter.hasNext()) {
039: Result rs = (Result) iter.next();
040: Collection termcoll = termDao.findByMenuId(rs.getMenuId());
041: Iterator termiter = termcoll.iterator();
042:
043: while (termiter.hasNext()) {
044: Term term = (Term) termiter.next();
045:
046: if (terms.containsKey(term.getStem())) {
047: TermEntry entry = (TermEntry) terms.get(term
048: .getStem());
049:
050: //entry.setValue(entry.getValue() + (term.getValue() / term.getWordCount()));
051: //entry.setWordCount(entry.getWordCount() + term.getWordCount());
052: entry.setValue(entry.getValue() + term.getValue());
053:
054: if (term.getOriginWord().length() < entry
055: .getOriginWord().length()) {
056: entry.setOriginWord(term.getOriginWord());
057: }
058:
059: double value = term.getValue();
060: Edge edge = new Edge();
061:
062: if (value > 30.0) {
063: edge.setThickness(3);
064: } else if (value > 10.0) {
065: edge.setThickness(2);
066: } else {
067: edge.setThickness(1);
068: }
069:
070: edge.setId(term.getMenuId());
071: entry.addDocument(edge);
072: } else {
073: TermEntry entry = new TermEntry();
074: entry.setName(term.getStem());
075:
076: //entry.setWordCount(term.getWordCount());
077: //entry.setValue(term.getValue() * term.getWordCount());
078: entry.setValue(term.getValue());
079: entry.setOriginWord(term.getOriginWord());
080:
081: double value = term.getValue();
082: Edge edge = new Edge();
083:
084: if (value > 30.0) {
085: edge.setThickness(3);
086: } else if (value > 10.0) {
087: edge.setThickness(2);
088: } else {
089: edge.setThickness(1);
090: }
091:
092: edge.setId(term.getMenuId());
093: entry.addDocument(edge);
094: terms.put(term.getStem(), entry);
095: }
096: }
097: }
098:
099: Collection<TermEntry> coll = new ArrayList<TermEntry>(count);
100:
101: if (terms.size() > 0) {
102: for (int i = 0; i < count; i++) {
103: TermEntry e = getTopWord();
104: coll.add(e);
105: }
106: }
107:
108: return coll;
109: } // end method getTopTerms
110:
111: protected TermEntry getTopWord() {
112: TermEntry entry = new TermEntry();
113: Enumeration enum1 = terms.keys();
114: String topterm = "";
115: double topvalue = 0d;
116:
117: while (enum1.hasMoreElements()) {
118: String term = (String) enum1.nextElement();
119: TermEntry te = (TermEntry) terms.get(term);
120:
121: //double val = (double)te.getWordCount() / te.getValue();
122: double val = te.getValue();
123:
124: if (val > topvalue) {
125: topvalue = val;
126: topterm = term;
127: entry = te;
128: }
129: }
130:
131: terms.remove(topterm);
132: return entry;
133: } // end method getTopWord
134: } // end class TermSelector
|