01: package org.contineo.core.text.analyze;
02:
03: import java.lang.reflect.InvocationTargetException;
04: import java.lang.reflect.Method;
05: import java.util.Locale;
06:
07: import net.sf.snowball.SnowballProgram;
08:
09: import org.apache.commons.logging.Log;
10: import org.apache.commons.logging.LogFactory;
11:
12: /**
13: * @author Michael Scholz
14: * @author Alessandro Gasparini
15: */
16: class Stemmer {
17:
18: protected static Log log = LogFactory.getLog(Stemmer.class);
19:
20: // Wrapped snowball stemmer
21: private SnowballProgram stemmer;
22:
23: private Method stemMethod;
24:
25: public Stemmer(String lang) throws SecurityException,
26: NoSuchMethodException {
27: String language = new Locale(lang)
28: .getDisplayLanguage(Locale.ENGLISH);
29: try {
30: Class stemClass = Class.forName("net.sf.snowball.ext."
31: + language + "Stemmer");
32: stemmer = (SnowballProgram) stemClass.newInstance();
33: stemMethod = stemmer.getClass().getMethod("stem",
34: new Class[0]);
35: } catch (Exception e) {
36: log.info("Error instantiating stemmer for language:"
37: + language);
38: log.info("Trying with English Stemmer.");
39: // Default with English stemmer
40: stemmer = new net.sf.snowball.ext.EnglishStemmer();
41: stemMethod = stemmer.getClass().getMethod("stem",
42: new Class[0]);
43: }
44: }
45:
46: /**
47: * Stemms the given term to a unique <tt>discriminator</tt>.
48: *
49: * @param term java.lang.String The term that should be stemmed
50: * @return java.lang.String Discriminator for <tt>term</tt>
51: */
52: public String stem(String term) throws IllegalArgumentException,
53: IllegalAccessException, InvocationTargetException {
54: if (term == null)
55: return null;
56: stemmer.setCurrent(term.toLowerCase());
57: stemMethod.invoke(stemmer, new Object[0]);
58: return stemmer.getCurrent();
59: }
60: }
|