001: package org.apache.lucene.analysis.ngram;
002:
003: /**
004: * Licensed to the Apache Software Foundation (ASF) under one or more
005: * contributor license agreements. See the NOTICE file distributed with
006: * this work for additional information regarding copyright ownership.
007: * The ASF licenses this file to You under the Apache License, Version 2.0
008: * (the "License"); you may not use this file except in compliance with
009: * the License. You may obtain a copy of the License at
010: *
011: * http://www.apache.org/licenses/LICENSE-2.0
012: *
013: * Unless required by applicable law or agreed to in writing, software
014: * distributed under the License is distributed on an "AS IS" BASIS,
015: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
016: * See the License for the specific language governing permissions and
017: * limitations under the License.
018: */
019:
020: import org.apache.lucene.analysis.Token;
021: import org.apache.lucene.analysis.TokenStream;
022: import org.apache.lucene.analysis.WhitespaceTokenizer;
023:
024: import java.io.StringReader;
025: import java.util.ArrayList;
026:
027: import junit.framework.TestCase;
028:
029: /**
030: * Tests {@link NGramTokenFilter} for correctness.
031: * @author Otis Gospodnetic
032: */
033: public class NGramTokenFilterTest extends TestCase {
034: private TokenStream input;
035: private ArrayList tokens = new ArrayList();
036:
037: public void setUp() {
038: input = new WhitespaceTokenizer(new StringReader("abcde"));
039: }
040:
041: public void testInvalidInput() throws Exception {
042: boolean gotException = false;
043: try {
044: new NGramTokenFilter(input, 2, 1);
045: } catch (IllegalArgumentException e) {
046: gotException = true;
047: }
048: assertTrue(gotException);
049: }
050:
051: public void testInvalidInput2() throws Exception {
052: boolean gotException = false;
053: try {
054: new NGramTokenFilter(input, 0, 1);
055: } catch (IllegalArgumentException e) {
056: gotException = true;
057: }
058: assertTrue(gotException);
059: }
060:
061: public void testUnigrams() throws Exception {
062: NGramTokenFilter filter = new NGramTokenFilter(input, 1, 1);
063:
064: Token token = null;
065: do {
066: token = filter.next();
067: if (token != null) {
068: tokens.add(token.toString());
069: // System.out.println(token.termText());
070: // System.out.println(token);
071: // Thread.sleep(1000);
072: }
073: } while (token != null);
074:
075: assertEquals(5, tokens.size());
076: ArrayList exp = new ArrayList();
077: exp.add("(a,0,1)");
078: exp.add("(b,1,2)");
079: exp.add("(c,2,3)");
080: exp.add("(d,3,4)");
081: exp.add("(e,4,5)");
082: assertEquals(exp, tokens);
083: }
084:
085: public void testBigrams() throws Exception {
086: NGramTokenFilter filter = new NGramTokenFilter(input, 2, 2);
087:
088: Token token = null;
089: do {
090: token = filter.next();
091: if (token != null) {
092: tokens.add(token.toString());
093: // System.out.println(token.termText());
094: // System.out.println(token);
095: // Thread.sleep(1000);
096: }
097: } while (token != null);
098:
099: assertEquals(4, tokens.size());
100: ArrayList exp = new ArrayList();
101: exp.add("(ab,0,2)");
102: exp.add("(bc,1,3)");
103: exp.add("(cd,2,4)");
104: exp.add("(de,3,5)");
105: assertEquals(exp, tokens);
106: }
107:
108: public void testNgrams() throws Exception {
109: NGramTokenFilter filter = new NGramTokenFilter(input, 1, 3);
110:
111: Token token = null;
112: do {
113: token = filter.next();
114: if (token != null) {
115: tokens.add(token.toString());
116: // System.out.println(token.termText());
117: // System.out.println(token);
118: // Thread.sleep(1000);
119: }
120: } while (token != null);
121:
122: assertEquals(12, tokens.size());
123: ArrayList exp = new ArrayList();
124: exp.add("(a,0,1)");
125: exp.add("(b,1,2)");
126: exp.add("(c,2,3)");
127: exp.add("(d,3,4)");
128: exp.add("(e,4,5)");
129: exp.add("(ab,0,2)");
130: exp.add("(bc,1,3)");
131: exp.add("(cd,2,4)");
132: exp.add("(de,3,5)");
133: exp.add("(abc,0,3)");
134: exp.add("(bcd,1,4)");
135: exp.add("(cde,2,5)");
136: assertEquals(exp, tokens);
137: }
138:
139: public void testOversizedNgrams() throws Exception {
140: NGramTokenFilter filter = new NGramTokenFilter(input, 6, 7);
141:
142: Token token = null;
143: do {
144: token = filter.next();
145: if (token != null) {
146: tokens.add(token.toString());
147: // System.out.println(token.termText());
148: // System.out.println(token);
149: // Thread.sleep(1000);
150: }
151: } while (token != null);
152:
153: assertTrue(tokens.isEmpty());
154: }
155: }
|