001: /*
002: * Copyright 2003-2004 Michael Franken, Zilverline.
003: *
004: * The contents of this file, or the files included with this file, are subject to
005: * the current version of ZILVERLINE Collaborative Source License for the
006: * Zilverline Search Engine (the "License"); You may not use this file except in
007: * compliance with the License.
008: *
009: * You may obtain a copy of the License at
010: *
011: * http://www.zilverline.org.
012: *
013: * See the License for the rights, obligations and
014: * limitations governing use of the contents of the file.
015: *
016: * The Original and Upgraded Code is the Zilverline Search Engine. The developer of
017: * the Original and Upgraded Code is Michael Franken. Michael Franken owns the
018: * copyrights in the portions it created. All Rights Reserved.
019: *
020: */
021:
022: package org.zilverline.core;
023:
024: import java.io.File;
025: import java.util.Map;
026: import java.util.Properties;
027:
028: import junit.framework.TestCase;
029:
030: import org.apache.commons.logging.Log;
031: import org.apache.commons.logging.LogFactory;
032:
033: /**
034: * Test class for Collection, self documenting.
035: *
036: * @author Michael Franken
037: *
038: * @see org.zilverline.core.FileSystemCollection
039: */
040: public class TestExtractorFactory extends TestCase {
041: /** logger for Commons logging. */
042: private static Log log = LogFactory
043: .getLog(TestExtractorFactory.class);
044:
045: public void testMappings() {
046: Properties props = new Properties();
047:
048: ExtractorFactory ef = new ExtractorFactory();
049:
050: // add handler for zip
051: props.put("PDF", "org.zilverline.extractors.PDFExtractor");
052: props.put("doc", "org.zilverline.extractors.WordExtractor");
053: props.put("html", "org.zilverline.extractors.HTMLExtractor");
054: props.put("htm", "org.zilverline.extractors.HTMLExtractor");
055: props.put("txt", "org.zilverline.extractors.TextExtractor");
056:
057: ef.setCaseSensitive(false);
058: ef.setMappings(props);
059:
060: Map mappings = ef.getMappings();
061:
062: assertTrue("Testing whether keys are stored in lowercase",
063: mappings.containsKey("pdf"));
064: assertEquals((String) mappings.get("pdf"),
065: "org.zilverline.extractors.PDFExtractor");
066:
067: ef.setCaseSensitive(true);
068: ef.setMappings(props);
069: mappings = ef.getMappings();
070: assertFalse("Testing whether keys are not stored in lowercase",
071: mappings.containsKey("pdf"));
072: assertTrue("Testing whether keys are stored in original case",
073: mappings.containsKey("PDF"));
074: assertEquals((String) mappings.get("PDF"),
075: "org.zilverline.extractors.PDFExtractor");
076: }
077:
078: public void testFactory() {
079: Properties props = new Properties();
080:
081: ExtractorFactory ef = new ExtractorFactory();
082:
083: ef.setCaseSensitive(false);
084:
085: // add handler for zip
086: props.put("pdf", "org.zilverline.extractors.PDFExtractor");
087: props.put("doc", "org.zilverline.extractors.WordExtractor");
088: props.put("rtf", "org.zilverline.extractors.RTFExtractor");
089: props.put("html", "org.zilverline.extractors.HTMLExtractor");
090: props.put("htm", "org.zilverline.extractors.HTMLExtractor");
091: props.put("txt", "org.zilverline.extractors.TextExtractor");
092: props.put("", "org.zilverline.extractors.TextExtractor");
093: ef.setMappings(props);
094:
095: File pdf = new File("test\\data\\test.pdf");
096:
097: assertTrue(pdf.isFile());
098:
099: Extractor ex = ef.createExtractor(pdf);
100:
101: log.debug(ex.getClass().getName());
102:
103: ParsedFileInfo file = ex.extractInfo(pdf);
104:
105: log.debug("###########################################"
106: + file.getType());
107:
108: File txt = new File(
109: "test\\data\\some text file with a very nasty long filename.txt");
110:
111: assertTrue(txt.isFile());
112: ex = ef.createExtractor(txt);
113: log.debug(ex.getClass().getName());
114: file = ex.extractInfo(txt);
115: log.debug("###########################################"
116: + file.getType());
117:
118: txt = new File("test\\data\\readme");
119: assertTrue(txt.isFile());
120: ex = ef.createExtractor(txt);
121: log.debug(ex.getClass().getName());
122: file = ex.extractInfo(txt);
123: log.debug("###########################################"
124: + file.getType());
125:
126: File word = new File("test\\data\\test.doc");
127:
128: assertTrue(word.isFile());
129: ex = ef.createExtractor(word);
130: log.debug(ex.getClass().getName());
131: file = ex.extractInfo(word);
132: log.debug("###########################################"
133: + file.getType());
134:
135: File rtf = new File("test\\data\\test.rtf");
136:
137: assertTrue(rtf.isFile());
138: ex = ef.createExtractor(rtf);
139: assertEquals("org.zilverline.extractors.RTFExtractor", ex
140: .getClass().getName());
141: file = ex.extractInfo(rtf);
142: log.debug("###########################################"
143: + file.getType());
144:
145: File html = new File("test\\data\\test.html");
146:
147: assertTrue(html.isFile());
148: ex = ef.createExtractor(html);
149: assertEquals("org.zilverline.extractors.HTMLExtractor", ex
150: .getClass().getName());
151: file = ex.extractInfo(html);
152: log.debug("###########################################"
153: + file.getType());
154: }
155:
156: public void testGetMimeType() {
157: File dir = new File("test\\data");
158: File[] allFiles = dir.listFiles();
159: if (allFiles != null) {
160: for (int i = 0; i < allFiles.length; i++) {
161: File file = allFiles[i];
162: if (file.isFile()) {
163: log.debug("File type of " + file.getName()
164: + " is: "
165: + ExtractorFactory.getMimeType(file));
166: }
167: }
168: }
169: }
170:
171: }
|