001: /**
002: * Copyright (c) 2003-2004, www.pdfbox.org
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are met:
007: *
008: * 1. Redistributions of source code must retain the above copyright notice,
009: * this list of conditions and the following disclaimer.
010: * 2. Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: * 3. Neither the name of pdfbox; nor the names of its
014: * contributors may be used to endorse or promote products derived from this
015: * software without specific prior written permission.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
020: * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
021: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
024: * ANY THEORY OF LIABILIT, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: *
028: * http://www.pdfbox.org
029: */package test.pdfbox.util;
030:
031: import java.io.File;
032: import java.io.FilenameFilter;
033: import java.io.FileOutputStream;
034: import java.io.OutputStream;
035: import java.io.OutputStreamWriter;
036: import java.io.Writer;
037:
038: import junit.framework.Test;
039: import junit.framework.TestCase;
040: import junit.framework.TestSuite;
041:
042: import org.pdfbox.pdmodel.PDDocument;
043:
044: import org.pdfbox.util.PDFTextStripper;
045:
046: /**
047: * Test the performance of the PDF text stripper utility.
048: *
049: * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
050: * @version $Revision: 1.4 $
051: */
052: public class TestTextStripperPerformance extends TestCase {
053: /**
054: * Test class constructor.
055: *
056: * @param name The name of the test class.
057: */
058: public TestTextStripperPerformance(String name) {
059: super (name);
060: }
061:
062: /**
063: * Test suite setup.
064: */
065: public void setUp() {
066: }
067:
068: /**
069: * Validate text extraction on a single file.
070: *
071: * @param file The file to validate
072: * @param bLogResult Whether to log the extracted text
073: * @throws Exception when there is an exception
074: */
075: public void doTestFile(File file, boolean bLogResult)
076: throws Exception {
077:
078: PDFTextStripper stripper = new PDFTextStripper();
079: OutputStream os = null;
080: Writer writer = null;
081: PDDocument document = null;
082: try {
083: document = PDDocument.load(file);
084:
085: File outFile = new File(file.getParentFile()
086: .getParentFile(), "output/" + file.getName()
087: + ".txt");
088: os = new FileOutputStream(outFile);
089: writer = new OutputStreamWriter(os);
090:
091: stripper.writeText(document, writer);
092: } finally {
093: if (writer != null) {
094: writer.close();
095: }
096: if (os != null) {
097: os.close();
098: }
099: if (document != null) {
100: document.close();
101: }
102: }
103: }
104:
105: /**
106: * Test to validate text extraction of file set.
107: *
108: * @throws Exception when there is an exception
109: */
110: public void testExtract() throws Exception {
111: String filename = System
112: .getProperty("test.pdfbox.util.TextStripper.file");
113: File testDir = new File("test/input");
114:
115: if ((filename == null) || (filename.length() == 0)) {
116: File[] testFiles = testDir.listFiles(new FilenameFilter() {
117: public boolean accept(File dir, String name) {
118: return (name.endsWith(".pdf"));
119: }
120: });
121:
122: for (int n = 0; n < testFiles.length; n++) {
123: doTestFile(testFiles[n], false);
124: }
125: } else {
126: //doTestFile(new File(testDir, filename), true);
127: }
128: }
129:
130: /**
131: * Set the tests in the suite for this test class.
132: *
133: * @return the Suite.
134: */
135: public static Test suite() {
136: return new TestSuite(TestTextStripperPerformance.class);
137: }
138:
139: /**
140: * Command line execution.
141: *
142: * @param args Command line arguments.
143: */
144: public static void main(String[] args) {
145: String[] arg = { TestTextStripperPerformance.class.getName() };
146: junit.textui.TestRunner.main(arg);
147: }
148: }
|