01: /**
02: * Copyright (c) 2003, www.pdfbox.org
03: * All rights reserved.
04: *
05: * Redistribution and use in source and binary forms, with or without
06: * modification, are permitted provided that the following conditions are met:
07: *
08: * 1. Redistributions of source code must retain the above copyright notice,
09: * this list of conditions and the following disclaimer.
10: * 2. Redistributions in binary form must reproduce the above copyright notice,
11: * this list of conditions and the following disclaimer in the documentation
12: * and/or other materials provided with the distribution.
13: * 3. Neither the name of pdfbox; nor the names of its
14: * contributors may be used to endorse or promote products derived from this
15: * software without specific prior written permission.
16: *
17: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20: * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
21: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
22: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
23: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
24: * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27: *
28: * http://www.pdfbox.org
29: *
30: */package org.pdfbox.ant;
31:
32: import java.io.File;
33:
34: import java.util.ArrayList;
35: import java.util.Iterator;
36: import java.util.List;
37:
38: import org.apache.tools.ant.DirectoryScanner;
39: import org.apache.tools.ant.Task;
40:
41: import org.apache.tools.ant.types.FileSet;
42:
43: /**
44: * This is an ant task that will allow pdf documents to be converted using an
45: * and task.
46: *
47: * @author <a href="ben@benlitchfield.com">Ben Litchfield</a>
48: * @version $Revision: 1.8 $
49: */
50: public class PDFToTextTask extends Task {
51: private List fileSets = new ArrayList();
52:
53: /**
54: * Adds a set of files (nested fileset attribute).
55: *
56: * @param set Another fileset to add.
57: */
58: public void addFileset(FileSet set) {
59: fileSets.add(set);
60: }
61:
62: /**
63: * This will perform the execution.
64: */
65: public void execute() {
66: log("PDFToTextTask executing");
67: Iterator fileSetIter = fileSets.iterator();
68: while (fileSetIter.hasNext()) {
69: FileSet next = (FileSet) fileSetIter.next();
70: DirectoryScanner dirScanner = next
71: .getDirectoryScanner(getProject());
72: dirScanner.scan();
73: String[] files = dirScanner.getIncludedFiles();
74: for (int i = 0; i < files.length; i++) {
75: File f = new File(dirScanner.getBasedir(), files[i]);
76: log("processing: " + f.getAbsolutePath());
77: String pdfFile = f.getAbsolutePath();
78: if (pdfFile.toUpperCase().endsWith(".PDF")) {
79: String textFile = pdfFile.substring(0, pdfFile
80: .length() - 3);
81: textFile = textFile + "txt";
82: try {
83: org.pdfbox.ExtractText.main(new String[] {
84: pdfFile, textFile });
85: } catch (Exception e) {
86: log("Error processing " + pdfFile
87: + e.getMessage());
88: }
89: }
90: }
91:
92: }
93: }
94: }
|