001: /**
002: * Copyright (c) 2003-2004, www.pdfbox.org
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are met:
007: *
008: * 1. Redistributions of source code must retain the above copyright notice,
009: * this list of conditions and the following disclaimer.
010: * 2. Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: * 3. Neither the name of pdfbox; nor the names of its
014: * contributors may be used to endorse or promote products derived from this
015: * software without specific prior written permission.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
020: * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
021: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
024: * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: *
028: * http://www.pdfbox.org
029: *
030: */package org.pdfbox;
031:
032: import java.io.File;
033: import java.io.IOException;
034: import java.util.Iterator;
035: import java.util.List;
036: import java.util.Map;
037:
038: import org.pdfbox.pdmodel.PDDocument;
039: import org.pdfbox.pdmodel.PDPage;
040: import org.pdfbox.pdmodel.PDResources;
041: import org.pdfbox.pdmodel.encryption.AccessPermission;
042: import org.pdfbox.pdmodel.encryption.StandardDecryptionMaterial;
043: import org.pdfbox.pdmodel.graphics.xobject.PDXObjectImage;
044:
045: /**
046: * This will read a read pdf and extract images. <br/><br/>
047: *
048: * usage: java org.pdfbox.ExtractImages <pdffile> <password> [imageprefix]
049: *
050: * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
051: * @version $Revision: 1.7 $
052: */
053: public class ExtractImages {
054: private int imageCounter = 1;
055:
056: private static final String PASSWORD = "-password";
057: private static final String PREFIX = "-prefix";
058:
059: /**
060: * This is the entry point for the application.
061: *
062: * @param args The command-line arguments.
063: *
064: * @throws Exception If there is an error decrypting the document.
065: */
066: public static void main(String[] args) throws Exception {
067: ExtractImages extractor = new ExtractImages();
068: extractor.extractImages(args);
069: }
070:
071: private void extractImages(String[] args) throws Exception {
072: if (args.length < 1 || args.length > 3) {
073: usage();
074: } else {
075: String pdfFile = null;
076: String password = "";
077: String prefix = null;
078: for (int i = 0; i < args.length; i++) {
079: if (args[i].equals(PASSWORD)) {
080: i++;
081: if (i >= args.length) {
082: usage();
083: }
084: password = args[i];
085: } else if (args[i].equals(PREFIX)) {
086: i++;
087: if (i >= args.length) {
088: usage();
089: }
090: prefix = args[i];
091: } else {
092: if (pdfFile == null) {
093: pdfFile = args[i];
094: }
095: }
096: }
097: if (pdfFile == null) {
098: usage();
099: } else {
100: if (prefix == null && pdfFile.length() > 4) {
101: prefix = pdfFile.substring(0, pdfFile.length() - 4);
102: }
103:
104: PDDocument document = null;
105:
106: try {
107: document = PDDocument.load(pdfFile);
108:
109: if (document.isEncrypted()) {
110:
111: StandardDecryptionMaterial spm = new StandardDecryptionMaterial(
112: password);
113: document.openProtection(spm);
114: AccessPermission ap = document
115: .getCurrentAccessPermission();
116:
117: if (!ap.canExtractContent()) {
118: throw new IOException(
119: "Error: You do not have permission to extract images.");
120: }
121: }
122:
123: List pages = document.getDocumentCatalog()
124: .getAllPages();
125: Iterator iter = pages.iterator();
126: while (iter.hasNext()) {
127: PDPage page = (PDPage) iter.next();
128: PDResources resources = page.getResources();
129: Map images = resources.getImages();
130: if (images != null) {
131: Iterator imageIter = images.keySet()
132: .iterator();
133: while (imageIter.hasNext()) {
134: String key = (String) imageIter.next();
135: PDXObjectImage image = (PDXObjectImage) images
136: .get(key);
137: String name = getUniqueFileName(key,
138: image.getSuffix());
139: System.out.println("Writing image:"
140: + name);
141: image.write2file(name);
142: }
143: }
144: }
145: } finally {
146: if (document != null) {
147: document.close();
148: }
149: }
150: }
151: }
152: }
153:
154: private String getUniqueFileName(String prefix, String suffix) {
155: String uniqueName = null;
156: File f = null;
157: while (f == null || f.exists()) {
158: uniqueName = prefix + "-" + imageCounter;
159: f = new File(uniqueName + "." + suffix);
160: imageCounter++;
161: }
162: return uniqueName;
163: }
164:
165: /**
166: * This will print the usage requirements and exit.
167: */
168: private static void usage() {
169: System.err
170: .println("Usage: java org.pdfbox.ExtractImages [OPTIONS] <PDF file>\n"
171: + " -password <password> Password to decrypt document\n"
172: + " -prefix <image-prefix> Image prefix(default to pdf name)\n"
173: + " <PDF file> The PDF document to use\n");
174: System.exit(1);
175: }
176:
177: }
|