001: /**
002: * Copyright (c) 2003-2006, www.pdfbox.org
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are met:
007: *
008: * 1. Redistributions of source code must retain the above copyright notice,
009: * this list of conditions and the following disclaimer.
010: * 2. Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: * 3. Neither the name of pdfbox; nor the names of its
014: * contributors may be used to endorse or promote products derived from this
015: * software without specific prior written permission.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
020: * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
021: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
024: * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: *
028: * http://www.pdfbox.org
029: *
030: */package org.pdfbox.cos;
031:
032: import java.io.BufferedInputStream;
033: import java.io.BufferedOutputStream;
034: import java.io.ByteArrayInputStream;
035: import java.io.InputStream;
036: import java.io.IOException;
037: import java.io.OutputStream;
038:
039: import java.util.List;
040:
041: import org.pdfbox.filter.Filter;
042: import org.pdfbox.filter.FilterManager;
043:
044: import org.pdfbox.pdfparser.PDFStreamParser;
045:
046: import org.pdfbox.exceptions.COSVisitorException;
047:
048: import org.pdfbox.io.RandomAccess;
049: import org.pdfbox.io.RandomAccessFileInputStream;
050: import org.pdfbox.io.RandomAccessFileOutputStream;
051:
052: /**
053: * This class represents a stream object in a PDF document.
054: *
055: * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
056: * @version $Revision: 1.39 $
057: */
058: public class COSStream extends COSDictionary {
059: private static final int BUFFER_SIZE = 16384;
060:
061: private RandomAccess file;
062: /**
063: * The stream with all of the filters applied.
064: */
065: private RandomAccessFileOutputStream filteredStream;
066:
067: /**
068: * The stream with no filters, this contains the useful data.
069: */
070: private RandomAccessFileOutputStream unFilteredStream;
071:
072: /**
073: * Constructor. Creates a new stream with an empty dictionary.
074: *
075: * @param storage The intermediate storage for the stream.
076: */
077: public COSStream(RandomAccess storage) {
078: super ();
079: file = storage;
080: }
081:
082: /**
083: * Constructor.
084: *
085: * @param dictionary The dictionary that is associated with this stream.
086: * @param storage The intermediate storage for the stream.
087: */
088: public COSStream(COSDictionary dictionary, RandomAccess storage) {
089: super (dictionary);
090: file = storage;
091: }
092:
093: /**
094: * This will replace this object with the data from the new object. This
095: * is used to easily maintain referential integrity when changing references
096: * to new objects.
097: *
098: * @param stream The stream that have the new values in it.
099: */
100: public void replaceWithStream(COSStream stream) {
101: this .clear();
102: this .addAll(stream);
103: file = stream.file;
104: filteredStream = stream.filteredStream;
105: unFilteredStream = stream.unFilteredStream;
106: }
107:
108: /**
109: * This will get the scratch file associated with this stream.
110: *
111: * @return The scratch file where this stream is being stored.
112: */
113: public RandomAccess getScratchFile() {
114: return file;
115: }
116:
117: /**
118: * This will get all the tokens in the stream.
119: *
120: * @return All of the tokens in the stream.
121: *
122: * @throws IOException If there is an error parsing the stream.
123: */
124: public List getStreamTokens() throws IOException {
125: PDFStreamParser parser = new PDFStreamParser(this );
126: parser.parse();
127: return parser.getTokens();
128: }
129:
130: /**
131: * This will get the stream with all of the filters applied.
132: *
133: * @return the bytes of the physical (endoced) stream
134: *
135: * @throws IOException when encoding/decoding causes an exception
136: */
137: public InputStream getFilteredStream() throws IOException {
138: if (filteredStream == null) {
139: doEncode();
140: }
141: long position = filteredStream.getPosition();
142: long length = filteredStream.getLength();
143:
144: RandomAccessFileInputStream input = new RandomAccessFileInputStream(
145: file, position, length);
146: return new BufferedInputStream(input, BUFFER_SIZE);
147: }
148:
149: /**
150: * This will get the logical content stream with none of the filters.
151: *
152: * @return the bytes of the logical (decoded) stream
153: *
154: * @throws IOException when encoding/decoding causes an exception
155: */
156: public InputStream getUnfilteredStream() throws IOException {
157: InputStream retval = null;
158: if (unFilteredStream == null) {
159: doDecode();
160: }
161:
162: //if unFilteredStream is still null then this stream has not been
163: //created yet, so we should return null.
164: if (unFilteredStream != null) {
165: long position = unFilteredStream.getPosition();
166: long length = unFilteredStream.getLength();
167: RandomAccessFileInputStream input = new RandomAccessFileInputStream(
168: file, position, length);
169: retval = new BufferedInputStream(input, BUFFER_SIZE);
170: } else {
171: // We should check if the COSStream contains data, maybe it
172: // has been created with a RandomAccessFile - which is not
173: // necessary empty.
174: // In this case, the creation was been done as an input, this should
175: // be the unfiltered file, since no filter has been applied yet.
176: // if ( (file != null) &&
177: // (file.length() > 0) )
178: // {
179: // retval = new RandomAccessFileInputStream( file,
180: // 0,
181: // file.length() );
182: // }
183: // else
184: // {
185: //if there is no stream data then simply return an empty stream.
186: retval = new ByteArrayInputStream(new byte[0]);
187: // }
188: }
189: return retval;
190: }
191:
192: /**
193: * visitor pattern double dispatch method.
194: *
195: * @param visitor The object to notify when visiting this object.
196: * @return any object, depending on the visitor implementation, or null
197: * @throws COSVisitorException If an error occurs while visiting this object.
198: */
199: public Object accept(ICOSVisitor visitor)
200: throws COSVisitorException {
201: return visitor.visitFromStream(this );
202: }
203:
204: /**
205: * This will decode the physical byte stream applying all of the filters to the stream.
206: *
207: * @throws IOException If there is an error applying a filter to the stream.
208: */
209: private void doDecode() throws IOException {
210: // FIXME: We shouldn't keep the same reference?
211: unFilteredStream = filteredStream;
212:
213: COSBase filters = getFilters();
214: if (filters == null) {
215: //then do nothing
216: } else if (filters instanceof COSName) {
217: doDecode((COSName) filters);
218: } else if (filters instanceof COSArray) {
219: COSArray filterArray = (COSArray) filters;
220: for (int i = 0; i < filterArray.size(); i++) {
221: COSName filterName = (COSName) filterArray.get(i);
222: doDecode(filterName);
223: }
224: } else {
225: throw new IOException("Error: Unknown filter type:"
226: + filters);
227: }
228: }
229:
230: /**
231: * This will decode applying a single filter on the stream.
232: *
233: * @param filterName The name of the filter.
234: *
235: * @throws IOException If there is an error parsing the stream.
236: */
237: private void doDecode(COSName filterName) throws IOException {
238: FilterManager manager = getFilterManager();
239: Filter filter = manager.getFilter(filterName);
240: InputStream input;
241:
242: boolean done = false;
243: IOException exception = null;
244: long position = unFilteredStream.getPosition();
245: long length = unFilteredStream.getLength();
246:
247: if (length == 0) {
248: //if the length is zero then don't bother trying to decode
249: //some filters don't work when attempting to decode
250: //with a zero length stream. See zlib_error_01.pdf
251: unFilteredStream = new RandomAccessFileOutputStream(file);
252: done = true;
253: } else {
254: //ok this is a simple hack, sometimes we read a couple extra
255: //bytes that shouldn't be there, so we encounter an error we will just
256: //try again with one less byte.
257: for (int tryCount = 0; !done && tryCount < 5; tryCount++) {
258: try {
259: input = new BufferedInputStream(
260: new RandomAccessFileInputStream(file,
261: position, length), BUFFER_SIZE);
262: unFilteredStream = new RandomAccessFileOutputStream(
263: file);
264: filter.decode(input, unFilteredStream, this );
265: done = true;
266: } catch (IOException io) {
267: length--;
268: exception = io;
269: }
270: }
271: }
272: if (!done) {
273: throw exception;
274: }
275: }
276:
277: /**
278: * This will encode the logical byte stream applying all of the filters to the stream.
279: *
280: * @throws IOException If there is an error applying a filter to the stream.
281: */
282: private void doEncode() throws IOException {
283: filteredStream = unFilteredStream;
284:
285: COSBase filters = getFilters();
286: if (filters == null) {
287: //there is no filter to apply
288: } else if (filters instanceof COSName) {
289: doEncode((COSName) filters);
290: } else if (filters instanceof COSArray) {
291: // apply filters in reverse order
292: COSArray filterArray = (COSArray) filters;
293: for (int i = filterArray.size() - 1; i >= 0; i--) {
294: COSName filterName = (COSName) filterArray.get(i);
295: doEncode(filterName);
296: }
297: }
298: }
299:
300: /**
301: * This will encode applying a single filter on the stream.
302: *
303: * @param filterName The name of the filter.
304: *
305: * @throws IOException If there is an error parsing the stream.
306: */
307: private void doEncode(COSName filterName) throws IOException {
308: FilterManager manager = getFilterManager();
309: Filter filter = manager.getFilter(filterName);
310: InputStream input;
311:
312: input = new BufferedInputStream(
313: new RandomAccessFileInputStream(file, filteredStream
314: .getPosition(), filteredStream.getLength()),
315: BUFFER_SIZE);
316: filteredStream = new RandomAccessFileOutputStream(file);
317: filter.encode(input, filteredStream, this );
318: }
319:
320: /**
321: * This will return the filters to apply to the byte stream.
322: * The method will return
323: * - null if no filters are to be applied
324: * - a COSName if one filter is to be applied
325: * - a COSArray containing COSNames if multiple filters are to be applied
326: *
327: * @return the COSBase object representing the filters
328: */
329: public COSBase getFilters() {
330: return getDictionaryObject(COSName.FILTER);
331: }
332:
333: /**
334: * This will create a new stream for which filtered byte should be
335: * written to. You probably don't want this but want to use the
336: * createUnfilteredStream, which is used to write raw bytes to.
337: *
338: * @return A stream that can be written to.
339: *
340: * @throws IOException If there is an error creating the stream.
341: */
342: public OutputStream createFilteredStream() throws IOException {
343: filteredStream = new RandomAccessFileOutputStream(file);
344: unFilteredStream = null;
345: return new BufferedOutputStream(filteredStream, BUFFER_SIZE);
346: }
347:
348: /**
349: * This will create a new stream for which filtered byte should be
350: * written to. You probably don't want this but want to use the
351: * createUnfilteredStream, which is used to write raw bytes to.
352: *
353: * @param expectedLength An entry where a length is expected.
354: *
355: * @return A stream that can be written to.
356: *
357: * @throws IOException If there is an error creating the stream.
358: */
359: public OutputStream createFilteredStream(COSBase expectedLength)
360: throws IOException {
361: filteredStream = new RandomAccessFileOutputStream(file);
362: filteredStream.setExpectedLength(expectedLength);
363: unFilteredStream = null;
364: return new BufferedOutputStream(filteredStream, BUFFER_SIZE);
365: }
366:
367: /**
368: * set the filters to be applied to the stream.
369: *
370: * @param filters The filters to set on this stream.
371: *
372: * @throws IOException If there is an error clearing the old filters.
373: */
374: public void setFilters(COSBase filters) throws IOException {
375: setItem(COSName.FILTER, filters);
376: // kill cached filtered streams
377: filteredStream = null;
378: }
379:
380: /**
381: * This will create an output stream that can be written to.
382: *
383: * @return An output stream which raw data bytes should be written to.
384: *
385: * @throws IOException If there is an error creating the stream.
386: */
387: public OutputStream createUnfilteredStream() throws IOException {
388: unFilteredStream = new RandomAccessFileOutputStream(file);
389: filteredStream = null;
390: return new BufferedOutputStream(unFilteredStream, BUFFER_SIZE);
391: }
392: }
|