001: /**
002: * Copyright (c) 2003-2005, www.pdfbox.org
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are met:
007: *
008: * 1. Redistributions of source code must retain the above copyright notice,
009: * this list of conditions and the following disclaimer.
010: * 2. Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: * 3. Neither the name of pdfbox; nor the names of its
014: * contributors may be used to endorse or promote products derived from this
015: * software without specific prior written permission.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
020: * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
021: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
024: * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: *
028: * http://www.pdfbox.org
029: *
030: */package org.pdfbox.filter;
031:
032: import java.io.ByteArrayOutputStream;
033: import java.io.IOException;
034: import java.io.InputStream;
035: import java.io.OutputStream;
036: import java.io.PushbackInputStream;
037: import java.io.StreamCorruptedException;
038:
039: import org.pdfbox.cos.COSDictionary;
040:
041: import org.pdfbox.io.NBitInputStream;
042: import org.pdfbox.io.NBitOutputStream;
043:
044: /**
045: * This is the used for the LZWDecode filter.
046: *
047: * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
048: * @version $Revision: 1.14 $
049: */
050: public class LZWFilter implements Filter {
051:
052: /**
053: * The LZW clear table code.
054: */
055: public static final long CLEAR_TABLE = 256;
056: /**
057: * The LZW end of data code.
058: */
059: public static final long EOD = 257;
060:
061: /**
062: * This will decode some compressed data.
063: *
064: * @param compressedData The compressed byte stream.
065: * @param result The place to write the uncompressed byte stream.
066: * @param options The options to use to encode the data.
067: *
068: * @throws IOException If there is an error decompressing the stream.
069: */
070: public void decode(InputStream compressedData, OutputStream result,
071: COSDictionary options) throws IOException {
072: //log.debug("decode( )");
073: NBitInputStream in = null;
074: in = new NBitInputStream(compressedData);
075: in.setBitsInChunk(9);
076: LZWDictionary dic = new LZWDictionary();
077: byte firstByte = 0;
078: long nextCommand = 0;
079: while ((nextCommand = in.read()) != EOD) {
080: // log.debug( "decode - nextCommand=" + nextCommand + ", bitsInChunk: " + in.getBitsInChunk());
081:
082: if (nextCommand == CLEAR_TABLE) {
083: in.setBitsInChunk(9);
084: dic = new LZWDictionary();
085: } else {
086: byte[] data = dic.getData(nextCommand);
087: if (data == null) {
088: dic.visit(firstByte);
089: data = dic.getData(nextCommand);
090: dic.clear();
091: }
092: if (data == null) {
093: throw new StreamCorruptedException(
094: "Error: data is null");
095: }
096: dic.visit(data);
097:
098: //log.debug( "decode - dic.getNextCode(): " + dic.getNextCode());
099:
100: if (dic.getNextCode() >= 2047) {
101: in.setBitsInChunk(12);
102: } else if (dic.getNextCode() >= 1023) {
103: in.setBitsInChunk(11);
104: } else if (dic.getNextCode() >= 511) {
105: in.setBitsInChunk(10);
106: } else {
107: in.setBitsInChunk(9);
108: }
109: /**
110: if( in.getBitsInChunk() != dic.getCodeSize() )
111: {
112: in.unread( nextCommand );
113: in.setBitsInChunk( dic.getCodeSize() );
114: System.out.print( "Switching " + nextCommand + " to " );
115: nextCommand = in.read();
116: System.out.println( "" + nextCommand );
117: data = dic.getData( nextCommand );
118: }**/
119: firstByte = data[0];
120: result.write(data);
121: }
122: }
123: result.flush();
124: }
125:
126: /**
127: * This will encode some data.
128: *
129: * @param rawData The raw data to encode.
130: * @param result The place to write to encoded results to.
131: * @param options The options to use to encode the data.
132: *
133: * @throws IOException If there is an error compressing the stream.
134: */
135: public void encode(InputStream rawData, OutputStream result,
136: COSDictionary options) throws IOException {
137: //log.debug("encode( )");
138: PushbackInputStream input = new PushbackInputStream(rawData,
139: 4096);
140: LZWDictionary dic = new LZWDictionary();
141: NBitOutputStream out = new NBitOutputStream(result);
142: out.setBitsInChunk(9); //initially nine
143: out.write(CLEAR_TABLE);
144: ByteArrayOutputStream buffer = new ByteArrayOutputStream();
145: int byteRead = 0;
146: for (int i = 0; (byteRead = input.read()) != -1; i++) {
147: //log.debug( "byteRead = '" + (char)byteRead + "' (0x" + Integer.toHexString(byteRead) + "), i=" + i);
148: buffer.write(byteRead);
149: dic.visit((byte) byteRead);
150: out.setBitsInChunk(dic.getCodeSize());
151:
152: //log.debug( "Getting node '" + new String( buffer.toByteArray() ) + "', buffer.size = " + buffer.size() );
153: LZWNode node = dic.getNode(buffer.toByteArray());
154: int nextByte = input.read();
155: if (nextByte != -1) {
156: //log.debug( "nextByte = '" + (char)nextByte + "' (0x" + Integer.toHexString(nextByte) + ")");
157: LZWNode next = node.getNode((byte) nextByte);
158: if (next == null) {
159: //log.debug("encode - No next node, writing node and resetting buffer (" +
160: // " node.getCode: " + node.getCode() + ")" +
161: // " bitsInChunk: " + out.getBitsInChunk() +
162: // ")");
163: out.write(node.getCode());
164: buffer.reset();
165: }
166:
167: input.unread(nextByte);
168: } else {
169: //log.debug("encode - EOF on lookahead: writing node, resetting buffer, and terminating read loop (" +
170: // " node.getCode: " + node.getCode() + ")" +
171: // " bitsInChunk: " + out.getBitsInChunk() +
172: // ")");
173: out.write(node.getCode());
174: buffer.reset();
175: break;
176: }
177:
178: if (dic.getNextCode() == 4096) {
179: //log.debug("encode - Clearing dictionary and unreading pending buffer data (" +
180: // " bitsInChunk: " + out.getBitsInChunk() +
181: // ")");
182: out.write(CLEAR_TABLE);
183: dic = new LZWDictionary();
184: input.unread(buffer.toByteArray());
185: buffer.reset();
186: }
187: }
188:
189: // Fix the code size based on the fact that we are writing the EOD
190: //
191: if (dic.getNextCode() >= 2047) {
192: out.setBitsInChunk(12);
193: } else if (dic.getNextCode() >= 1023) {
194: out.setBitsInChunk(11);
195: } else if (dic.getNextCode() >= 511) {
196: out.setBitsInChunk(10);
197: } else {
198: out.setBitsInChunk(9);
199: }
200:
201: //log.debug("encode - Writing EOD (" +
202: // " bitsInChunk: " + out.getBitsInChunk() +
203: // ")");
204: out.write(EOD);
205: out.close();
206: result.flush();
207: }
208: }
|