001: /*
002: * $Id: CCITTFaxDecode.java,v 1.3 2007/12/20 18:33:33 rbair Exp $
003: *
004: * Copyright 2004 Sun Microsystems, Inc., 4150 Network Circle,
005: * Santa Clara, California 95054, U.S.A. All rights reserved.
006: *
007: * This library is free software; you can redistribute it and/or
008: * modify it under the terms of the GNU Lesser General Public
009: * License as published by the Free Software Foundation; either
010: * version 2.1 of the License, or (at your option) any later version.
011: *
012: * This library is distributed in the hope that it will be useful,
013: * but WITHOUT ANY WARRANTY; without even the implied warranty of
014: * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
015: * Lesser General Public License for more details.
016: *
017: * You should have received a copy of the GNU Lesser General Public
018: * License along with this library; if not, write to the Free Software
019: * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
020: */
021:
022: package com.sun.pdfview.decode;
023:
024: import java.io.BufferedReader;
025: import java.io.ByteArrayOutputStream;
026: import java.io.IOException;
027: import java.io.InputStream;
028: import java.io.InputStreamReader;
029: import java.nio.ByteBuffer;
030:
031: import com.sun.pdfview.PDFObject;
032: import com.sun.pdfview.PDFParseException;
033:
034: /**
035: * Decode CCITT Group 4 format coding into a byte array
036: * @author Mike Wessler
037: */
038: public class CCITTFaxDecode {
039: // Group-3 coding:
040: // 1-d coding: each code word represents a segment of white or black
041: // white and black runs alternate. white run to start.
042: // 1728 pixels across
043: // 0-63: 1 terminating code word
044: // 64-1728: makeup word+terminating word
045: // EOL: 0000 0000 0001 (some number of 0's >10, followed by a 1)
046: // FILL (between data and EOL: variable count of 0's
047: // ends with 6 consecutive EOLs.
048: // code words in file CCITTCodes
049:
050: class CCITTTreeNode {
051: }
052:
053: class CCITTTreeBranch extends CCITTTreeNode {
054: CCITTTreeNode zero;
055: CCITTTreeNode one;
056: }
057:
058: class CCITTTreeLeaf extends CCITTTreeNode {
059: int code;
060:
061: public CCITTTreeLeaf(int code) {
062: this .code = code;
063: }
064: }
065:
066: static CCITTTreeNode blackTree;
067: static CCITTTreeNode whiteTree;
068:
069: /**
070: * read in the file "CCITTCodes" to generate two decision trees.
071: */
072: private void createTrees() throws IOException {
073: InputStream is = getClass().getResourceAsStream("CCITTCodes");
074: BufferedReader br = new BufferedReader(
075: new InputStreamReader(is));
076: String line;
077: CCITTTreeNode base = null;
078: while ((line = br.readLine()) != null) {
079: // parse that line
080: if (line.startsWith("# BLACK")) {
081: base = blackTree = new CCITTTreeBranch();
082: } else if (line.startsWith("# WHITE")) {
083: base = whiteTree = new CCITTTreeBranch();
084: } else if (!line.startsWith("#") && line.length() > 0) {
085: // trace path of bits
086: int scanbit = 0;
087: char this Char = line.charAt(scanbit++);
088: char nextChar;
089: CCITTTreeBranch tn = (CCITTTreeBranch) base;
090: while ((nextChar = line.charAt(scanbit++)) != ' ') {
091: if (this Char == '0') {
092: if (tn.zero == null) {
093: tn.zero = new CCITTTreeBranch();
094: }
095: if (tn.zero instanceof CCITTTreeLeaf) {
096: throw new PDFParseException("Bad form: "
097: + line
098: + " has a leaf at bit number "
099: + (scanbit - 1));
100: }
101: tn = (CCITTTreeBranch) tn.zero;
102: } else {
103: if (tn.one == null) {
104: tn.one = new CCITTTreeBranch();
105: }
106: if (tn.one instanceof CCITTTreeLeaf) {
107: throw new PDFParseException("Bad form: "
108: + line
109: + " has a leaf at bit number "
110: + (scanbit - 1));
111: }
112: tn = (CCITTTreeBranch) tn.one;
113: }
114: this Char = nextChar;
115: }
116: int code = Integer.parseInt(line.substring(scanbit));
117: if (this Char == '0') {
118: if (tn.zero != null) {
119: throw new PDFParseException(
120: "Bad form: last char of "
121: + line
122: + " is already occupied in the tree");
123: }
124: tn.zero = new CCITTTreeLeaf(code);
125: } else {
126: if (tn.one != null) {
127: throw new PDFParseException(
128: "Bad form: last char of "
129: + line
130: + " is already occupied in the tree");
131: }
132: tn.one = new CCITTTreeLeaf(code);
133: }
134: }
135: }
136: }
137:
138: private ByteBuffer buf;
139: private int bytenum = 0;
140: private int bitnum = 8;
141: private byte bits;
142:
143: // color change boundaries. refline[0](=0) -> refline[1] = white
144: // even digits are start of white ranges, odds are start of black ranges
145: private int refline[]; // indices of color changes on reference line
146: private int reflen; // size of refline (may not need this)
147: private int refloc; // where the current spot is within refline
148: private int curline[]; // indices of color changes on current line
149: private int curlen; // length of curline
150: private int prevspan; // what color we're drawing (derive from curlen?)
151: private int nlines; // what line # we're on
152:
153: private int WHITEBIT = 0;
154:
155: private ByteArrayOutputStream baos;
156: private int destbyte; // the byte
157: private int bitsremaining = 8;
158:
159: /**
160: * initialize the decoder with a byte buffer in
161: * CCITT Group 4 Fax form
162: */
163: private CCITTFaxDecode(ByteBuffer buf) throws IOException {
164: if (blackTree == null) {
165: createTrees();
166: }
167:
168: // copy the data
169: // [ JK FIXME this could probably
170:
171: this .buf = buf;
172: bitnum = 0;
173: bits = buf.get(0);
174: bytenum = 1;
175: }
176:
177: private void invert() {
178: WHITEBIT = 1 - WHITEBIT;
179: }
180:
181: /**
182: * get the next bit from the stream.
183: * @return true if the next bit is 1, false if it's a 0
184: */
185: private boolean nextBit() {
186: if (bitnum == 8) {
187: bitnum = 0;
188: try {
189: bits = buf.get(bytenum++);
190: } catch (RuntimeException e) {
191: System.out.println("Error: bytenum=" + bytenum + " of "
192: + buf.limit());
193: throw e;
194: }
195: }
196: bitnum++;
197: boolean value = (bits & 0x80) != 0;//(bits&1)!=0;//
198: bits <<= 1;//bits>>=1;//
199: // System.out.print(value?"1":"0");
200: return value;
201: }
202:
203: /**
204: * get the next code word from the stream.
205: * @param base which tree to scan (black or white)
206: * @return the code word
207: */
208: private int nextCode(CCITTTreeNode base) throws PDFParseException {
209: while (!(base instanceof CCITTTreeLeaf)) {
210: if (nextBit()) {
211: base = ((CCITTTreeBranch) base).one;
212: } else {
213: base = ((CCITTTreeBranch) base).zero;
214: }
215: if (base == null) {
216: System.out.println(" bleah.");
217: throw new PDFParseException("Bad code word!");
218: }
219: }
220: // System.out.println(" ="+((CCITTTreeLeaf)base).code);
221: return ((CCITTTreeLeaf) base).code;
222: }
223:
224: /**
225: * get the next distance encoded in the stream. Distances can
226: * consist of one or two code words.
227: * @param base which tree to scan (black or white)
228: * @return the distance encoded
229: */
230: private int nextDist(CCITTTreeNode base) throws PDFParseException {
231: int tot, code;
232: tot = code = nextCode(base);
233: while (code >= 64) {
234: code = nextCode(base);
235: tot += code;
236: }
237: return tot;
238: }
239:
240: /**
241: * continue scanning bits until the end of an encoded line
242: * is reached. This method is no longer used.
243: */
244: private void skipToEOL() {
245: int bitcount = 0;
246: int totcount = 0;
247: while (true) {
248: if (!nextBit()) {
249: bitcount++;
250: } else {
251: if (bitcount > 10) {
252: break;
253: }
254: bitcount = 0;
255: }
256: totcount++;
257: if ((totcount & 7) == 0) {
258: System.out.print(" ");
259: }
260: if ((totcount & 63) == 0) {
261: System.out.println();
262: }
263: }
264: // System.out.println("\nSkipped "+(totcount/8)+" bytes. Bytenum is now "+bytenum);
265: }
266:
267: /**
268: * add a given number of pixels of a particular color to
269: * the current line. This is performed in intermediate
270: * form, recording only the locations of the color changes.
271: *
272: */
273: private void addColor(int color, int num) {
274: if (prevspan == color) {
275: // add the new length to the previous length
276: curline[curlen - 1] += num;
277: // System.out.println("Added "+num+" to current color ("+color+") for a total of "+curline[curlen-1]);
278: } else {
279: if (curlen == curline.length) {
280: int nline[] = new int[curline.length * 2];
281: System.arraycopy(curline, 0, nline, 0, curline.length);
282: curline = nline;
283: }
284: curline[curlen] = curline[curlen - 1] + num;
285: curlen++;
286: prevspan = color;
287: // System.out.println("New span, width="+num+" for color "+color+" for a total of "+curline[curlen-1]);
288: }
289: }
290:
291: /**
292: * Find the B1 location for the previous line, given the A0 color.
293: * See the description of CCITT codes for what this means.
294: * @param a0color BLACK or WHITE
295: * @return the B1 location
296: */
297: private int findB1(int a0color) {
298: // a0pos= curline[curlen-1]
299: // start search at refloc
300: int start = curlen == 1 ? -1 : curline[curlen - 1];
301: while (refline[refloc] <= start) {
302: refloc++;
303: }
304: int scan = refloc;
305: // match color: refloc&1==0 means white
306: if (((scan & 1) == 0) == (a0color == WHITE)) {
307: scan++;
308: }
309: return refline[scan] - curline[curlen - 1];
310: }
311:
312: /**
313: * Find the B2 location for the previous line, given the A0 color.
314: * See the description of CCITT codes for what this means.
315: * @param a0color BLACK or WHITE
316: * @return the B2 location
317: */
318: private int findB2(int a0color) {
319: // a0pos= curline[curlen-1]
320: // start search at refloc
321: int start = curlen == 1 ? -1 : curline[curlen - 1];
322: while (refline[refloc] <= start) {
323: refloc++;
324: }
325: int scan = refloc;
326: // match color: refloc&1==0 means white
327: if (((scan & 1) == 0) == (a0color == WHITE)) {
328: scan++;
329: }
330: return refline[scan + 1] - curline[curlen - 1];
331: }
332:
333: /**
334: * dump actual black bits into the output stream. Unlike
335: * addColor(), this actually puts bits into the stream.
336: *
337: * @param span the number of black pixels to add
338: */
339: private void stuffBlackBits(int span) {
340: int num = span;
341: int fillbits = (0xFF >> (8 - bitsremaining));
342: destbyte |= (byte) fillbits;
343: bitsremaining -= num;
344: while (bitsremaining <= 0) {
345: baos.write(destbyte);
346: destbyte = 0xFF;
347: bitsremaining += 8;
348: }
349: destbyte &= (0xFF << bitsremaining);
350: }
351:
352: /**
353: * dump actual white bits into the output stream. Unlike
354: * addColor(), this actually puts bits into the stream.
355: *
356: * @param span the number of white pixels to add
357: */
358: private void stuffWhiteBits(int span) {
359: // stuff blank bytes
360: int num = span;
361: bitsremaining -= num; // bitsremaining might go negative!
362: while (bitsremaining <= 0) {
363: baos.write(destbyte);
364: destbyte = 0;
365: bitsremaining += 8;
366: }
367: }
368:
369: /**
370: * interpret the intermediate span form to produce actual
371: * bits for the line.
372: * @param width unused (old error checking)
373: * @param linenum unused (old error checking)
374: */
375: private void processLine(int width, int linenum) {
376: for (int i = 1; i < reflen; i++) {
377: int len = refline[i] - refline[i - 1];
378: if (len > 0) {
379: if ((i & 1) == WHITEBIT) {
380: stuffWhiteBits(len);
381: } else {
382: stuffBlackBits(len);
383: }
384: }
385: }
386: stuffWhiteBits(bitsremaining % 8);
387: }
388:
389: public static final int TWOD = 2;
390: public static final int UNCOMPRESSED = 3;
391: public static final int PASS = 4;
392: public static final int VERTICAL = 5;
393: public static final int HORIZONTAL = 6;
394: public static final int BLACK = 1;
395: public static final int WHITE = 0;
396:
397: /**
398: * decode a line of output from the input
399: * @param totlen how long in pixels the line is expected to be
400: * @return how long the line actually was
401: */
402: private int decodeLine(int totlen) throws PDFParseException {
403: // white starts!
404: int linelen = 0;
405: int mode = TWOD;
406: int color = WHITE;
407: int prevlen = 0;
408: curline = new int[500];
409: curlen = 0;
410: curline[curlen++] = 0;
411: prevspan = BLACK;
412: refloc = 0;
413: while (curline[curlen - 1] < totlen) {
414: int len;
415: /*
416: if (mode==WHITE) {
417: len= nextDist(whiteTree);
418: if (len>0) {
419: // addColor(WHITE, len);
420: stuffWhiteBits(len);
421: mode= BLACK;
422: } else if (len==-2) {
423: mode= UNCOMPRESSED;
424: } else if (len<0) {
425: break;
426: }
427: } else if (mode==BLACK) {
428: len= nextDist(blackTree);
429: if (len>0) {
430: // addColor(BLACK, len);
431: stuffBlackBits(len);
432: linelen+= len;
433: mode= WHITE;
434: } else if (len==-2) {
435: mode= UNCOMPRESSED;
436: } else if (len<0) {
437: break;
438: }
439: } else
440: */
441: if (mode == UNCOMPRESSED) {
442: // code words are /0*1/ representing up to 5 zeros
443: // 1->1 01->01... 000001->00000
444: // stop words: 6 zeros-> nil, 7->0 ... 10->0000
445: // bit after the stop word maps 1=black, 0=white
446: int count = 0;
447: while (!nextBit()) {
448: count++;
449: }
450: // System.out.println(" (uncompressed)");
451: if (count <= 5) {
452: // System.out.println("Adding "+count+" white"+(count==5?"":" + 1 black"));
453: addColor(WHITE, count);
454: if (count < 5) {
455: addColor(BLACK, 1);
456: }
457: } else if (count <= 10) {
458: if (count > 6) {
459: addColor(WHITE, count - 6);
460: }
461: if (nextBit()) {
462: color = BLACK;
463: } else {
464: color = WHITE;
465: }
466: // System.out.println("Finishing with "+(count-6)+" white");
467: mode = TWOD;
468: // System.out.println(" new mode="+((mode==WHITE)?"white":"black"));
469: } else {
470: // end of line
471: break;
472: }
473: } else if (mode == TWOD) {
474: // w/ respect to reference line
475: int count = 0;
476: while (!nextBit()) {
477: count++;
478: }
479: if (count == 2) {
480: // System.out.println(" HORIZONTAL MODE");
481: // HORIZONTAL MODE
482: for (int i = 0; i < 2; i++) {
483: if (color == WHITE) {
484: len = nextDist(whiteTree);
485: addColor(WHITE, len);
486: color = BLACK;
487: } else {
488: len = nextDist(blackTree);
489: addColor(BLACK, len);
490: color = WHITE;
491: }
492: }
493: } else if (count == 3) {
494: // PASS MODE
495: // System.out.println(" PASS MODE");
496: len = findB2(color);
497: if (color == WHITE) {
498: addColor(WHITE, len);
499: } else {
500: addColor(BLACK, len);
501: }
502: } else if (count == 6) {
503: // EXTENSION
504: // read the next 3 bits
505: int type = ((nextBit() ? 4 : 0)
506: | (nextBit() ? 2 : 0) | (nextBit() ? 1 : 0));
507: // System.out.println(" EXTENSION <6>: "+type);
508: mode = UNCOMPRESSED;
509: } else if (count == 0) {
510: // VERTICAL MODE, Directly underneath
511: // System.out.println(" VERTICAL (0)");
512: len = findB1(color);
513: if (color == WHITE) {
514: addColor(WHITE, len);
515: color = BLACK;
516: } else {
517: addColor(BLACK, len);
518: color = WHITE;
519: }
520: } else if (count == 11) {
521: // EOL
522: break;
523: } else {
524: int right = nextBit() ? 1 : -1;
525: // distance is 1=1 4=2, 5=3;
526: if (count == 1) {
527: len = right;
528: } else if (count == 4) {
529: len = right * 2;
530: } else if (count == 5) {
531: len = right * 3;
532: } else {
533: throw new PDFParseException("Bad code word! ("
534: + count + "), char=" + bytenum
535: + ", line=" + nlines + ", insertion #"
536: + curlen);
537: }
538: // System.out.println(" VERTICAL MODE ("+len+")");
539: len += findB1(color);
540: if (color == WHITE) {
541: addColor(WHITE, len);
542: color = BLACK;
543: } else {
544: addColor(BLACK, len);
545: color = WHITE;
546: }
547: }
548: }
549: // System.out.println("Line length is now "+curline[curlen-1]+" at "+(curlen-1));
550: if (curline[curlen - 1] > totlen) {
551: throw new PDFParseException(
552: "Line went too long! (bytenum=" + bytenum
553: + ", len=" + (curline[curlen - 1])
554: + " of " + totlen + ", prev=" + prevlen
555: + ")");
556: }
557: prevlen = curline[curlen - 1];
558: }
559: addColor(WHITE, 0);
560: addColor(BLACK, 0);
561: addColor(WHITE, 0);
562: // curline becomes refline
563: refline = curline;
564: reflen = curlen;
565: return curline[curlen - 1];
566: }
567:
568: /**
569: * decode the output bitmap from the input array
570: * @param len the expected length of each line
571: * @return a byte array containing the packed pixels of the image
572: */
573: private ByteBuffer decode(int len, int rows)
574: throws PDFParseException {
575: long time = System.currentTimeMillis();
576: baos = new ByteArrayOutputStream();
577: refline = new int[3];
578: refline[0] = 0;
579: refline[1] = refline[2] = len;
580: reflen = 3;
581: nlines = 0;
582:
583: while (rows < 0 || nlines < rows) {
584: // System.out.println(bytenum);
585: int linelen = decodeLine(len);
586: // System.out.println("Line length= "+linelen);
587: if (linelen == 0) {
588: break;
589: }
590: processLine(len, nlines);
591: nlines++;
592: }
593: // PDFRenderContext.debug("Image was "+len+"x"+nlines, 2);
594: // PDFRenderContext.debug("Took "+((System.currentTimeMillis()-time)/1000.0)+" seconds, size="+baos.size(), 0);
595: return ByteBuffer.wrap(baos.toByteArray());
596: }
597:
598: /**
599: * decode a buffer bits to a bitmap image using the CCITT
600: * Group 4 fax encoding.
601: * @param buf the input byte buffer
602: * @param params the input parameters; must be a PDF dictionary
603: * that contains an entry for "Columns" describing how wide the
604: * image is in pixels.
605: * @return a byte buffer describing the bits of the image. Each
606: * line of the image will be padded to the next byte boundary.
607: */
608: protected static ByteBuffer decode(PDFObject dict, ByteBuffer buf,
609: PDFObject params) throws IOException {
610: // PDFRenderContext.debug(params.toString(), 0);
611: // PDFRenderContext.debug("K: "+params.getDictRef("K")+", cols="+params.getDictRef("Columns"), 0);
612: // PDFRenderContext.debug("Stream contains "+ary.length+" bytes", 0);
613: CCITTFaxDecode me = new CCITTFaxDecode(buf);
614: int len = 1728;
615: int rows = -1;
616: boolean invert = false;
617: PDFObject cols = params.getDictRef("Columns");
618: if (cols != null) {
619: len = cols.getIntValue();
620: }
621: PDFObject height = dict.getDictRef("Height");
622: if (height != null) {
623: rows = height.getIntValue();
624: }
625: PDFObject blackis1 = params.getDictRef("BlackIs1");
626: if (blackis1 != null) {
627: if (blackis1.getBooleanValue() == true) {
628: me.invert();
629: }
630: }
631: return me.decode(len, rows);
632: }
633: }
|