001: package com.etymon.pjx.util;
002:
003: import java.io.*;
004: import java.nio.*;
005: import java.util.*;
006: import com.etymon.pjx.*;
007:
008: /**
009: Appends multiple PDF documents together, forming a new PDF
010: document.
011: @author Nassib Nassar
012: */
013: public class PdfAppender {
014:
015: /**
016: A flag used to indicate whether file names should be
017: printed during the appending process. This is temporary
018: and will be superceded when logging is implemented.
019: */
020: protected boolean _printFileNames = false;
021:
022: protected static PdfObject renumber(PdfObject obj, int offset)
023: throws PdfFormatException {
024:
025: if (obj == null) {
026: return null;
027: }
028:
029: if (obj instanceof PdfArray) {
030: List list = ((PdfArray) obj).getList();
031: ArrayList nlist = new ArrayList(list.size());
032: for (Iterator t = list.iterator(); t.hasNext();) {
033: nlist.add(renumber((PdfObject) t.next(), offset));
034: }
035: return new PdfArray(nlist);
036: }
037:
038: if (obj instanceof PdfDictionary) {
039: Map map = ((PdfDictionary) obj).getMap();
040: HashMap nmap = new HashMap(map.size());
041: for (Iterator t = map.keySet().iterator(); t.hasNext();) {
042: PdfName key = (PdfName) t.next();
043: nmap.put(key,
044: renumber((PdfObject) map.get(key), offset));
045: }
046: return new PdfDictionary(nmap);
047: }
048:
049: if (obj instanceof PdfStream) {
050: PdfStream s = (PdfStream) obj;
051: ByteBuffer bb = s.getBuffer();
052: bb.position(0);
053: return new PdfStream((PdfDictionary) renumber(s
054: .getDictionary(), offset), bb);
055: }
056:
057: if (obj instanceof PdfReference) {
058: PdfReference r = (PdfReference) obj;
059: return new PdfReference(r.getObjectNumber() + offset, 0);
060: }
061:
062: return obj;
063:
064: }
065:
066: /**
067: The array of PDF managers.
068: */
069: protected PdfManager[] _m;
070:
071: /**
072: The current amount to offset (increase) all object numbers
073: by. The pdfReaderFilter(PdfObject) method renumbers
074: indirect references by adding this value to their object
075: number.
076: */
077: protected int _renumber_offset;
078:
079: /**
080: Stores whether the {@link #append() append()} method has
081: been used.
082: */
083: protected boolean _used;
084:
085: /**
086: The PDF writer.
087: */
088: protected PdfWriter _w;
089:
090: /**
091: The class is initialized to read a list of PDF documents
092: (<code>PdfManager</code> objects) in order and to write the
093: resultant document to a specified <code>PdfWriter</code>.
094: The <code>PdfWriter</code> should be newly created (i.e. it
095: should not have been previously used for anything); and
096: after {@link #append() append()} has been called, the
097: <code>PdfWriter</code> should be closed and discarded, and
098: this <code>PdfAppender</code> should be discarded.
099: @param managers the documents to read.
100: @param writer the document to write to.
101: @deprecated Use {@link #PdfAppender(List,
102: PdfWriter) PdfAppender(List, PdfWriter)}.
103: */
104: public PdfAppender(PdfManager[] managers, PdfWriter writer) {
105:
106: _m = new PdfManager[managers.length];
107: System.arraycopy(managers, 0, _m, 0, managers.length);
108:
109: _w = writer;
110:
111: _used = false;
112:
113: }
114:
115: /**
116: The class is initialized to read a list of PDF documents
117: (<code>PdfManager</code> objects) in order and to write the
118: resultant document to a specified <code>PdfWriter</code>.
119: The <code>PdfWriter</code> should be newly created (i.e. it
120: should not have been previously used for anything); and
121: after {@link #append() append()} has been called, the
122: <code>PdfWriter</code> should be closed and discarded, and
123: this <code>PdfAppender</code> should be discarded.
124: @param managers the documents to read. This must be a list
125: of <code>PdfManager</code> objects.
126: @param writer the document to write to.
127: @throws PdfFormatException
128: */
129: public PdfAppender(List managers, PdfWriter writer)
130: throws PdfFormatException {
131:
132: _m = new PdfManager[managers.size()];
133: int x = 0;
134: for (Iterator t = managers.iterator(); t.hasNext();) {
135: Object obj = t.next();
136: if (!(obj instanceof PdfManager)) {
137: throw new PdfFormatException(
138: "List element is not a PdfManager instance.");
139: }
140: _m[x++] = (PdfManager) obj;
141: }
142:
143: _w = writer;
144:
145: _used = false;
146:
147: }
148:
149: // needs to be synchronized on the managers
150: /**
151: Performs the append operation. This method can be called
152: only once per instance of this class.
153: @throws IOException
154: @throws PdfFormatException
155: */
156: public void append() throws IOException, PdfFormatException {
157:
158: if (_used) {
159: throw new PdfFormatException(
160: "PdfAppender.append() called more than once per instance.");
161: }
162:
163: _used = true;
164:
165: PdfManager[] ma = _m;
166: PdfWriter w = _w;
167:
168: if (ma.length == 0) {
169: return;
170: }
171:
172: int[] pageTreeRootId = new int[ma.length];
173: int[] pageTreeRootGen = new int[ma.length];
174: PdfDictionary[] pageTreeRoot = new PdfDictionary[ma.length];
175: List[] fieldsRef = new List[ma.length];
176: List[] fields = new List[ma.length];
177: Map newAcroFormMap = null;
178:
179: if (_printFileNames) {
180: System.out.println(ma[0].getReader().getInput().getName());
181: }
182:
183: // first copy ma[0] to the output
184: long pos = ma[0].writeDocument(w);
185:
186: if (ma.length == 1) {
187: return;
188: }
189:
190: long prev = ma[0].getStartxref();
191:
192: // get the page tree root object
193: PdfManager manager = ma[0];
194: PdfModifier modifier = new PdfModifier(manager);
195: PdfReference pageTreeRootRef = modifier
196: .getPageTreeRootReference();
197: pageTreeRootId[0] = pageTreeRootRef.getObjectNumber();
198: pageTreeRootGen[0] = pageTreeRootRef.getGenerationNumber();
199: pageTreeRoot[0] = modifier.getPageTreeRoot();
200:
201: // get the interactive form dictionary
202: PdfDictionary catalog = modifier.getCatalog();
203: PdfObject acroFormObj = (PdfObject) catalog.getMap().get(
204: new PdfName("AcroForm"));
205: PdfDictionary acroForm = (PdfDictionary) ma[0]
206: .getObjectIndirect(acroFormObj);
207: if (acroForm != null) {
208: Map acroFormMap = acroForm.getMap();
209: int acroFormMapSize = acroFormMap.size();
210: // we only copy the dictionary as a whole if
211: // we don't have one yet
212: if (newAcroFormMap == null) {
213: newAcroFormMap = new HashMap(acroFormMap);
214: }
215: // now add fields to our running list
216: PdfObject fieldsObj = (PdfObject) acroFormMap
217: .get(new PdfName("Fields"));
218: PdfArray fa = (PdfArray) ma[0].getObjectIndirect(fieldsObj);
219: List fr = new ArrayList();
220: List ff = new ArrayList();
221: if (fa != null) {
222: for (Iterator t = fa.getList().iterator(); t.hasNext();) {
223: PdfReference f = (PdfReference) t.next();
224: fr.add(f);
225: ff.add(ma[0].getObjectIndirect(f));
226: }
227: }
228: fieldsRef[0] = fr;
229: fields[0] = ff;
230:
231: }
232:
233: int pageCount;
234: PdfInteger countObj = (PdfInteger) manager
235: .getObjectIndirect((PdfObject) (pageTreeRoot[0]
236: .getMap().get(new PdfName("Count"))));
237: if (countObj != null) {
238: pageCount = countObj.getInt();
239: } else {
240: pageCount = 0;
241: }
242:
243: _renumber_offset = manager.getXrefTableSize();
244:
245: // next append the remaining documents
246: for (int mx = 1; mx < ma.length; mx++) {
247:
248: PdfManager m = ma[mx];
249:
250: if (_printFileNames) {
251: System.out.println(m.getReader().getInput().getName());
252: }
253:
254: // first extract needed information, before we
255: // renumber all of the objects. we need the
256: // object number of the root of the page tree.
257:
258: manager = m;
259: modifier = new PdfModifier(manager);
260: pageTreeRootRef = modifier.getPageTreeRootReference();
261: pageTreeRootId[mx] = pageTreeRootRef.getObjectNumber()
262: + _renumber_offset;
263: pageTreeRootGen[mx] = pageTreeRootRef.getGenerationNumber();
264: // we delay setting pageTreeRoot[rax] and
265: // using it to get the number of pages until
266: // later when we can get the renumbered
267: // version of the page tree root
268:
269: // get the interactive form dictionary
270: catalog = modifier.getCatalog();
271: acroFormObj = (PdfObject) catalog.getMap().get(
272: new PdfName("AcroForm"));
273: acroForm = (PdfDictionary) m.getObjectIndirect(acroFormObj);
274: if (acroForm != null) {
275: Map acroFormMap = acroForm.getMap();
276: int acroFormMapSize = acroFormMap.size();
277: // we only copy the dictionary as a whole if
278: // we don't have one yet
279: if (newAcroFormMap == null) {
280: newAcroFormMap = new HashMap(acroFormMapSize);
281: for (Iterator t = acroFormMap.keySet().iterator(); t
282: .hasNext();) {
283: PdfName key = (PdfName) t.next();
284: newAcroFormMap.put(key, renumber(
285: (PdfObject) acroFormMap.get(key),
286: _renumber_offset));
287: }
288: }
289: // now add fields to our running list
290: PdfObject fieldsObj = (PdfObject) acroFormMap
291: .get(new PdfName("Fields"));
292: PdfArray fa = (PdfArray) m.getObjectIndirect(fieldsObj);
293: List fr = new ArrayList();
294: List ff = new ArrayList();
295: if (fa != null) {
296: for (Iterator t = fa.getList().iterator(); t
297: .hasNext();) {
298: PdfReference f = (PdfReference) t.next();
299: fr.add(renumber(f, _renumber_offset));
300: ff.add(renumber(m.getObjectIndirect(f),
301: _renumber_offset));
302: }
303: }
304: fieldsRef[mx] = fr;
305: fields[mx] = ff;
306: }
307:
308: // next read all the objects, renumber them,
309: // and write them to the output
310:
311: int xtSize = m.getXrefTableSize();
312: int nxtSize = xtSize + _renumber_offset;
313:
314: long[] index = new long[nxtSize];
315: int[] generation = new int[nxtSize];
316: byte[] usage = new byte[nxtSize];
317: index[0] = XrefTable.ENTRY_FREE;
318: generation[0] = 65535;
319: usage[0] = XrefTable.ENTRY_FREE;
320:
321: for (int x = 1; x < xtSize; x++) {
322:
323: PdfObject obj = m.getObject(x);
324:
325: if (obj != null) {
326:
327: obj = renumber(obj, _renumber_offset);
328:
329: index[_renumber_offset + x] = pos;
330: generation[_renumber_offset + x] = 0;
331: usage[_renumber_offset + x] = XrefTable.ENTRY_IN_USE;
332:
333: pos += w.writeObjectIndirect(obj, x
334: + _renumber_offset, 0);
335:
336: if ((x + _renumber_offset) == pageTreeRootId[mx]) {
337: pageTreeRoot[mx] = (PdfDictionary) obj;
338:
339: // now we can get the number of pages
340: countObj = (PdfInteger) manager
341: .getObjectIndirect((PdfObject) (pageTreeRoot[mx]
342: .getMap().get(new PdfName(
343: "Count"))));
344: if (countObj != null) {
345: pageCount += countObj.getInt();
346: }
347:
348: }
349: } else {
350:
351: generation[_renumber_offset + x] = 0;
352: usage[_renumber_offset + x] = XrefTable.ENTRY_FREE;
353:
354: }
355:
356: }
357:
358: // finally, write the xref table and trailer
359:
360: PdfDictionary trailer = m.getTrailerDictionary();
361: Map trailerMap = trailer.getMap();
362:
363: HashMap ntrailerMap = new HashMap(trailerMap);
364:
365: ntrailerMap.put(new PdfName("Size"), new PdfInteger(
366: _renumber_offset + nxtSize));
367: ntrailerMap.put(new PdfName("Prev"), new PdfLong(prev));
368:
369: prev = pos;
370:
371: PdfDictionary ntrailer = new PdfDictionary(ntrailerMap);
372:
373: XrefTable nxt = new XrefTable(index, generation, usage,
374: ntrailer);
375:
376: pos += w.writeXrefTable(nxt, pos);
377:
378: _renumber_offset = nxtSize;
379:
380: }
381:
382: // write the old page tree roots and field
383: // dictionaries with new parent values
384:
385: int newPageTreeRootId = _renumber_offset;
386: int newFieldsId = _renumber_offset + 1;
387: int newCatalogId = _renumber_offset + 1 + fields.length;
388: int xtSize = _renumber_offset + 2 + fields.length;
389:
390: long[] index = new long[xtSize];
391: int[] generation = new int[xtSize];
392: byte[] usage = new byte[xtSize];
393: Arrays.fill(usage, XrefTable.ENTRY_UNDEFINED);
394: index[0] = 0;
395: generation[0] = 65535;
396: usage[0] = XrefTable.ENTRY_FREE;
397:
398: for (int x = 0; x < pageTreeRoot.length; x++) {
399: index[pageTreeRootId[x]] = pos;
400: generation[pageTreeRootId[x]] = pageTreeRootGen[x];
401: usage[pageTreeRootId[x]] = XrefTable.ENTRY_IN_USE;
402:
403: // update parent value
404: PdfDictionary d = pageTreeRoot[x];
405: Map map = d.getMap();
406: HashMap nmap = new HashMap(map);
407: nmap.put(new PdfName("Parent"), new PdfReference(
408: newPageTreeRootId, 0));
409:
410: pos += w.writeObjectIndirect(new PdfDictionary(nmap),
411: pageTreeRootId[x], pageTreeRootGen[x]);
412: }
413:
414: for (int y = 0; y < fields.length; y++) {
415: if (fields[y] != null) {
416: int fieldsSize = fields[y].size();
417: for (int x = 0; x < fieldsSize; x++) {
418: PdfReference ref = (PdfReference) fieldsRef[y]
419: .get(x);
420: int id = ref.getObjectNumber();
421: int gen = ref.getGenerationNumber();
422: index[id] = pos;
423: generation[id] = gen;
424: usage[id] = XrefTable.ENTRY_IN_USE;
425:
426: // update parent value
427: PdfDictionary d = (PdfDictionary) fields[y].get(x);
428: Map map = d.getMap();
429: HashMap nmap = new HashMap(map);
430: nmap.put(new PdfName("Parent"), new PdfReference(
431: newFieldsId + y, 0));
432:
433: pos += w.writeObjectIndirect(
434: new PdfDictionary(nmap), id, gen);
435: }
436: }
437: }
438:
439: // write the new page tree root, which contains the
440: // root from each document
441:
442: HashMap rootMap = new HashMap();
443: rootMap.put(new PdfName("Type"), new PdfName("Pages"));
444: rootMap.put(new PdfName("Count"), new PdfInteger(pageCount));
445: ArrayList kids = new ArrayList(pageTreeRoot.length);
446: for (int x = 0; x < pageTreeRoot.length; x++) {
447: kids.add(new PdfReference(pageTreeRootId[x],
448: pageTreeRootGen[x]));
449: }
450: rootMap.put(new PdfName("Kids"), new PdfArray(kids));
451:
452: index[newPageTreeRootId] = pos;
453: generation[newPageTreeRootId] = 0;
454: usage[newPageTreeRootId] = XrefTable.ENTRY_IN_USE;
455:
456: pos += w.writeObjectIndirect(new PdfDictionary(rootMap),
457: newPageTreeRootId, 0);
458:
459: // write the new fields roots, which contain all the
460: // fields
461:
462: List fieldRootList = new ArrayList(fields.length);
463:
464: for (int x = 0; x < fields.length; x++) {
465:
466: if (fields[x] != null) {
467:
468: rootMap = new HashMap();
469: kids = new ArrayList(fields[x].size());
470: for (Iterator t = fieldsRef[x].iterator(); t.hasNext();) {
471: PdfReference ref = (PdfReference) t.next();
472: kids.add(ref);
473: }
474: rootMap.put(new PdfName("Kids"), new PdfArray(kids));
475:
476: rootMap.put(new PdfName("T"), new PdfString("A" + x));
477:
478: int n = newFieldsId + x;
479: index[n] = pos;
480: generation[n] = 0;
481: usage[n] = XrefTable.ENTRY_IN_USE;
482:
483: pos += w.writeObjectIndirect(
484: new PdfDictionary(rootMap), n, 0);
485:
486: fieldRootList.add(new PdfReference(n, 0));
487: }
488:
489: }
490:
491: // build the interactive form dictionary for the new
492: // catalog
493:
494: Map buildAcroFormMap;
495: if (newAcroFormMap != null) {
496: buildAcroFormMap = new HashMap(newAcroFormMap);
497: buildAcroFormMap.put(new PdfName("Fields"), new PdfArray(
498: fieldRootList));
499: } else {
500: buildAcroFormMap = null;
501: }
502:
503: // write the new catalog
504:
505: HashMap catalogMap = new HashMap();
506: catalogMap.put(new PdfName("Type"), new PdfName("Catalog"));
507: catalogMap.put(new PdfName("Pages"), new PdfReference(
508: newPageTreeRootId, 0));
509: if (buildAcroFormMap != null) {
510: catalogMap.put(new PdfName("AcroForm"), new PdfDictionary(
511: buildAcroFormMap));
512: }
513:
514: index[newCatalogId] = pos;
515: generation[newCatalogId] = 0;
516: usage[newCatalogId] = XrefTable.ENTRY_IN_USE;
517:
518: pos += w.writeObjectIndirect(new PdfDictionary(catalogMap),
519: newCatalogId, 0);
520:
521: // write the final xref table and trailer
522:
523: HashMap ntrailerMap = new HashMap();
524:
525: ntrailerMap.put(new PdfName("Size"), new PdfInteger(xtSize));
526: ntrailerMap.put(new PdfName("Prev"), new PdfLong(prev));
527: ntrailerMap.put(new PdfName("Root"), new PdfReference(
528: newCatalogId, 0));
529:
530: PdfDictionary ntrailer = new PdfDictionary(ntrailerMap);
531:
532: XrefTable nxt = new XrefTable(index, generation, usage,
533: ntrailer);
534:
535: pos += w.writeXrefTable(nxt, pos);
536:
537: }
538:
539: /**
540: Appends multiple PDF documents together using this class.
541: The documents are specified with a list of file names; the
542: last indicating the output file and the others indicating
543: the input files. The input files are appended in the order
544: they are specified within the list.
545: @param args the list of file names. <b>Note that the last
546: file in this list (<code>args[args.length - 1]</code>) is
547: overwritten with the resultant PDF document.</b>
548: @throws IOException
549: @throws PdfFormatException
550: */
551: public static void main(String[] args) throws IOException,
552: PdfFormatException {
553:
554: if (args.length < 2) {
555: System.err
556: .println("Usage: java com.etymon.pjx.util.PdfAppender [input1.pdf] [input2.pdf] [...] [output.pdf]");
557: return;
558: }
559:
560: List m = new ArrayList(args.length - 1);
561:
562: for (int x = 0; x < args.length - 1; x++) {
563: try {
564: m.add(new PdfManager(new PdfReader(new PdfInputFile(
565: new File(args[x])))));
566: } catch (PdfFormatException e) {
567: throw new PdfFormatException(args[x] + ": "
568: + e.getMessage(), e.getOffset());
569: }
570: }
571:
572: PdfWriter w = new PdfWriter(new File(args[args.length - 1]));
573:
574: PdfAppender a = new PdfAppender(m, w);
575: a._printFileNames = true;
576: a.append();
577:
578: w.close();
579:
580: }
581:
582: }
|