Source Code Cross Referenced for Parser.java in » PDF » PDFClown-0.0.5 » it » stefanochizzolini » clown » tokens » Java Source Code / Java DocumentationJava Source Code and Java Documentation

Java Source Code / Java Documentation
1.	6.0 JDK Core
2.	6.0 JDK Modules
3.	6.0 JDK Modules com.sun
4.	6.0 JDK Modules com.sun.java
5.	6.0 JDK Modules sun
6.	6.0 JDK Platform
7.	Ajax
8.	Apache Harmony Java SE
9.	Aspect oriented
10.	Authentication Authorization
11.	Blogger System
12.	Build
13.	Byte Code
14.	Cache
15.	Chart
16.	Chat
17.	Code Analyzer
18.	Collaboration
19.	Content Management System
20.	Database Client
21.	Database DBMS
22.	Database JDBC Connection Pool
23.	Database ORM
24.	Development
25.	EJB Server geronimo
26.	EJB Server GlassFish
27.	EJB Server JBoss 4.2.1
28.	EJB Server resin 3.1.5
29.	ERP CRM Financial
30.	ESB
31.	Forum
32.	GIS
33.	Graphic Library
34.	Groupware
35.	HTML Parser
36.	IDE
37.	IDE Eclipse
38.	IDE Netbeans
39.	Installer
40.	Internationalization Localization
41.	Inversion of Control
42.	Issue Tracking
43.	J2EE
44.	JBoss
45.	JMS
46.	JMX
47.	Library
48.	Mail Clients
49.	Net
50.	Parser
51.	PDF
52.	Portal
53.	Profiler
54.	Project Management
55.	Report
56.	RSS RDF
57.	Rule Engine
58.	Science
59.	Scripting
60.	Search Engine
61.	Security
62.	Sevlet Container
63.	Source Control
64.	Swing Library
65.	Template Engine
66.	Test Coverage
67.	Testing
68.	UML
69.	Web Crawler
70.	Web Framework
71.	Web Mail
72.	Web Server
73.	Web Services
74.	Web Services apache cxf 2.0.1
75.	Web Services AXIS2
76.	Wiki Engine
77.	Workflow Engines
78.	XML
79.	XML UI
Java
Java Tutorial
Illustrator Tutorials
GIMP Tutorials
C# / C Sharp
C# / CSharp Tutorial
C# / CSharp Open Source
SQL Server / T-SQL Tutorial
Oracle PL / SQL
Oracle PL/SQL Tutorial
Flash / Flex / ActionScript
VBA / Excel / Access / Word
XML
XML Tutorial
Microsoft Office PowerPoint 2007 Tutorial
Microsoft Office Excel 2007 Tutorial
Microsoft Office Word 2007 Tutorial
Java Source Code / Java Documentation » PDF » PDFClown 0.0.5 » it.stefanochizzolini.clown.tokens
Source Cross Referenced Class Diagram Java Document (Java Doc)
001:        /*
002:          Copyright � 2006,2007 Stefano Chizzolini. http://clown.stefanochizzolini.it
003:
004:          Contributors:
005:         * Stefano Chizzolini (original code developer, http://www.stefanochizzolini.it)
006:         * Haakan Aakerberg (bugfix contributor):
007:              - [FIX:0.0.4:1]
008:              - [FIX:0.0.4:4]
009:
010:          This file should be part of the source code distribution of "PDF Clown library"
011:          (the Program): see the accompanying README files for more info.
012:
013:          This Program is free software; you can redistribute it and/or modify it under
014:          the terms of the GNU General Public License as published by the Free Software
015:          Foundation; either version 2 of the License, or (at your option) any later version.
016:
017:          This Program is distributed in the hope that it will be useful, but WITHOUT ANY
018:          WARRANTY, either expressed or implied; without even the implied warranty of
019:          MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the License for more details.
020:
021:          You should have received a copy of the GNU General Public License along with this
022:          Program (see README files); if not, go to the GNU website (http://www.gnu.org/).
023:
024:          Redistribution and use, with or without modification, are permitted provided that such
025:          redistributions retain the above copyright notice, license and disclaimer, along with
026:          this list of conditions.
027:         */
028:
029:        package it.stefanochizzolini.clown.tokens;
030:
031:        import it.stefanochizzolini.clown.bytes.Buffer;
032:        import it.stefanochizzolini.clown.bytes.IInputStream;
033:        import it.stefanochizzolini.clown.files.File;
034:        import it.stefanochizzolini.clown.objects.PdfArray;
035:        import it.stefanochizzolini.clown.objects.PdfBoolean;
036:        import it.stefanochizzolini.clown.objects.PdfDataObject;
037:        import it.stefanochizzolini.clown.objects.PdfDate;
038:        import it.stefanochizzolini.clown.objects.PdfDictionary;
039:        import it.stefanochizzolini.clown.objects.PdfDirectObject;
040:        import it.stefanochizzolini.clown.objects.PdfHex;
041:        import it.stefanochizzolini.clown.objects.PdfInteger;
042:        import it.stefanochizzolini.clown.objects.PdfLiteral;
043:        import it.stefanochizzolini.clown.objects.PdfName;
044:        import it.stefanochizzolini.clown.objects.PdfNull;
045:        import it.stefanochizzolini.clown.objects.PdfReal;
046:        import it.stefanochizzolini.clown.objects.PdfReference;
047:        import it.stefanochizzolini.clown.objects.PdfStream;
048:        import java.io.EOFException;
049:        import java.util.Date;
050:
051:        /**
052:         Token parser.
053:         <h3>Contract</h3>
054:         <ul>
055:         <li>Preconditions:
056:         <ol>
057:         <li>(none).</li>
058:         </ol>
059:         </li>
060:         <li>Postconditions:
061:         <ol>
062:         <li>(none).</li>
063:         </ol>
064:         </li>
065:         <li>Invariants:
066:         <ol>
067:         <li>Stream data IS kept untouched.</li>
068:         </ol>
069:         </li>
070:         <li>Side-effects:
071:         <ol>
072:         <li>(none).</li>
073:         </ol>
074:         </li>
075:         </ul>
076:         */
077:        public class Parser {
078:            // <class>
079:            // <classes>
080:            public class Reference {
081:                // <class>
082:                // <fields>
083:                private final int generationNumber;
084:                private final int objectNumber;
085:
086:                // </fields>
087:
088:                // <constructors>
089:                private Reference(int objectNumber, int generationNumber) {
090:                    this .objectNumber = objectNumber;
091:                    this .generationNumber = generationNumber;
092:                }
093:
094:                // </constructors>
095:
096:                // <interface>
097:                // <public>
098:                public int getGenerationNumber() {
099:                    return generationNumber;
100:                }
101:
102:                public int getObjectNumber() {
103:                    return objectNumber;
104:                }
105:                // </public>
106:                // </interface>
107:                // </class>
108:            }
109:
110:            // </classes>
111:
112:            // <static>
113:            // <fields>
114:            private static final String PdfHeader = "%PDF-";
115:
116:            // </fields>
117:
118:            // <interface>
119:            // <protected>
120:            /**
121:              Evaluates whether a character is a delimiter [PDF:1.6:3.1.1].
122:             */
123:            protected static boolean isDelimiter(int c) {
124:                return (c == '(' || c == ')' || c == '<' || c == '>'
125:                        || c == '[' || c == ']' || c == '/' || c == '%');
126:            }
127:
128:            /**
129:              Evaluates whether a character is an EOL marker [PDF:1.6:3.1.1].
130:             */
131:            protected static boolean isEOL(int c) {
132:                return (c == 12 || c == 15);
133:            }
134:
135:            /**
136:              Evaluates whether a character is a white-space [PDF:1.6:3.1.1].
137:             */
138:            protected static boolean isWhitespace(int c) {
139:                return (c == 0 || c == 9 || c == 10 || c == 12 || c == 13 || c == 32);
140:            }
141:
142:            // </protected>
143:            // </interface>
144:            // </static>
145:
146:            // <dynamic>
147:            // <fields>
148:            private File file;
149:            private IInputStream stream;
150:            private Object token;
151:            private TokenTypeEnum tokenType;
152:
153:            private boolean multipleTokenParsing;
154:
155:            // </fields>
156:
157:            // <constructors>
158:            Parser(IInputStream stream, File file) {
159:                this .stream = stream;
160:                this .file = file;
161:            }
162:
163:            // </constructors>
164:
165:            // <interface>
166:            // <public>
167:            public long getLength() {
168:                return stream.getLength();
169:            }
170:
171:            public long getPosition() {
172:                return stream.getPosition();
173:            }
174:
175:            public IInputStream getStream() {
176:                return stream;
177:            }
178:
179:            /**
180:              Gets the currently-parsed token.
181:              @return The current token.
182:             */
183:            public Object getToken() {
184:                return token;
185:            }
186:
187:            /**
188:              Gets the currently-parsed token type.
189:              @return The current token type.
190:             */
191:            public TokenTypeEnum getTokenType() {
192:                return tokenType;
193:            }
194:
195:            public int hashCode() {
196:                return stream.hashCode();
197:            }
198:
199:            /**
200:              @param offset Number of tokens to be skipped before reaching the intended one.
201:             */
202:            public boolean moveNext(int offset) throws FileFormatException {
203:                for (int index = 0; index < offset; index++) {
204:                    if (!moveNext())
205:                        return false;
206:                }
207:
208:                return true;
209:            }
210:
211:            /**
212:              Parse the next token [PDF:1.6:3.1].
213:              <h3>Contract</h3>
214:              <ul>
215:               <li>Preconditions:
216:                <ol>
217:                 <li>To properly parse the current token, the pointer MUST be just before its starting (leading whitespaces are ignored).</li>
218:                </ol>
219:               </li>
220:               <li>Postconditions:
221:                <ol>
222:                 <li id="moveNext_contract_post[0]">When this method terminates, the pointer IS at the last byte of the current token.</li>
223:                </ol>
224:               </li>
225:               <li>Invariants:
226:                <ol>
227:                 <li>The byte-level position of the pointer IS anytime (during token parsing) at the end of the current token (whereas the 'current token' represents the token-level position of the pointer).</li>
228:                </ol>
229:               </li>
230:               <li>Side-effects:
231:                <ol>
232:                 <li>See <a href="#moveNext_contract_post[0]">Postconditions</a>.</li>
233:                </ol>
234:               </li>
235:              </ul>
236:              @return Whether a new token was found.
237:             */
238:            public boolean moveNext() throws FileFormatException {
239:                /*
240:                  NOTE: It'd be interesting to evaluate an alternative regular-expression-based
241:                  implementation...
242:                 */
243:                StringBuilder buffer = null;
244:                token = null;
245:                int c = 0;
246:
247:                // Skip leading white-space characters [PDF:1.6:3.1.1].
248:                try {
249:                    do {
250:                        c = stream.readUnsignedByte();
251:                    } while (isWhitespace(c)); // Keep goin' till there's a white-space character...
252:                } catch (EOFException e) {
253:                    return false;
254:                }
255:
256:                // Which character is it?
257:                switch (c) {
258:                case '/': // Name [PDF:1.6:3.2.4].
259:                    tokenType = TokenTypeEnum.Name;
260:
261:                    /*
262:                      NOTE: As name objects are atomic symbols uniquely defined by sequences of characters,
263:                      the bytes making up the name are never treated as text, so here they are just
264:                      passed through without unescaping.
265:                     */
266:                    buffer = new StringBuilder();
267:                    try {
268:                        while (true) {
269:                            c = stream.readUnsignedByte();
270:                            if (isDelimiter(c) || isWhitespace(c))
271:                                break;
272:
273:                            buffer.append((char) c);
274:                        }
275:                    } catch (EOFException e) {
276:                        throw new FileFormatException(
277:                                "Unexpected EOF (malformed name object).", e,
278:                                stream.getPosition());
279:                    }
280:
281:                    stream.skip(-1); // Recover the first byte after the current token.
282:                    break;
283:                case '0':
284:                case '1':
285:                case '2':
286:                case '3':
287:                case '4':
288:                case '5':
289:                case '6':
290:                case '7':
291:                case '8':
292:                case '9':
293:                case '.':
294:                case '-':
295:                case '+': // Number [PDF:1.6:3.2.2] | Indirect reference.
296:                    switch (c) {
297:                    case '.': // Decimal point.
298:                        tokenType = TokenTypeEnum.Real;
299:                        break;
300:                    case '-':
301:                    case '+': // Signum.
302:                        tokenType = TokenTypeEnum.Integer; // By default (it may be real).
303:                        break;
304:                    default: // Digit.
305:                        if (multipleTokenParsing) // Plain number (multiple token parsing -- see indirect reference search).
306:                        {
307:                            tokenType = TokenTypeEnum.Integer; // By default (it may be real).
308:                        } else // Maybe an indirect reference (postfix notation [PDF:1.6:3.2.9]).
309:                        {
310:                            /*
311:                              NOTE: We need to identify this pattern:
312:                              ref :=  { int int 'R' }
313:                             */
314:                            // Enable multiple token parsing!
315:                            // NOTE: This state MUST be disabled before returning.
316:                            multipleTokenParsing = true;
317:
318:                            // 1. Object number.
319:                            // Try the possible object number!
320:                            stream.skip(-1);
321:                            moveNext();
322:                            // Isn't it a valid object number?
323:                            if (tokenType != TokenTypeEnum.Integer) {
324:                                // Disable multiple token parsing!
325:                                multipleTokenParsing = false;
326:                                return true;
327:                            }
328:                            // Assign object number!
329:                            int objectNumber = (Integer) token;
330:                            // Backup the recovery position!
331:                            long oldOffset = stream.getPosition();
332:
333:                            // 2. Generation number.
334:                            // Try the possible generation number!
335:                            moveNext();
336:                            // Isn't it a valid generation number?
337:                            if (tokenType != TokenTypeEnum.Integer) {
338:                                // Rollback!
339:                                stream.seek(oldOffset);
340:                                token = objectNumber;
341:                                tokenType = TokenTypeEnum.Integer;
342:                                // Disable multiple token parsing!
343:                                multipleTokenParsing = false;
344:                                return true;
345:                            }
346:                            // Assign generation number!
347:                            int generationNumber = (Integer) token;
348:
349:                            // 3. Reference keyword.
350:                            // Try the possible reference keyword!
351:                            moveNext();
352:                            // Isn't it a valid reference keyword?
353:                            if (tokenType != TokenTypeEnum.Reference) {
354:                                // Rollback!
355:                                stream.seek(oldOffset);
356:                                token = objectNumber;
357:                                tokenType = TokenTypeEnum.Integer;
358:                                // Disable multiple token parsing!
359:                                multipleTokenParsing = false;
360:                                return true;
361:                            }
362:                            token = new Reference(objectNumber,
363:                                    generationNumber);
364:                            // Disable multiple token parsing!
365:                            multipleTokenParsing = false;
366:                            return true;
367:                        }
368:                        break;
369:                    }
370:
371:                    // Building the number...
372:                    buffer = new StringBuilder();
373:                    try {
374:                        do {
375:                            buffer.append((char) c);
376:                            c = stream.readUnsignedByte();
377:                            if (c == '.')
378:                                tokenType = TokenTypeEnum.Real;
379:                            else if (c < '0' || c > '9')
380:                                break;
381:                        } while (true);
382:                    } catch (EOFException e) {
383:                        throw new FileFormatException(
384:                                "Unexpected EOF (malformed number object).", e,
385:                                stream.getPosition());
386:                    }
387:
388:                    stream.skip(-1); // Recover the first byte after the current token.
389:                    break;
390:                case '[': // Array (begin).
391:                    tokenType = TokenTypeEnum.ArrayBegin;
392:                    break;
393:                case ']': // Array (end).
394:                    tokenType = TokenTypeEnum.ArrayEnd;
395:                    break;
396:                case '<': // Dictionary (begin) | Hexadecimal string.
397:                    try {
398:                        c = stream.readUnsignedByte();
399:                    } catch (EOFException e) {
400:                        throw new FileFormatException(
401:                                "Unexpected EOF (isolated opening angle-bracket character).",
402:                                e, stream.getPosition());
403:                    }
404:                    // Is it a dictionary (2nd angle bracket [PDF:1.6:3.2.6])?
405:                    if (c == '<') {
406:                        tokenType = TokenTypeEnum.DictionaryBegin;
407:                        break;
408:                    }
409:
410:                    // Hexadecimal string (single angle bracket [PDF:1.6:3.2.3]).
411:                    tokenType = TokenTypeEnum.Hex;
412:
413:                    // [FIX:0.0.4:4] It skipped after the first hexadecimal character, missing it.
414:                    buffer = new StringBuilder();
415:                    try {
416:                        while (c != '>') // NOT string end.
417:                        {
418:                            buffer.append((char) c);
419:
420:                            c = stream.readUnsignedByte();
421:                        }
422:                    } catch (EOFException e) {
423:                        throw new FileFormatException(
424:                                "Unexpected EOF (malformed hex string).", e,
425:                                stream.getPosition());
426:                    }
427:
428:                    break;
429:                case '>': // Dictionary (end).
430:                    try {
431:                        c = stream.readUnsignedByte();
432:                    } catch (EOFException e) {
433:                        throw new FileFormatException(
434:                                "Unexpected EOF (malformed dictionary).", e,
435:                                stream.getPosition());
436:                    }
437:                    if (c != '>')
438:                        throw new FileFormatException("Malformed dictionary.",
439:                                stream.getPosition());
440:
441:                    tokenType = TokenTypeEnum.DictionaryEnd;
442:
443:                    break;
444:                case '%': // Comment [PDF:1.6:3.1.2].
445:                    tokenType = TokenTypeEnum.Comment;
446:                    // Skipping comment content...
447:                    try {
448:                        do {
449:                            c = stream.readUnsignedByte();
450:                        } while (!isEOL(c));
451:                    } catch (EOFException e) {/* Let it go. */
452:                    }
453:
454:                    break;
455:                case '(': // Literal string [PDF:1.6:3.2.3].
456:                    tokenType = TokenTypeEnum.Literal;
457:
458:                    /*
459:                      NOTE: As literal objects are textual, their characters are unescaped when deserialized.
460:                     */
461:                    buffer = new StringBuilder();
462:                    int level = 0;
463:                    try {
464:                        while (true) {
465:                            c = stream.readUnsignedByte();
466:                            if (c == '(')
467:                                level++;
468:                            else if (c == ')')
469:                                level--;
470:                            else if (c == '\\') {
471:                                boolean lineBreak = false;
472:                                c = stream.readUnsignedByte();
473:                                switch (c) {
474:                                case 'n':
475:                                    c = '\n';
476:                                    break;
477:                                case 'r':
478:                                    c = '\r';
479:                                    break;
480:                                case 't':
481:                                    c = '\t';
482:                                    break;
483:                                case 'b':
484:                                    c = '\b';
485:                                    break;
486:                                case 'f':
487:                                    c = '\f';
488:                                    break;
489:                                case '(':
490:                                case ')':
491:                                case '\\':
492:                                    break;
493:                                case '\r':
494:                                    lineBreak = true;
495:                                    c = stream.readUnsignedByte();
496:                                    if (c != '\n')
497:                                        stream.skip(-1);
498:                                    break;
499:                                case '\n':
500:                                    lineBreak = true;
501:                                    break;
502:                                default: {
503:                                    // Is it outside the octal encoding?
504:                                    if (c < '0' || c > '7')
505:                                        break;
506:
507:                                    // Octal.
508:                                    int octal = c - '0';
509:                                    c = stream.readUnsignedByte();
510:                                    // Octal end?
511:                                    if (c < '0' || c > '7') {
512:                                        c = octal;
513:                                        stream.skip(-1);
514:                                        break;
515:                                    }
516:                                    octal = (octal << 3) + c - '0';
517:                                    c = stream.readUnsignedByte();
518:                                    // Octal end?
519:                                    if (c < '0' || c > '7') {
520:                                        c = octal;
521:                                        stream.skip(-1);
522:                                        break;
523:                                    }
524:                                    octal = (octal << 3) + c - '0';
525:                                    c = octal & 0xff;
526:                                    break;
527:                                }
528:                                }
529:                                if (lineBreak)
530:                                    continue;
531:                            } else if (c == '\r') {
532:                                c = stream.readUnsignedByte();
533:                                if (c != '\n') {
534:                                    c = '\n';
535:                                    stream.skip(-1);
536:                                }
537:                            }
538:                            if (level == -1)
539:                                break;
540:
541:                            buffer.append((char) c);
542:                        }
543:                    } catch (EOFException e) {
544:                        throw new FileFormatException(
545:                                "Unexpected EOF (malformed literal string).",
546:                                e, stream.getPosition());
547:                    }
548:
549:                    break;
550:                case 'R': // Indirect reference.
551:                    tokenType = TokenTypeEnum.Reference;
552:
553:                    break;
554:                default: // Keyword object.
555:                    tokenType = TokenTypeEnum.Keyword;
556:
557:                    buffer = new StringBuilder();
558:                    try {
559:                        do {
560:                            buffer.append((char) c);
561:                            c = stream.readUnsignedByte();
562:                        } while (!isDelimiter(c) && !isWhitespace(c));
563:                    } catch (EOFException e) {/* Let it go. */
564:                    }
565:                    stream.skip(-1); // Recover the first byte after the current token.
566:
567:                    break;
568:                }
569:
570:                if (buffer != null) {
571:                    /*
572:                      Here we prepare the current token state.
573:                     */
574:                    // Which token type?
575:                    switch (tokenType) {
576:                    case Keyword:
577:                        token = buffer.toString();
578:                        // Late recognition.
579:                        if (((String) token).equals("false")
580:                                || ((String) token).equals("true")) // Boolean.
581:                        {
582:                            tokenType = TokenTypeEnum.Boolean;
583:                            token = Boolean.parseBoolean((String) token);
584:                        } else if (((String) token).equals("null")) // Null.
585:                        {
586:                            tokenType = TokenTypeEnum.Null;
587:                            token = null;
588:                        }
589:                        break;
590:                    case Comment:
591:                    case Hex:
592:                    case Name:
593:                        token = buffer.toString();
594:                        break;
595:                    case Literal:
596:                        token = buffer.toString();
597:                        // Late recognition.
598:                        if (((String) token).startsWith("D:")) // Date.
599:                        {
600:                            tokenType = TokenTypeEnum.Date;
601:                            token = PdfDate.toDate((String) token);
602:                        }
603:                        break;
604:                    case Integer:
605:                        token = Integer.parseInt(buffer.toString());
606:                        break;
607:                    case Real:
608:                        token = Float.parseFloat(buffer.toString());
609:                        break;
610:                    }
611:                }
612:
613:                return true;
614:            }
615:
616:            /**
617:              Parse the current PDF object [PDF:1.6:3.2].
618:              <h3>Contract</h3>
619:              <ul>
620:               <li>Preconditions:
621:                <ol>
622:                 <li>When this method is invoked, the pointer MUST be at the first
623:                 token of the requested object.</li>
624:                </ol>
625:               </li>
626:               <li>Postconditions:
627:                <ol>
628:                 <li id="parsePdfObject_contract_post[0]">When this method terminates,
629:                 the pointer IS at the last token of the requested object.</li>
630:                </ol>
631:               </li>
632:               <li>Invariants:
633:                <ol>
634:                 <li>(none).</li>
635:                </ol>
636:               </li>
637:               <li>Side-effects:
638:                <ol>
639:                 <li>See <a href="#parsePdfObject_contract_post[0]">Postconditions</a>.</li>
640:                </ol>
641:               </li>
642:              </ul>
643:             */
644:            public PdfDataObject parsePdfObject() throws FileFormatException {
645:                /*
646:                  NOTE: Object parsing is intrinsically a sequential operation tied to the stream pointer.
647:                  Calls bound towards other classes are potentially disruptive for the predictability of
648:                  the position of the stream pointer, so we are forced to carefully keep track of our
649:                  current position in order to recover its proper state after any outbound call.
650:                 */
651:
652:                // Which token type?
653:                switch (tokenType) {
654:                case Integer:
655:                    return new PdfInteger((Integer) token);
656:                case Name:
657:                    return new PdfName((String) token, true);
658:                case Reference:
659:                    /*
660:                      NOTE: Curiously, PDF references are the only primitive objects that require
661:                      a file reference. That's because they deal with indirect objects, which are strongly
662:                      coupled with the current state of the file: so, PDF references are the fundamental
663:                      bridge between the token layer and the file layer.
664:                     */
665:                    return new PdfReference((Reference) token, file);
666:                case Literal:
667:                    return new PdfLiteral((String) token);
668:                case DictionaryBegin:
669:                    PdfDictionary dictionary = new PdfDictionary();
670:                    // Populate the dictionary.
671:                    while (true) {
672:                        // Key.
673:                        moveNext();
674:                        if (tokenType == TokenTypeEnum.DictionaryEnd)
675:                            break;
676:                        PdfName key = (PdfName) parsePdfObject();
677:
678:                        // Value.
679:                        moveNext();
680:                        PdfDirectObject value = (PdfDirectObject) parsePdfObject();
681:
682:                        // Add the current entry to the dictionary!
683:                        dictionary.put(key, value);
684:                    }
685:
686:                    int oldOffset = (int) stream.getPosition();
687:                    moveNext();
688:                    // Is this dictionary the header of a stream object [PDF:1.6:3.2.7]?
689:                    if ((tokenType == TokenTypeEnum.Keyword)
690:                            && token.equals("stream")) // Stream.
691:                    {
692:                        // Keep track of current position!
693:                        long position = stream.getPosition();
694:
695:                        // Get the stream length!
696:                        /*
697:                          NOTE: Indirect reference resolution is an outbound call (stream pointer hazard!),
698:                          so we need to recover our current position after it returns.
699:                         */
700:                        int length = ((PdfInteger) File.resolve(dictionary
701:                                .get(PdfName.Length))).getValue();
702:
703:                        // Come back to current position!
704:                        stream.seek(position);
705:
706:                        skipWhitespace();
707:
708:                        // Copy the stream data to the instance!
709:                        byte[] data = new byte[length];
710:                        try {
711:                            stream.read(data);
712:                        } catch (EOFException e) {
713:                            throw new FileFormatException(
714:                                    "Unexpected EOF (malformed stream object).",
715:                                    e, stream.getPosition());
716:                        }
717:
718:                        moveNext(); // Postcondition (last token should be 'endstream' keyword).
719:
720:                        return new PdfStream(dictionary, new Buffer(data));
721:                    } else // Simple dictionary.
722:                    {
723:                        stream.seek(oldOffset); // Restore postcondition (last token should be the dictionary end).
724:
725:                        return dictionary;
726:                    }
727:                case ArrayBegin:
728:                    PdfArray array = new PdfArray();
729:                    // Populate the array.
730:                    while (true) {
731:                        // Value.
732:                        moveNext();
733:                        if (tokenType == TokenTypeEnum.ArrayEnd)
734:                            break;
735:
736:                        // Add the current item to the array!
737:                        array.add((PdfDirectObject) parsePdfObject());
738:                    }
739:                    return array;
740:                case Real:
741:                    return new PdfReal((Float) token);
742:                case Boolean:
743:                    return new PdfBoolean((Boolean) token);
744:                case Date:
745:                    return new PdfDate((Date) token);
746:                case Hex:
747:                    return new PdfHex((String) token);
748:                case Null:
749:                    return PdfNull.Null;
750:                default:
751:                    return null;
752:                }
753:            }
754:
755:            /**
756:              Retrieves the PDF version of the file [PDF:1.6:3.4.1].
757:              <h3>Contract</h3>
758:              <ul>
759:               <li>Preconditions:
760:                <ol>
761:                 <li>(none).</li>
762:                </ol>
763:               </li>
764:               <li>Postconditions:
765:                <ol>
766:                 <li>(none).</li>
767:                </ol>
768:               </li>
769:               <li>Invariants:
770:                <ol>
771:                 <li>(none).</li>
772:                </ol>
773:               </li>
774:               <li>Side-effects:
775:                <ol>
776:                 <li>The pointer is released at an undefined location.</li>
777:                </ol>
778:               </li>
779:              </ul>
780:             */
781:            public String retrieveVersion() throws FileFormatException {
782:                stream.seek(0);
783:                String header;
784:                try {
785:                    header = stream.readString(10);
786:                } catch (EOFException e) {
787:                    throw new FileFormatException(
788:                            "Unexpected EOF (malformed version data).", e,
789:                            stream.getPosition());
790:                }
791:                if (!header.startsWith(PdfHeader))
792:                    throw new FileFormatException("PDF header not found.",
793:                            stream.getPosition());
794:
795:                return header.substring(PdfHeader.length(),
796:                        PdfHeader.length() + 3);
797:            }
798:
799:            /**
800:              Retrieves the starting position of the last xref-table section.
801:              @see retrieveXRefOffset(long)
802:             */
803:            public long retrieveXRefOffset() throws FileFormatException {
804:                return retrieveXRefOffset(stream.getLength());
805:            }
806:
807:            /**
808:              Retrieves the starting position of an xref-table section [PDF:1.6:3.4.4].
809:              <h3>Contract</h3>
810:              <ul>
811:               <li>Preconditions:
812:                <ol>
813:                 <li>(none).</li>
814:                </ol>
815:               </li>
816:               <li>Postconditions:
817:                <ol>
818:                 <li>(none).</li>
819:                </ol>
820:               </li>
821:               <li>Invariants:
822:                <ol>
823:                 <li>(none).</li>
824:                </ol>
825:               </li>
826:               <li>Side-effects:
827:                <ol>
828:                 <li>The pointer is released at an undefined location.</li>
829:                </ol>
830:               </li>
831:              </ul>
832:              @param offset Position of the EOF marker related to the section intended to be parsed.
833:             */
834:            public long retrieveXRefOffset(long offset)
835:                    throws FileFormatException {
836:                final int chunkSize = 1024; // [PDF:1.6:H.3.18].
837:
838:                // Move back before 'startxref' keyword!
839:                long position = offset - chunkSize;
840:                if (position < 0) {
841:                    position = 0;
842:                } // [FIX:0.0.4:1] It failed to deal with less-than-1024-byte-long PDF files.
843:                stream.seek(position);
844:
845:                // Get 'startxref' keyword position!
846:                int index;
847:                try {
848:                    index = stream.readString(chunkSize).lastIndexOf(
849:                            "startxref");
850:                } catch (EOFException e) {
851:                    throw new FileFormatException(
852:                            "Unexpected EOF (malformed 'startxref' tag).", e,
853:                            stream.getPosition());
854:                }
855:                if (index < 0)
856:                    throw new FileFormatException("PDF startxref not found.",
857:                            stream.getPosition());
858:                // Go past the 'startxref' keyword!
859:                stream.seek(position + index);
860:                moveNext();
861:
862:                // Get the xref offset!
863:                moveNext();
864:                if (tokenType != TokenTypeEnum.Integer)
865:                    throw new FileFormatException("PDF startxref malformed.",
866:                            stream.getPosition());
867:
868:                return (Integer) token;
869:            }
870:
871:            public void seek(long position) {
872:                stream.seek(position);
873:            }
874:
875:            public void skip(long offset) {
876:                stream.skip(offset);
877:            }
878:
879:            /**
880:              Moves to the last whitespace after the current position in order to let read
881:              the first non-whitespace.
882:             */
883:            public boolean skipWhitespace() {
884:                int b;
885:                try {
886:                    do {
887:                        b = stream.readUnsignedByte();
888:                    } while (isWhitespace(b)); // Keep goin' till there's a white-space character...
889:                } catch (EOFException e) {
890:                    return false;
891:                }
892:                stream.skip(-1); // Recover the last whitespace position.
893:
894:                return true;
895:            }
896:            // </public>
897:            // </interface>
898:            // </dynamic>
899:            // </class>
900:        }
www.java2java.com | Contact Us
All other trademarks are property of their respective owners.