001: /**
002: * Copyright (c) 2005, www.pdfbox.org
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are met:
007: *
008: * 1. Redistributions of source code must retain the above copyright notice,
009: * this list of conditions and the following disclaimer.
010: * 2. Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: * 3. Neither the name of pdfbox; nor the names of its
014: * contributors may be used to endorse or promote products derived from this
015: * software without specific prior written permission.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
020: * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
021: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
024: * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: *
028: * http://www.pdfbox.org
029: *
030: */package org.pdfbox.examples.pdmodel;
031:
032: import java.awt.geom.Rectangle2D;
033: import java.util.List;
034:
035: import org.pdfbox.pdmodel.PDDocument;
036: import org.pdfbox.pdmodel.PDPage;
037:
038: import org.pdfbox.pdmodel.common.PDRectangle;
039: import org.pdfbox.pdmodel.interactive.action.type.PDAction;
040: import org.pdfbox.pdmodel.interactive.action.type.PDActionURI;
041: import org.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
042: import org.pdfbox.pdmodel.interactive.annotation.PDAnnotationLink;
043: import org.pdfbox.util.PDFTextStripperByArea;
044:
045: /**
046: * This is an example of how to access a URL in a PDF document.
047: *
048: * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
049: * @version $Revision: 1.3 $
050: */
051: public class PrintURLs {
052: /**
053: * Constructor.
054: */
055: private PrintURLs() {
056: //utility class
057: }
058:
059: /**
060: * This will create a hello world PDF document.
061: * <br />
062: * see usage() for commandline
063: *
064: * @param args Command line arguments.
065: *
066: * @throws Exception If there is an error extracting the URLs.
067: */
068: public static void main(String[] args) throws Exception {
069: PDDocument doc = null;
070: try {
071: if (args.length != 1) {
072: usage();
073: } else {
074: doc = PDDocument.load(args[0]);
075: List allPages = doc.getDocumentCatalog().getAllPages();
076: for (int i = 0; i < allPages.size(); i++) {
077: PDFTextStripperByArea stripper = new PDFTextStripperByArea();
078: PDPage page = (PDPage) allPages.get(i);
079: List annotations = page.getAnnotations();
080: //first setup text extraction regions
081: for (int j = 0; j < annotations.size(); j++) {
082: PDAnnotation annot = (PDAnnotation) annotations
083: .get(j);
084: if (annot instanceof PDAnnotationLink) {
085: PDAnnotationLink link = (PDAnnotationLink) annot;
086: PDRectangle rect = link.getRectangle();
087: //need to reposition link rectangle to match text space
088: float x = rect.getLowerLeftX();
089: float y = rect.getUpperRightY();
090: float width = rect.getWidth();
091: float height = rect.getHeight();
092: int rotation = page.findRotation();
093: if (rotation == 0) {
094: PDRectangle pageSize = page
095: .findMediaBox();
096: y = pageSize.getHeight() - y;
097: } else if (rotation == 90) {
098: //do nothing
099: }
100:
101: Rectangle2D.Float awtRect = new Rectangle2D.Float(
102: x, y, width, height);
103: stripper.addRegion("" + j, awtRect);
104: }
105: }
106:
107: stripper.extractRegions(page);
108:
109: for (int j = 0; j < annotations.size(); j++) {
110: PDAnnotation annot = (PDAnnotation) annotations
111: .get(j);
112: if (annot instanceof PDAnnotationLink) {
113: PDAnnotationLink link = (PDAnnotationLink) annot;
114: PDAction action = link.getAction();
115: String urlText = stripper
116: .getTextForRegion("" + j);
117: if (action instanceof PDActionURI) {
118: PDActionURI uri = (PDActionURI) action;
119: System.out.println("Page " + (i + 1)
120: + ":'" + urlText + "'="
121: + uri.getURI());
122: }
123: }
124: }
125: }
126: }
127: } finally {
128: if (doc != null) {
129: doc.close();
130: }
131: }
132: }
133:
134: /**
135: * This will print out a message telling how to use this example.
136: */
137: private static void usage() {
138: System.err.println("usage: " + PrintURLs.class.getName()
139: + " <input-file>");
140: }
141: }
|