001: /**
002: * Copyright (c) 2003-2006, www.pdfbox.org
003: * All rights reserved.
004: *
005: * Redistribution and use in source and binary forms, with or without
006: * modification, are permitted provided that the following conditions are met:
007: *
008: * 1. Redistributions of source code must retain the above copyright notice,
009: * this list of conditions and the following disclaimer.
010: * 2. Redistributions in binary form must reproduce the above copyright notice,
011: * this list of conditions and the following disclaimer in the documentation
012: * and/or other materials provided with the distribution.
013: * 3. Neither the name of pdfbox; nor the names of its
014: * contributors may be used to endorse or promote products derived from this
015: * software without specific prior written permission.
016: *
017: * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
018: * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
019: * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
020: * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
021: * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
022: * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
023: * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
024: * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
025: * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
026: * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
027: *
028: * http://www.pdfbox.org
029: *
030: */package org.pdfbox.cos;
031:
032: import java.io.ByteArrayOutputStream;
033: import java.io.IOException;
034: import java.io.OutputStream;
035: import java.io.UnsupportedEncodingException;
036:
037: import org.pdfbox.persistence.util.COSHEXTable;
038:
039: import org.pdfbox.exceptions.COSVisitorException;
040:
041: /**
042: * This represents a string object in a PDF document.
043: *
044: * @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
045: * @version $Revision: 1.29 $
046: */
047: public class COSString extends COSBase {
048: /**
049: * One of the open string tokens.
050: */
051: public static final byte[] STRING_OPEN = new byte[] { 40 }; //"(".getBytes();
052: /**
053: * One of the close string tokens.
054: */
055: public static final byte[] STRING_CLOSE = new byte[] { 41 }; //")".getBytes( "ISO-8859-1" );
056: /**
057: * One of the open string tokens.
058: */
059: public static final byte[] HEX_STRING_OPEN = new byte[] { 60 }; //"<".getBytes( "ISO-8859-1" );
060: /**
061: * One of the close string tokens.
062: */
063: public static final byte[] HEX_STRING_CLOSE = new byte[] { 62 }; //">".getBytes( "ISO-8859-1" );
064: /**
065: * the escape character in strings.
066: */
067: public static final byte[] ESCAPE = new byte[] { 92 }; //"\\".getBytes( "ISO-8859-1" );
068:
069: /**
070: * CR escape characters.
071: */
072: public static final byte[] CR_ESCAPE = new byte[] { 92, 114 }; //"\\r".getBytes( "ISO-8859-1" );
073: /**
074: * LF escape characters.
075: */
076: public static final byte[] LF_ESCAPE = new byte[] { 92, 110 }; //"\\n".getBytes( "ISO-8859-1" );
077: /**
078: * HT escape characters.
079: */
080: public static final byte[] HT_ESCAPE = new byte[] { 92, 116 }; //"\\t".getBytes( "ISO-8859-1" );
081: /**
082: * BS escape characters.
083: */
084: public static final byte[] BS_ESCAPE = new byte[] { 92, 98 }; //"\\b".getBytes( "ISO-8859-1" );
085: /**
086: * FF escape characters.
087: */
088: public static final byte[] FF_ESCAPE = new byte[] { 92, 102 }; //"\\f".getBytes( "ISO-8859-1" );
089:
090: private ByteArrayOutputStream out = new ByteArrayOutputStream();
091:
092: /**
093: * Forces the string to be serialized in literal form but not hexa form.
094: */
095: private boolean forceLiteralForm = false;
096:
097: /**
098: * Constructor.
099: */
100: public COSString() {
101: }
102:
103: /**
104: * Explicit constructor for ease of manual PDF construction.
105: *
106: * @param value The string value of the object.
107: */
108: public COSString(String value) {
109: try {
110: boolean unicode16 = false;
111: char[] chars = value.toCharArray();
112: for (int i = 0; i < chars.length; i++) {
113: if (chars[i] > 255) {
114: unicode16 = true;
115: }
116: }
117: if (unicode16) {
118: out.write(0xFE);
119: out.write(0xFF);
120: out.write(value.getBytes("UTF-16BE"));
121: } else {
122: out.write(value.getBytes("ISO-8859-1"));
123: }
124: } catch (IOException ignore) {
125: ignore.printStackTrace();
126: //should never happen
127: }
128: }
129:
130: /**
131: * Explicit constructor for ease of manual PDF construction.
132: *
133: * @param value The string value of the object.
134: */
135: public COSString(byte[] value) {
136: try {
137: out.write(value);
138: } catch (IOException ignore) {
139: ignore.printStackTrace();
140: //should never happen
141: }
142: }
143:
144: /**
145: * Forces the string to be written in literal form instead of hexadecimal form.
146: *
147: * @param v if v is true the string will be written in literal form, otherwise it will
148: * be written in hexa if necessary.
149: */
150:
151: public void setForceLiteralForm(boolean v) {
152: forceLiteralForm = v;
153: }
154:
155: /**
156: * This will create a COS string from a string of hex characters.
157: *
158: * @param hex A hex string.
159: * @return A cos string with the hex characters converted to their actual bytes.
160: * @throws IOException If there is an error with the hex string.
161: */
162: public static COSString createFromHexString(String hex)
163: throws IOException {
164: COSString retval = new COSString();
165: StringBuffer hexBuffer = new StringBuffer(hex.trim());
166: //if odd number then the last hex digit is assumed to be 0
167: if (hexBuffer.length() % 2 == 1) {
168: hexBuffer.append("0");
169: }
170: for (int i = 0; i < hexBuffer.length();) {
171: String hexChars = "" + hexBuffer.charAt(i++)
172: + hexBuffer.charAt(i++);
173: try {
174: retval.append(Integer.parseInt(hexChars, 16));
175: } catch (NumberFormatException e) {
176: throw new IOException(
177: "Error: Expected hex number, actual='"
178: + hexChars + "'");
179: }
180: }
181: return retval;
182: }
183:
184: /**
185: * This will take this string and create a hex representation of the bytes that make the string.
186: *
187: * @return A hex string representing the bytes in this string.
188: */
189: public String getHexString() {
190: StringBuffer retval = new StringBuffer(out.size() * 2);
191: byte[] data = getBytes();
192: for (int i = 0; i < data.length; i++) {
193: retval.append(COSHEXTable.HEX_TABLE[(data[i] + 256) % 256]);
194: }
195:
196: return retval.toString();
197: }
198:
199: /**
200: * This will get the string that this object wraps.
201: *
202: * @return The wrapped string.
203: */
204: public String getString() {
205: String retval;
206: String encoding = "ISO-8859-1";
207: byte[] data = getBytes();
208: int start = 0;
209: if (data.length > 2) {
210: if (data[0] == (byte) 0xFF && data[1] == (byte) 0xFE) {
211: encoding = "UTF-16LE";
212: start = 2;
213: } else if (data[0] == (byte) 0xFE && data[1] == (byte) 0xFF) {
214: encoding = "UTF-16BE";
215: start = 2;
216: }
217: }
218: try {
219: retval = new String(getBytes(), start, data.length - start,
220: encoding);
221: } catch (UnsupportedEncodingException e) {
222: //should never happen
223: e.printStackTrace();
224: retval = new String(getBytes());
225: }
226: return retval;
227: }
228:
229: /**
230: * This will append a byte[] to the string.
231: *
232: * @param data The byte[] to add to this string.
233: *
234: * @throws IOException If an IO error occurs while writing the byte.
235: */
236: public void append(byte[] data) throws IOException {
237: out.write(data);
238: }
239:
240: /**
241: * This will append a byte to the string.
242: *
243: * @param in The byte to add to this string.
244: *
245: * @throws IOException If an IO error occurs while writing the byte.
246: */
247: public void append(int in) throws IOException {
248: out.write(in);
249: }
250:
251: /**
252: * This will reset the internal buffer.
253: */
254: public void reset() {
255: out.reset();
256: }
257:
258: /**
259: * This will get the bytes of the string.
260: *
261: * @return A byte array that represents the string.
262: */
263: public byte[] getBytes() {
264: return out.toByteArray();
265: }
266:
267: /**
268: * {@inheritDoc}
269: */
270: public String toString() {
271: return "COSString{" + new String(getBytes()) + "}";
272: }
273:
274: /**
275: * This will output this string as a PDF object.
276: *
277: * @param output The stream to write to.
278: * @throws IOException If there is an error writing to the stream.
279: */
280: public void writePDF(OutputStream output) throws IOException {
281: boolean outsideASCII = false;
282: //Lets first check if we need to escape this string.
283: byte[] bytes = getBytes();
284: for (int i = 0; i < bytes.length && !outsideASCII; i++) {
285: //if the byte is negative then it is an eight bit byte and is
286: //outside the ASCII range.
287: outsideASCII = bytes[i] < 0;
288: }
289: if (!outsideASCII || forceLiteralForm) {
290: output.write(STRING_OPEN);
291: for (int i = 0; i < bytes.length; i++) {
292: int b = (bytes[i] + 256) % 256;
293: switch (b) {
294: case '(':
295: case ')':
296: case '\\': {
297: output.write(ESCAPE);
298: output.write(b);
299: break;
300: }
301: case 10: //LF
302: {
303: output.write(LF_ESCAPE);
304: break;
305: }
306: case 13: // CR
307: {
308: output.write(CR_ESCAPE);
309: break;
310: }
311: case '\t': {
312: output.write(HT_ESCAPE);
313: break;
314: }
315: case '\b': {
316: output.write(BS_ESCAPE);
317: break;
318: }
319: case '\f': {
320: output.write(FF_ESCAPE);
321: break;
322: }
323: default: {
324: output.write(b);
325: }
326: }
327: }
328: output.write(STRING_CLOSE);
329: } else {
330: output.write(HEX_STRING_OPEN);
331: for (int i = 0; i < bytes.length; i++) {
332: output.write(COSHEXTable.TABLE[(bytes[i] + 256) % 256]);
333: }
334: output.write(HEX_STRING_CLOSE);
335: }
336: }
337:
338: /**
339: * visitor pattern double dispatch method.
340: *
341: * @param visitor The object to notify when visiting this object.
342: * @return any object, depending on the visitor implementation, or null
343: * @throws COSVisitorException If an error occurs while visiting this object.
344: */
345: public Object accept(ICOSVisitor visitor)
346: throws COSVisitorException {
347: return visitor.visitFromString(this );
348: }
349:
350: /**
351: * {@inheritDoc}
352: */
353: public boolean equals(Object obj) {
354: return (obj instanceof COSString)
355: && java.util.Arrays.equals(
356: ((COSString) obj).getBytes(), getBytes());
357: }
358:
359: /**
360: * {@inheritDoc}
361: */
362: public int hashCode() {
363: return getBytes().hashCode();
364: }
365: }
|