001: /*
002: * Copyright 2006 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025:
026: package com.sun.xml.internal.bind.v2.runtime.output;
027:
028: import java.io.IOException;
029:
030: /**
031: * Buffer for UTF-8 encoded string.
032: *
033: * See http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8 for the UTF-8 encoding.
034: *
035: * @author Kohsuke Kawaguchi
036: */
037: public final class Encoded {
038: public byte[] buf;
039:
040: public int len;
041:
042: public Encoded() {
043: }
044:
045: public Encoded(String text) {
046: set(text);
047: }
048:
049: public void ensureSize(int size) {
050: if (buf == null || buf.length < size)
051: buf = new byte[size];
052: }
053:
054: public final void set(String text) {
055: int length = text.length();
056:
057: ensureSize(length * 3 + 1); // +1 for append
058:
059: int ptr = 0;
060:
061: for (int i = 0; i < length; i++) {
062: final char chr = text.charAt(i);
063: if (chr > 0x7F) {
064: if (chr > 0x7FF) {
065: if (Character.MIN_HIGH_SURROGATE <= chr
066: && chr <= Character.MAX_LOW_SURROGATE) {
067: // surrogate
068: int uc = (((chr & 0x3ff) << 10) | (text
069: .charAt(++i) & 0x3ff)) + 0x10000;
070:
071: buf[ptr++] = (byte) (0xF0 | ((uc >> 18)));
072: buf[ptr++] = (byte) (0x80 | ((uc >> 12) & 0x3F));
073: buf[ptr++] = (byte) (0x80 | ((uc >> 6) & 0x3F));
074: buf[ptr++] = (byte) (0x80 + (uc & 0x3F));
075: continue;
076: }
077: buf[ptr++] = (byte) (0xE0 + (chr >> 12));
078: buf[ptr++] = (byte) (0x80 + ((chr >> 6) & 0x3F));
079: } else {
080: buf[ptr++] = (byte) (0xC0 + (chr >> 6));
081: }
082: buf[ptr++] = (byte) (0x80 + (chr & 0x3F));
083: } else {
084: buf[ptr++] = (byte) chr;
085: }
086: }
087:
088: len = ptr;
089: }
090:
091: /**
092: * Fill in the buffer by encoding the specified characters
093: * while escaping characters like <
094: *
095: * @param isAttribute
096: * if true, characters like \t, \r, and \n are also escaped.
097: */
098: public final void setEscape(String text, boolean isAttribute) {
099: int length = text.length();
100: ensureSize(length * 6 + 1); // in the worst case the text is like """""", so we need 6 bytes per char
101:
102: int ptr = 0;
103:
104: for (int i = 0; i < length; i++) {
105: final char chr = text.charAt(i);
106:
107: int ptr1 = ptr;
108: if (chr > 0x7F) {
109: if (chr > 0x7FF) {
110: if (Character.MIN_HIGH_SURROGATE <= chr
111: && chr <= Character.MAX_LOW_SURROGATE) {
112: // surrogate
113: int uc = (((chr & 0x3ff) << 10) | (text
114: .charAt(++i) & 0x3ff)) + 0x10000;
115:
116: buf[ptr++] = (byte) (0xF0 | ((uc >> 18)));
117: buf[ptr++] = (byte) (0x80 | ((uc >> 12) & 0x3F));
118: buf[ptr++] = (byte) (0x80 | ((uc >> 6) & 0x3F));
119: buf[ptr++] = (byte) (0x80 + (uc & 0x3F));
120: continue;
121: }
122: buf[ptr1++] = (byte) (0xE0 + (chr >> 12));
123: buf[ptr1++] = (byte) (0x80 + ((chr >> 6) & 0x3F));
124: } else {
125: buf[ptr1++] = (byte) (0xC0 + (chr >> 6));
126: }
127: buf[ptr1++] = (byte) (0x80 + (chr & 0x3F));
128: } else {
129: byte[] ent;
130:
131: if ((ent = attributeEntities[chr]) != null) {
132: // the majority of the case is just printed as a char,
133: // so it's very important to reject them as quickly as possible
134:
135: // check again to see if this really needs to be escaped
136: if (isAttribute || entities[chr] != null)
137: ptr1 = writeEntity(ent, ptr1);
138: else
139: buf[ptr1++] = (byte) chr;
140: } else
141: buf[ptr1++] = (byte) chr;
142: }
143: ptr = ptr1;
144: }
145: len = ptr;
146: }
147:
148: private int writeEntity(byte[] entity, int ptr) {
149: System.arraycopy(entity, 0, buf, ptr, entity.length);
150: return ptr + entity.length;
151: }
152:
153: /**
154: * Writes the encoded bytes to the given output stream.
155: */
156: public final void write(UTF8XmlOutput out) throws IOException {
157: out.write(buf, 0, len);
158: }
159:
160: /**
161: * Appends a new character to the end of the buffer.
162: * This assumes that you have enough space in the buffer.
163: */
164: public void append(char b) {
165: buf[len++] = (byte) b;
166: }
167:
168: /**
169: * Reallocate the buffer to the exact size of the data
170: * to reduce the memory footprint.
171: */
172: public void compact() {
173: byte[] b = new byte[len];
174: System.arraycopy(buf, 0, b, 0, len);
175: buf = b;
176: }
177:
178: /**
179: * UTF-8 encoded entities keyed by their character code.
180: * e.g., entities['&'] == AMP_ENTITY.
181: *
182: * In attributes we need to encode more characters.
183: */
184: private static final byte[][] entities = new byte[0x80][];
185: private static final byte[][] attributeEntities = new byte[0x80][];
186:
187: static {
188: add('&', "&", false);
189: add('<', "<", false);
190: add('>', ">", false);
191: add('"', """, false);
192: add('\t', "	", true);
193: add('\r', "
", false);
194: add('\n', "
", true);
195: }
196:
197: private static void add(char c, String s, boolean attOnly) {
198: byte[] image = UTF8XmlOutput.toBytes(s);
199: attributeEntities[c] = image;
200: if (!attOnly)
201: entities[c] = image;
202: }
203: }
|