001: /*
002: * Copyright 2006 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025:
026: package com.sun.xml.internal.stream.writers;
027:
028: import java.io.Writer;
029: import java.io.OutputStream;
030: import java.io.IOException;
031:
032: import com.sun.org.apache.xerces.internal.util.XMLChar;
033:
034: /**
035: * <p>This class is used to write a stream of chars as a stream of
036: * bytes using the UTF8 encoding. It assumes that the underlying
037: * output stream is buffered or does not need additional buffering.</p>
038: *
039: * <p>It is more efficient than using a <code>java.io.OutputStreamWriter</code>
040: * because it does not need to be wrapped in a
041: * <code>java.io.BufferedWriter</code>. Creating multiple instances
042: * of <code>java.io.BufferedWriter</code> has been shown to be very
043: * expensive in JAX-WS.</p>
044: *
045: * @author Santiago.PericasGeertsen@sun.com
046: */
047: public final class UTF8OutputStreamWriter extends Writer {
048:
049: /**
050: * Undelying output stream. This class assumes that this
051: * output stream does not need buffering.
052: */
053: OutputStream out;
054:
055: /**
056: * Java represents chars that are not in the Basic Multilingual
057: * Plane (BMP) in UTF-16. This int stores the first code unit
058: * for a code point encoded in two UTF-16 code units.
059: */
060: int lastUTF16CodePoint = 0;
061:
062: public UTF8OutputStreamWriter(OutputStream out) {
063: this .out = out;
064: }
065:
066: public String getEncoding() {
067: return "UTF-8";
068: }
069:
070: public void write(int c) throws IOException {
071: // Check in we are encoding at high and low surrogates
072: if (lastUTF16CodePoint != 0) {
073: final int uc = (((lastUTF16CodePoint & 0x3ff) << 10) | (c & 0x3ff)) + 0x10000;
074:
075: if (uc < 0 || uc >= 0x200000) {
076: throw new IOException(
077: "Atttempting to write invalid Unicode code point '"
078: + uc + "'");
079: }
080:
081: out.write(0xF0 | (uc >> 18));
082: out.write(0x80 | ((uc >> 12) & 0x3F));
083: out.write(0x80 | ((uc >> 6) & 0x3F));
084: out.write(0x80 | (uc & 0x3F));
085:
086: lastUTF16CodePoint = 0;
087: return;
088: }
089:
090: // Otherwise, encode char as defined in UTF-8
091: if (c < 0x80) {
092: // 1 byte, 7 bits
093: out.write((int) c);
094: } else if (c < 0x800) {
095: // 2 bytes, 11 bits
096: out.write(0xC0 | (c >> 6)); // first 5
097: out.write(0x80 | (c & 0x3F)); // second 6
098: } else if (c <= '\uFFFF') {
099: if (!XMLChar.isHighSurrogate(c)
100: && !XMLChar.isLowSurrogate(c)) {
101: // 3 bytes, 16 bits
102: out.write(0xE0 | (c >> 12)); // first 4
103: out.write(0x80 | ((c >> 6) & 0x3F)); // second 6
104: out.write(0x80 | (c & 0x3F)); // third 6
105: } else {
106: lastUTF16CodePoint = c;
107: }
108: }
109: }
110:
111: public void write(char cbuf[]) throws IOException {
112: for (int i = 0; i < cbuf.length; i++) {
113: write(cbuf[i]);
114: }
115: }
116:
117: public void write(char cbuf[], int off, int len) throws IOException {
118: for (int i = 0; i < len; i++) {
119: write(cbuf[off + i]);
120: }
121: }
122:
123: public void write(String str) throws IOException {
124: final int len = str.length();
125: for (int i = 0; i < len; i++) {
126: write(str.charAt(i));
127: }
128: }
129:
130: public void write(String str, int off, int len) throws IOException {
131: for (int i = 0; i < len; i++) {
132: write(str.charAt(off + i));
133: }
134: }
135:
136: public void flush() throws IOException {
137: out.flush();
138: }
139:
140: public void close() throws IOException {
141: if (lastUTF16CodePoint != 0) {
142: throw new IllegalStateException(
143: "Attempting to close a UTF8OutputStreamWriter"
144: + " while awaiting for a UTF-16 code unit");
145: }
146: out.close();
147: }
148:
149: }
|