001: /*
002:
003: Derby - Class org.apache.derby.iapi.types.ReaderToUTF8Stream
004:
005: Licensed to the Apache Software Foundation (ASF) under one or more
006: contributor license agreements. See the NOTICE file distributed with
007: this work for additional information regarding copyright ownership.
008: The ASF licenses this file to you under the Apache License, Version 2.0
009: (the "License"); you may not use this file except in compliance with
010: the License. You may obtain a copy of the License at
011:
012: http://www.apache.org/licenses/LICENSE-2.0
013:
014: Unless required by applicable law or agreed to in writing, software
015: distributed under the License is distributed on an "AS IS" BASIS,
016: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017: See the License for the specific language governing permissions and
018: limitations under the License.
019:
020: */
021:
022: package org.apache.derby.iapi.types;
023:
024: import java.io.InputStream;
025: import java.io.IOException;
026: import java.io.EOFException;
027: import java.io.Reader;
028: import java.io.UTFDataFormatException;
029: import org.apache.derby.iapi.reference.SQLState;
030: import org.apache.derby.iapi.services.i18n.MessageService;
031: import org.apache.derby.iapi.services.io.DerbyIOException;
032: import org.apache.derby.iapi.services.io.LimitReader;
033: import org.apache.derby.iapi.types.TypeId;
034:
035: /**
036: Converts a java.io.Reader to the on-disk UTF8 format used by Derby
037: for character types.
038: */
039: public final class ReaderToUTF8Stream extends InputStream {
040: /**
041: * Application's reader wrapped in a LimitReader.
042: */
043: private LimitReader reader;
044:
045: private byte[] buffer;
046: private int boff;
047: private int blen;
048: private boolean eof;
049: private boolean multipleBuffer;
050: // buffer to hold the data read from stream
051: // and converted to UTF8 format
052: private final static int BUFSIZE = 32768;
053:
054: /** Number of characters to truncate from this stream
055: The SQL standard allows for truncation of trailing spaces
056: for clobs,varchar,char.
057: If zero, no characters are truncated.
058: */
059: private final int charsToTruncate;
060: private static final char SPACE = ' ';
061:
062: /**
063: * Length of the final value, after truncation if any,
064: * in characters.
065: this stream needs to fit into a column of colWidth
066: if truncation error happens ,then the error message includes
067: information about the column width.
068: */
069: private final int valueLength;
070: /** The maximum allowed length of the stream. */
071: private final int maximumLength;
072: /** The type name for the column data is inserted into. */
073: private final String typeName;
074:
075: /**
076: * Create a stream that will truncate trailing blanks if required/allowed.
077: *
078: * If the stream must be truncated, the number of blanks to truncate
079: * is specified to allow the stream to be checked for exact length, as
080: * required by JDBC 3.0. If the stream is shorter or longer than specified,
081: * an exception is thrown during read.
082: *
083: * @param appReader application reader
084: * @param valueLength the length of the reader in characters
085: * @param numCharsToTruncate the number of trailing blanks to truncate
086: * @param typeName type name of the column data is inserted into
087: */
088: public ReaderToUTF8Stream(Reader appReader, int valueLength,
089: int numCharsToTruncate, String typeName) {
090: this .reader = new LimitReader(appReader);
091: reader.setLimit(valueLength);
092: buffer = new byte[BUFSIZE];
093: blen = -1;
094: this .charsToTruncate = numCharsToTruncate;
095: this .valueLength = valueLength;
096: this .maximumLength = -1;
097: this .typeName = typeName;
098: }
099:
100: /**
101: * Create a UTF-8 stream for a length less application reader.
102: *
103: * A limit is placed on the length of the reader. If the reader exceeds
104: * the maximum length, truncation of trailing blanks is attempted. If
105: * truncation fails, an exception is thrown.
106: *
107: * @param appReader application reader
108: * @param maximumLength maximum allowed length in number of characters for
109: * the reader
110: * @param typeName type name of the column data is inserted into
111: * @throws IllegalArgumentException if maximum length is negative, or type
112: * name is <code>null<code>
113: */
114: public ReaderToUTF8Stream(Reader appReader, int maximumLength,
115: String typeName) {
116: if (maximumLength < 0) {
117: throw new IllegalArgumentException(
118: "Maximum length for a capped "
119: + "stream cannot be negative: "
120: + maximumLength);
121: }
122: if (typeName == null) {
123: throw new IllegalArgumentException(
124: "Type name cannot be null");
125: }
126: this .reader = new LimitReader(appReader);
127: reader.setLimit(maximumLength);
128: buffer = new byte[BUFSIZE];
129: blen = -1;
130: this .maximumLength = maximumLength;
131: this .typeName = typeName;
132: this .charsToTruncate = -1;
133: this .valueLength = -1;
134: }
135:
136: /**
137: * read from stream; characters converted to utf-8 derby specific encoding.
138: * If stream has been read, and eof reached, in that case any subsequent
139: * read will throw an EOFException
140: * @see java.io.InputStream#read()
141: */
142: public int read() throws IOException {
143:
144: // when stream has been read and eof reached, stream is closed
145: // and buffer is set to null ( see close() method)
146: // since stream cannot be re-used, check if stream is closed and
147: // if so throw an EOFException
148: if (buffer == null)
149: throw new EOFException(MessageService
150: .getTextMessage(SQLState.STREAM_EOF));
151:
152: // first read
153: if (blen < 0)
154: fillBuffer(2);
155:
156: while (boff == blen) {
157: // reached end of buffer, read more?
158: if (eof) {
159: // we have reached the end of this stream
160: // cleanup here and return -1 indicating
161: // eof of stream
162: close();
163: return -1;
164: }
165:
166: fillBuffer(0);
167: }
168:
169: return buffer[boff++] & 0xff;
170:
171: }
172:
173: public int read(byte b[], int off, int len) throws IOException {
174:
175: // when stream has been read and eof reached, stream is closed
176: // and buffer is set to null ( see close() method)
177: // since stream cannot be re-used, check if stream is closed and
178: // if so throw an EOFException
179: if (buffer == null)
180: throw new EOFException(MessageService
181: .getTextMessage(SQLState.STREAM_EOF));
182:
183: // first read
184: if (blen < 0)
185: fillBuffer(2);
186:
187: int readCount = 0;
188:
189: while (len > 0) {
190:
191: int copyBytes = blen - boff;
192:
193: // buffer empty?
194: if (copyBytes == 0) {
195: if (eof) {
196: if (readCount > 0) {
197: return readCount;
198: } else {
199: // we have reached the eof, so close the stream
200: close();
201: return -1;
202: }
203:
204: }
205: fillBuffer(0);
206: continue;
207: }
208:
209: if (len < copyBytes)
210: copyBytes = len;
211:
212: System.arraycopy(buffer, boff, b, off, copyBytes);
213: boff += copyBytes;
214: len -= copyBytes;
215: readCount += copyBytes;
216: off += copyBytes;
217:
218: }
219: return readCount;
220: }
221:
222: private void fillBuffer(int startingOffset) throws IOException {
223: int off = boff = startingOffset;
224:
225: if (off == 0)
226: multipleBuffer = true;
227:
228: // 6! need to leave room for a three byte UTF8 encoding
229: // and 3 bytes for our special end of file marker.
230: for (; off <= buffer.length - 6;) {
231: int c = reader.read();
232: if (c < 0) {
233: eof = true;
234: break;
235: }
236:
237: if ((c >= 0x0001) && (c <= 0x007F)) {
238: buffer[off++] = (byte) c;
239: } else if (c > 0x07FF) {
240: buffer[off++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
241: buffer[off++] = (byte) (0x80 | ((c >> 6) & 0x3F));
242: buffer[off++] = (byte) (0x80 | ((c >> 0) & 0x3F));
243: } else {
244: buffer[off++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
245: buffer[off++] = (byte) (0x80 | ((c >> 0) & 0x3F));
246: }
247: }
248:
249: blen = off;
250: boff = 0;
251:
252: if (eof)
253: checkSufficientData();
254: }
255:
256: /**
257: * Validate the length of the stream, take corrective action if allowed.
258: *
259: * JDBC 3.0 (from tutorial book) requires that an input stream has the
260: * correct number of bytes in the stream.
261: * If the stream is too long, trailing blank truncation is attempted if
262: * allowed. If truncation fails, or is disallowed, an exception is thrown.
263: *
264: * @throws IOException if an errors occurs in the application stream
265: * @throws DerbyIOException if Derby finds a problem with the stream;
266: * stream is too long and cannot be truncated, or the stream length
267: * does not match the specified length
268: */
269: private void checkSufficientData() throws IOException {
270: // now that we finished reading from the stream; the amount
271: // of data that we can insert,start check for trailing spaces
272: if (charsToTruncate > 0) {
273: reader.setLimit(charsToTruncate);
274: truncate();
275: }
276:
277: // A length less stream that is capped, will return 0 even if there
278: // are more bytes in the application stream.
279: int remainingBytes = reader.clearLimit();
280: if (remainingBytes > 0 && valueLength > 0) {
281: // If we had a specified length, throw exception.
282: throw new DerbyIOException(
283: MessageService
284: .getTextMessage(SQLState.SET_STREAM_INEXACT_LENGTH_DATA),
285: SQLState.SET_STREAM_INEXACT_LENGTH_DATA);
286: }
287:
288: // if we had a limit try reading one more character.
289: // JDBC 3.0 states the stream must have the correct number of
290: // characters in it.
291: if (remainingBytes == 0 && reader.read() >= 0) {
292: if (valueLength > -1) {
293: throw new DerbyIOException(
294: MessageService
295: .getTextMessage(SQLState.SET_STREAM_INEXACT_LENGTH_DATA),
296: SQLState.SET_STREAM_INEXACT_LENGTH_DATA);
297: } else {
298: // Stream was capped (length less) and too long.
299: // Try to truncate if allowed, or else throw exception.
300: if (canTruncate()) {
301: truncate();
302: } else {
303: throw new DerbyIOException(
304: MessageService
305: .getTextMessage(SQLState.LANG_STRING_TRUNCATION),
306: SQLState.LANG_STRING_TRUNCATION);
307: }
308: }
309: }
310:
311: // can put the correct length into the stream.
312: if (!multipleBuffer) {
313: int utflen = blen - 2;
314:
315: buffer[0] = (byte) ((utflen >>> 8) & 0xFF);
316: buffer[1] = (byte) ((utflen >>> 0) & 0xFF);
317:
318: } else {
319: buffer[blen++] = (byte) 0xE0;
320: buffer[blen++] = (byte) 0x00;
321: buffer[blen++] = (byte) 0x00;
322: }
323: }
324:
325: /**
326: * Determine if trailing blank truncation is allowed.
327: */
328: private boolean canTruncate() {
329: // Only a few types can be truncated, default is to not allow.
330: if (typeName.equals(TypeId.CLOB_NAME)) {
331: return true;
332: } else if (typeName.equals(TypeId.VARCHAR_NAME)) {
333: return true;
334: }
335: return false;
336: }
337:
338: /**
339: * Attempt to truncate the stream by removing trailing blanks.
340: */
341: private void truncate() throws IOException {
342: int c = 0;
343: for (;;) {
344: c = reader.read();
345:
346: if (c < 0) {
347: break;
348: } else if (c != SPACE) {
349: throw new DerbyIOException(MessageService
350: .getTextMessage(
351: SQLState.LANG_STRING_TRUNCATION,
352: typeName, "XXXX", String
353: .valueOf(valueLength)),
354: SQLState.LANG_STRING_TRUNCATION);
355: }
356: }
357: }
358:
359: /**
360: * return resources
361: */
362: public void close() throws IOException {
363: // since stream has been read and eof reached, return buffer back to
364: // the vm.
365: // Instead of using another variable to indicate stream is closed
366: // a check on (buffer==null) is used instead.
367: buffer = null;
368: }
369:
370: /**
371: * Return an optimized version of bytes available to read from
372: * the stream
373: * Note, it is not exactly per java.io.InputStream#available()
374: */
375: public final int available() {
376: int remainingBytes = reader.getLimit();
377: // this object buffers BUFSIZE bytes that can be read
378: // and when that is finished it reads the next available bytes
379: // from the reader object
380: // reader.getLimit() returns the remaining bytes available
381: // on this stream
382: return (BUFSIZE > remainingBytes ? remainingBytes : BUFSIZE);
383: }
384: }
|