001: /*
002: * Licensed to the Apache Software Foundation (ASF) under one or more
003: * contributor license agreements. See the NOTICE file distributed with
004: * this work for additional information regarding copyright ownership.
005: * The ASF licenses this file to You under the Apache License, Version 2.0
006: * (the "License"); you may not use this file except in compliance with
007: * the License. You may obtain a copy of the License at
008: *
009: * http://www.apache.org/licenses/LICENSE-2.0
010: *
011: * Unless required by applicable law or agreed to in writing, software
012: * distributed under the License is distributed on an "AS IS" BASIS,
013: * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014: * See the License for the specific language governing permissions and
015: * limitations under the License.
016: */
017: package org.apache.wicket.util.upload;
018:
019: import java.io.ByteArrayOutputStream;
020: import java.io.IOException;
021: import java.io.InputStream;
022: import java.io.OutputStream;
023: import java.io.UnsupportedEncodingException;
024:
025: /**
026: * <p>
027: * Low level API for processing file uploads.
028: *
029: * <p>
030: * This class can be used to process data streams conforming to MIME 'multipart'
031: * format as defined in <a href="http://www.ietf.org/rfc/rfc1867.txt">RFC 1867</a>.
032: * Arbitrarily large amounts of data in the stream can be processed under
033: * constant memory usage.
034: *
035: * <p>
036: * The format of the stream is defined in the following way:<br>
037: *
038: * <code>
039: * multipart-body := preamble 1*encapsulation close-delimiter epilogue<br>
040: * encapsulation := delimiter body CRLF<br>
041: * delimiter := "--" boundary CRLF<br>
042: * close-delimiter := "--" boudary "--"<br>
043: * preamble := <ignore><br>
044: * epilogue := <ignore><br>
045: * body := header-part CRLF body-part<br>
046: * header-part := 1*header CRLF<br>
047: * header := header-name ":" header-value<br>
048: * header-name := <printable ascii characters except ":"><br>
049: * header-value := <any ascii characters except CR & LF><br>
050: * body-data := <arbitrary data><br>
051: * </code>
052: *
053: * <p>
054: * Note that body-data can contain another mulipart entity. There is limited
055: * support for single pass processing of such nested streams. The nested stream
056: * is <strong>required</strong> to have a boundary token of the same length as
057: * the parent stream (see {@link #setBoundary(byte[])}).
058: *
059: * <p>
060: * Here is an exaple of usage of this class.<br>
061: *
062: * <pre>
063: * try {
064: * MultipartStream multipartStream = new MultipartStream(input,
065: * boundary);
066: * boolean nextPart = malitPartStream.skipPreamble();
067: * OutputStream output;
068: * while(nextPart) {
069: * header = chunks.readHeader();
070: * // process headers
071: * // create some output stream
072: * multipartStream.readBodyPart(output);
073: * nextPart = multipartStream.readBoundary();
074: * }
075: * } catch(MultipartStream.MalformedStreamException e) {
076: * // the stream failed to follow required syntax
077: * } catch(IOException) {
078: * // a read or write error occurred
079: * }
080: *
081: * </pre>
082: *
083: * @author <a href="mailto:Rafal.Krzewski@e-point.pl">Rafal Krzewski</a>
084: * @author <a href="mailto:martinc@apache.org">Martin Cooper</a>
085: * @author Sean C. Sullivan
086: *
087: * @version $Id: MultipartFormInputStream.java,v 1.2 2006/02/05 18:41:32
088: * jonathanlocke Exp $
089: */
090: public class MultipartFormInputStream {
091:
092: // ----------------------------------------------------- Manifest constants
093:
094: /**
095: * The Carriage Return ASCII character value.
096: */
097: public static final byte CR = 0x0D;
098:
099: /**
100: * The Line Feed ASCII character value.
101: */
102: public static final byte LF = 0x0A;
103:
104: /**
105: * The dash (-) ASCII character value.
106: */
107: public static final byte DASH = 0x2D;
108:
109: /**
110: * The maximum length of <code>header-part</code> that will be processed
111: * (10 kilobytes = 10240 bytes.).
112: */
113: public static final int HEADER_PART_SIZE_MAX = 10240;
114:
115: /**
116: * The default length of the buffer used for processing a request.
117: */
118: protected static final int DEFAULT_BUFSIZE = 4096;
119:
120: /**
121: * A byte sequence that marks the end of <code>header-part</code> (<code>CRLFCRLF</code>).
122: */
123: protected static final byte[] HEADER_SEPARATOR = { CR, LF, CR, LF };
124:
125: /**
126: * A byte sequence that that follows a delimiter that will be followed by an
127: * encapsulation (<code>CRLF</code>).
128: */
129: protected static final byte[] FIELD_SEPARATOR = { CR, LF };
130:
131: /**
132: * A byte sequence that that follows a delimiter of the last encapsulation
133: * in the stream (<code>--</code>).
134: */
135: protected static final byte[] STREAM_TERMINATOR = { DASH, DASH };
136:
137: // ----------------------------------------------------------- Data members
138:
139: /**
140: * The input stream from which data is read.
141: */
142: private InputStream input;
143:
144: /**
145: * The length of the boundary token plus the leading <code>CRLF--</code>.
146: */
147: private int boundaryLength;
148:
149: /**
150: * The amount of data, in bytes, that must be kept in the buffer in order to
151: * detect delimiters reliably.
152: */
153: private int keepRegion;
154:
155: /**
156: * The byte sequence that partitions the stream.
157: */
158: private byte[] boundary;
159:
160: /**
161: * The length of the buffer used for processing the request.
162: */
163: private int bufSize;
164:
165: /**
166: * The buffer used for processing the request.
167: */
168: private byte[] buffer;
169:
170: /**
171: * The index of first valid character in the buffer. <br>
172: * 0 <= head < bufSize
173: */
174: private int head;
175:
176: /**
177: * The index of last valid characer in the buffer + 1. <br>
178: * 0 <= tail <= bufSize
179: */
180: private int tail;
181:
182: /**
183: * The content encoding to use when reading headers.
184: */
185: private String headerEncoding;
186:
187: // ----------------------------------------------------------- Constructors
188:
189: /**
190: * Default constructor.
191: *
192: * @see #MultipartFormInputStream(InputStream, byte[], int)
193: * @see #MultipartFormInputStream(InputStream, byte[])
194: *
195: */
196: public MultipartFormInputStream() {
197: }
198:
199: /**
200: * <p>
201: * Constructs a <code>MultipartStream</code> with a custom size buffer.
202: *
203: * <p>
204: * Note that the buffer must be at least big enough to contain the boundary
205: * string, plus 4 characters for CR/LF and double dash, plus at least one
206: * byte of data. Too small a buffer size setting will degrade performance.
207: *
208: * @param input
209: * The <code>InputStream</code> to serve as a data source.
210: * @param boundary
211: * The token used for dividing the stream into
212: * <code>encapsulations</code>.
213: * @param bufSize
214: * The size of the buffer to be used, in bytes.
215: *
216: *
217: * @see #MultipartFormInputStream()
218: * @see #MultipartFormInputStream(InputStream, byte[])
219: *
220: */
221: public MultipartFormInputStream(InputStream input, byte[] boundary,
222: int bufSize) {
223: this .input = input;
224: this .bufSize = bufSize;
225: this .buffer = new byte[bufSize];
226:
227: // We prepend CR/LF to the boundary to chop trailng CR/LF from
228: // body-data tokens.
229: this .boundary = new byte[boundary.length + 4];
230: this .boundaryLength = boundary.length + 4;
231: this .keepRegion = boundary.length + 3;
232: this .boundary[0] = CR;
233: this .boundary[1] = LF;
234: this .boundary[2] = DASH;
235: this .boundary[3] = DASH;
236: System
237: .arraycopy(boundary, 0, this .boundary, 4,
238: boundary.length);
239:
240: head = 0;
241: tail = 0;
242: }
243:
244: /**
245: * <p>
246: * Constructs a <code>MultipartStream</code> with a default size buffer.
247: *
248: * @param input
249: * The <code>InputStream</code> to serve as a data source.
250: * @param boundary
251: * The token used for dividing the stream into
252: * <code>encapsulations</code>.
253: *
254: * @see #MultipartFormInputStream()
255: * @see #MultipartFormInputStream(InputStream, byte[], int)
256: *
257: */
258: public MultipartFormInputStream(InputStream input, byte[] boundary) {
259: this (input, boundary, DEFAULT_BUFSIZE);
260: }
261:
262: // --------------------------------------------------------- Public methods
263:
264: /**
265: * Retrieves the character encoding used when reading the headers of an
266: * individual part. When not specified, or <code>null</code>, the
267: * platform default encoding is used.
268: *
269: *
270: * @return The encoding used to read part headers.
271: */
272: public String getHeaderEncoding() {
273: return headerEncoding;
274: }
275:
276: /**
277: * Specifies the character encoding to be used when reading the headers of
278: * individual parts. When not specified, or <code>null</code>, the
279: * platform default encoding is used.
280: *
281: * @param encoding
282: * The encoding used to read part headers.
283: */
284: public void setHeaderEncoding(String encoding) {
285: headerEncoding = encoding;
286: }
287:
288: /**
289: * Reads a byte from the <code>buffer</code>, and refills it as
290: * necessary.
291: *
292: * @return The next byte from the input stream.
293: *
294: * @exception IOException
295: * if there is no more data available.
296: */
297: public byte readByte() throws IOException {
298: // Buffer depleted ?
299: if (head == tail) {
300: head = 0;
301: // Refill.
302: tail = input.read(buffer, head, bufSize);
303: if (tail == -1) {
304: // No more data available.
305: throw new IOException("No more data is available");
306: }
307: }
308: return buffer[head++];
309: }
310:
311: /**
312: * Skips a <code>boundary</code> token, and checks whether more
313: * <code>encapsulations</code> are contained in the stream.
314: *
315: * @return <code>true</code> if there are more encapsulations in this
316: * stream; <code>false</code> otherwise.
317: *
318: * @exception MalformedStreamException
319: * if the stream ends unexpecetedly or fails to follow
320: * required syntax.
321: */
322: public boolean readBoundary() throws MalformedStreamException {
323: byte[] marker = new byte[2];
324: boolean nextChunk = false;
325:
326: head += boundaryLength;
327: try {
328: marker[0] = readByte();
329: if (marker[0] == LF) {
330: // Work around IE5 Mac bug with input type=image.
331: // Because the boundary delimiter, not including the trailing
332: // CRLF, must not appear within any file (RFC 2046, section
333: // 5.1.1), we know the missing CR is due to a buggy browser
334: // rather than a file containing something similar to a
335: // boundary.
336: return true;
337: }
338:
339: marker[1] = readByte();
340: if (arrayequals(marker, STREAM_TERMINATOR, 2)) {
341: nextChunk = false;
342: } else if (arrayequals(marker, FIELD_SEPARATOR, 2)) {
343: nextChunk = true;
344: } else {
345: throw new MalformedStreamException(
346: "Unexpected characters follow a boundary");
347: }
348: } catch (IOException e) {
349: throw new MalformedStreamException(
350: "Stream ended unexpectedly");
351: }
352: return nextChunk;
353: }
354:
355: /**
356: * <p>
357: * Changes the boundary token used for partitioning the stream.
358: *
359: * <p>
360: * This method allows single pass processing of nested multipart streams.
361: *
362: * <p>
363: * The boundary token of the nested stream is <code>required</code> to be
364: * of the same length as the boundary token in parent stream.
365: *
366: * <p>
367: * Restoring the parent stream boundary token after processing of a nested
368: * stream is left to the application.
369: *
370: * @param boundary
371: * The boundary to be used for parsing of the nested stream.
372: *
373: * @exception IllegalBoundaryException
374: * if the <code>boundary</code> has a different length than
375: * the one being currently parsed.
376: */
377: public void setBoundary(byte[] boundary)
378: throws IllegalBoundaryException {
379: if (boundary.length != boundaryLength - 4) {
380: throw new IllegalBoundaryException(
381: "The length of a boundary token can not be changed");
382: }
383: System
384: .arraycopy(boundary, 0, this .boundary, 4,
385: boundary.length);
386: }
387:
388: /**
389: * <p>
390: * Reads the <code>header-part</code> of the current
391: * <code>encapsulation</code>.
392: * <p>
393: * Headers are returned verbatim to the input stream, including the trailing
394: * <code>CRLF</code> marker. Parsing is left to the application.
395: *
396: * @param maxSize
397: * The maximum amount to read before giving up
398: *
399: * @return The <code>header-part</code> of the current encapsulation.
400: *
401: * @exception MalformedStreamException
402: * if the stream ends unexpecetedly.
403: */
404: public String readHeaders(final int maxSize)
405: throws MalformedStreamException {
406: int i = 0;
407: byte[] b = new byte[1];
408: // to support multi-byte characters
409: ByteArrayOutputStream baos = new ByteArrayOutputStream();
410: int sizeMax = HEADER_PART_SIZE_MAX;
411: int size = 0;
412: while (i < 4) {
413: try {
414: b[0] = readByte();
415: } catch (IOException e) {
416: throw new MalformedStreamException(
417: "Stream ended unexpectedly");
418: }
419: size++;
420: if (size > maxSize) {
421: throw new MalformedStreamException(
422: "Stream exceeded maximum of " + maxSize
423: + " bytes");
424: }
425: if (b[0] == HEADER_SEPARATOR[i]) {
426: i++;
427: } else {
428: i = 0;
429: }
430: if (size <= sizeMax) {
431: baos.write(b[0]);
432: }
433: }
434:
435: String headers = null;
436: if (headerEncoding != null) {
437: try {
438: headers = baos.toString(headerEncoding);
439: } catch (UnsupportedEncodingException e) {
440: // Fall back to platform default if specified encoding is not
441: // supported.
442: headers = baos.toString();
443: }
444: } else {
445: headers = baos.toString();
446: }
447:
448: return headers;
449: }
450:
451: /**
452: * <p>
453: * Reads <code>body-data</code> from the current
454: * <code>encapsulation</code> and writes its contents into the output
455: * <code>Stream</code>.
456: *
457: * <p>
458: * Arbitrary large amounts of data can be processed by this method using a
459: * constant size buffer. (see {@link
460: * #MultipartFormInputStream(InputStream,byte[],int) constructor}).
461: *
462: * @param output
463: * The <code>Stream</code> to write data into.
464: *
465: * @return the amount of data written.
466: *
467: * @exception MalformedStreamException
468: * if the stream ends unexpectedly.
469: * @exception IOException
470: * if an i/o error occurs.
471: */
472: public int readBodyData(OutputStream output)
473: throws MalformedStreamException, IOException {
474: boolean done = false;
475: int pad;
476: int pos;
477: int bytesRead;
478: int total = 0;
479: while (!done) {
480: // Is boundary token present somewere in the buffer?
481: pos = findSeparator();
482: if (pos != -1) {
483: // Write the rest of the data before the boundary.
484: output.write(buffer, head, pos - head);
485: total += pos - head;
486: head = pos;
487: done = true;
488: } else {
489: // Determine how much data should be kept in the
490: // buffer.
491: if (tail - head > keepRegion) {
492: pad = keepRegion;
493: } else {
494: pad = tail - head;
495: }
496: // Write out the data belonging to the body-data.
497: output.write(buffer, head, tail - head - pad);
498:
499: // Move the data to the beginning of the buffer.
500: total += tail - head - pad;
501: System.arraycopy(buffer, tail - pad, buffer, 0, pad);
502:
503: // Refill buffer with new data.
504: head = 0;
505: bytesRead = input.read(buffer, pad, bufSize - pad);
506:
507: // [pprrrrrrr]
508: if (bytesRead != -1) {
509: tail = pad + bytesRead;
510: } else {
511: // The last pad amount is left in the buffer.
512: // Boundary can't be in there so write out the
513: // data you have and signal an error condition.
514: output.write(buffer, 0, pad);
515: output.flush();
516: total += pad;
517: throw new MalformedStreamException(
518: "Stream ended unexpectedly");
519: }
520: }
521: }
522: output.flush();
523: return total;
524: }
525:
526: /**
527: * <p>
528: * Reads <code>body-data</code> from the current
529: * <code>encapsulation</code> and discards it.
530: *
531: * <p>
532: * Use this method to skip encapsulations you don't need or don't
533: * understand.
534: *
535: * @return The amount of data discarded.
536: *
537: * @exception MalformedStreamException
538: * if the stream ends unexpectedly.
539: * @exception IOException
540: * if an i/o error occurs.
541: */
542: public int discardBodyData() throws MalformedStreamException,
543: IOException {
544: boolean done = false;
545: int pad;
546: int pos;
547: int bytesRead;
548: int total = 0;
549: while (!done) {
550: // Is boundary token present somewere in the buffer?
551: pos = findSeparator();
552: if (pos != -1) {
553: // Write the rest of the data before the boundary.
554: total += pos - head;
555: head = pos;
556: done = true;
557: } else {
558: // Determine how much data should be kept in the
559: // buffer.
560: if (tail - head > keepRegion) {
561: pad = keepRegion;
562: } else {
563: pad = tail - head;
564: }
565: total += tail - head - pad;
566:
567: // Move the data to the beginning of the buffer.
568: System.arraycopy(buffer, tail - pad, buffer, 0, pad);
569:
570: // Refill buffer with new data.
571: head = 0;
572: bytesRead = input.read(buffer, pad, bufSize - pad);
573:
574: // [pprrrrrrr]
575: if (bytesRead != -1) {
576: tail = pad + bytesRead;
577: } else {
578: // The last pad amount is left in the buffer.
579: // Boundary can't be in there so signal an error
580: // condition.
581: total += pad;
582: throw new MalformedStreamException(
583: "Stream ended unexpectedly");
584: }
585: }
586: }
587: return total;
588: }
589:
590: /**
591: * Finds the beginning of the first <code>encapsulation</code>.
592: *
593: * @return <code>true</code> if an <code>encapsulation</code> was found
594: * in the stream.
595: *
596: * @exception IOException
597: * if an i/o error occurs.
598: */
599: public boolean skipPreamble() throws IOException {
600: // First delimiter may be not preceeded with a CRLF.
601: System.arraycopy(boundary, 2, boundary, 0, boundary.length - 2);
602: boundaryLength = boundary.length - 2;
603: try {
604: // Discard all data up to the delimiter.
605: discardBodyData();
606:
607: // Read boundary - if succeded, the stream contains an
608: // encapsulation.
609: return readBoundary();
610: } catch (MalformedStreamException e) {
611: return false;
612: } finally {
613: // Restore delimiter.
614: System.arraycopy(boundary, 0, boundary, 2,
615: boundary.length - 2);
616: boundaryLength = boundary.length;
617: boundary[0] = CR;
618: boundary[1] = LF;
619: }
620: }
621:
622: /**
623: * Compares <code>count</code> first bytes in the arrays <code>a</code>
624: * and <code>b</code>.
625: *
626: * @param a
627: * The first array to compare.
628: * @param b
629: * The second array to compare.
630: * @param count
631: * How many bytes should be compared.
632: *
633: * @return <code>true</code> if <code>count</code> first bytes in arrays
634: * <code>a</code> and <code>b</code> are equal.
635: */
636: public static boolean arrayequals(byte[] a, byte[] b, int count) {
637: for (int i = 0; i < count; i++) {
638: if (a[i] != b[i]) {
639: return false;
640: }
641: }
642: return true;
643: }
644:
645: /**
646: * Searches for a byte of specified value in the <code>buffer</code>,
647: * starting at the specified <code>position</code>.
648: *
649: * @param value
650: * The value to find.
651: * @param pos
652: * The starting position for searching.
653: *
654: * @return The position of byte found, counting from beginning of the
655: * <code>buffer</code>, or <code>-1</code> if not found.
656: */
657: protected int findByte(byte value, int pos) {
658: for (int i = pos; i < tail; i++) {
659: if (buffer[i] == value) {
660: return i;
661: }
662: }
663:
664: return -1;
665: }
666:
667: /**
668: * Searches for the <code>boundary</code> in the <code>buffer</code>
669: * region delimited by <code>head</code> and <code>tail</code>.
670: *
671: * @return The position of the boundary found, counting from the beginning
672: * of the <code>buffer</code>, or <code>-1</code> if not found.
673: */
674: protected int findSeparator() {
675: int first;
676: int match = 0;
677: int maxpos = tail - boundaryLength;
678: for (first = head; (first <= maxpos)
679: && (match != boundaryLength); first++) {
680: first = findByte(boundary[0], first);
681: if (first == -1 || (first > maxpos)) {
682: return -1;
683: }
684: for (match = 1; match < boundaryLength; match++) {
685: if (buffer[first + match] != boundary[match]) {
686: break;
687: }
688: }
689: }
690: if (match == boundaryLength) {
691: return first - 1;
692: }
693: return -1;
694: }
695:
696: /**
697: * Returns a string representation of this object.
698: *
699: * @return The string representation of this object.
700: */
701: public String toString() {
702: StringBuffer sbTemp = new StringBuffer();
703: sbTemp.append("boundary='");
704: sbTemp.append(String.valueOf(boundary));
705: sbTemp.append("'\nbufSize=");
706: sbTemp.append(bufSize);
707: return sbTemp.toString();
708: }
709:
710: /**
711: * Thrown to indicate that the input stream fails to follow the required
712: * syntax.
713: */
714: public class MalformedStreamException extends IOException {
715:
716: private static final long serialVersionUID = 1L;
717:
718: /**
719: * Constructs a <code>MalformedStreamException</code> with no detail
720: * message.
721: */
722: public MalformedStreamException() {
723: super ();
724: }
725:
726: /**
727: * Constructs an <code>MalformedStreamException</code> with the
728: * specified detail message.
729: *
730: * @param message
731: * The detail message.
732: */
733: public MalformedStreamException(String message) {
734: super (message);
735: }
736: }
737:
738: /**
739: * Thrown upon attempt of setting an invalid boundary token.
740: */
741: public class IllegalBoundaryException extends IOException {
742:
743: private static final long serialVersionUID = 1L;
744:
745: /**
746: * Constructs an <code>IllegalBoundaryException</code> with no detail
747: * message.
748: */
749: public IllegalBoundaryException() {
750: super ();
751: }
752:
753: /**
754: * Constructs an <code>IllegalBoundaryException</code> with the
755: * specified detail message.
756: *
757: * @param message
758: * The detail message.
759: */
760: public IllegalBoundaryException(String message) {
761: super (message);
762: }
763: }
764:
765: // ------------------------------------------------------ Debugging methods
766:
767: // These are the methods that were used to debug this stuff.
768: /*
769: * // Dump data. protected void dump() { System.out.println("01234567890");
770: * byte[] temp = new byte[buffer.length]; for(int i=0; i<buffer.length;
771: * i++) { if (buffer[i] == 0x0D || buffer[i] == 0x0A) { temp[i] = 0x21; }
772: * else { temp[i] = buffer[i]; } } System.out.println(new String(temp)); int
773: * i; for (i=0; i<head; i++) System.out.print(" ");
774: * System.out.println("h"); for (i=0; i<tail; i++) System.out.print(" ");
775: * System.out.println("t"); System.out.flush(); } // Main routine, for
776: * testing purposes only. // // @param args A String[] with the command line
777: * arguments. // @exception Exception, a generic exception. public static
778: * void main( String[] args ) throws Exception { File boundaryFile = new
779: * File("boundary.dat"); int boundarySize = (int)boundaryFile.length();
780: * byte[] boundary = new byte[boundarySize]; FileInputStream input = new
781: * FileInputStream(boundaryFile); input.read(boundary,0,boundarySize);
782: *
783: * input = new FileInputStream("multipart.dat"); MultipartStream chunks =
784: * new MultipartStream(input, boundary);
785: *
786: * int i = 0; String header; OutputStream output; boolean nextChunk =
787: * chunks.skipPreamble(); while (nextChunk) { header = chunks.readHeaders();
788: * System.out.println("!"+header+"!"); System.out.println("wrote
789: * part"+i+".dat"); output = new FileOutputStream("part"+(i++)+".dat");
790: * chunks.readBodyData(output); nextChunk = chunks.readBoundary(); } }
791: *
792: */
793: }
|