001: /*
002:
003: Licensed to the Apache Software Foundation (ASF) under one or more
004: contributor license agreements. See the NOTICE file distributed with
005: this work for additional information regarding copyright ownership.
006: The ASF licenses this file to You under the Apache License, Version 2.0
007: (the "License"); you may not use this file except in compliance with
008: the License. You may obtain a copy of the License at
009:
010: http://www.apache.org/licenses/LICENSE-2.0
011:
012: Unless required by applicable law or agreed to in writing, software
013: distributed under the License is distributed on an "AS IS" BASIS,
014: WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015: See the License for the specific language governing permissions and
016: limitations under the License.
017:
018: */
019: package org.apache.batik.xml;
020:
021: import java.io.IOException;
022: import java.io.InputStream;
023: import java.io.PushbackInputStream;
024: import java.io.Reader;
025:
026: import org.apache.batik.util.io.StreamNormalizingReader;
027: import org.apache.batik.util.io.UTF16Decoder;
028:
029: /**
030: * This class represents a normalizing reader with encoding detection
031: * management.
032: *
033: * @author <a href="mailto:stephane@hillion.org">Stephane Hillion</a>
034: * @version $Id: XMLStreamNormalizingReader.java 475477 2006-11-15 22:44:28Z cam $
035: */
036: public class XMLStreamNormalizingReader extends StreamNormalizingReader {
037:
038: /**
039: * Creates a new XMLStreamNormalizingReader.
040: * @param is The input stream to read.
041: * @param encod The character encoding to use if the auto-detection fail.
042: */
043: public XMLStreamNormalizingReader(InputStream is, String encod)
044: throws IOException {
045: PushbackInputStream pbis = new PushbackInputStream(is, 128);
046: byte[] buf = new byte[4];
047:
048: int len = pbis.read(buf);
049: if (len > 0) {
050: pbis.unread(buf, 0, len);
051: }
052:
053: if (len == 4) {
054: switch (buf[0] & 0x00FF) {
055: case 0:
056: if (buf[1] == 0x003c && buf[2] == 0x0000
057: && buf[3] == 0x003f) {
058: charDecoder = new UTF16Decoder(pbis, true);
059: return;
060: }
061: break;
062:
063: case '<':
064: switch (buf[1] & 0x00FF) {
065: case 0:
066: if (buf[2] == 0x003f && buf[3] == 0x0000) {
067: charDecoder = new UTF16Decoder(pbis, false);
068: return;
069: }
070: break;
071:
072: case '?':
073: if (buf[2] == 'x' && buf[3] == 'm') {
074: Reader r = XMLUtilities
075: .createXMLDeclarationReader(pbis,
076: "UTF8");
077: String enc = XMLUtilities
078: .getXMLDeclarationEncoding(r, "UTF-8");
079: charDecoder = createCharDecoder(pbis, enc);
080: return;
081: }
082: }
083: break;
084:
085: case 0x004C:
086: if (buf[1] == 0x006f && (buf[2] & 0x00FF) == 0x00a7
087: && (buf[3] & 0x00FF) == 0x0094) {
088: Reader r = XMLUtilities.createXMLDeclarationReader(
089: pbis, "CP037");
090: String enc = XMLUtilities
091: .getXMLDeclarationEncoding(r,
092: "EBCDIC-CP-US");
093: charDecoder = createCharDecoder(pbis, enc);
094: return;
095: }
096: break;
097:
098: case 0x00FE:
099: if ((buf[1] & 0x00FF) == 0x00FF) {
100: charDecoder = createCharDecoder(pbis, "UTF-16");
101: return;
102: }
103: break;
104:
105: case 0x00FF:
106: if ((buf[1] & 0x00FF) == 0x00FE) {
107: charDecoder = createCharDecoder(pbis, "UTF-16");
108: return;
109: }
110: }
111: }
112:
113: encod = (encod == null) ? "UTF-8" : encod;
114: charDecoder = createCharDecoder(pbis, encod);
115: }
116: }
|