0001: /*
0002: * Fast Infoset ver. 0.1 software ("Software")
0003: *
0004: * Copyright, 2004-2005 Sun Microsystems, Inc. All Rights Reserved.
0005: *
0006: * Software is licensed under the Apache License, Version 2.0 (the "License");
0007: * you may not use this file except in compliance with the License. You may
0008: * obtain a copy of the License at:
0009: *
0010: * http://www.apache.org/licenses/LICENSE-2.0
0011: *
0012: * Unless required by applicable law or agreed to in writing, software
0013: * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
0014: * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
0015: * License for the specific language governing permissions and limitations.
0016: *
0017: * Sun supports and benefits from the global community of open source
0018: * developers, and thanks the community for its important contributions and
0019: * open standards-based technology, which Sun has adopted into many of its
0020: * products.
0021: *
0022: * Please note that portions of Software may be provided with notices and
0023: * open source licenses from such communities and third parties that govern the
0024: * use of those portions, and any licenses granted hereunder do not alter any
0025: * rights and obligations you may have under such open source licenses,
0026: * however, the disclaimer of warranty and limitation of liability provisions
0027: * in this License will apply to all Software in this distribution.
0028: *
0029: * You acknowledge that the Software is not designed, licensed or intended
0030: * for use in the design, construction, operation or maintenance of any nuclear
0031: * facility.
0032: *
0033: * Apache License
0034: * Version 2.0, January 2004
0035: * http://www.apache.org/licenses/
0036: *
0037: */
0038: package com.sun.xml.fastinfoset;
0039:
0040: import com.sun.xml.fastinfoset.algorithm.BuiltInEncodingAlgorithmFactory;
0041: import com.sun.xml.fastinfoset.org.apache.xerces.util.XMLChar;
0042: import com.sun.xml.fastinfoset.util.CharArrayIntMap;
0043: import com.sun.xml.fastinfoset.util.KeyIntMap;
0044: import com.sun.xml.fastinfoset.util.LocalNameQualifiedNamesMap;
0045: import com.sun.xml.fastinfoset.util.StringIntMap;
0046: import com.sun.xml.fastinfoset.vocab.SerializerVocabulary;
0047: import java.io.IOException;
0048: import java.io.OutputStream;
0049: import java.util.HashMap;
0050: import java.util.Map;
0051: import org.jvnet.fastinfoset.EncodingAlgorithm;
0052: import org.jvnet.fastinfoset.EncodingAlgorithmException;
0053: import org.jvnet.fastinfoset.EncodingAlgorithmIndexes;
0054: import org.jvnet.fastinfoset.ExternalVocabulary;
0055: import org.jvnet.fastinfoset.FastInfosetException;
0056: import org.jvnet.fastinfoset.FastInfosetSerializer;
0057: import org.jvnet.fastinfoset.RestrictedAlphabet;
0058: import org.jvnet.fastinfoset.VocabularyApplicationData;
0059: import org.xml.sax.helpers.DefaultHandler;
0060:
0061: /**
0062: * Abstract encoder for developing concrete encoders.
0063: *
0064: * Concrete implementations extending Encoder will utilize methods on Encoder
0065: * to encode XML infoset according to the Fast Infoset standard. It is the
0066: * responsibility of the concrete implementation to ensure that methods are
0067: * invoked in the correct order to produce a valid fast infoset document.
0068: *
0069: * <p>
0070: * This class extends org.sax.xml.DefaultHandler so that concrete SAX
0071: * implementations can be used with javax.xml.parsers.SAXParser and the parse
0072: * methods that take org.sax.xml.DefaultHandler as a parameter.
0073: *
0074: * <p>
0075: * Buffering of octets that are written to an {@link java.io.OutputStream} is
0076: * supported in a similar manner to a {@link java.io.BufferedOutputStream}.
0077: * Combining buffering with encoding enables better performance.
0078: *
0079: * <p>
0080: * More than one fast infoset document may be encoded to the
0081: * {@link java.io.OutputStream}.
0082: *
0083: */
0084: public abstract class Encoder extends DefaultHandler implements
0085: FastInfosetSerializer {
0086:
0087: /**
0088: * Character encoding scheme system property for the encoding
0089: * of content and attribute values.
0090: */
0091: public static final String CHARACTER_ENCODING_SCHEME_SYSTEM_PROPERTY = "com.sun.xml.fastinfoset.serializer.character-encoding-scheme";
0092:
0093: /**
0094: * Default character encoding scheme system property for the encoding
0095: * of content and attribute values.
0096: */
0097: protected static final String _characterEncodingSchemeSystemDefault = getDefaultEncodingScheme();
0098:
0099: private static String getDefaultEncodingScheme() {
0100: String p = System.getProperty(
0101: CHARACTER_ENCODING_SCHEME_SYSTEM_PROPERTY, UTF_8);
0102: if (p.equals(UTF_16BE)) {
0103: return UTF_16BE;
0104: } else {
0105: return UTF_8;
0106: }
0107: }
0108:
0109: protected static int[] NUMERIC_CHARACTERS_TABLE;
0110:
0111: protected static int[] DATE_TIME_CHARACTERS_TABLE;
0112:
0113: static {
0114: NUMERIC_CHARACTERS_TABLE = new int[maxCharacter(RestrictedAlphabet.NUMERIC_CHARACTERS) + 1];
0115: DATE_TIME_CHARACTERS_TABLE = new int[maxCharacter(RestrictedAlphabet.DATE_TIME_CHARACTERS) + 1];
0116:
0117: for (int i = 0; i < NUMERIC_CHARACTERS_TABLE.length; i++) {
0118: NUMERIC_CHARACTERS_TABLE[i] = -1;
0119: }
0120: for (int i = 0; i < DATE_TIME_CHARACTERS_TABLE.length; i++) {
0121: DATE_TIME_CHARACTERS_TABLE[i] = -1;
0122: }
0123:
0124: for (int i = 0; i < RestrictedAlphabet.NUMERIC_CHARACTERS
0125: .length(); i++) {
0126: NUMERIC_CHARACTERS_TABLE[RestrictedAlphabet.NUMERIC_CHARACTERS
0127: .charAt(i)] = i;
0128: }
0129: for (int i = 0; i < RestrictedAlphabet.DATE_TIME_CHARACTERS
0130: .length(); i++) {
0131: DATE_TIME_CHARACTERS_TABLE[RestrictedAlphabet.DATE_TIME_CHARACTERS
0132: .charAt(i)] = i;
0133: }
0134: }
0135:
0136: private static int maxCharacter(String alphabet) {
0137: int c = 0;
0138: for (int i = 0; i < alphabet.length(); i++) {
0139: if (c < alphabet.charAt(i)) {
0140: c = alphabet.charAt(i);
0141: }
0142: }
0143:
0144: return c;
0145: }
0146:
0147: /**
0148: * True if DTD and internal subset shall be ignored.
0149: */
0150: private boolean _ignoreDTD;
0151:
0152: /**
0153: * True if comments shall be ignored.
0154: */
0155: private boolean _ignoreComments;
0156:
0157: /**
0158: * True if procesing instructions shall be ignored.
0159: */
0160: private boolean _ignoreProcessingInstructions;
0161:
0162: /**
0163: * True if white space characters for text content shall be ignored.
0164: */
0165: private boolean _ignoreWhiteSpaceTextContent;
0166:
0167: /**
0168: * True, if the local name string is used as the key to find the
0169: * associated set of qualified names.
0170: * <p>
0171: * False, if the <prefix>:<local name> string is used as the key
0172: * to find the associated set of qualified names.
0173: */
0174: private boolean _useLocalNameAsKeyForQualifiedNameLookup;
0175:
0176: /**
0177: * True if strings for text content and attribute values will be
0178: * UTF-8 encoded otherwise they will be UTF-16 encoded.
0179: */
0180: private boolean _encodingStringsAsUtf8 = true;
0181:
0182: /**
0183: * Encoding constant generated from the string encoding.
0184: */
0185: private int _nonIdentifyingStringOnThirdBitCES;
0186:
0187: /**
0188: * Encoding constant generated from the string encoding.
0189: */
0190: private int _nonIdentifyingStringOnFirstBitCES;
0191:
0192: /**
0193: * The map of URIs to algorithms.
0194: */
0195: private Map _registeredEncodingAlgorithms = new HashMap();
0196:
0197: /**
0198: * The vocabulary that is used by the encoder
0199: */
0200: protected SerializerVocabulary _v;
0201:
0202: /**
0203: * The vocabulary application data that is used by the encoder
0204: */
0205: protected VocabularyApplicationData _vData;
0206:
0207: /**
0208: * True if the vocubulary is internal to the encoder
0209: */
0210: private boolean _vIsInternal;
0211:
0212: /**
0213: * True if terminatation of an information item is required
0214: */
0215: protected boolean _terminate = false;
0216:
0217: /**
0218: * The current octet that is to be written.
0219: */
0220: protected int _b;
0221:
0222: /**
0223: * The {@link java.io.OutputStream} that the encoded XML infoset (the
0224: * fast infoset document) is written to.
0225: */
0226: protected OutputStream _s;
0227:
0228: /**
0229: * The internal buffer of characters used for the UTF-8 or UTF-16 encoding
0230: * of characters.
0231: */
0232: protected char[] _charBuffer = new char[512];
0233:
0234: /**
0235: * The internal buffer of bytes.
0236: */
0237: protected byte[] _octetBuffer = new byte[1024];
0238:
0239: /**
0240: * The current position in the internal buffer.
0241: */
0242: protected int _octetBufferIndex;
0243:
0244: /**
0245: * The current mark in the internal buffer.
0246: *
0247: * <p>
0248: * If the value of the mark is < 0 then the mark is not set.
0249: */
0250: protected int _markIndex = -1;
0251:
0252: /**
0253: * The limit on the size of [normalized value] of Attribute Information
0254: * Items that will be indexed.
0255: */
0256: protected int attributeValueSizeConstraint = FastInfosetSerializer.ATTRIBUTE_VALUE_SIZE_CONSTRAINT;
0257:
0258: /**
0259: * The limit on the size of indexed Map for attribute values
0260: * Limit is measured in characters number
0261: */
0262: protected int attributeValueMapTotalCharactersConstraint = FastInfosetSerializer.ATTRIBUTE_VALUE_MAP_MEMORY_CONSTRAINT / 2;
0263:
0264: /**
0265: * The limit on the size of character content chunks
0266: * of Character Information Items or Comment Information Items that
0267: * will be indexed.
0268: */
0269: protected int characterContentChunkSizeContraint = FastInfosetSerializer.CHARACTER_CONTENT_CHUNK_SIZE_CONSTRAINT;
0270:
0271: /**
0272: * The limit on the size of indexed Map for character content chunks
0273: * Limit is measured in characters number
0274: */
0275: protected int characterContentChunkMapTotalCharactersConstraint = FastInfosetSerializer.CHARACTER_CONTENT_CHUNK_MAP_MEMORY_CONSTRAINT / 2;
0276:
0277: /**
0278: * Default constructor for the Encoder.
0279: */
0280: protected Encoder() {
0281: setCharacterEncodingScheme(_characterEncodingSchemeSystemDefault);
0282: }
0283:
0284: protected Encoder(boolean useLocalNameAsKeyForQualifiedNameLookup) {
0285: setCharacterEncodingScheme(_characterEncodingSchemeSystemDefault);
0286: _useLocalNameAsKeyForQualifiedNameLookup = useLocalNameAsKeyForQualifiedNameLookup;
0287: }
0288:
0289: // FastInfosetSerializer interface
0290:
0291: /**
0292: * {@inheritDoc}
0293: */
0294: public final void setIgnoreDTD(boolean ignoreDTD) {
0295: _ignoreDTD = ignoreDTD;
0296: }
0297:
0298: /**
0299: * {@inheritDoc}
0300: */
0301: public final boolean getIgnoreDTD() {
0302: return _ignoreDTD;
0303: }
0304:
0305: /**
0306: * {@inheritDoc}
0307: */
0308: public final void setIgnoreComments(boolean ignoreComments) {
0309: _ignoreComments = ignoreComments;
0310: }
0311:
0312: /**
0313: * {@inheritDoc}
0314: */
0315: public final boolean getIgnoreComments() {
0316: return _ignoreComments;
0317: }
0318:
0319: /**
0320: * {@inheritDoc}
0321: */
0322: public final void setIgnoreProcesingInstructions(
0323: boolean ignoreProcesingInstructions) {
0324: _ignoreProcessingInstructions = ignoreProcesingInstructions;
0325: }
0326:
0327: /**
0328: * {@inheritDoc}
0329: */
0330: public final boolean getIgnoreProcesingInstructions() {
0331: return _ignoreProcessingInstructions;
0332: }
0333:
0334: /**
0335: * {@inheritDoc}
0336: */
0337: public final void setIgnoreWhiteSpaceTextContent(
0338: boolean ignoreWhiteSpaceTextContent) {
0339: _ignoreWhiteSpaceTextContent = ignoreWhiteSpaceTextContent;
0340: }
0341:
0342: /**
0343: * {@inheritDoc}
0344: */
0345: public final boolean getIgnoreWhiteSpaceTextContent() {
0346: return _ignoreWhiteSpaceTextContent;
0347: }
0348:
0349: /**
0350: * {@inheritDoc}
0351: */
0352: public void setCharacterEncodingScheme(
0353: String characterEncodingScheme) {
0354: if (characterEncodingScheme.equals(UTF_16BE)) {
0355: _encodingStringsAsUtf8 = false;
0356: _nonIdentifyingStringOnThirdBitCES = EncodingConstants.CHARACTER_CHUNK
0357: | EncodingConstants.CHARACTER_CHUNK_UTF_16_FLAG;
0358: _nonIdentifyingStringOnFirstBitCES = EncodingConstants.NISTRING_UTF_16_FLAG;
0359: } else {
0360: _encodingStringsAsUtf8 = true;
0361: _nonIdentifyingStringOnThirdBitCES = EncodingConstants.CHARACTER_CHUNK;
0362: _nonIdentifyingStringOnFirstBitCES = 0;
0363: }
0364: }
0365:
0366: /**
0367: * {@inheritDoc}
0368: */
0369: public String getCharacterEncodingScheme() {
0370: return (_encodingStringsAsUtf8) ? UTF_8 : UTF_16BE;
0371: }
0372:
0373: /**
0374: * {@inheritDoc}
0375: */
0376: public void setRegisteredEncodingAlgorithms(Map algorithms) {
0377: _registeredEncodingAlgorithms = algorithms;
0378: if (_registeredEncodingAlgorithms == null) {
0379: _registeredEncodingAlgorithms = new HashMap();
0380: }
0381: }
0382:
0383: /**
0384: * {@inheritDoc}
0385: */
0386: public Map getRegisteredEncodingAlgorithms() {
0387: return _registeredEncodingAlgorithms;
0388: }
0389:
0390: /**
0391: * {@inheritDoc}
0392: */
0393: public void setCharacterContentChunkSizeLimit(int size) {
0394: if (size < 0) {
0395: size = 0;
0396: }
0397:
0398: characterContentChunkSizeContraint = size;
0399: }
0400:
0401: /**
0402: * {@inheritDoc}
0403: */
0404: public int getCharacterContentChunkSizeLimit() {
0405: return characterContentChunkSizeContraint;
0406: }
0407:
0408: /**
0409: * {@inheritDoc}
0410: */
0411: public void setCharacterContentChunkMapMemoryLimit(int size) {
0412: if (size < 0) {
0413: size = 0;
0414: }
0415:
0416: characterContentChunkMapTotalCharactersConstraint = size / 2;
0417: }
0418:
0419: /**
0420: * {@inheritDoc}
0421: */
0422: public int getCharacterContentChunkMapMemoryLimit() {
0423: return characterContentChunkMapTotalCharactersConstraint * 2;
0424: }
0425:
0426: /**
0427: * Checks whether character content chunk (its length) matches limits:
0428: * length limit itself and limit for total capacity of specified CharArrayIntMap
0429: *
0430: * @param length the length of character content chunk is checking to be added to Map.
0431: * @param map the custom CharArrayIntMap, which memory limits will be checked.
0432: * @return whether character content chunk length matches limits
0433: */
0434: public boolean isCharacterContentChunkLengthMatchesLimit(
0435: int length, CharArrayIntMap map) {
0436: return (length < characterContentChunkSizeContraint)
0437: && (map.getTotalCharacterCount() + length < characterContentChunkMapTotalCharactersConstraint);
0438: }
0439:
0440: /**
0441: * {@inheritDoc}
0442: */
0443: public void setAttributeValueSizeLimit(int size) {
0444: if (size < 0) {
0445: size = 0;
0446: }
0447:
0448: attributeValueSizeConstraint = size;
0449: }
0450:
0451: /**
0452: * {@inheritDoc}
0453: */
0454: public int getAttributeValueSizeLimit() {
0455: return attributeValueSizeConstraint;
0456: }
0457:
0458: /**
0459: * {@inheritDoc}
0460: */
0461: public void setAttributeValueMapMemoryLimit(int size) {
0462: if (size < 0) {
0463: size = 0;
0464: }
0465:
0466: attributeValueMapTotalCharactersConstraint = size / 2;
0467:
0468: }
0469:
0470: /**
0471: * {@inheritDoc}
0472: */
0473: public int getAttributeValueMapMemoryLimit() {
0474: return attributeValueMapTotalCharactersConstraint * 2;
0475: }
0476:
0477: /**
0478: * Checks whether attribute value (its length) matches limits:
0479: * length limit itself and limit for index Map total capacity
0480: *
0481: * @return whether attribute value matches limits
0482: */
0483: public boolean isAttributeValueLengthMatchesLimit(int length) {
0484: return (length < attributeValueSizeConstraint)
0485: && (_v.attributeValue.getTotalCharacterCount() + length < attributeValueMapTotalCharactersConstraint);
0486: }
0487:
0488: /**
0489: * {@inheritDoc}
0490: */
0491: public void setExternalVocabulary(ExternalVocabulary v) {
0492: // Create internal serializer vocabulary
0493: _v = new SerializerVocabulary();
0494: // Set the external vocabulary
0495: SerializerVocabulary ev = new SerializerVocabulary(
0496: v.vocabulary, _useLocalNameAsKeyForQualifiedNameLookup);
0497: _v.setExternalVocabulary(v.URI, ev, false);
0498:
0499: _vIsInternal = true;
0500: }
0501:
0502: /**
0503: * {@inheritDoc}
0504: */
0505: public void setVocabularyApplicationData(
0506: VocabularyApplicationData data) {
0507: _vData = data;
0508: }
0509:
0510: /**
0511: * {@inheritDoc}
0512: */
0513: public VocabularyApplicationData getVocabularyApplicationData() {
0514: return _vData;
0515: }
0516:
0517: // End of FastInfosetSerializer interface
0518:
0519: /**
0520: * Reset the encoder for reuse encoding another XML infoset.
0521: */
0522: public void reset() {
0523: _terminate = false;
0524: }
0525:
0526: /**
0527: * Set the OutputStream to encode the XML infoset to a
0528: * fast infoset document.
0529: *
0530: * @param s the OutputStream where the fast infoset document is written to.
0531: */
0532: public void setOutputStream(OutputStream s) {
0533: _octetBufferIndex = 0;
0534: _markIndex = -1;
0535: _s = s;
0536: }
0537:
0538: /**
0539: * Set the SerializerVocabulary to be used for encoding.
0540: *
0541: * @param vocabulary the vocabulary to be used for encoding.
0542: */
0543: public void setVocabulary(SerializerVocabulary vocabulary) {
0544: _v = vocabulary;
0545: _vIsInternal = false;
0546: }
0547:
0548: /**
0549: * Encode the header of a fast infoset document.
0550: *
0551: * @param encodeXmlDecl true if the XML declaration should be encoded.
0552: */
0553: protected final void encodeHeader(boolean encodeXmlDecl)
0554: throws IOException {
0555: if (encodeXmlDecl) {
0556: _s.write(EncodingConstants.XML_DECLARATION_VALUES[0]);
0557: }
0558: _s.write(EncodingConstants.BINARY_HEADER);
0559: }
0560:
0561: /**
0562: * Encode the initial vocabulary of a fast infoset document.
0563: *
0564: */
0565: protected final void encodeInitialVocabulary() throws IOException {
0566: if (_v == null) {
0567: _v = new SerializerVocabulary();
0568: _vIsInternal = true;
0569: } else if (_vIsInternal) {
0570: _v.clear();
0571: if (_vData != null)
0572: _vData.clear();
0573: }
0574:
0575: if (!_v.hasInitialVocabulary() && !_v.hasExternalVocabulary()) {
0576: write(0);
0577: } else if (_v.hasInitialVocabulary()) {
0578: _b = EncodingConstants.DOCUMENT_INITIAL_VOCABULARY_FLAG;
0579: write(_b);
0580:
0581: SerializerVocabulary initialVocabulary = _v
0582: .getReadOnlyVocabulary();
0583:
0584: // TODO check for contents of vocabulary to assign bits
0585: if (initialVocabulary.hasExternalVocabulary()) {
0586: _b = EncodingConstants.INITIAL_VOCABULARY_EXTERNAL_VOCABULARY_FLAG;
0587: write(_b);
0588: write(0);
0589: }
0590:
0591: if (initialVocabulary.hasExternalVocabulary()) {
0592: encodeNonEmptyOctetStringOnSecondBit(_v
0593: .getExternalVocabularyURI());
0594: }
0595:
0596: // TODO check for contents of vocabulary to encode values
0597: } else if (_v.hasExternalVocabulary()) {
0598: _b = EncodingConstants.DOCUMENT_INITIAL_VOCABULARY_FLAG;
0599: write(_b);
0600:
0601: _b = EncodingConstants.INITIAL_VOCABULARY_EXTERNAL_VOCABULARY_FLAG;
0602: write(_b);
0603: write(0);
0604:
0605: encodeNonEmptyOctetStringOnSecondBit(_v
0606: .getExternalVocabularyURI());
0607: }
0608: }
0609:
0610: /**
0611: * Encode the termination of the Document Information Item.
0612: *
0613: */
0614: protected final void encodeDocumentTermination() throws IOException {
0615: encodeElementTermination();
0616: encodeTermination();
0617: _flush();
0618: _s.flush();
0619: }
0620:
0621: /**
0622: * Encode the termination of an Element Information Item.
0623: *
0624: */
0625: protected final void encodeElementTermination() throws IOException {
0626: _terminate = true;
0627: switch (_b) {
0628: case EncodingConstants.TERMINATOR:
0629: _b = EncodingConstants.DOUBLE_TERMINATOR;
0630: break;
0631: case EncodingConstants.DOUBLE_TERMINATOR:
0632: write(EncodingConstants.DOUBLE_TERMINATOR);
0633: default:
0634: _b = EncodingConstants.TERMINATOR;
0635: }
0636: }
0637:
0638: /**
0639: * Encode a termination if required.
0640: *
0641: */
0642: protected final void encodeTermination() throws IOException {
0643: if (_terminate) {
0644: write(_b);
0645: _b = 0;
0646: _terminate = false;
0647: }
0648: }
0649:
0650: /**
0651: * Encode a Attribute Information Item that is a namespace declaration.
0652: *
0653: * @param prefix the prefix of the namespace declaration,
0654: * if "" then there is no prefix for the namespace declaration.
0655: * @param uri the URI of the namespace declaration,
0656: * if "" then there is no URI for the namespace declaration.
0657: */
0658: protected final void encodeNamespaceAttribute(String prefix,
0659: String uri) throws IOException {
0660: _b = EncodingConstants.NAMESPACE_ATTRIBUTE;
0661: if (prefix.length() > 0) {
0662: _b |= EncodingConstants.NAMESPACE_ATTRIBUTE_PREFIX_FLAG;
0663: }
0664: if (uri.length() > 0) {
0665: _b |= EncodingConstants.NAMESPACE_ATTRIBUTE_NAME_FLAG;
0666: }
0667:
0668: // NOTE a prefix with out a namespace name is an undeclaration
0669: // of the namespace bound to the prefix
0670: // TODO needs to investigate how the startPrefixMapping works in
0671: // relation to undeclaration
0672:
0673: write(_b);
0674:
0675: if (prefix.length() > 0) {
0676: encodeIdentifyingNonEmptyStringOnFirstBit(prefix, _v.prefix);
0677: }
0678: if (uri.length() > 0) {
0679: encodeIdentifyingNonEmptyStringOnFirstBit(uri,
0680: _v.namespaceName);
0681: }
0682: }
0683:
0684: /**
0685: * Encode a chunk of Character Information Items.
0686: *
0687: * @param ch the array of characters.
0688: * @param offset the offset into the array of characters.
0689: * @param length the length of characters.
0690: * @throws ArrayIndexOutOfBoundsException.
0691: */
0692: protected final void encodeCharacters(char[] ch, int offset,
0693: int length) throws IOException {
0694: final boolean addToTable = isCharacterContentChunkLengthMatchesLimit(
0695: length, _v.characterContentChunk);
0696: encodeNonIdentifyingStringOnThirdBit(ch, offset, length,
0697: _v.characterContentChunk, addToTable, true);
0698: }
0699:
0700: /**
0701: * Encode a chunk of Character Information Items.
0702: *
0703: * If the array of characters is to be indexed (as determined by
0704: * {@link Encoder#characterContentChunkSizeContraint}) then the array is not cloned
0705: * when adding the array to the vocabulary.
0706: *
0707: * @param ch the array of characters.
0708: * @param offset the offset into the array of characters.
0709: * @param length the length of characters.
0710: * @throws ArrayIndexOutOfBoundsException.
0711: */
0712: protected final void encodeCharactersNoClone(char[] ch, int offset,
0713: int length) throws IOException {
0714: final boolean addToTable = isCharacterContentChunkLengthMatchesLimit(
0715: length, _v.characterContentChunk);
0716: encodeNonIdentifyingStringOnThirdBit(ch, offset, length,
0717: _v.characterContentChunk, addToTable, false);
0718: }
0719:
0720: /**
0721: * Encode a chunk of Character Information Items using a restricted
0722: * alphabet that results in the encoding of a character in 4 bits
0723: * (or two characters per octet).
0724: *
0725: * @param id the restricted alphabet identifier.
0726: * @param table the table mapping characters to 4 bit values.
0727: * @param ch the array of characters.
0728: * @param offset the offset into the array of characters.
0729: * @param length the length of characters.
0730: * @param addToTable if characters should be added to table.
0731: * @throws ArrayIndexOutOfBoundsException.
0732: */
0733: protected final void encodeFourBitCharacters(int id, int[] table,
0734: char[] ch, int offset, int length, boolean addToTable)
0735: throws FastInfosetException, IOException {
0736: if (addToTable) {
0737: final int index = _v.characterContentChunk.obtainIndex(ch,
0738: offset, length, true);
0739: if (index != KeyIntMap.NOT_PRESENT) {
0740: _b = EncodingConstants.CHARACTER_CHUNK | 0x20;
0741: encodeNonZeroIntegerOnFourthBit(index);
0742: return;
0743: }
0744: }
0745:
0746: // This procedure assumes that id <= 64
0747: _b = (addToTable) ? EncodingConstants.CHARACTER_CHUNK
0748: | EncodingConstants.CHARACTER_CHUNK_RESTRICTED_ALPHABET_FLAG
0749: | EncodingConstants.CHARACTER_CHUNK_ADD_TO_TABLE_FLAG
0750: : EncodingConstants.CHARACTER_CHUNK
0751: | EncodingConstants.CHARACTER_CHUNK_RESTRICTED_ALPHABET_FLAG;
0752: write(_b);
0753:
0754: // Encode bottom 6 bits of enoding algorithm id
0755: _b = id << 2;
0756:
0757: encodeNonEmptyFourBitCharacterStringOnSeventhBit(table, ch,
0758: offset, length);
0759: }
0760:
0761: /**
0762: * Encode a chunk of Character Information Items using a restricted
0763: * alphabet table.
0764: *
0765: * @param alphabet the alphabet defining the mapping between characters and
0766: * integer values.
0767: * @param ch the array of characters.
0768: * @param offset the offset into the array of characters.
0769: * @param length the length of characters.
0770: * @param addToTable if characters should be added to table
0771: * @throws ArrayIndexOutOfBoundsException.
0772: * @throws FastInfosetException if the alphabet is not present in the
0773: * vocabulary.
0774: */
0775: protected final void encodeAlphabetCharacters(String alphabet,
0776: char[] ch, int offset, int length, boolean addToTable)
0777: throws FastInfosetException, IOException {
0778: if (addToTable) {
0779: final int index = _v.characterContentChunk.obtainIndex(ch,
0780: offset, length, true);
0781: if (index != KeyIntMap.NOT_PRESENT) {
0782: _b = EncodingConstants.CHARACTER_CHUNK | 0x20;
0783: encodeNonZeroIntegerOnFourthBit(index);
0784: return;
0785: }
0786: }
0787:
0788: int id = _v.restrictedAlphabet.get(alphabet);
0789: if (id == KeyIntMap.NOT_PRESENT) {
0790: throw new FastInfosetException(CommonResourceBundle
0791: .getInstance().getString(
0792: "message.restrictedAlphabetNotPresent"));
0793: }
0794: id += EncodingConstants.RESTRICTED_ALPHABET_APPLICATION_START;
0795:
0796: _b = (addToTable) ? EncodingConstants.CHARACTER_CHUNK
0797: | EncodingConstants.CHARACTER_CHUNK_RESTRICTED_ALPHABET_FLAG
0798: | EncodingConstants.CHARACTER_CHUNK_ADD_TO_TABLE_FLAG
0799: : EncodingConstants.CHARACTER_CHUNK
0800: | EncodingConstants.CHARACTER_CHUNK_RESTRICTED_ALPHABET_FLAG;
0801: _b |= (id & 0xC0) >> 6;
0802: write(_b);
0803:
0804: // Encode bottom 6 bits of enoding algorithm id
0805: _b = (id & 0x3F) << 2;
0806:
0807: encodeNonEmptyNBitCharacterStringOnSeventhBit(alphabet, ch,
0808: offset, length);
0809: }
0810:
0811: /**
0812: * Encode a Processing Instruction Information Item.
0813: *
0814: * @param target the target of the processing instruction.
0815: * @param data the data of the processing instruction.
0816: */
0817: protected final void encodeProcessingInstruction(String target,
0818: String data) throws IOException {
0819: write(EncodingConstants.PROCESSING_INSTRUCTION);
0820:
0821: // Target
0822: encodeIdentifyingNonEmptyStringOnFirstBit(target,
0823: _v.otherNCName);
0824:
0825: // Data
0826: boolean addToTable = isCharacterContentChunkLengthMatchesLimit(
0827: data.length(), _v.characterContentChunk);
0828: encodeNonIdentifyingStringOnFirstBit(data, _v.otherString,
0829: addToTable);
0830: }
0831:
0832: /**
0833: * Encode a Document Type Declaration.
0834: *
0835: * @param systemId the system identifier of the external subset.
0836: * @param publicId the public identifier of the external subset.
0837: */
0838: protected final void encodeDocumentTypeDeclaration(String systemId,
0839: String publicId) throws IOException {
0840: _b = EncodingConstants.DOCUMENT_TYPE_DECLARATION;
0841: if (systemId != null && systemId.length() > 0) {
0842: _b |= EncodingConstants.DOCUMENT_TYPE_SYSTEM_IDENTIFIER_FLAG;
0843: }
0844: if (publicId != null && publicId.length() > 0) {
0845: _b |= EncodingConstants.DOCUMENT_TYPE_PUBLIC_IDENTIFIER_FLAG;
0846: }
0847: write(_b);
0848:
0849: if (systemId != null && systemId.length() > 0) {
0850: encodeIdentifyingNonEmptyStringOnFirstBit(systemId,
0851: _v.otherURI);
0852: }
0853: if (publicId != null && publicId.length() > 0) {
0854: encodeIdentifyingNonEmptyStringOnFirstBit(publicId,
0855: _v.otherURI);
0856: }
0857: }
0858:
0859: /**
0860: * Encode a Comment Information Item.
0861: *
0862: * @param ch the array of characters that is as comment.
0863: * @param offset the offset into the array of characters.
0864: * @param length the length of characters.
0865: * @throws ArrayIndexOutOfBoundsException.
0866: */
0867: protected final void encodeComment(char[] ch, int offset, int length)
0868: throws IOException {
0869: write(EncodingConstants.COMMENT);
0870:
0871: boolean addToTable = isCharacterContentChunkLengthMatchesLimit(
0872: length, _v.otherString);
0873: encodeNonIdentifyingStringOnFirstBit(ch, offset, length,
0874: _v.otherString, addToTable, true);
0875: }
0876:
0877: /**
0878: * Encode a Comment Information Item.
0879: *
0880: * If the array of characters that is a comment is to be indexed (as
0881: * determined by {@link Encoder#characterContentChunkSizeContraint}) then
0882: * the array is not cloned when adding the array to the vocabulary.
0883: *
0884: * @param ch the array of characters.
0885: * @param offset the offset into the array of characters.
0886: * @param length the length of characters.
0887: * @throws ArrayIndexOutOfBoundsException.
0888: */
0889: protected final void encodeCommentNoClone(char[] ch, int offset,
0890: int length) throws IOException {
0891: write(EncodingConstants.COMMENT);
0892:
0893: boolean addToTable = isCharacterContentChunkLengthMatchesLimit(
0894: length, _v.otherString);
0895: encodeNonIdentifyingStringOnFirstBit(ch, offset, length,
0896: _v.otherString, addToTable, false);
0897: }
0898:
0899: /**
0900: * Encode a qualified name of an Element Informaiton Item on the third bit
0901: * of an octet.
0902: * Implementation of clause C.18 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
0903: *
0904: * <p>
0905: * The index of the qualified name will be encoded if the name is present
0906: * in the vocabulary otherwise the qualified name will be encoded literally
0907: * (see {@link #encodeLiteralElementQualifiedNameOnThirdBit}).
0908: *
0909: * @param namespaceURI the namespace URI of the qualified name.
0910: * @param prefix the prefix of the qualified name.
0911: * @param localName the local name of the qualified name.
0912: */
0913: protected final void encodeElementQualifiedNameOnThirdBit(
0914: String namespaceURI, String prefix, String localName)
0915: throws IOException {
0916: LocalNameQualifiedNamesMap.Entry entry = _v.elementName
0917: .obtainEntry(localName);
0918: if (entry._valueIndex > 0) {
0919: QualifiedName[] names = entry._value;
0920: for (int i = 0; i < entry._valueIndex; i++) {
0921: if ((prefix == names[i].prefix || prefix
0922: .equals(names[i].prefix))
0923: && (namespaceURI == names[i].namespaceName || namespaceURI
0924: .equals(names[i].namespaceName))) {
0925: encodeNonZeroIntegerOnThirdBit(names[i].index);
0926: return;
0927: }
0928: }
0929: }
0930:
0931: encodeLiteralElementQualifiedNameOnThirdBit(namespaceURI,
0932: prefix, localName, entry);
0933: }
0934:
0935: /**
0936: * Encode a literal qualified name of an Element Informaiton Item on the
0937: * third bit of an octet.
0938: * Implementation of clause C.18 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
0939: *
0940: * @param namespaceURI the namespace URI of the qualified name.
0941: * @param prefix the prefix of the qualified name.
0942: * @param localName the local name of the qualified name.
0943: */
0944: protected final void encodeLiteralElementQualifiedNameOnThirdBit(
0945: String namespaceURI, String prefix, String localName,
0946: LocalNameQualifiedNamesMap.Entry entry) throws IOException {
0947: QualifiedName name = new QualifiedName(prefix, namespaceURI,
0948: localName, "", _v.elementName.getNextIndex());
0949: entry.addQualifiedName(name);
0950:
0951: int namespaceURIIndex = KeyIntMap.NOT_PRESENT;
0952: int prefixIndex = KeyIntMap.NOT_PRESENT;
0953: if (namespaceURI.length() > 0) {
0954: namespaceURIIndex = _v.namespaceName.get(namespaceURI);
0955: if (namespaceURIIndex == KeyIntMap.NOT_PRESENT) {
0956: throw new IOException(CommonResourceBundle
0957: .getInstance().getString(
0958: "message.namespaceURINotIndexed",
0959: new Object[] { namespaceURI }));
0960: }
0961:
0962: if (prefix.length() > 0) {
0963: prefixIndex = _v.prefix.get(prefix);
0964: if (prefixIndex == KeyIntMap.NOT_PRESENT) {
0965: throw new IOException(CommonResourceBundle
0966: .getInstance().getString(
0967: "message.prefixNotIndexed",
0968: new Object[] { prefix }));
0969: }
0970: }
0971: }
0972:
0973: int localNameIndex = _v.localName.obtainIndex(localName);
0974:
0975: _b |= EncodingConstants.ELEMENT_LITERAL_QNAME_FLAG;
0976: if (namespaceURIIndex >= 0) {
0977: _b |= EncodingConstants.LITERAL_QNAME_NAMESPACE_NAME_FLAG;
0978: if (prefixIndex >= 0) {
0979: _b |= EncodingConstants.LITERAL_QNAME_PREFIX_FLAG;
0980: }
0981: }
0982: write(_b);
0983:
0984: if (namespaceURIIndex >= 0) {
0985: if (prefixIndex >= 0) {
0986: encodeNonZeroIntegerOnSecondBitFirstBitOne(prefixIndex);
0987: }
0988: encodeNonZeroIntegerOnSecondBitFirstBitOne(namespaceURIIndex);
0989: }
0990:
0991: if (localNameIndex >= 0) {
0992: encodeNonZeroIntegerOnSecondBitFirstBitOne(localNameIndex);
0993: } else {
0994: encodeNonEmptyOctetStringOnSecondBit(localName);
0995: }
0996: }
0997:
0998: /**
0999: * Encode a qualified name of an Attribute Informaiton Item on the third bit
1000: * of an octet.
1001: * Implementation of clause C.17 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1002: *
1003: * <p>
1004: * The index of the qualified name will be encoded if the name is present
1005: * in the vocabulary otherwise the qualified name will be encoded literally
1006: * (see {@link #encodeLiteralAttributeQualifiedNameOnSecondBit}).
1007: *
1008: * @param namespaceURI the namespace URI of the qualified name.
1009: * @param prefix the prefix of the qualified name.
1010: * @param localName the local name of the qualified name.
1011: */
1012: protected final void encodeAttributeQualifiedNameOnSecondBit(
1013: String namespaceURI, String prefix, String localName)
1014: throws IOException {
1015: LocalNameQualifiedNamesMap.Entry entry = _v.attributeName
1016: .obtainEntry(localName);
1017: if (entry._valueIndex > 0) {
1018: QualifiedName[] names = entry._value;
1019: for (int i = 0; i < entry._valueIndex; i++) {
1020: if ((prefix == names[i].prefix || prefix
1021: .equals(names[i].prefix))
1022: && (namespaceURI == names[i].namespaceName || namespaceURI
1023: .equals(names[i].namespaceName))) {
1024: encodeNonZeroIntegerOnSecondBitFirstBitZero(names[i].index);
1025: return;
1026: }
1027: }
1028: }
1029:
1030: encodeLiteralAttributeQualifiedNameOnSecondBit(namespaceURI,
1031: prefix, localName, entry);
1032: }
1033:
1034: /**
1035: * Encode a literal qualified name of an Attribute Informaiton Item on the
1036: * third bit of an octet.
1037: * Implementation of clause C.17 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1038: *
1039: * @param namespaceURI the namespace URI of the qualified name.
1040: * @param prefix the prefix of the qualified name.
1041: * @param localName the local name of the qualified name.
1042: */
1043: protected final boolean encodeLiteralAttributeQualifiedNameOnSecondBit(
1044: String namespaceURI, String prefix, String localName,
1045: LocalNameQualifiedNamesMap.Entry entry) throws IOException {
1046: int namespaceURIIndex = KeyIntMap.NOT_PRESENT;
1047: int prefixIndex = KeyIntMap.NOT_PRESENT;
1048: if (namespaceURI.length() > 0) {
1049: namespaceURIIndex = _v.namespaceName.get(namespaceURI);
1050: if (namespaceURIIndex == KeyIntMap.NOT_PRESENT) {
1051: if (namespaceURI == EncodingConstants.XMLNS_NAMESPACE_NAME
1052: || namespaceURI
1053: .equals(EncodingConstants.XMLNS_NAMESPACE_NAME)) {
1054: return false;
1055: } else {
1056: throw new IOException(CommonResourceBundle
1057: .getInstance().getString(
1058: "message.namespaceURINotIndexed",
1059: new Object[] { namespaceURI }));
1060: }
1061: }
1062:
1063: if (prefix.length() > 0) {
1064: prefixIndex = _v.prefix.get(prefix);
1065: if (prefixIndex == KeyIntMap.NOT_PRESENT) {
1066: throw new IOException(CommonResourceBundle
1067: .getInstance().getString(
1068: "message.prefixNotIndexed",
1069: new Object[] { prefix }));
1070: }
1071: }
1072: }
1073:
1074: int localNameIndex = _v.localName.obtainIndex(localName);
1075:
1076: QualifiedName name = new QualifiedName(prefix, namespaceURI,
1077: localName, "", _v.attributeName.getNextIndex());
1078: entry.addQualifiedName(name);
1079:
1080: _b = EncodingConstants.ATTRIBUTE_LITERAL_QNAME_FLAG;
1081: if (namespaceURI.length() > 0) {
1082: _b |= EncodingConstants.LITERAL_QNAME_NAMESPACE_NAME_FLAG;
1083: if (prefix.length() > 0) {
1084: _b |= EncodingConstants.LITERAL_QNAME_PREFIX_FLAG;
1085: }
1086: }
1087:
1088: write(_b);
1089:
1090: if (namespaceURIIndex >= 0) {
1091: if (prefixIndex >= 0) {
1092: encodeNonZeroIntegerOnSecondBitFirstBitOne(prefixIndex);
1093: }
1094: encodeNonZeroIntegerOnSecondBitFirstBitOne(namespaceURIIndex);
1095: } else if (namespaceURI != "") {
1096: // XML prefix and namespace name
1097: encodeNonEmptyOctetStringOnSecondBit("xml");
1098: encodeNonEmptyOctetStringOnSecondBit("http://www.w3.org/XML/1998/namespace");
1099: }
1100:
1101: if (localNameIndex >= 0) {
1102: encodeNonZeroIntegerOnSecondBitFirstBitOne(localNameIndex);
1103: } else {
1104: encodeNonEmptyOctetStringOnSecondBit(localName);
1105: }
1106:
1107: return true;
1108: }
1109:
1110: /**
1111: * Encode a non identifying string on the first bit of an octet.
1112: * Implementation of clause C.14 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1113: *
1114: * @param s the string to encode
1115: * @param map the vocabulary table of strings to indexes.
1116: * @param addToTable true if the string should be added to the vocabulary
1117: * table (if not already present in the table).
1118: */
1119: protected final void encodeNonIdentifyingStringOnFirstBit(String s,
1120: StringIntMap map, boolean addToTable) throws IOException {
1121: if (s == null || s.length() == 0) {
1122: // C.26 an index (first bit '1') with seven '1' bits for an empty string
1123: write(0xFF);
1124: } else {
1125: if (addToTable) {
1126: int index = map.obtainIndex(s);
1127: if (index == KeyIntMap.NOT_PRESENT) {
1128: _b = EncodingConstants.NISTRING_ADD_TO_TABLE_FLAG
1129: | _nonIdentifyingStringOnFirstBitCES;
1130: encodeNonEmptyCharacterStringOnFifthBit(s);
1131: } else {
1132: encodeNonZeroIntegerOnSecondBitFirstBitOne(index);
1133: }
1134: } else {
1135: _b = _nonIdentifyingStringOnFirstBitCES;
1136: encodeNonEmptyCharacterStringOnFifthBit(s);
1137: }
1138: }
1139: }
1140:
1141: /**
1142: * Encode a non identifying string on the first bit of an octet.
1143: * Implementation of clause C.14 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1144: *
1145: * @param s the string to encode
1146: * @param map the vocabulary table of character arrays to indexes.
1147: * @param addToTable true if the string should be added to the vocabulary
1148: * table (if not already present in the table).
1149: */
1150: protected final void encodeNonIdentifyingStringOnFirstBit(String s,
1151: CharArrayIntMap map, boolean addToTable) throws IOException {
1152: if (s == null || s.length() == 0) {
1153: // C.26 an index (first bit '1') with seven '1' bits for an empty string
1154: write(0xFF);
1155: } else {
1156: if (addToTable) {
1157: final char[] ch = s.toCharArray();
1158: final int length = s.length();
1159: int index = map.obtainIndex(ch, 0, length, false);
1160: if (index == KeyIntMap.NOT_PRESENT) {
1161: _b = EncodingConstants.NISTRING_ADD_TO_TABLE_FLAG
1162: | _nonIdentifyingStringOnFirstBitCES;
1163: encodeNonEmptyCharacterStringOnFifthBit(ch, 0,
1164: length);
1165: } else {
1166: encodeNonZeroIntegerOnSecondBitFirstBitOne(index);
1167: }
1168: } else {
1169: _b = _nonIdentifyingStringOnFirstBitCES;
1170: encodeNonEmptyCharacterStringOnFifthBit(s);
1171: }
1172: }
1173: }
1174:
1175: /**
1176: * Encode a non identifying string on the first bit of an octet.
1177: * Implementation of clause C.14 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1178: *
1179: * @param ch the array of characters.
1180: * @param offset the offset into the array of characters.
1181: * @param length the length of characters.
1182: * @param map the vocabulary table of character arrays to indexes.
1183: * @param addToTable true if the string should be added to the vocabulary
1184: * table (if not already present in the table).
1185: * @param clone true if the array of characters should be cloned if added
1186: * to the vocabulary table.
1187: */
1188: protected final void encodeNonIdentifyingStringOnFirstBit(
1189: char[] ch, int offset, int length, CharArrayIntMap map,
1190: boolean addToTable, boolean clone) throws IOException {
1191: if (length == 0) {
1192: // C.26 an index (first bit '1') with seven '1' bits for an empty string
1193: write(0xFF);
1194: } else {
1195: if (addToTable) {
1196: int index = map.obtainIndex(ch, offset, length, clone);
1197: if (index == KeyIntMap.NOT_PRESENT) {
1198: _b = EncodingConstants.NISTRING_ADD_TO_TABLE_FLAG
1199: | _nonIdentifyingStringOnFirstBitCES;
1200: encodeNonEmptyCharacterStringOnFifthBit(ch, offset,
1201: length);
1202: } else {
1203: encodeNonZeroIntegerOnSecondBitFirstBitOne(index);
1204: }
1205: } else {
1206: _b = _nonIdentifyingStringOnFirstBitCES;
1207: encodeNonEmptyCharacterStringOnFifthBit(ch, offset,
1208: length);
1209: }
1210: }
1211: }
1212:
1213: protected final void encodeNonIdentifyingStringOnFirstBit(int id,
1214: int[] table, String s, boolean addToTable)
1215: throws IOException, FastInfosetException {
1216: if (s == null || s.length() == 0) {
1217: // C.26 an index (first bit '1') with seven '1' bits for an empty string
1218: write(0xFF);
1219: return;
1220: } else if (addToTable) {
1221: final int index = _v.attributeValue.obtainIndex(s);
1222: if (index != KeyIntMap.NOT_PRESENT) {
1223: encodeNonZeroIntegerOnSecondBitFirstBitOne(index);
1224: return;
1225: }
1226: }
1227:
1228: _b = (addToTable) ? EncodingConstants.NISTRING_RESTRICTED_ALPHABET_FLAG
1229: | EncodingConstants.NISTRING_ADD_TO_TABLE_FLAG
1230: : EncodingConstants.NISTRING_RESTRICTED_ALPHABET_FLAG;
1231: // Encode identification and top four bits of alphabet id
1232: write(_b | ((id & 0xF0) >> 4));
1233: // Encode bottom 4 bits of alphabet id
1234: _b = (id & 0x0F) << 4;
1235:
1236: final int length = s.length();
1237: final int octetPairLength = length / 2;
1238: final int octetSingleLength = length % 2;
1239: encodeNonZeroOctetStringLengthOnFifthBit(octetPairLength
1240: + octetSingleLength);
1241: encodeNonEmptyFourBitCharacterString(table, s.toCharArray(), 0,
1242: octetPairLength, octetSingleLength);
1243: }
1244:
1245: /**
1246: * Encode a non identifying string on the first bit of an octet as binary
1247: * data using an encoding algorithm.
1248: * Implementation of clause C.14 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1249: *
1250: * @param URI the encoding algorithm URI. If the URI == null then the
1251: * encoding algorithm identifier takes precendence.
1252: * @param id the encoding algorithm identifier.
1253: * @param data the data to be encoded using an encoding algorithm.
1254: * @throws EncodingAlgorithmException if the encoding algorithm URI is not
1255: * present in the vocabulary, or the encoding algorithm identifier
1256: * is not with the required range.
1257: */
1258: protected final void encodeNonIdentifyingStringOnFirstBit(
1259: String URI, int id, Object data)
1260: throws FastInfosetException, IOException {
1261: if (URI != null) {
1262: id = _v.encodingAlgorithm.get(URI);
1263: if (id == KeyIntMap.NOT_PRESENT) {
1264: throw new EncodingAlgorithmException(
1265: CommonResourceBundle.getInstance().getString(
1266: "message.EncodingAlgorithmURI",
1267: new Object[] { URI }));
1268: }
1269: id += EncodingConstants.ENCODING_ALGORITHM_APPLICATION_START;
1270:
1271: EncodingAlgorithm ea = (EncodingAlgorithm) _registeredEncodingAlgorithms
1272: .get(URI);
1273: if (ea != null) {
1274: encodeAIIObjectAlgorithmData(id, data, ea);
1275: } else {
1276: if (data instanceof byte[]) {
1277: byte[] d = (byte[]) data;
1278: encodeAIIOctetAlgorithmData(id, d, 0, d.length);
1279: } else {
1280: throw new EncodingAlgorithmException(
1281: CommonResourceBundle
1282: .getInstance()
1283: .getString(
1284: "message.nullEncodingAlgorithmURI"));
1285: }
1286: }
1287: } else if (id <= EncodingConstants.ENCODING_ALGORITHM_BUILTIN_END) {
1288: int length = 0;
1289: switch (id) {
1290: case EncodingAlgorithmIndexes.HEXADECIMAL:
1291: case EncodingAlgorithmIndexes.BASE64:
1292: length = ((byte[]) data).length;
1293: break;
1294: case EncodingAlgorithmIndexes.SHORT:
1295: length = ((short[]) data).length;
1296: break;
1297: case EncodingAlgorithmIndexes.INT:
1298: length = ((int[]) data).length;
1299: break;
1300: case EncodingAlgorithmIndexes.LONG:
1301: case EncodingAlgorithmIndexes.UUID:
1302: length = ((long[]) data).length;
1303: break;
1304: case EncodingAlgorithmIndexes.BOOLEAN:
1305: length = ((boolean[]) data).length;
1306: break;
1307: case EncodingAlgorithmIndexes.FLOAT:
1308: length = ((float[]) data).length;
1309: break;
1310: case EncodingAlgorithmIndexes.DOUBLE:
1311: length = ((double[]) data).length;
1312: break;
1313: case EncodingAlgorithmIndexes.CDATA:
1314: throw new UnsupportedOperationException(
1315: CommonResourceBundle.getInstance().getString(
1316: "message.CDATA"));
1317: default:
1318: throw new EncodingAlgorithmException(
1319: CommonResourceBundle.getInstance().getString(
1320: "message.UnsupportedBuiltInAlgorithm",
1321: new Object[] { Integer.valueOf(id) }));
1322: }
1323: encodeAIIBuiltInAlgorithmData(id, data, 0, length);
1324: } else if (id >= EncodingConstants.ENCODING_ALGORITHM_APPLICATION_START) {
1325: if (data instanceof byte[]) {
1326: byte[] d = (byte[]) data;
1327: encodeAIIOctetAlgorithmData(id, d, 0, d.length);
1328: } else {
1329: throw new EncodingAlgorithmException(
1330: CommonResourceBundle.getInstance().getString(
1331: "message.nullEncodingAlgorithmURI"));
1332: }
1333: } else {
1334: throw new EncodingAlgorithmException(CommonResourceBundle
1335: .getInstance().getString(
1336: "message.identifiers10to31Reserved"));
1337: }
1338: }
1339:
1340: /**
1341: * Encode the [normalized value] of an Attribute Information Item using
1342: * using an encoding algorithm.
1343: * Implementation of clause C.14 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1344: *
1345: * @param id the encoding algorithm identifier.
1346: * @param d the data, as an array of bytes, to be encoded.
1347: * @param offset the offset into the array of bytes.
1348: * @param length the length of bytes.
1349: */
1350: protected final void encodeAIIOctetAlgorithmData(int id, byte[] d,
1351: int offset, int length) throws IOException {
1352: // Encode identification and top four bits of encoding algorithm id
1353: write(EncodingConstants.NISTRING_ENCODING_ALGORITHM_FLAG
1354: | ((id & 0xF0) >> 4));
1355:
1356: // Encode bottom 4 bits of enoding algorithm id
1357: _b = (id & 0x0F) << 4;
1358:
1359: // Encode the length
1360: encodeNonZeroOctetStringLengthOnFifthBit(length);
1361:
1362: write(d, offset, length);
1363: }
1364:
1365: /**
1366: * Encode the [normalized value] of an Attribute Information Item using
1367: * using an encoding algorithm.
1368: * Implementation of clause C.14 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1369: *
1370: * @param id the encoding algorithm identifier.
1371: * @param data the data to be encoded using an encoding algorithm.
1372: * @param ea the encoding algorithm to use to encode the data into an
1373: * array of bytes.
1374: */
1375: protected final void encodeAIIObjectAlgorithmData(int id,
1376: Object data, EncodingAlgorithm ea)
1377: throws FastInfosetException, IOException {
1378: // Encode identification and top four bits of encoding algorithm id
1379: write(EncodingConstants.NISTRING_ENCODING_ALGORITHM_FLAG
1380: | ((id & 0xF0) >> 4));
1381:
1382: // Encode bottom 4 bits of enoding algorithm id
1383: _b = (id & 0x0F) << 4;
1384:
1385: _encodingBufferOutputStream.reset();
1386: ea.encodeToOutputStream(data, _encodingBufferOutputStream);
1387: encodeNonZeroOctetStringLengthOnFifthBit(_encodingBufferIndex);
1388: write(_encodingBuffer, _encodingBufferIndex);
1389: }
1390:
1391: /**
1392: * Encode the [normalized value] of an Attribute Information Item using
1393: * using a built in encoding algorithm.
1394: * Implementation of clause C.14 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1395: *
1396: * @param id the built in encoding algorithm identifier.
1397: * @param data the data to be encoded using an encoding algorithm. The data
1398: * represents an array of items specified by the encoding algorithm
1399: * identifier
1400: * @param offset the offset into the array of bytes.
1401: * @param length the length of bytes.
1402: */
1403: protected final void encodeAIIBuiltInAlgorithmData(int id,
1404: Object data, int offset, int length) throws IOException {
1405: // Encode identification and top four bits of encoding algorithm id
1406: write(EncodingConstants.NISTRING_ENCODING_ALGORITHM_FLAG
1407: | ((id & 0xF0) >> 4));
1408:
1409: // Encode bottom 4 bits of enoding algorithm id
1410: _b = (id & 0x0F) << 4;
1411:
1412: final int octetLength = BuiltInEncodingAlgorithmFactory.table[id]
1413: .getOctetLengthFromPrimitiveLength(length);
1414:
1415: encodeNonZeroOctetStringLengthOnFifthBit(octetLength);
1416:
1417: ensureSize(octetLength);
1418: BuiltInEncodingAlgorithmFactory.table[id].encodeToBytes(data,
1419: offset, length, _octetBuffer, _octetBufferIndex);
1420: _octetBufferIndex += octetLength;
1421: }
1422:
1423: /**
1424: * Encode a non identifying string on the third bit of an octet.
1425: * Implementation of clause C.15 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1426: *
1427: * @param ch the array of characters.
1428: * @param offset the offset into the array of characters.
1429: * @param length the length of characters.
1430: * @param map the vocabulary table of character arrays to indexes.
1431: * @param addToTable true if the array of characters should be added to the vocabulary
1432: * table (if not already present in the table).
1433: * @param clone true if the array of characters should be cloned if added
1434: * to the vocabulary table.
1435: */
1436: protected final void encodeNonIdentifyingStringOnThirdBit(
1437: char[] ch, int offset, int length, CharArrayIntMap map,
1438: boolean addToTable, boolean clone) throws IOException {
1439: // length cannot be zero since sequence of CIIs has to be > 0
1440:
1441: if (addToTable) {
1442: int index = map.obtainIndex(ch, offset, length, clone);
1443: if (index == KeyIntMap.NOT_PRESENT) {
1444: _b = EncodingConstants.CHARACTER_CHUNK_ADD_TO_TABLE_FLAG
1445: | _nonIdentifyingStringOnThirdBitCES;
1446: encodeNonEmptyCharacterStringOnSeventhBit(ch, offset,
1447: length);
1448: } else {
1449: _b = EncodingConstants.CHARACTER_CHUNK | 0x20;
1450: encodeNonZeroIntegerOnFourthBit(index);
1451: }
1452: } else {
1453: _b = _nonIdentifyingStringOnThirdBitCES;
1454: encodeNonEmptyCharacterStringOnSeventhBit(ch, offset,
1455: length);
1456: }
1457: }
1458:
1459: /**
1460: * Encode a non identifying string on the third bit of an octet as binary
1461: * data using an encoding algorithm.
1462: * Implementation of clause C.15 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1463: *
1464: * @param URI the encoding algorithm URI. If the URI == null then the
1465: * encoding algorithm identifier takes precendence.
1466: * @param id the encoding algorithm identifier.
1467: * @param data the data to be encoded using an encoding algorithm.
1468: * @throws EncodingAlgorithmException if the encoding algorithm URI is not
1469: * present in the vocabulary, or the encoding algorithm identifier
1470: * is not with the required range.
1471: */
1472: protected final void encodeNonIdentifyingStringOnThirdBit(
1473: String URI, int id, Object data)
1474: throws FastInfosetException, IOException {
1475: if (URI != null) {
1476: id = _v.encodingAlgorithm.get(URI);
1477: if (id == KeyIntMap.NOT_PRESENT) {
1478: throw new EncodingAlgorithmException(
1479: CommonResourceBundle.getInstance().getString(
1480: "message.EncodingAlgorithmURI",
1481: new Object[] { URI }));
1482: }
1483: id += EncodingConstants.ENCODING_ALGORITHM_APPLICATION_START;
1484:
1485: EncodingAlgorithm ea = (EncodingAlgorithm) _registeredEncodingAlgorithms
1486: .get(URI);
1487: if (ea != null) {
1488: encodeCIIObjectAlgorithmData(id, data, ea);
1489: } else {
1490: if (data instanceof byte[]) {
1491: byte[] d = (byte[]) data;
1492: encodeCIIOctetAlgorithmData(id, d, 0, d.length);
1493: } else {
1494: throw new EncodingAlgorithmException(
1495: CommonResourceBundle
1496: .getInstance()
1497: .getString(
1498: "message.nullEncodingAlgorithmURI"));
1499: }
1500: }
1501: } else if (id <= EncodingConstants.ENCODING_ALGORITHM_BUILTIN_END) {
1502: int length = 0;
1503: switch (id) {
1504: case EncodingAlgorithmIndexes.HEXADECIMAL:
1505: case EncodingAlgorithmIndexes.BASE64:
1506: length = ((byte[]) data).length;
1507: break;
1508: case EncodingAlgorithmIndexes.SHORT:
1509: length = ((short[]) data).length;
1510: break;
1511: case EncodingAlgorithmIndexes.INT:
1512: length = ((int[]) data).length;
1513: break;
1514: case EncodingAlgorithmIndexes.LONG:
1515: case EncodingAlgorithmIndexes.UUID:
1516: length = ((long[]) data).length;
1517: break;
1518: case EncodingAlgorithmIndexes.BOOLEAN:
1519: length = ((boolean[]) data).length;
1520: break;
1521: case EncodingAlgorithmIndexes.FLOAT:
1522: length = ((float[]) data).length;
1523: break;
1524: case EncodingAlgorithmIndexes.DOUBLE:
1525: length = ((double[]) data).length;
1526: break;
1527: case EncodingAlgorithmIndexes.CDATA:
1528: throw new UnsupportedOperationException(
1529: CommonResourceBundle.getInstance().getString(
1530: "message.CDATA"));
1531: default:
1532: throw new EncodingAlgorithmException(
1533: CommonResourceBundle.getInstance().getString(
1534: "message.UnsupportedBuiltInAlgorithm",
1535: new Object[] { Integer.valueOf(id) }));
1536: }
1537: encodeCIIBuiltInAlgorithmData(id, data, 0, length);
1538: } else if (id >= EncodingConstants.ENCODING_ALGORITHM_APPLICATION_START) {
1539: if (data instanceof byte[]) {
1540: byte[] d = (byte[]) data;
1541: encodeCIIOctetAlgorithmData(id, d, 0, d.length);
1542: } else {
1543: throw new EncodingAlgorithmException(
1544: CommonResourceBundle.getInstance().getString(
1545: "message.nullEncodingAlgorithmURI"));
1546: }
1547: } else {
1548: throw new EncodingAlgorithmException(CommonResourceBundle
1549: .getInstance().getString(
1550: "message.identifiers10to31Reserved"));
1551: }
1552: }
1553:
1554: /**
1555: * Encode a non identifying string on the third bit of an octet as binary
1556: * data using an encoding algorithm.
1557: * Implementation of clause C.15 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1558: *
1559: * @param URI the encoding algorithm URI. If the URI == null then the
1560: * encoding algorithm identifier takes precendence.
1561: * @param id the encoding algorithm identifier.
1562: * @param d the data, as an array of bytes, to be encoded.
1563: * @param offset the offset into the array of bytes.
1564: * @param length the length of bytes.
1565: * @throws EncodingAlgorithmException if the encoding algorithm URI is not
1566: * present in the vocabulary.
1567: */
1568: protected final void encodeNonIdentifyingStringOnThirdBit(
1569: String URI, int id, byte[] d, int offset, int length)
1570: throws FastInfosetException, IOException {
1571: if (URI != null) {
1572: id = _v.encodingAlgorithm.get(URI);
1573: if (id == KeyIntMap.NOT_PRESENT) {
1574: throw new EncodingAlgorithmException(
1575: CommonResourceBundle.getInstance().getString(
1576: "message.EncodingAlgorithmURI",
1577: new Object[] { URI }));
1578: }
1579: id += EncodingConstants.ENCODING_ALGORITHM_APPLICATION_START;
1580: }
1581:
1582: encodeCIIOctetAlgorithmData(id, d, offset, length);
1583: }
1584:
1585: /**
1586: * Encode a chunk of Character Information Items using
1587: * using an encoding algorithm.
1588: * Implementation of clause C.15 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1589: *
1590: * @param id the encoding algorithm identifier.
1591: * @param d the data, as an array of bytes, to be encoded.
1592: * @param offset the offset into the array of bytes.
1593: * @param length the length of bytes.
1594: */
1595: protected final void encodeCIIOctetAlgorithmData(int id, byte[] d,
1596: int offset, int length) throws IOException {
1597: // Encode identification and top two bits of encoding algorithm id
1598: write(EncodingConstants.CHARACTER_CHUNK
1599: | EncodingConstants.CHARACTER_CHUNK_ENCODING_ALGORITHM_FLAG
1600: | ((id & 0xC0) >> 6));
1601:
1602: // Encode bottom 6 bits of enoding algorithm id
1603: _b = (id & 0x3F) << 2;
1604:
1605: // Encode the length
1606: encodeNonZeroOctetStringLengthOnSenventhBit(length);
1607:
1608: write(d, offset, length);
1609: }
1610:
1611: /**
1612: * Encode a chunk of Character Information Items using
1613: * using an encoding algorithm.
1614: * Implementation of clause C.15 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1615: *
1616: * @param id the encoding algorithm identifier.
1617: * @param data the data to be encoded using an encoding algorithm.
1618: * @param ea the encoding algorithm to use to encode the data into an
1619: * array of bytes.
1620: */
1621: protected final void encodeCIIObjectAlgorithmData(int id,
1622: Object data, EncodingAlgorithm ea)
1623: throws FastInfosetException, IOException {
1624: // Encode identification and top two bits of encoding algorithm id
1625: write(EncodingConstants.CHARACTER_CHUNK
1626: | EncodingConstants.CHARACTER_CHUNK_ENCODING_ALGORITHM_FLAG
1627: | ((id & 0xC0) >> 6));
1628:
1629: // Encode bottom 6 bits of enoding algorithm id
1630: _b = (id & 0x3F) << 2;
1631:
1632: _encodingBufferOutputStream.reset();
1633: ea.encodeToOutputStream(data, _encodingBufferOutputStream);
1634: encodeNonZeroOctetStringLengthOnSenventhBit(_encodingBufferIndex);
1635: write(_encodingBuffer, _encodingBufferIndex);
1636: }
1637:
1638: /**
1639: * Encode a chunk of Character Information Items using
1640: * using an encoding algorithm.
1641: * Implementation of clause C.15 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1642: *
1643: * @param id the built in encoding algorithm identifier.
1644: * @param data the data to be encoded using an encoding algorithm. The data
1645: * represents an array of items specified by the encoding algorithm
1646: * identifier
1647: * @param offset the offset into the array of bytes.
1648: * @param length the length of bytes.
1649: */
1650: protected final void encodeCIIBuiltInAlgorithmData(int id,
1651: Object data, int offset, int length)
1652: throws FastInfosetException, IOException {
1653: // Encode identification and top two bits of encoding algorithm id
1654: write(EncodingConstants.CHARACTER_CHUNK
1655: | EncodingConstants.CHARACTER_CHUNK_ENCODING_ALGORITHM_FLAG
1656: | ((id & 0xC0) >> 6));
1657:
1658: // Encode bottom 6 bits of enoding algorithm id
1659: _b = (id & 0x3F) << 2;
1660:
1661: final int octetLength = BuiltInEncodingAlgorithmFactory.table[id]
1662: .getOctetLengthFromPrimitiveLength(length);
1663:
1664: encodeNonZeroOctetStringLengthOnSenventhBit(octetLength);
1665:
1666: ensureSize(octetLength);
1667: BuiltInEncodingAlgorithmFactory.table[id].encodeToBytes(data,
1668: offset, length, _octetBuffer, _octetBufferIndex);
1669: _octetBufferIndex += octetLength;
1670: }
1671:
1672: /**
1673: * Encode a chunk of Character Information Items using
1674: * using the CDATA built in encoding algorithm.
1675: * Implementation of clause C.15 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1676: *
1677: * @param ch the array of characters.
1678: * @param offset the offset into the array of characters.
1679: * @param length the length of characters.
1680: */
1681: protected final void encodeCIIBuiltInAlgorithmDataAsCDATA(
1682: char[] ch, int offset, int length)
1683: throws FastInfosetException, IOException {
1684: // Encode identification and top two bits of encoding algorithm id
1685: write(EncodingConstants.CHARACTER_CHUNK
1686: | EncodingConstants.CHARACTER_CHUNK_ENCODING_ALGORITHM_FLAG);
1687:
1688: // Encode bottom 6 bits of enoding algorithm id
1689: _b = EncodingAlgorithmIndexes.CDATA << 2;
1690:
1691: length = encodeUTF8String(ch, offset, length);
1692: encodeNonZeroOctetStringLengthOnSenventhBit(length);
1693: write(_encodingBuffer, length);
1694: }
1695:
1696: /**
1697: * Encode a non empty identifying string on the first bit of an octet.
1698: * Implementation of clause C.13 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1699: *
1700: * @param s the identifying string.
1701: * @param map the vocabulary table to use to determin the index of the
1702: * identifying string
1703: */
1704: protected final void encodeIdentifyingNonEmptyStringOnFirstBit(
1705: String s, StringIntMap map) throws IOException {
1706: int index = map.obtainIndex(s);
1707: if (index == KeyIntMap.NOT_PRESENT) {
1708: // _b = 0;
1709: encodeNonEmptyOctetStringOnSecondBit(s);
1710: } else {
1711: // _b = 0x80;
1712: encodeNonZeroIntegerOnSecondBitFirstBitOne(index);
1713: }
1714: }
1715:
1716: /**
1717: * Encode a non empty string on the second bit of an octet using the UTF-8
1718: * encoding.
1719: * Implementation of clause C.22 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1720: *
1721: * @param s the string.
1722: */
1723: protected final void encodeNonEmptyOctetStringOnSecondBit(String s)
1724: throws IOException {
1725: final int length = encodeUTF8String(s);
1726: encodeNonZeroOctetStringLengthOnSecondBit(length);
1727: write(_encodingBuffer, length);
1728: }
1729:
1730: /**
1731: * Encode the length of a UTF-8 encoded string on the second bit of an octet.
1732: * Implementation of clause C.22 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1733: *
1734: * @param length the length to encode.
1735: */
1736: protected final void encodeNonZeroOctetStringLengthOnSecondBit(
1737: int length) throws IOException {
1738: if (length < EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_SMALL_LIMIT) {
1739: // [1, 64]
1740: write(length - 1);
1741: } else if (length < EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_MEDIUM_LIMIT) {
1742: // [65, 320]
1743: write(EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_MEDIUM_FLAG); // 010 00000
1744: write(length
1745: - EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_SMALL_LIMIT);
1746: } else {
1747: // [321, 4294967296]
1748: write(EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_LARGE_FLAG); // 0110 0000
1749: length -= EncodingConstants.OCTET_STRING_LENGTH_2ND_BIT_MEDIUM_LIMIT;
1750: write(length >>> 24);
1751: write((length >> 16) & 0xFF);
1752: write((length >> 8) & 0xFF);
1753: write(length & 0xFF);
1754: }
1755: }
1756:
1757: /**
1758: * Encode a non empty string on the fifth bit of an octet using the UTF-8
1759: * or UTF-16 encoding.
1760: * Implementation of clause C.23 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1761: *
1762: * @param s the string.
1763: */
1764: protected final void encodeNonEmptyCharacterStringOnFifthBit(
1765: String s) throws IOException {
1766: final int length = (_encodingStringsAsUtf8) ? encodeUTF8String(s)
1767: : encodeUtf16String(s);
1768: encodeNonZeroOctetStringLengthOnFifthBit(length);
1769: write(_encodingBuffer, length);
1770: }
1771:
1772: /**
1773: * Encode a non empty string on the fifth bit of an octet using the UTF-8
1774: * or UTF-16 encoding.
1775: * Implementation of clause C.23 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1776: *
1777: * @param ch the array of characters.
1778: * @param offset the offset into the array of characters.
1779: * @param length the length of characters.
1780: */
1781: protected final void encodeNonEmptyCharacterStringOnFifthBit(
1782: char[] ch, int offset, int length) throws IOException {
1783: length = (_encodingStringsAsUtf8) ? encodeUTF8String(ch,
1784: offset, length) : encodeUtf16String(ch, offset, length);
1785: encodeNonZeroOctetStringLengthOnFifthBit(length);
1786: write(_encodingBuffer, length);
1787: }
1788:
1789: /**
1790: * Encode the length of a UTF-8 or UTF-16 encoded string on the fifth bit
1791: * of an octet.
1792: * Implementation of clause C.23 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1793: *
1794: * @param length the length to encode.
1795: */
1796: protected final void encodeNonZeroOctetStringLengthOnFifthBit(
1797: int length) throws IOException {
1798: if (length < EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_SMALL_LIMIT) {
1799: // [1, 8]
1800: write(_b | (length - 1));
1801: } else if (length < EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_MEDIUM_LIMIT) {
1802: // [9, 264]
1803: write(_b
1804: | EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_MEDIUM_FLAG); // 000010 00
1805: write(length
1806: - EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_SMALL_LIMIT);
1807: } else {
1808: // [265, 4294967296]
1809: write(_b
1810: | EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_LARGE_FLAG); // 000011 00
1811: length -= EncodingConstants.OCTET_STRING_LENGTH_5TH_BIT_MEDIUM_LIMIT;
1812: write(length >>> 24);
1813: write((length >> 16) & 0xFF);
1814: write((length >> 8) & 0xFF);
1815: write(length & 0xFF);
1816: }
1817: }
1818:
1819: /**
1820: * Encode a non empty string on the seventh bit of an octet using the UTF-8
1821: * or UTF-16 encoding.
1822: * Implementation of clause C.24 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1823: *
1824: * @param ch the array of characters.
1825: * @param offset the offset into the array of characters.
1826: * @param length the length of characters.
1827: */
1828: protected final void encodeNonEmptyCharacterStringOnSeventhBit(
1829: char[] ch, int offset, int length) throws IOException {
1830: length = (_encodingStringsAsUtf8) ? encodeUTF8String(ch,
1831: offset, length) : encodeUtf16String(ch, offset, length);
1832: encodeNonZeroOctetStringLengthOnSenventhBit(length);
1833: write(_encodingBuffer, length);
1834: }
1835:
1836: /**
1837: * Encode a non empty string on the seventh bit of an octet using a restricted
1838: * alphabet that results in the encoding of a character in 4 bits
1839: * (or two characters per octet).
1840: * Implementation of clause C.24 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1841: *
1842: * @param table the table mapping characters to 4 bit values.
1843: * @param ch the array of characters.
1844: * @param offset the offset into the array of characters.
1845: * @param length the length of characters.
1846: */
1847: protected final void encodeNonEmptyFourBitCharacterStringOnSeventhBit(
1848: int[] table, char[] ch, int offset, int length)
1849: throws FastInfosetException, IOException {
1850: final int octetPairLength = length / 2;
1851: final int octetSingleLength = length % 2;
1852:
1853: // Encode the length
1854: encodeNonZeroOctetStringLengthOnSenventhBit(octetPairLength
1855: + octetSingleLength);
1856: encodeNonEmptyFourBitCharacterString(table, ch, offset,
1857: octetPairLength, octetSingleLength);
1858: }
1859:
1860: protected final void encodeNonEmptyFourBitCharacterString(
1861: int[] table, char[] ch, int offset, int octetPairLength,
1862: int octetSingleLength) throws FastInfosetException,
1863: IOException {
1864: ensureSize(octetPairLength + octetSingleLength);
1865: // Encode all pairs
1866: int v = 0;
1867: for (int i = 0; i < octetPairLength; i++) {
1868: v = (table[ch[offset++]] << 4) | table[ch[offset++]];
1869: if (v < 0) {
1870: throw new FastInfosetException(CommonResourceBundle
1871: .getInstance().getString(
1872: "message.characterOutofAlphabetRange"));
1873: }
1874: _octetBuffer[_octetBufferIndex++] = (byte) v;
1875: }
1876: // Encode single character at end with termination bits
1877: if (octetSingleLength == 1) {
1878: v = (table[ch[offset]] << 4) | 0x0F;
1879: if (v < 0) {
1880: throw new FastInfosetException(CommonResourceBundle
1881: .getInstance().getString(
1882: "message.characterOutofAlphabetRange"));
1883: }
1884: _octetBuffer[_octetBufferIndex++] = (byte) v;
1885: }
1886: }
1887:
1888: /**
1889: * Encode a non empty string on the seventh bit of an octet using a restricted
1890: * alphabet table.
1891: * Implementation of clause C.24 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1892: *
1893: * @param alphabet the alphabet defining the mapping between characters and
1894: * integer values.
1895: * @param ch the array of characters.
1896: * @param offset the offset into the array of characters.
1897: * @param length the length of characters.
1898: */
1899: protected final void encodeNonEmptyNBitCharacterStringOnSeventhBit(
1900: String alphabet, char[] ch, int offset, int length)
1901: throws FastInfosetException, IOException {
1902: int bitsPerCharacter = 1;
1903: while ((1 << bitsPerCharacter) <= alphabet.length()) {
1904: bitsPerCharacter++;
1905: }
1906:
1907: final int bits = length * bitsPerCharacter;
1908: final int octets = bits / 8;
1909: final int bitsOfLastOctet = bits % 8;
1910: final int totalOctets = octets
1911: + ((bitsOfLastOctet > 0) ? 1 : 0);
1912:
1913: // Encode the length
1914: encodeNonZeroOctetStringLengthOnSenventhBit(totalOctets);
1915:
1916: resetBits();
1917: ensureSize(totalOctets);
1918: int v = 0;
1919: for (int i = 0; i < length; i++) {
1920: final char c = ch[offset + i];
1921: // This is grotesquely slow, need to use hash table of character to int value
1922: for (v = 0; v < alphabet.length(); v++) {
1923: if (c == alphabet.charAt(v)) {
1924: break;
1925: }
1926: }
1927: if (v == alphabet.length()) {
1928: throw new FastInfosetException(CommonResourceBundle
1929: .getInstance().getString(
1930: "message.characterOutofAlphabetRange"));
1931: }
1932: writeBits(bitsPerCharacter, v);
1933: }
1934:
1935: if (bitsOfLastOctet > 0) {
1936: _b |= (1 << (8 - bitsOfLastOctet)) - 1;
1937: write(_b);
1938: }
1939: }
1940:
1941: private int _bitsLeftInOctet;
1942:
1943: private final void resetBits() {
1944: _bitsLeftInOctet = 8;
1945: _b = 0;
1946: }
1947:
1948: private final void writeBits(int bits, int v) throws IOException {
1949: while (bits > 0) {
1950: final int bit = (v & (1 << --bits)) > 0 ? 1 : 0;
1951: _b |= bit << (--_bitsLeftInOctet);
1952: if (_bitsLeftInOctet == 0) {
1953: write(_b);
1954: _bitsLeftInOctet = 8;
1955: _b = 0;
1956: }
1957: }
1958: }
1959:
1960: /**
1961: * Encode the length of a encoded string on the seventh bit
1962: * of an octet.
1963: * Implementation of clause C.24 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1964: *
1965: * @param length the length to encode.
1966: */
1967: protected final void encodeNonZeroOctetStringLengthOnSenventhBit(
1968: int length) throws IOException {
1969: if (length < EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_SMALL_LIMIT) {
1970: // [1, 2]
1971: write(_b | (length - 1));
1972: } else if (length < EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_MEDIUM_LIMIT) {
1973: // [3, 258]
1974: write(_b
1975: | EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_MEDIUM_FLAG); // 00000010
1976: write(length
1977: - EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_SMALL_LIMIT);
1978: } else {
1979: // [259, 4294967296]
1980: write(_b
1981: | EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_LARGE_FLAG); // 00000011
1982: length -= EncodingConstants.OCTET_STRING_LENGTH_7TH_BIT_MEDIUM_LIMIT;
1983: write(length >>> 24);
1984: write((length >> 16) & 0xFF);
1985: write((length >> 8) & 0xFF);
1986: write(length & 0xFF);
1987: }
1988: }
1989:
1990: /**
1991: * Encode a non zero integer on the second bit of an octet, setting
1992: * the first bit to 1.
1993: * Implementation of clause C.24 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
1994: *
1995: * <p>
1996: * The first bit of the first octet is set, as specified in clause C.13 of
1997: * ITU-T Rec. X.891 | ISO/IEC 24824-1
1998: *
1999: * @param i The integer to encode, which is a member of the interval
2000: * [0, 1048575]. In the specification the interval is [1, 1048576]
2001: *
2002: */
2003: protected final void encodeNonZeroIntegerOnSecondBitFirstBitOne(
2004: int i) throws IOException {
2005: if (i < EncodingConstants.INTEGER_2ND_BIT_SMALL_LIMIT) {
2006: // [1, 64] ( [0, 63] ) 6 bits
2007: write(0x80 | i);
2008: } else if (i < EncodingConstants.INTEGER_2ND_BIT_MEDIUM_LIMIT) {
2009: // [65, 8256] ( [64, 8255] ) 13 bits
2010: i -= EncodingConstants.INTEGER_2ND_BIT_SMALL_LIMIT;
2011: _b = (0x80 | EncodingConstants.INTEGER_2ND_BIT_MEDIUM_FLAG)
2012: | (i >> 8); // 010 00000
2013: // _b = 0xC0 | (i >> 8); // 010 00000
2014: write(_b);
2015: write(i & 0xFF);
2016: } else if (i < EncodingConstants.INTEGER_2ND_BIT_LARGE_LIMIT) {
2017: // [8257, 1048576] ( [8256, 1048575] ) 20 bits
2018: i -= EncodingConstants.INTEGER_2ND_BIT_MEDIUM_LIMIT;
2019: _b = (0x80 | EncodingConstants.INTEGER_2ND_BIT_LARGE_FLAG)
2020: | (i >> 16); // 0110 0000
2021: // _b = 0xE0 | (i >> 16); // 0110 0000
2022: write(_b);
2023: write((i >> 8) & 0xFF);
2024: write(i & 0xFF);
2025: } else {
2026: throw new IOException(
2027: CommonResourceBundle
2028: .getInstance()
2029: .getString(
2030: "message.integerMaxSize",
2031: new Object[] { Integer
2032: .valueOf(EncodingConstants.INTEGER_2ND_BIT_LARGE_LIMIT) }));
2033: }
2034: }
2035:
2036: /**
2037: * Encode a non zero integer on the second bit of an octet, setting
2038: * the first bit to 0.
2039: * Implementation of clause C.25 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
2040: *
2041: * <p>
2042: * The first bit of the first octet is set, as specified in clause C.13 of
2043: * ITU-T Rec. X.891 | ISO/IEC 24824-1
2044: *
2045: * @param i The integer to encode, which is a member of the interval
2046: * [0, 1048575]. In the specification the interval is [1, 1048576]
2047: *
2048: */
2049: protected final void encodeNonZeroIntegerOnSecondBitFirstBitZero(
2050: int i) throws IOException {
2051: if (i < EncodingConstants.INTEGER_2ND_BIT_SMALL_LIMIT) {
2052: // [1, 64] ( [0, 63] ) 6 bits
2053: write(i);
2054: } else if (i < EncodingConstants.INTEGER_2ND_BIT_MEDIUM_LIMIT) {
2055: // [65, 8256] ( [64, 8255] ) 13 bits
2056: i -= EncodingConstants.INTEGER_2ND_BIT_SMALL_LIMIT;
2057: _b = EncodingConstants.INTEGER_2ND_BIT_MEDIUM_FLAG
2058: | (i >> 8); // 010 00000
2059: write(_b);
2060: write(i & 0xFF);
2061: } else {
2062: // [8257, 1048576] ( [8256, 1048575] ) 20 bits
2063: i -= EncodingConstants.INTEGER_2ND_BIT_MEDIUM_LIMIT;
2064: _b = EncodingConstants.INTEGER_2ND_BIT_LARGE_FLAG
2065: | (i >> 16); // 0110 0000
2066: write(_b);
2067: write((i >> 8) & 0xFF);
2068: write(i & 0xFF);
2069: }
2070: }
2071:
2072: /**
2073: * Encode a non zero integer on the third bit of an octet.
2074: * Implementation of clause C.27 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
2075: *
2076: * @param i The integer to encode, which is a member of the interval
2077: * [0, 1048575]. In the specification the interval is [1, 1048576]
2078: *
2079: */
2080: protected final void encodeNonZeroIntegerOnThirdBit(int i)
2081: throws IOException {
2082: if (i < EncodingConstants.INTEGER_3RD_BIT_SMALL_LIMIT) {
2083: // [1, 32] ( [0, 31] ) 5 bits
2084: write(_b | i);
2085: } else if (i < EncodingConstants.INTEGER_3RD_BIT_MEDIUM_LIMIT) {
2086: // [33, 2080] ( [32, 2079] ) 11 bits
2087: i -= EncodingConstants.INTEGER_3RD_BIT_SMALL_LIMIT;
2088: _b |= EncodingConstants.INTEGER_3RD_BIT_MEDIUM_FLAG
2089: | (i >> 8); // 00100 000
2090: write(_b);
2091: write(i & 0xFF);
2092: } else if (i < EncodingConstants.INTEGER_3RD_BIT_LARGE_LIMIT) {
2093: // [2081, 526368] ( [2080, 526367] ) 19 bits
2094: i -= EncodingConstants.INTEGER_3RD_BIT_MEDIUM_LIMIT;
2095: _b |= EncodingConstants.INTEGER_3RD_BIT_LARGE_FLAG
2096: | (i >> 16); // 00101 000
2097: write(_b);
2098: write((i >> 8) & 0xFF);
2099: write(i & 0xFF);
2100: } else {
2101: // [526369, 1048576] ( [526368, 1048575] ) 20 bits
2102: i -= EncodingConstants.INTEGER_3RD_BIT_LARGE_LIMIT;
2103: _b |= EncodingConstants.INTEGER_3RD_BIT_LARGE_LARGE_FLAG; // 00110 000
2104: write(_b);
2105: write(i >> 16);
2106: write((i >> 8) & 0xFF);
2107: write(i & 0xFF);
2108: }
2109: }
2110:
2111: /**
2112: * Encode a non zero integer on the fourth bit of an octet.
2113: * Implementation of clause C.28 of ITU-T Rec. X.891 | ISO/IEC 24824-1.
2114: *
2115: * @param i The integer to encode, which is a member of the interval
2116: * [0, 1048575]. In the specification the interval is [1, 1048576]
2117: *
2118: */
2119: protected final void encodeNonZeroIntegerOnFourthBit(int i)
2120: throws IOException {
2121: if (i < EncodingConstants.INTEGER_4TH_BIT_SMALL_LIMIT) {
2122: // [1, 16] ( [0, 15] ) 4 bits
2123: write(_b | i);
2124: } else if (i < EncodingConstants.INTEGER_4TH_BIT_MEDIUM_LIMIT) {
2125: // [17, 1040] ( [16, 1039] ) 10 bits
2126: i -= EncodingConstants.INTEGER_4TH_BIT_SMALL_LIMIT;
2127: _b |= EncodingConstants.INTEGER_4TH_BIT_MEDIUM_FLAG
2128: | (i >> 8); // 000 100 00
2129: write(_b);
2130: write(i & 0xFF);
2131: } else if (i < EncodingConstants.INTEGER_4TH_BIT_LARGE_LIMIT) {
2132: // [1041, 263184] ( [1040, 263183] ) 18 bits
2133: i -= EncodingConstants.INTEGER_4TH_BIT_MEDIUM_LIMIT;
2134: _b |= EncodingConstants.INTEGER_4TH_BIT_LARGE_FLAG
2135: | (i >> 16); // 000 101 00
2136: write(_b);
2137: write((i >> 8) & 0xFF);
2138: write(i & 0xFF);
2139: } else {
2140: // [263185, 1048576] ( [263184, 1048575] ) 20 bits
2141: i -= EncodingConstants.INTEGER_4TH_BIT_LARGE_LIMIT;
2142: _b |= EncodingConstants.INTEGER_4TH_BIT_LARGE_LARGE_FLAG; // 000 110 00
2143: write(_b);
2144: write(i >> 16);
2145: write((i >> 8) & 0xFF);
2146: write(i & 0xFF);
2147: }
2148: }
2149:
2150: /**
2151: * Encode a non empty string using the UTF-8 encoding.
2152: *
2153: * @param b the current octet that is being written.
2154: * @param s the string to be UTF-8 encoded.
2155: * @param constants the array of constants to use when encoding to determin
2156: * how the length of the UTF-8 encoded string is encoded.
2157: */
2158: protected final void encodeNonEmptyUTF8StringAsOctetString(int b,
2159: String s, int[] constants) throws IOException {
2160: final char[] ch = s.toCharArray();
2161: encodeNonEmptyUTF8StringAsOctetString(b, ch, 0, ch.length,
2162: constants);
2163: }
2164:
2165: /**
2166: * Encode a non empty string using the UTF-8 encoding.
2167: *
2168: * @param b the current octet that is being written.
2169: * @param ch the array of characters.
2170: * @param offset the offset into the array of characters.
2171: * @param length the length of characters.
2172: * how the length of the UTF-8 encoded string is encoded.
2173: * @param constants the array of constants to use when encoding to determin
2174: * how the length of the UTF-8 encoded string is encoded.
2175: */
2176: protected final void encodeNonEmptyUTF8StringAsOctetString(int b,
2177: char ch[], int offset, int length, int[] constants)
2178: throws IOException {
2179: length = encodeUTF8String(ch, offset, length);
2180: encodeNonZeroOctetStringLength(b, length, constants);
2181: write(_encodingBuffer, length);
2182: }
2183:
2184: /**
2185: * Encode the length of non empty UTF-8 encoded string.
2186: *
2187: * @param b the current octet that is being written.
2188: * @param length the length of the UTF-8 encoded string.
2189: * how the length of the UTF-8 encoded string is encoded.
2190: * @param constants the array of constants to use when encoding to determin
2191: * how the length of the UTF-8 encoded string is encoded.
2192: */
2193: protected final void encodeNonZeroOctetStringLength(int b,
2194: int length, int[] constants) throws IOException {
2195: if (length < constants[EncodingConstants.OCTET_STRING_LENGTH_SMALL_LIMIT]) {
2196: write(b | (length - 1));
2197: } else if (length < constants[EncodingConstants.OCTET_STRING_LENGTH_MEDIUM_LIMIT]) {
2198: write(b
2199: | constants[EncodingConstants.OCTET_STRING_LENGTH_MEDIUM_FLAG]);
2200: write(length
2201: - constants[EncodingConstants.OCTET_STRING_LENGTH_SMALL_LIMIT]);
2202: } else {
2203: write(b
2204: | constants[EncodingConstants.OCTET_STRING_LENGTH_LARGE_FLAG]);
2205: length -= constants[EncodingConstants.OCTET_STRING_LENGTH_MEDIUM_LIMIT];
2206: write(length >>> 24);
2207: write((length >> 16) & 0xFF);
2208: write((length >> 8) & 0xFF);
2209: write(length & 0xFF);
2210: }
2211: }
2212:
2213: /**
2214: * Encode a non zero integer.
2215: *
2216: * @param b the current octet that is being written.
2217: * @param i the non zero integer.
2218: * @param constants the array of constants to use when encoding to determin
2219: * how the non zero integer is encoded.
2220: */
2221: protected final void encodeNonZeroInteger(int b, int i,
2222: int[] constants) throws IOException {
2223: if (i < constants[EncodingConstants.INTEGER_SMALL_LIMIT]) {
2224: write(b | i);
2225: } else if (i < constants[EncodingConstants.INTEGER_MEDIUM_LIMIT]) {
2226: i -= constants[EncodingConstants.INTEGER_SMALL_LIMIT];
2227: write(b | constants[EncodingConstants.INTEGER_MEDIUM_FLAG]
2228: | (i >> 8));
2229: write(i & 0xFF);
2230: } else if (i < constants[EncodingConstants.INTEGER_LARGE_LIMIT]) {
2231: i -= constants[EncodingConstants.INTEGER_MEDIUM_LIMIT];
2232: write(b | constants[EncodingConstants.INTEGER_LARGE_FLAG]
2233: | (i >> 16));
2234: write((i >> 8) & 0xFF);
2235: write(i & 0xFF);
2236: } else if (i < EncodingConstants.INTEGER_MAXIMUM_SIZE) {
2237: i -= constants[EncodingConstants.INTEGER_LARGE_LIMIT];
2238: write(b
2239: | constants[EncodingConstants.INTEGER_LARGE_LARGE_FLAG]);
2240: write(i >> 16);
2241: write((i >> 8) & 0xFF);
2242: write(i & 0xFF);
2243: } else {
2244: throw new IOException(
2245: CommonResourceBundle
2246: .getInstance()
2247: .getString(
2248: "message.integerMaxSize",
2249: new Object[] { Integer
2250: .valueOf(EncodingConstants.INTEGER_MAXIMUM_SIZE) }));
2251: }
2252: }
2253:
2254: /**
2255: * Mark the current position in the buffered stream.
2256: */
2257: protected final void mark() {
2258: _markIndex = _octetBufferIndex;
2259: }
2260:
2261: /**
2262: * Reset the marked position in the buffered stream.
2263: */
2264: protected final void resetMark() {
2265: _markIndex = -1;
2266: }
2267:
2268: /**
2269: * @return true if the mark has been set, otherwise false if the mark
2270: * has not been set.
2271: */
2272: protected final boolean hasMark() {
2273: return _markIndex != -1;
2274: }
2275:
2276: /**
2277: * Write a byte to the buffered stream.
2278: */
2279: protected final void write(int i) throws IOException {
2280: if (_octetBufferIndex < _octetBuffer.length) {
2281: _octetBuffer[_octetBufferIndex++] = (byte) i;
2282: } else {
2283: if (_markIndex == -1) {
2284: _s.write(_octetBuffer);
2285: _octetBufferIndex = 1;
2286: _octetBuffer[0] = (byte) i;
2287: } else {
2288: resize(_octetBuffer.length * 3 / 2);
2289: _octetBuffer[_octetBufferIndex++] = (byte) i;
2290: }
2291: }
2292: }
2293:
2294: /**
2295: * Write an array of bytes to the buffered stream.
2296: *
2297: * @param b the array of bytes.
2298: * @param length the length of bytes.
2299: */
2300: protected final void write(byte[] b, int length) throws IOException {
2301: write(b, 0, length);
2302: }
2303:
2304: /**
2305: * Write an array of bytes to the buffered stream.
2306: *
2307: * @param b the array of bytes.
2308: * @param offset the offset into the array of bytes.
2309: * @param length the length of bytes.
2310: */
2311: protected final void write(byte[] b, int offset, int length)
2312: throws IOException {
2313: if ((_octetBufferIndex + length) < _octetBuffer.length) {
2314: System.arraycopy(b, offset, _octetBuffer,
2315: _octetBufferIndex, length);
2316: _octetBufferIndex += length;
2317: } else {
2318: if (_markIndex == -1) {
2319: _s.write(_octetBuffer, 0, _octetBufferIndex);
2320: _s.write(b, offset, length);
2321: _octetBufferIndex = 0;
2322: } else {
2323: resize((_octetBuffer.length + length) * 3 / 2 + 1);
2324: System.arraycopy(b, offset, _octetBuffer,
2325: _octetBufferIndex, length);
2326: _octetBufferIndex += length;
2327: }
2328: }
2329: }
2330:
2331: private void ensureSize(int length) {
2332: if ((_octetBufferIndex + length) > _octetBuffer.length) {
2333: resize((_octetBufferIndex + length) * 3 / 2 + 1);
2334: }
2335: }
2336:
2337: private void resize(int length) {
2338: byte[] b = new byte[length];
2339: System.arraycopy(_octetBuffer, 0, b, 0, _octetBufferIndex);
2340: _octetBuffer = b;
2341: }
2342:
2343: private void _flush() throws IOException {
2344: if (_octetBufferIndex > 0) {
2345: _s.write(_octetBuffer, 0, _octetBufferIndex);
2346: _octetBufferIndex = 0;
2347: }
2348: }
2349:
2350: private EncodingBufferOutputStream _encodingBufferOutputStream = new EncodingBufferOutputStream();
2351:
2352: private byte[] _encodingBuffer = new byte[512];
2353:
2354: private int _encodingBufferIndex;
2355:
2356: private class EncodingBufferOutputStream extends OutputStream {
2357:
2358: public void write(int b) throws IOException {
2359: if (_encodingBufferIndex < _encodingBuffer.length) {
2360: _encodingBuffer[_encodingBufferIndex++] = (byte) b;
2361: } else {
2362: byte newbuf[] = new byte[Math.max(
2363: _encodingBuffer.length << 1,
2364: _encodingBufferIndex)];
2365: System.arraycopy(_encodingBuffer, 0, newbuf, 0,
2366: _encodingBufferIndex);
2367: _encodingBuffer = newbuf;
2368:
2369: _encodingBuffer[_encodingBufferIndex++] = (byte) b;
2370: }
2371: }
2372:
2373: public void write(byte b[], int off, int len)
2374: throws IOException {
2375: if ((off < 0) || (off > b.length) || (len < 0)
2376: || ((off + len) > b.length) || ((off + len) < 0)) {
2377: throw new IndexOutOfBoundsException();
2378: } else if (len == 0) {
2379: return;
2380: }
2381: final int newoffset = _encodingBufferIndex + len;
2382: if (newoffset > _encodingBuffer.length) {
2383: byte newbuf[] = new byte[Math.max(
2384: _encodingBuffer.length << 1, newoffset)];
2385: System.arraycopy(_encodingBuffer, 0, newbuf, 0,
2386: _encodingBufferIndex);
2387: _encodingBuffer = newbuf;
2388: }
2389: System.arraycopy(b, off, _encodingBuffer,
2390: _encodingBufferIndex, len);
2391: _encodingBufferIndex = newoffset;
2392: }
2393:
2394: public int getLength() {
2395: return _encodingBufferIndex;
2396: }
2397:
2398: public void reset() {
2399: _encodingBufferIndex = 0;
2400: }
2401: }
2402:
2403: /**
2404: * Encode a string using the UTF-8 encoding.
2405: *
2406: * @param s the string to encode.
2407: */
2408: protected final int encodeUTF8String(String s) throws IOException {
2409: final int length = s.length();
2410: if (length < _charBuffer.length) {
2411: s.getChars(0, length, _charBuffer, 0);
2412: return encodeUTF8String(_charBuffer, 0, length);
2413: } else {
2414: char[] ch = s.toCharArray();
2415: return encodeUTF8String(ch, 0, length);
2416: }
2417: }
2418:
2419: private void ensureEncodingBufferSizeForUtf8String(int length) {
2420: final int newLength = 4 * length;
2421: if (_encodingBuffer.length < newLength) {
2422: _encodingBuffer = new byte[newLength];
2423: }
2424: }
2425:
2426: /**
2427: * Encode a string using the UTF-8 encoding.
2428: *
2429: * @param ch the array of characters.
2430: * @param offset the offset into the array of characters.
2431: * @param length the length of characters.
2432: */
2433: protected final int encodeUTF8String(char[] ch, int offset,
2434: int length) throws IOException {
2435: int bpos = 0;
2436:
2437: // Make sure buffer is large enough
2438: ensureEncodingBufferSizeForUtf8String(length);
2439:
2440: final int end = offset + length;
2441: int c;
2442: while (end != offset) {
2443: c = ch[offset++];
2444: if (c < 0x80) {
2445: // 1 byte, 7 bits
2446: _encodingBuffer[bpos++] = (byte) c;
2447: } else if (c < 0x800) {
2448: // 2 bytes, 11 bits
2449: _encodingBuffer[bpos++] = (byte) (0xC0 | (c >> 6)); // first 5
2450: _encodingBuffer[bpos++] = (byte) (0x80 | (c & 0x3F)); // second 6
2451: } else if (c <= '\uFFFF') {
2452: if (!XMLChar.isHighSurrogate(c)
2453: && !XMLChar.isLowSurrogate(c)) {
2454: // 3 bytes, 16 bits
2455: _encodingBuffer[bpos++] = (byte) (0xE0 | (c >> 12)); // first 4
2456: _encodingBuffer[bpos++] = (byte) (0x80 | ((c >> 6) & 0x3F)); // second 6
2457: _encodingBuffer[bpos++] = (byte) (0x80 | (c & 0x3F)); // third 6
2458: } else {
2459: // 4 bytes, high and low surrogate
2460: encodeCharacterAsUtf8FourByte(c, ch, offset, end,
2461: bpos);
2462: bpos += 4;
2463: offset++;
2464: }
2465: }
2466: }
2467:
2468: return bpos;
2469: }
2470:
2471: private void encodeCharacterAsUtf8FourByte(int c, char[] ch,
2472: int chpos, int chend, int bpos) throws IOException {
2473: if (chpos == chend) {
2474: throw new IOException("");
2475: }
2476:
2477: final char d = ch[chpos];
2478: if (!XMLChar.isLowSurrogate(d)) {
2479: throw new IOException("");
2480: }
2481:
2482: final int uc = (((c & 0x3ff) << 10) | (d & 0x3ff)) + 0x10000;
2483: if (uc < 0 || uc >= 0x200000) {
2484: throw new IOException("");
2485: }
2486:
2487: _encodingBuffer[bpos++] = (byte) (0xF0 | ((uc >> 18)));
2488: _encodingBuffer[bpos++] = (byte) (0x80 | ((uc >> 12) & 0x3F));
2489: _encodingBuffer[bpos++] = (byte) (0x80 | ((uc >> 6) & 0x3F));
2490: _encodingBuffer[bpos++] = (byte) (0x80 | (uc & 0x3F));
2491: }
2492:
2493: /**
2494: * Encode a string using the UTF-16 encoding.
2495: *
2496: * @param s the string to encode.
2497: */
2498: protected final int encodeUtf16String(String s) throws IOException {
2499: final int length = s.length();
2500: if (length < _charBuffer.length) {
2501: s.getChars(0, length, _charBuffer, 0);
2502: return encodeUtf16String(_charBuffer, 0, length);
2503: } else {
2504: char[] ch = s.toCharArray();
2505: return encodeUtf16String(ch, 0, length);
2506: }
2507: }
2508:
2509: private void ensureEncodingBufferSizeForUtf16String(int length) {
2510: final int newLength = 2 * length;
2511: if (_encodingBuffer.length < newLength) {
2512: _encodingBuffer = new byte[newLength];
2513: }
2514: }
2515:
2516: /**
2517: * Encode a string using the UTF-16 encoding.
2518: *
2519: * @param ch the array of characters.
2520: * @param offset the offset into the array of characters.
2521: * @param length the length of characters.
2522: */
2523: protected final int encodeUtf16String(char[] ch, int offset,
2524: int length) throws IOException {
2525: int byteLength = 0;
2526:
2527: // Make sure buffer is large enough
2528: ensureEncodingBufferSizeForUtf16String(length);
2529:
2530: final int n = offset + length;
2531: for (int i = offset; i < n; i++) {
2532: final int c = (int) ch[i];
2533: _encodingBuffer[byteLength++] = (byte) (c >> 8);
2534: _encodingBuffer[byteLength++] = (byte) (c & 0xFF);
2535: }
2536:
2537: return byteLength;
2538: }
2539:
2540: /**
2541: * Obtain the prefix from a qualified name.
2542: *
2543: * @param qName the qualified name
2544: * @return the prefix, or "" if there is no prefix.
2545: */
2546: public static String getPrefixFromQualifiedName(String qName) {
2547: int i = qName.indexOf(':');
2548: String prefix = "";
2549: if (i != -1) {
2550: prefix = qName.substring(0, i);
2551: }
2552: return prefix;
2553: }
2554:
2555: /**
2556: * Check if character array contains characters that are all white space.
2557: *
2558: * @param ch the character array
2559: * @param start the starting character index into the array to check from
2560: * @param length the number of characters to check
2561: * @return true if all characters are white space, false otherwise
2562: */
2563: public static boolean isWhiteSpace(final char[] ch, int start,
2564: final int length) {
2565: if (!XMLChar.isSpace(ch[start]))
2566: return false;
2567:
2568: final int end = start + length;
2569: while (++start < end && XMLChar.isSpace(ch[start]))
2570: ;
2571:
2572: return start == end;
2573: }
2574:
2575: /**
2576: * Check if a String contains characters that are all white space.
2577: *
2578: * @param s the string
2579: * @return true if all characters are white space, false otherwise
2580: */
2581: public static boolean isWhiteSpace(String s) {
2582: if (!XMLChar.isSpace(s.charAt(0)))
2583: return false;
2584:
2585: final int end = s.length();
2586: int start = 1;
2587: while (start < end && XMLChar.isSpace(s.charAt(start++)))
2588: ;
2589: return start == end;
2590: }
2591: }
|