001: /*
002: * Portions Copyright 2003-2005 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025:
026: /*
027: *******************************************************************************
028: * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved *
029: * *
030: * The original version of this source code and documentation is copyrighted *
031: * and owned by IBM, These materials are provided under terms of a License *
032: * Agreement between IBM and Sun. This technology is protected by multiple *
033: * US and International patents. This notice and attribution to IBM may not *
034: * to removed. *
035: *******************************************************************************
036: */
037:
038: package sun.text.normalizer;
039:
040: import java.io.InputStream;
041: import java.io.DataInputStream;
042: import java.io.IOException;
043: import java.util.Arrays;
044:
045: /**
046: * Trie implementation which stores data in int, 32 bits.
047: * @author synwee
048: * @see com.ibm.icu.impl.Trie
049: * @since release 2.1, Jan 01 2002
050: */
051: public class IntTrie extends Trie {
052: // public constructors ---------------------------------------------
053:
054: /**
055: * <p>Creates a new Trie with the settings for the trie data.</p>
056: * <p>Unserialize the 32-bit-aligned input stream and use the data for the
057: * trie.</p>
058: * @param inputStream file input stream to a ICU data file, containing
059: * the trie
060: * @param dataManipulate object which provides methods to parse the char
061: * data
062: * @throws IOException thrown when data reading fails
063: * @draft 2.1
064: */
065: public IntTrie(InputStream inputStream,
066: DataManipulate datamanipulate) throws IOException {
067: super (inputStream, datamanipulate);
068: if (!isIntTrie()) {
069: throw new IllegalArgumentException(
070: "Data given does not belong to a int trie.");
071: }
072: }
073:
074: // public methods --------------------------------------------------
075:
076: /**
077: * Gets the value associated with the codepoint.
078: * If no value is associated with the codepoint, a default value will be
079: * returned.
080: * @param ch codepoint
081: * @return offset to data
082: * @draft 2.1
083: */
084: public final int getCodePointValue(int ch) {
085: int offset = getCodePointOffset(ch);
086: return (offset >= 0) ? m_data_[offset] : m_initialValue_;
087: }
088:
089: /**
090: * Gets the value to the data which this lead surrogate character points
091: * to.
092: * Returned data may contain folding offset information for the next
093: * trailing surrogate character.
094: * This method does not guarantee correct results for trail surrogates.
095: * @param ch lead surrogate character
096: * @return data value
097: * @draft 2.1
098: */
099: public final int getLeadValue(char ch) {
100: return m_data_[getLeadOffset(ch)];
101: }
102:
103: /**
104: * Get a value from a folding offset (from the value of a lead surrogate)
105: * and a trail surrogate.
106: * @param leadvalue the value of a lead surrogate that contains the
107: * folding offset
108: * @param trail surrogate
109: * @return trie data value associated with the trail character
110: * @draft 2.1
111: */
112: public final int getTrailValue(int leadvalue, char trail) {
113: if (m_dataManipulate_ == null) {
114: throw new NullPointerException(
115: "The field DataManipulate in this Trie is null");
116: }
117: int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
118: if (offset > 0) {
119: return m_data_[getRawOffset(offset,
120: (char) (trail & SURROGATE_MASK_))];
121: }
122: return m_initialValue_;
123: }
124:
125: // protected methods -----------------------------------------------
126:
127: /**
128: * <p>Parses the input stream and stores its trie content into a index and
129: * data array</p>
130: * @param inputStream data input stream containing trie data
131: * @exception IOException thrown when data reading fails
132: */
133: protected final void unserialize(InputStream inputStream)
134: throws IOException {
135: super .unserialize(inputStream);
136: // one used for initial value
137: m_data_ = new int[m_dataLength_];
138: DataInputStream input = new DataInputStream(inputStream);
139: for (int i = 0; i < m_dataLength_; i++) {
140: m_data_[i] = input.readInt();
141: }
142: m_initialValue_ = m_data_[0];
143: }
144:
145: /**
146: * Gets the offset to the data which the surrogate pair points to.
147: * @param lead lead surrogate
148: * @param trail trailing surrogate
149: * @return offset to data
150: * @draft 2.1
151: */
152: protected final int getSurrogateOffset(char lead, char trail) {
153: if (m_dataManipulate_ == null) {
154: throw new NullPointerException(
155: "The field DataManipulate in this Trie is null");
156: }
157: // get fold position for the next trail surrogate
158: int offset = m_dataManipulate_
159: .getFoldingOffset(getLeadValue(lead));
160:
161: // get the real data from the folded lead/trail units
162: if (offset > 0) {
163: return getRawOffset(offset,
164: (char) (trail & SURROGATE_MASK_));
165: }
166:
167: // return -1 if there is an error, in this case we return the default
168: // value: m_initialValue_
169: return -1;
170: }
171:
172: /**
173: * Gets the value at the argument index.
174: * For use internally in TrieIterator
175: * @param index value at index will be retrieved
176: * @return 32 bit value
177: * @see com.ibm.icu.impl.TrieIterator
178: * @draft 2.1
179: */
180: protected final int getValue(int index) {
181: return m_data_[index];
182: }
183:
184: /**
185: * Gets the default initial value
186: * @return 32 bit value
187: * @draft 2.1
188: */
189: protected final int getInitialValue() {
190: return m_initialValue_;
191: }
192:
193: // package private methods -----------------------------------------
194:
195: /**
196: * Internal constructor for builder use
197: * @param index the index array to be slotted into this trie
198: * @param data the data array to be slotted into this trie
199: * @param initialvalue the initial value for this trie
200: * @param options trie options to use
201: * @param datamanipulate folding implementation
202: */
203: IntTrie(char index[], int data[], int initialvalue, int options,
204: DataManipulate datamanipulate) {
205: super (index, options, datamanipulate);
206: m_data_ = data;
207: m_dataLength_ = m_data_.length;
208: m_initialValue_ = initialvalue;
209: }
210:
211: // private data members --------------------------------------------
212:
213: /**
214: * Default value
215: */
216: private int m_initialValue_;
217: /**
218: * Array of char data
219: */
220: private int m_data_[];
221: }
|