001: /*
002: * Portions Copyright 2003-2005 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025:
026: /*
027: *******************************************************************************
028: * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved *
029: * *
030: * The original version of this source code and documentation is copyrighted *
031: * and owned by IBM, These materials are provided under terms of a License *
032: * Agreement between IBM and Sun. This technology is protected by multiple *
033: * US and International patents. This notice and attribution to IBM may not *
034: * to removed. *
035: *******************************************************************************
036: */
037:
038: package sun.text.normalizer;
039:
040: import java.io.InputStream;
041: import java.io.DataInputStream;
042: import java.io.IOException;
043:
044: /**
045: * Trie implementation which stores data in char, 16 bits.
046: * @author synwee
047: * @see com.ibm.icu.impl.Trie
048: * @since release 2.1, Jan 01 2002
049: */
050:
051: // note that i need to handle the block calculations later, since chartrie
052: // in icu4c uses the same index array.
053: public class CharTrie extends Trie {
054: // public constructors ---------------------------------------------
055:
056: /**
057: * <p>Creates a new Trie with the settings for the trie data.</p>
058: * <p>Unserialize the 32-bit-aligned input stream and use the data for the
059: * trie.</p>
060: * @param inputStream file input stream to a ICU data file, containing
061: * the trie
062: * @param dataManipulate object which provides methods to parse the char
063: * data
064: * @throws IOException thrown when data reading fails
065: * @draft 2.1
066: */
067: public CharTrie(InputStream inputStream,
068: DataManipulate dataManipulate) throws IOException {
069: super (inputStream, dataManipulate);
070:
071: if (!isCharTrie()) {
072: throw new IllegalArgumentException(
073: "Data given does not belong to a char trie.");
074: }
075: m_friendAgent_ = new FriendAgent();
076: }
077:
078: /**
079: * Java friend implementation
080: */
081: public class FriendAgent {
082: /**
083: * Gives out the index array of the trie
084: * @return index array of trie
085: */
086: public char[] getPrivateIndex() {
087: return m_index_;
088: }
089:
090: /**
091: * Gives out the data array of the trie
092: * @return data array of trie
093: */
094: public char[] getPrivateData() {
095: return m_data_;
096: }
097:
098: /**
099: * Gives out the data offset in the trie
100: * @return data offset in the trie
101: */
102: public int getPrivateInitialValue() {
103: return m_initialValue_;
104: }
105: }
106:
107: // public methods --------------------------------------------------
108:
109: /**
110: * Java friend implementation
111: * To store the index and data array into the argument.
112: * @param friend java friend UCharacterProperty object to store the array
113: */
114: public void putIndexData(UCharacterProperty friend) {
115: friend.setIndexData(m_friendAgent_);
116: }
117:
118: /**
119: * Gets the value associated with the codepoint.
120: * If no value is associated with the codepoint, a default value will be
121: * returned.
122: * @param ch codepoint
123: * @return offset to data
124: * @draft 2.1
125: */
126: public final char getCodePointValue(int ch) {
127: int offset = getCodePointOffset(ch);
128:
129: // return -1 if there is an error, in this case we return the default
130: // value: m_initialValue_
131: return (offset >= 0) ? m_data_[offset] : m_initialValue_;
132: }
133:
134: /**
135: * Gets the value to the data which this lead surrogate character points
136: * to.
137: * Returned data may contain folding offset information for the next
138: * trailing surrogate character.
139: * This method does not guarantee correct results for trail surrogates.
140: * @param ch lead surrogate character
141: * @return data value
142: * @draft 2.1
143: */
144: public final char getLeadValue(char ch) {
145: return m_data_[getLeadOffset(ch)];
146: }
147:
148: /**
149: * Get the value associated with a pair of surrogates.
150: * @param lead a lead surrogate
151: * @param trail a trail surrogate
152: * @draft 2.1
153: */
154: public final char getSurrogateValue(char lead, char trail) {
155: int offset = getSurrogateOffset(lead, trail);
156: if (offset > 0) {
157: return m_data_[offset];
158: }
159: return m_initialValue_;
160: }
161:
162: /**
163: * <p>Get a value from a folding offset (from the value of a lead surrogate)
164: * and a trail surrogate.</p>
165: * <p>If the
166: * @param leadvalue value associated with the lead surrogate which contains
167: * the folding offset
168: * @param trail surrogate
169: * @return trie data value associated with the trail character
170: * @draft 2.1
171: */
172: public final char getTrailValue(int leadvalue, char trail) {
173: if (m_dataManipulate_ == null) {
174: throw new NullPointerException(
175: "The field DataManipulate in this Trie is null");
176: }
177: int offset = m_dataManipulate_.getFoldingOffset(leadvalue);
178: if (offset > 0) {
179: return m_data_[getRawOffset(offset,
180: (char) (trail & SURROGATE_MASK_))];
181: }
182: return m_initialValue_;
183: }
184:
185: // protected methods -----------------------------------------------
186:
187: /**
188: * <p>Parses the input stream and stores its trie content into a index and
189: * data array</p>
190: * @param inputStream data input stream containing trie data
191: * @exception IOException thrown when data reading fails
192: */
193: protected final void unserialize(InputStream inputStream)
194: throws IOException {
195: DataInputStream input = new DataInputStream(inputStream);
196: int indexDataLength = m_dataOffset_ + m_dataLength_;
197: m_index_ = new char[indexDataLength];
198: for (int i = 0; i < indexDataLength; i++) {
199: m_index_[i] = input.readChar();
200: }
201: m_data_ = m_index_;
202: m_initialValue_ = m_data_[m_dataOffset_];
203: }
204:
205: /**
206: * Gets the offset to the data which the surrogate pair points to.
207: * @param lead lead surrogate
208: * @param trail trailing surrogate
209: * @return offset to data
210: * @draft 2.1
211: */
212: protected final int getSurrogateOffset(char lead, char trail) {
213: if (m_dataManipulate_ == null) {
214: throw new NullPointerException(
215: "The field DataManipulate in this Trie is null");
216: }
217:
218: // get fold position for the next trail surrogate
219: int offset = m_dataManipulate_
220: .getFoldingOffset(getLeadValue(lead));
221:
222: // get the real data from the folded lead/trail units
223: if (offset > 0) {
224: return getRawOffset(offset,
225: (char) (trail & SURROGATE_MASK_));
226: }
227:
228: // return -1 if there is an error, in this case we return the default
229: // value: m_initialValue_
230: return -1;
231: }
232:
233: /**
234: * Gets the value at the argument index.
235: * For use internally in TrieIterator.
236: * @param index value at index will be retrieved
237: * @return 32 bit value
238: * @see com.ibm.icu.impl.TrieIterator
239: * @draft 2.1
240: */
241: protected final int getValue(int index) {
242: return m_data_[index];
243: }
244:
245: /**
246: * Gets the default initial value
247: * @return 32 bit value
248: * @draft 2.1
249: */
250: protected final int getInitialValue() {
251: return m_initialValue_;
252: }
253:
254: // private data members --------------------------------------------
255:
256: /**
257: * Default value
258: */
259: private char m_initialValue_;
260: /**
261: * Array of char data
262: */
263: private char m_data_[];
264: /**
265: * Agent for friends
266: */
267: private FriendAgent m_friendAgent_;
268: }
|