001: /*
002: * Portions Copyright 2003-2006 Sun Microsystems, Inc. All Rights Reserved.
003: * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
004: *
005: * This code is free software; you can redistribute it and/or modify it
006: * under the terms of the GNU General Public License version 2 only, as
007: * published by the Free Software Foundation. Sun designates this
008: * particular file as subject to the "Classpath" exception as provided
009: * by Sun in the LICENSE file that accompanied this code.
010: *
011: * This code is distributed in the hope that it will be useful, but WITHOUT
012: * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
013: * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
014: * version 2 for more details (a copy is included in the LICENSE file that
015: * accompanied this code).
016: *
017: * You should have received a copy of the GNU General Public License version
018: * 2 along with this work; if not, write to the Free Software Foundation,
019: * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
020: *
021: * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
022: * CA 95054 USA or visit www.sun.com if you need additional information or
023: * have any questions.
024: */
025:
026: /*
027: *******************************************************************************
028: * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved *
029: * *
030: * The original version of this source code and documentation is copyrighted *
031: * and owned by IBM, These materials are provided under terms of a License *
032: * Agreement between IBM and Sun. This technology is protected by multiple *
033: * US and International patents. This notice and attribution to IBM may not *
034: * to removed. *
035: *******************************************************************************
036: */
037:
038: package sun.text.normalizer;
039:
040: import java.text.CharacterIterator;
041:
042: /**
043: * Abstract class that defines an API for iteration on text objects.This is an
044: * interface for forward and backward iteration and random access into a text
045: * object. Forward iteration is done with post-increment and backward iteration
046: * is done with pre-decrement semantics, while the
047: * <code>java.text.CharacterIterator</code> interface methods provided forward
048: * iteration with "pre-increment" and backward iteration with pre-decrement
049: * semantics. This API is more efficient for forward iteration over code points.
050: * The other major difference is that this API can do both code unit and code point
051: * iteration, <code>java.text.CharacterIterator</code> can only iterate over
052: * code units and is limited to BMP (0 - 0xFFFF)
053: * @author Ram
054: * @stable ICU 2.4
055: */
056: public abstract class UCharacterIterator implements Cloneable {
057:
058: /**
059: * Protected default constructor for the subclasses
060: * @stable ICU 2.4
061: */
062: protected UCharacterIterator() {
063: }
064:
065: /**
066: * Indicator that we have reached the ends of the UTF16 text.
067: * Moved from UForwardCharacterIterator.java
068: * @stable ICU 2.4
069: */
070: public static final int DONE = -1;
071:
072: // static final methods ----------------------------------------------------
073:
074: /**
075: * Returns a <code>UCharacterIterator</code> object given a
076: * source string.
077: * @param source a string
078: * @return UCharacterIterator object
079: * @exception IllegalArgumentException if the argument is null
080: * @stable ICU 2.4
081: */
082: public static final UCharacterIterator getInstance(String source) {
083: return new ReplaceableUCharacterIterator(source);
084: }
085:
086: //// for StringPrep
087: /**
088: * Returns a <code>UCharacterIterator</code> object given a
089: * source StringBuffer.
090: * @param source an string buffer of UTF-16 code units
091: * @return UCharacterIterator object
092: * @exception IllegalArgumentException if the argument is null
093: * @stable ICU 2.4
094: */
095: public static final UCharacterIterator getInstance(
096: StringBuffer source) {
097: return new ReplaceableUCharacterIterator(source);
098: }
099:
100: /**
101: * Returns a <code>UCharacterIterator</code> object given a
102: * CharacterIterator.
103: * @param source a valid CharacterIterator object.
104: * @return UCharacterIterator object
105: * @exception IllegalArgumentException if the argument is null
106: * @stable ICU 2.4
107: */
108: public static final UCharacterIterator getInstance(
109: CharacterIterator source) {
110: return new CharacterIteratorWrapper(source);
111: }
112:
113: // public methods ----------------------------------------------------------
114:
115: /**
116: * Returns the code unit at the current index. If index is out
117: * of range, returns DONE. Index is not changed.
118: * @return current code unit
119: * @stable ICU 2.4
120: */
121: public abstract int current();
122:
123: /**
124: * Returns the length of the text
125: * @return length of the text
126: * @stable ICU 2.4
127: */
128: public abstract int getLength();
129:
130: /**
131: * Gets the current index in text.
132: * @return current index in text.
133: * @stable ICU 2.4
134: */
135: public abstract int getIndex();
136:
137: /**
138: * Returns the UTF16 code unit at index, and increments to the next
139: * code unit (post-increment semantics). If index is out of
140: * range, DONE is returned, and the iterator is reset to the limit
141: * of the text.
142: * @return the next UTF16 code unit, or DONE if the index is at the limit
143: * of the text.
144: * @stable ICU 2.4
145: */
146: public abstract int next();
147:
148: /**
149: * Returns the code point at index, and increments to the next code
150: * point (post-increment semantics). If index does not point to a
151: * valid surrogate pair, the behavior is the same as
152: * <code>next()</code>. Otherwise the iterator is incremented past
153: * the surrogate pair, and the code point represented by the pair
154: * is returned.
155: * @return the next codepoint in text, or DONE if the index is at
156: * the limit of the text.
157: * @stable ICU 2.4
158: */
159: public int nextCodePoint() {
160: int ch1 = next();
161: if (UTF16.isLeadSurrogate((char) ch1)) {
162: int ch2 = next();
163: if (UTF16.isTrailSurrogate((char) ch2)) {
164: return UCharacterProperty.getRawSupplementary(
165: (char) ch1, (char) ch2);
166: } else if (ch2 != DONE) {
167: // unmatched surrogate so back out
168: previous();
169: }
170: }
171: return ch1;
172: }
173:
174: /**
175: * Decrement to the position of the previous code unit in the
176: * text, and return it (pre-decrement semantics). If the
177: * resulting index is less than 0, the index is reset to 0 and
178: * DONE is returned.
179: * @return the previous code unit in the text, or DONE if the new
180: * index is before the start of the text.
181: * @stable ICU 2.4
182: */
183: public abstract int previous();
184:
185: /**
186: * Sets the index to the specified index in the text.
187: * @param index the index within the text.
188: * @exception IndexOutOfBoundsException is thrown if an invalid index is
189: * supplied
190: * @stable ICU 2.4
191: */
192: public abstract void setIndex(int index);
193:
194: //// for StringPrep
195: /**
196: * Fills the buffer with the underlying text storage of the iterator
197: * If the buffer capacity is not enough a exception is thrown. The capacity
198: * of the fill in buffer should at least be equal to length of text in the
199: * iterator obtained by calling <code>getLength()</code>.
200: * <b>Usage:</b>
201: *
202: * <code>
203: * <pre>
204: * UChacterIterator iter = new UCharacterIterator.getInstance(text);
205: * char[] buf = new char[iter.getLength()];
206: * iter.getText(buf);
207: *
208: * OR
209: * char[] buf= new char[1];
210: * int len = 0;
211: * for(;;){
212: * try{
213: * len = iter.getText(buf);
214: * break;
215: * }catch(IndexOutOfBoundsException e){
216: * buf = new char[iter.getLength()];
217: * }
218: * }
219: * </pre>
220: * </code>
221: *
222: * @param fillIn an array of chars to fill with the underlying UTF-16 code
223: * units.
224: * @param offset the position within the array to start putting the data.
225: * @return the number of code units added to fillIn, as a convenience
226: * @exception IndexOutOfBounds exception if there is not enough
227: * room after offset in the array, or if offset < 0.
228: * @stable ICU 2.4
229: */
230: public abstract int getText(char[] fillIn, int offset);
231:
232: //// for StringPrep
233: /**
234: * Convenience override for <code>getText(char[], int)</code> that provides
235: * an offset of 0.
236: * @param fillIn an array of chars to fill with the underlying UTF-16 code
237: * units.
238: * @return the number of code units added to fillIn, as a convenience
239: * @exception IndexOutOfBounds exception if there is not enough
240: * room in the array.
241: * @stable ICU 2.4
242: */
243: public final int getText(char[] fillIn) {
244: return getText(fillIn, 0);
245: }
246:
247: //// for StringPrep
248: /**
249: * Convenience method for returning the underlying text storage as as string
250: * @return the underlying text storage in the iterator as a string
251: * @stable ICU 2.4
252: */
253: public String getText() {
254: char[] text = new char[getLength()];
255: getText(text);
256: return new String(text);
257: }
258:
259: /**
260: * Moves the current position by the number of code units
261: * specified, either forward or backward depending on the sign
262: * of delta (positive or negative respectively). If the resulting
263: * index would be less than zero, the index is set to zero, and if
264: * the resulting index would be greater than limit, the index is
265: * set to limit.
266: *
267: * @param delta the number of code units to move the current
268: * index.
269: * @return the new index.
270: * @exception IndexOutOfBoundsException is thrown if an invalid index is
271: * supplied
272: * @stable ICU 2.4
273: *
274: */
275: public int moveIndex(int delta) {
276: int x = Math.max(0, Math.min(getIndex() + delta, getLength()));
277: setIndex(x);
278: return x;
279: }
280:
281: /**
282: * Creates a copy of this iterator, independent from other iterators.
283: * If it is not possible to clone the iterator, returns null.
284: * @return copy of this iterator
285: * @stable ICU 2.4
286: */
287: public Object clone() throws CloneNotSupportedException {
288: return super.clone();
289: }
290:
291: }
|