blob: 6059beeeefe438ab4bb60c1a53be9e2e56a5b421 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Portions Copyright 2003-2006 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26/*
27 *******************************************************************************
28 * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved *
29 * *
30 * The original version of this source code and documentation is copyrighted *
31 * and owned by IBM, These materials are provided under terms of a License *
32 * Agreement between IBM and Sun. This technology is protected by multiple *
33 * US and International patents. This notice and attribution to IBM may not *
34 * to removed. *
35 *******************************************************************************
36 */
37
38package sun.text.normalizer;
39
40import java.text.CharacterIterator;
41
42/**
43 * Abstract class that defines an API for iteration on text objects.This is an
44 * interface for forward and backward iteration and random access into a text
45 * object. Forward iteration is done with post-increment and backward iteration
46 * is done with pre-decrement semantics, while the
47 * <code>java.text.CharacterIterator</code> interface methods provided forward
48 * iteration with "pre-increment" and backward iteration with pre-decrement
49 * semantics. This API is more efficient for forward iteration over code points.
50 * The other major difference is that this API can do both code unit and code point
51 * iteration, <code>java.text.CharacterIterator</code> can only iterate over
52 * code units and is limited to BMP (0 - 0xFFFF)
53 * @author Ram
54 * @stable ICU 2.4
55 */
56public abstract class UCharacterIterator
57 implements Cloneable {
58
59 /**
60 * Protected default constructor for the subclasses
61 * @stable ICU 2.4
62 */
63 protected UCharacterIterator(){
64 }
65
66 /**
67 * Indicator that we have reached the ends of the UTF16 text.
68 * Moved from UForwardCharacterIterator.java
69 * @stable ICU 2.4
70 */
71 public static final int DONE = -1;
72
73 // static final methods ----------------------------------------------------
74
75 /**
76 * Returns a <code>UCharacterIterator</code> object given a
77 * source string.
78 * @param source a string
79 * @return UCharacterIterator object
80 * @exception IllegalArgumentException if the argument is null
81 * @stable ICU 2.4
82 */
83 public static final UCharacterIterator getInstance(String source){
84 return new ReplaceableUCharacterIterator(source);
85 }
86
87 //// for StringPrep
88 /**
89 * Returns a <code>UCharacterIterator</code> object given a
90 * source StringBuffer.
91 * @param source an string buffer of UTF-16 code units
92 * @return UCharacterIterator object
93 * @exception IllegalArgumentException if the argument is null
94 * @stable ICU 2.4
95 */
96 public static final UCharacterIterator getInstance(StringBuffer source){
97 return new ReplaceableUCharacterIterator(source);
98 }
99
100 /**
101 * Returns a <code>UCharacterIterator</code> object given a
102 * CharacterIterator.
103 * @param source a valid CharacterIterator object.
104 * @return UCharacterIterator object
105 * @exception IllegalArgumentException if the argument is null
106 * @stable ICU 2.4
107 */
108 public static final UCharacterIterator getInstance(CharacterIterator source){
109 return new CharacterIteratorWrapper(source);
110 }
111
112 // public methods ----------------------------------------------------------
113
114 /**
115 * Returns the code unit at the current index. If index is out
116 * of range, returns DONE. Index is not changed.
117 * @return current code unit
118 * @stable ICU 2.4
119 */
120 public abstract int current();
121
122 /**
123 * Returns the length of the text
124 * @return length of the text
125 * @stable ICU 2.4
126 */
127 public abstract int getLength();
128
129
130 /**
131 * Gets the current index in text.
132 * @return current index in text.
133 * @stable ICU 2.4
134 */
135 public abstract int getIndex();
136
137
138 /**
139 * Returns the UTF16 code unit at index, and increments to the next
140 * code unit (post-increment semantics). If index is out of
141 * range, DONE is returned, and the iterator is reset to the limit
142 * of the text.
143 * @return the next UTF16 code unit, or DONE if the index is at the limit
144 * of the text.
145 * @stable ICU 2.4
146 */
147 public abstract int next();
148
149 /**
150 * Returns the code point at index, and increments to the next code
151 * point (post-increment semantics). If index does not point to a
152 * valid surrogate pair, the behavior is the same as
153 * <code>next()</code>. Otherwise the iterator is incremented past
154 * the surrogate pair, and the code point represented by the pair
155 * is returned.
156 * @return the next codepoint in text, or DONE if the index is at
157 * the limit of the text.
158 * @stable ICU 2.4
159 */
160 public int nextCodePoint(){
161 int ch1 = next();
162 if(UTF16.isLeadSurrogate((char)ch1)){
163 int ch2 = next();
164 if(UTF16.isTrailSurrogate((char)ch2)){
165 return UCharacterProperty.getRawSupplementary((char)ch1,
166 (char)ch2);
167 }else if (ch2 != DONE) {
168 // unmatched surrogate so back out
169 previous();
170 }
171 }
172 return ch1;
173 }
174
175 /**
176 * Decrement to the position of the previous code unit in the
177 * text, and return it (pre-decrement semantics). If the
178 * resulting index is less than 0, the index is reset to 0 and
179 * DONE is returned.
180 * @return the previous code unit in the text, or DONE if the new
181 * index is before the start of the text.
182 * @stable ICU 2.4
183 */
184 public abstract int previous();
185
186 /**
187 * Sets the index to the specified index in the text.
188 * @param index the index within the text.
189 * @exception IndexOutOfBoundsException is thrown if an invalid index is
190 * supplied
191 * @stable ICU 2.4
192 */
193 public abstract void setIndex(int index);
194
195 //// for StringPrep
196 /**
197 * Fills the buffer with the underlying text storage of the iterator
198 * If the buffer capacity is not enough a exception is thrown. The capacity
199 * of the fill in buffer should at least be equal to length of text in the
200 * iterator obtained by calling <code>getLength()</code>.
201 * <b>Usage:</b>
202 *
203 * <code>
204 * <pre>
205 * UChacterIterator iter = new UCharacterIterator.getInstance(text);
206 * char[] buf = new char[iter.getLength()];
207 * iter.getText(buf);
208 *
209 * OR
210 * char[] buf= new char[1];
211 * int len = 0;
212 * for(;;){
213 * try{
214 * len = iter.getText(buf);
215 * break;
216 * }catch(IndexOutOfBoundsException e){
217 * buf = new char[iter.getLength()];
218 * }
219 * }
220 * </pre>
221 * </code>
222 *
223 * @param fillIn an array of chars to fill with the underlying UTF-16 code
224 * units.
225 * @param offset the position within the array to start putting the data.
226 * @return the number of code units added to fillIn, as a convenience
227 * @exception IndexOutOfBounds exception if there is not enough
228 * room after offset in the array, or if offset < 0.
229 * @stable ICU 2.4
230 */
231 public abstract int getText(char[] fillIn, int offset);
232
233 //// for StringPrep
234 /**
235 * Convenience override for <code>getText(char[], int)</code> that provides
236 * an offset of 0.
237 * @param fillIn an array of chars to fill with the underlying UTF-16 code
238 * units.
239 * @return the number of code units added to fillIn, as a convenience
240 * @exception IndexOutOfBounds exception if there is not enough
241 * room in the array.
242 * @stable ICU 2.4
243 */
244 public final int getText(char[] fillIn) {
245 return getText(fillIn, 0);
246 }
247
248 //// for StringPrep
249 /**
250 * Convenience method for returning the underlying text storage as as string
251 * @return the underlying text storage in the iterator as a string
252 * @stable ICU 2.4
253 */
254 public String getText() {
255 char[] text = new char[getLength()];
256 getText(text);
257 return new String(text);
258 }
259
260 /**
261 * Moves the current position by the number of code units
262 * specified, either forward or backward depending on the sign
263 * of delta (positive or negative respectively). If the resulting
264 * index would be less than zero, the index is set to zero, and if
265 * the resulting index would be greater than limit, the index is
266 * set to limit.
267 *
268 * @param delta the number of code units to move the current
269 * index.
270 * @return the new index.
271 * @exception IndexOutOfBoundsException is thrown if an invalid index is
272 * supplied
273 * @stable ICU 2.4
274 *
275 */
276 public int moveIndex(int delta) {
277 int x = Math.max(0, Math.min(getIndex() + delta, getLength()));
278 setIndex(x);
279 return x;
280 }
281
282 /**
283 * Creates a copy of this iterator, independent from other iterators.
284 * If it is not possible to clone the iterator, returns null.
285 * @return copy of this iterator
286 * @stable ICU 2.4
287 */
288 public Object clone() throws CloneNotSupportedException{
289 return super.clone();
290 }
291
292}