J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Portions Copyright 2003-2006 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | /* |
| 27 | ******************************************************************************* |
| 28 | * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved * |
| 29 | * * |
| 30 | * The original version of this source code and documentation is copyrighted * |
| 31 | * and owned by IBM, These materials are provided under terms of a License * |
| 32 | * Agreement between IBM and Sun. This technology is protected by multiple * |
| 33 | * US and International patents. This notice and attribution to IBM may not * |
| 34 | * to removed. * |
| 35 | ******************************************************************************* |
| 36 | */ |
| 37 | |
| 38 | package sun.text.normalizer; |
| 39 | |
| 40 | import java.text.CharacterIterator; |
| 41 | |
| 42 | /** |
| 43 | * Abstract class that defines an API for iteration on text objects.This is an |
| 44 | * interface for forward and backward iteration and random access into a text |
| 45 | * object. Forward iteration is done with post-increment and backward iteration |
| 46 | * is done with pre-decrement semantics, while the |
| 47 | * <code>java.text.CharacterIterator</code> interface methods provided forward |
| 48 | * iteration with "pre-increment" and backward iteration with pre-decrement |
| 49 | * semantics. This API is more efficient for forward iteration over code points. |
| 50 | * The other major difference is that this API can do both code unit and code point |
| 51 | * iteration, <code>java.text.CharacterIterator</code> can only iterate over |
| 52 | * code units and is limited to BMP (0 - 0xFFFF) |
| 53 | * @author Ram |
| 54 | * @stable ICU 2.4 |
| 55 | */ |
| 56 | public abstract class UCharacterIterator |
| 57 | implements Cloneable { |
| 58 | |
| 59 | /** |
| 60 | * Protected default constructor for the subclasses |
| 61 | * @stable ICU 2.4 |
| 62 | */ |
| 63 | protected UCharacterIterator(){ |
| 64 | } |
| 65 | |
| 66 | /** |
| 67 | * Indicator that we have reached the ends of the UTF16 text. |
| 68 | * Moved from UForwardCharacterIterator.java |
| 69 | * @stable ICU 2.4 |
| 70 | */ |
| 71 | public static final int DONE = -1; |
| 72 | |
| 73 | // static final methods ---------------------------------------------------- |
| 74 | |
| 75 | /** |
| 76 | * Returns a <code>UCharacterIterator</code> object given a |
| 77 | * source string. |
| 78 | * @param source a string |
| 79 | * @return UCharacterIterator object |
| 80 | * @exception IllegalArgumentException if the argument is null |
| 81 | * @stable ICU 2.4 |
| 82 | */ |
| 83 | public static final UCharacterIterator getInstance(String source){ |
| 84 | return new ReplaceableUCharacterIterator(source); |
| 85 | } |
| 86 | |
| 87 | //// for StringPrep |
| 88 | /** |
| 89 | * Returns a <code>UCharacterIterator</code> object given a |
| 90 | * source StringBuffer. |
| 91 | * @param source an string buffer of UTF-16 code units |
| 92 | * @return UCharacterIterator object |
| 93 | * @exception IllegalArgumentException if the argument is null |
| 94 | * @stable ICU 2.4 |
| 95 | */ |
| 96 | public static final UCharacterIterator getInstance(StringBuffer source){ |
| 97 | return new ReplaceableUCharacterIterator(source); |
| 98 | } |
| 99 | |
| 100 | /** |
| 101 | * Returns a <code>UCharacterIterator</code> object given a |
| 102 | * CharacterIterator. |
| 103 | * @param source a valid CharacterIterator object. |
| 104 | * @return UCharacterIterator object |
| 105 | * @exception IllegalArgumentException if the argument is null |
| 106 | * @stable ICU 2.4 |
| 107 | */ |
| 108 | public static final UCharacterIterator getInstance(CharacterIterator source){ |
| 109 | return new CharacterIteratorWrapper(source); |
| 110 | } |
| 111 | |
| 112 | // public methods ---------------------------------------------------------- |
| 113 | |
| 114 | /** |
| 115 | * Returns the code unit at the current index. If index is out |
| 116 | * of range, returns DONE. Index is not changed. |
| 117 | * @return current code unit |
| 118 | * @stable ICU 2.4 |
| 119 | */ |
| 120 | public abstract int current(); |
| 121 | |
| 122 | /** |
| 123 | * Returns the length of the text |
| 124 | * @return length of the text |
| 125 | * @stable ICU 2.4 |
| 126 | */ |
| 127 | public abstract int getLength(); |
| 128 | |
| 129 | |
| 130 | /** |
| 131 | * Gets the current index in text. |
| 132 | * @return current index in text. |
| 133 | * @stable ICU 2.4 |
| 134 | */ |
| 135 | public abstract int getIndex(); |
| 136 | |
| 137 | |
| 138 | /** |
| 139 | * Returns the UTF16 code unit at index, and increments to the next |
| 140 | * code unit (post-increment semantics). If index is out of |
| 141 | * range, DONE is returned, and the iterator is reset to the limit |
| 142 | * of the text. |
| 143 | * @return the next UTF16 code unit, or DONE if the index is at the limit |
| 144 | * of the text. |
| 145 | * @stable ICU 2.4 |
| 146 | */ |
| 147 | public abstract int next(); |
| 148 | |
| 149 | /** |
| 150 | * Returns the code point at index, and increments to the next code |
| 151 | * point (post-increment semantics). If index does not point to a |
| 152 | * valid surrogate pair, the behavior is the same as |
| 153 | * <code>next()</code>. Otherwise the iterator is incremented past |
| 154 | * the surrogate pair, and the code point represented by the pair |
| 155 | * is returned. |
| 156 | * @return the next codepoint in text, or DONE if the index is at |
| 157 | * the limit of the text. |
| 158 | * @stable ICU 2.4 |
| 159 | */ |
| 160 | public int nextCodePoint(){ |
| 161 | int ch1 = next(); |
| 162 | if(UTF16.isLeadSurrogate((char)ch1)){ |
| 163 | int ch2 = next(); |
| 164 | if(UTF16.isTrailSurrogate((char)ch2)){ |
| 165 | return UCharacterProperty.getRawSupplementary((char)ch1, |
| 166 | (char)ch2); |
| 167 | }else if (ch2 != DONE) { |
| 168 | // unmatched surrogate so back out |
| 169 | previous(); |
| 170 | } |
| 171 | } |
| 172 | return ch1; |
| 173 | } |
| 174 | |
| 175 | /** |
| 176 | * Decrement to the position of the previous code unit in the |
| 177 | * text, and return it (pre-decrement semantics). If the |
| 178 | * resulting index is less than 0, the index is reset to 0 and |
| 179 | * DONE is returned. |
| 180 | * @return the previous code unit in the text, or DONE if the new |
| 181 | * index is before the start of the text. |
| 182 | * @stable ICU 2.4 |
| 183 | */ |
| 184 | public abstract int previous(); |
| 185 | |
| 186 | /** |
| 187 | * Sets the index to the specified index in the text. |
| 188 | * @param index the index within the text. |
| 189 | * @exception IndexOutOfBoundsException is thrown if an invalid index is |
| 190 | * supplied |
| 191 | * @stable ICU 2.4 |
| 192 | */ |
| 193 | public abstract void setIndex(int index); |
| 194 | |
| 195 | //// for StringPrep |
| 196 | /** |
| 197 | * Fills the buffer with the underlying text storage of the iterator |
| 198 | * If the buffer capacity is not enough a exception is thrown. The capacity |
| 199 | * of the fill in buffer should at least be equal to length of text in the |
| 200 | * iterator obtained by calling <code>getLength()</code>. |
| 201 | * <b>Usage:</b> |
| 202 | * |
| 203 | * <code> |
| 204 | * <pre> |
| 205 | * UChacterIterator iter = new UCharacterIterator.getInstance(text); |
| 206 | * char[] buf = new char[iter.getLength()]; |
| 207 | * iter.getText(buf); |
| 208 | * |
| 209 | * OR |
| 210 | * char[] buf= new char[1]; |
| 211 | * int len = 0; |
| 212 | * for(;;){ |
| 213 | * try{ |
| 214 | * len = iter.getText(buf); |
| 215 | * break; |
| 216 | * }catch(IndexOutOfBoundsException e){ |
| 217 | * buf = new char[iter.getLength()]; |
| 218 | * } |
| 219 | * } |
| 220 | * </pre> |
| 221 | * </code> |
| 222 | * |
| 223 | * @param fillIn an array of chars to fill with the underlying UTF-16 code |
| 224 | * units. |
| 225 | * @param offset the position within the array to start putting the data. |
| 226 | * @return the number of code units added to fillIn, as a convenience |
| 227 | * @exception IndexOutOfBounds exception if there is not enough |
| 228 | * room after offset in the array, or if offset < 0. |
| 229 | * @stable ICU 2.4 |
| 230 | */ |
| 231 | public abstract int getText(char[] fillIn, int offset); |
| 232 | |
| 233 | //// for StringPrep |
| 234 | /** |
| 235 | * Convenience override for <code>getText(char[], int)</code> that provides |
| 236 | * an offset of 0. |
| 237 | * @param fillIn an array of chars to fill with the underlying UTF-16 code |
| 238 | * units. |
| 239 | * @return the number of code units added to fillIn, as a convenience |
| 240 | * @exception IndexOutOfBounds exception if there is not enough |
| 241 | * room in the array. |
| 242 | * @stable ICU 2.4 |
| 243 | */ |
| 244 | public final int getText(char[] fillIn) { |
| 245 | return getText(fillIn, 0); |
| 246 | } |
| 247 | |
| 248 | //// for StringPrep |
| 249 | /** |
| 250 | * Convenience method for returning the underlying text storage as as string |
| 251 | * @return the underlying text storage in the iterator as a string |
| 252 | * @stable ICU 2.4 |
| 253 | */ |
| 254 | public String getText() { |
| 255 | char[] text = new char[getLength()]; |
| 256 | getText(text); |
| 257 | return new String(text); |
| 258 | } |
| 259 | |
| 260 | /** |
| 261 | * Moves the current position by the number of code units |
| 262 | * specified, either forward or backward depending on the sign |
| 263 | * of delta (positive or negative respectively). If the resulting |
| 264 | * index would be less than zero, the index is set to zero, and if |
| 265 | * the resulting index would be greater than limit, the index is |
| 266 | * set to limit. |
| 267 | * |
| 268 | * @param delta the number of code units to move the current |
| 269 | * index. |
| 270 | * @return the new index. |
| 271 | * @exception IndexOutOfBoundsException is thrown if an invalid index is |
| 272 | * supplied |
| 273 | * @stable ICU 2.4 |
| 274 | * |
| 275 | */ |
| 276 | public int moveIndex(int delta) { |
| 277 | int x = Math.max(0, Math.min(getIndex() + delta, getLength())); |
| 278 | setIndex(x); |
| 279 | return x; |
| 280 | } |
| 281 | |
| 282 | /** |
| 283 | * Creates a copy of this iterator, independent from other iterators. |
| 284 | * If it is not possible to clone the iterator, returns null. |
| 285 | * @return copy of this iterator |
| 286 | * @stable ICU 2.4 |
| 287 | */ |
| 288 | public Object clone() throws CloneNotSupportedException{ |
| 289 | return super.clone(); |
| 290 | } |
| 291 | |
| 292 | } |