J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Portions Copyright 2005 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | /* |
| 27 | ******************************************************************************* |
| 28 | * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved * |
| 29 | * * |
| 30 | * The original version of this source code and documentation is copyrighted * |
| 31 | * and owned by IBM, These materials are provided under terms of a License * |
| 32 | * Agreement between IBM and Sun. This technology is protected by multiple * |
| 33 | * US and International patents. This notice and attribution to IBM may not * |
| 34 | * to removed. * |
| 35 | ******************************************************************************* |
| 36 | */ |
| 37 | |
| 38 | package sun.text.normalizer; |
| 39 | |
| 40 | import java.util.Iterator; |
| 41 | |
| 42 | /** |
| 43 | * UnicodeSetIterator iterates over the contents of a UnicodeSet. It |
| 44 | * iterates over either code points or code point ranges. After all |
| 45 | * code points or ranges have been returned, it returns the |
| 46 | * multicharacter strings of the UnicodSet, if any. |
| 47 | * |
| 48 | * <p>To iterate over code points, use a loop like this: |
| 49 | * <pre> |
| 50 | * UnicodeSetIterator it(set); |
| 51 | * while (set.next()) { |
| 52 | * if (set.codepoint != UnicodeSetIterator::IS_STRING) { |
| 53 | * processCodepoint(set.codepoint); |
| 54 | * } else { |
| 55 | * processString(set.string); |
| 56 | * } |
| 57 | * } |
| 58 | * </pre> |
| 59 | * |
| 60 | * <p>To iterate over code point ranges, use a loop like this: |
| 61 | * <pre> |
| 62 | * UnicodeSetIterator it(set); |
| 63 | * while (set.nextRange()) { |
| 64 | * if (set.codepoint != UnicodeSetIterator::IS_STRING) { |
| 65 | * processCodepointRange(set.codepoint, set.codepointEnd); |
| 66 | * } else { |
| 67 | * processString(set.string); |
| 68 | * } |
| 69 | * } |
| 70 | * </pre> |
| 71 | * @author M. Davis |
| 72 | * @stable ICU 2.0 |
| 73 | */ |
| 74 | public class UnicodeSetIterator { |
| 75 | |
| 76 | /** |
| 77 | * Value of <tt>codepoint</tt> if the iterator points to a string. |
| 78 | * If <tt>codepoint == IS_STRING</tt>, then examine |
| 79 | * <tt>string</tt> for the current iteration result. |
| 80 | * @stable ICU 2.0 |
| 81 | */ |
| 82 | public static int IS_STRING = -1; |
| 83 | |
| 84 | /** |
| 85 | * Current code point, or the special value <tt>IS_STRING</tt>, if |
| 86 | * the iterator points to a string. |
| 87 | * @stable ICU 2.0 |
| 88 | */ |
| 89 | public int codepoint; |
| 90 | |
| 91 | /** |
| 92 | * When iterating over ranges using <tt>nextRange()</tt>, |
| 93 | * <tt>codepointEnd</tt> contains the inclusive end of the |
| 94 | * iteration range, if <tt>codepoint != IS_STRING</tt>. If |
| 95 | * iterating over code points using <tt>next()</tt>, or if |
| 96 | * <tt>codepoint == IS_STRING</tt>, then the value of |
| 97 | * <tt>codepointEnd</tt> is undefined. |
| 98 | * @stable ICU 2.0 |
| 99 | */ |
| 100 | public int codepointEnd; |
| 101 | |
| 102 | /** |
| 103 | * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points |
| 104 | * to the current string. If <tt>codepoint != IS_STRING</tt>, the |
| 105 | * value of <tt>string</tt> is undefined. |
| 106 | * @stable ICU 2.0 |
| 107 | */ |
| 108 | public String string; |
| 109 | |
| 110 | /** |
| 111 | * Create an iterator over the given set. |
| 112 | * @param set set to iterate over |
| 113 | * @stable ICU 2.0 |
| 114 | */ |
| 115 | public UnicodeSetIterator(UnicodeSet set) { |
| 116 | reset(set); |
| 117 | } |
| 118 | |
| 119 | /** |
| 120 | * Returns the next element in the set, either a code point range |
| 121 | * or a string. If there are no more elements in the set, return |
| 122 | * false. If <tt>codepoint == IS_STRING</tt>, the value is a |
| 123 | * string in the <tt>string</tt> field. Otherwise the value is a |
| 124 | * range of one or more code points from <tt>codepoint</tt> to |
| 125 | * <tt>codepointeEnd</tt> inclusive. |
| 126 | * |
| 127 | * <p>The order of iteration is all code points ranges in sorted |
| 128 | * order, followed by all strings sorted order. Ranges are |
| 129 | * disjoint and non-contiguous. <tt>string</tt> is undefined |
| 130 | * unless <tt>codepoint == IS_STRING</tt>. Do not mix calls to |
| 131 | * <tt>next()</tt> and <tt>nextRange()</tt> without calling |
| 132 | * <tt>reset()</tt> between them. The results of doing so are |
| 133 | * undefined. |
| 134 | * |
| 135 | * @return true if there was another element in the set and this |
| 136 | * object contains the element. |
| 137 | * @stable ICU 2.0 |
| 138 | */ |
| 139 | public boolean nextRange() { |
| 140 | if (nextElement <= endElement) { |
| 141 | codepointEnd = endElement; |
| 142 | codepoint = nextElement; |
| 143 | nextElement = endElement+1; |
| 144 | return true; |
| 145 | } |
| 146 | if (range < endRange) { |
| 147 | loadRange(++range); |
| 148 | codepointEnd = endElement; |
| 149 | codepoint = nextElement; |
| 150 | nextElement = endElement+1; |
| 151 | return true; |
| 152 | } |
| 153 | |
| 154 | // stringIterator == null iff there are no string elements remaining |
| 155 | |
| 156 | if (stringIterator == null) return false; |
| 157 | codepoint = IS_STRING; // signal that value is actually a string |
| 158 | string = (String)stringIterator.next(); |
| 159 | if (!stringIterator.hasNext()) stringIterator = null; |
| 160 | return true; |
| 161 | } |
| 162 | |
| 163 | /** |
| 164 | * Sets this iterator to visit the elements of the given set and |
| 165 | * resets it to the start of that set. The iterator is valid only |
| 166 | * so long as <tt>set</tt> is valid. |
| 167 | * @param set the set to iterate over. |
| 168 | * @stable ICU 2.0 |
| 169 | */ |
| 170 | public void reset(UnicodeSet set) { |
| 171 | this.set = set; |
| 172 | reset(); |
| 173 | } |
| 174 | |
| 175 | /** |
| 176 | * Resets this iterator to the start of the set. |
| 177 | * @stable ICU 2.0 |
| 178 | */ |
| 179 | public void reset() { |
| 180 | endRange = set.getRangeCount() - 1; |
| 181 | range = 0; |
| 182 | endElement = -1; |
| 183 | nextElement = 0; |
| 184 | if (endRange >= 0) { |
| 185 | loadRange(range); |
| 186 | } |
| 187 | stringIterator = null; |
| 188 | if (set.strings != null) { |
| 189 | stringIterator = set.strings.iterator(); |
| 190 | if (!stringIterator.hasNext()) stringIterator = null; |
| 191 | } |
| 192 | } |
| 193 | |
| 194 | // ======================= PRIVATES =========================== |
| 195 | |
| 196 | private UnicodeSet set; |
| 197 | private int endRange = 0; |
| 198 | private int range = 0; |
| 199 | /** |
| 200 | * @internal |
| 201 | */ |
| 202 | protected int endElement; |
| 203 | /** |
| 204 | * @internal |
| 205 | */ |
| 206 | protected int nextElement; |
| 207 | private Iterator stringIterator = null; |
| 208 | |
| 209 | /** |
| 210 | * Invariant: stringIterator is null when there are no (more) strings remaining |
| 211 | */ |
| 212 | |
| 213 | /** |
| 214 | * @internal |
| 215 | */ |
| 216 | protected void loadRange(int range) { |
| 217 | nextElement = set.getRangeStart(range); |
| 218 | endElement = set.getRangeEnd(range); |
| 219 | } |
| 220 | } |