J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Portions Copyright 2005-2006 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | /* |
| 27 | ******************************************************************************* |
| 28 | * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved * |
| 29 | * * |
| 30 | * The original version of this source code and documentation is copyrighted * |
| 31 | * and owned by IBM, These materials are provided under terms of a License * |
| 32 | * Agreement between IBM and Sun. This technology is protected by multiple * |
| 33 | * US and International patents. This notice and attribution to IBM may not * |
| 34 | * to removed. * |
| 35 | ******************************************************************************* |
| 36 | */ |
| 37 | |
| 38 | package sun.text.normalizer; |
| 39 | |
| 40 | /** |
| 41 | * <p>Class enabling iteration of the values in a Trie.</p> |
| 42 | * <p>Result of each iteration contains the interval of codepoints that have |
| 43 | * the same value type and the value type itself.</p> |
| 44 | * <p>The comparison of each codepoint value is done via extract(), which the |
| 45 | * default implementation is to return the value as it is.</p> |
| 46 | * <p>Method extract() can be overwritten to perform manipulations on |
| 47 | * codepoint values in order to perform specialized comparison.</p> |
| 48 | * <p>TrieIterator is designed to be a generic iterator for the CharTrie |
| 49 | * and the IntTrie, hence to accommodate both types of data, the return |
| 50 | * result will be in terms of int (32 bit) values.</p> |
| 51 | * <p>See com.ibm.icu.text.UCharacterTypeIterator for examples of use.</p> |
| 52 | * <p>Notes for porting utrie_enum from icu4c to icu4j:<br> |
| 53 | * Internally, icu4c's utrie_enum performs all iterations in its body. In Java |
| 54 | * sense, the caller will have to pass a object with a callback function |
| 55 | * UTrieEnumRange(const void *context, UChar32 start, UChar32 limit, |
| 56 | * uint32_t value) into utrie_enum. utrie_enum will then find ranges of |
| 57 | * codepoints with the same value as determined by |
| 58 | * UTrieEnumValue(const void *context, uint32_t value). for each range, |
| 59 | * utrie_enum calls the callback function to perform a task. In this way, |
| 60 | * icu4c performs the iteration within utrie_enum. |
| 61 | * To follow the JDK model, icu4j is slightly different from icu4c. |
| 62 | * Instead of requesting the caller to implement an object for a callback. |
| 63 | * The caller will have to implement a subclass of TrieIterator, fleshing out |
| 64 | * the method extract(int) (equivalent to UTrieEnumValue). Independent of icu4j, |
| 65 | * the caller will have to code his own iteration and flesh out the task |
| 66 | * (equivalent to UTrieEnumRange) to be performed in the iteration loop. |
| 67 | * </p> |
| 68 | * <p>There are basically 3 usage scenarios for porting:</p> |
| 69 | * <p>1) UTrieEnumValue is the only implemented callback then just implement a |
| 70 | * subclass of TrieIterator and override the extract(int) method. The |
| 71 | * extract(int) method is analogus to UTrieEnumValue callback. |
| 72 | * </p> |
| 73 | * <p>2) UTrieEnumValue and UTrieEnumRange both are implemented then implement |
| 74 | * a subclass of TrieIterator, override the extract method and iterate, e.g |
| 75 | * </p> |
| 76 | * <p>utrie_enum(&normTrie, _enumPropertyStartsValue, _enumPropertyStartsRange, |
| 77 | * set);<br> |
| 78 | * In Java :<br> |
| 79 | * <pre> |
| 80 | * class TrieIteratorImpl extends TrieIterator{ |
| 81 | * public TrieIteratorImpl(Trie data){ |
| 82 | * super(data); |
| 83 | * } |
| 84 | * public int extract(int value){ |
| 85 | * // port the implementation of _enumPropertyStartsValue here |
| 86 | * } |
| 87 | * } |
| 88 | * .... |
| 89 | * TrieIterator fcdIter = new TrieIteratorImpl(fcdTrieImpl.fcdTrie); |
| 90 | * while(fcdIter.next(result)) { |
| 91 | * // port the implementation of _enumPropertyStartsRange |
| 92 | * } |
| 93 | * </pre> |
| 94 | * </p> |
| 95 | * <p>3) UTrieEnumRange is the only implemented callback then just implement |
| 96 | * the while loop, when utrie_enum is called |
| 97 | * <pre> |
| 98 | * // utrie_enum(&fcdTrie, NULL, _enumPropertyStartsRange, set); |
| 99 | * TrieIterator fcdIter = new TrieIterator(fcdTrieImpl.fcdTrie); |
| 100 | * while(fcdIter.next(result)){ |
| 101 | * set.add(result.start); |
| 102 | * } |
| 103 | * </pre> |
| 104 | * </p> |
| 105 | * @author synwee |
| 106 | * @see com.ibm.icu.impl.Trie |
| 107 | * @see com.ibm.icu.lang.UCharacterTypeIterator |
| 108 | * @since release 2.1, Jan 17 2002 |
| 109 | */ |
| 110 | public class TrieIterator implements RangeValueIterator |
| 111 | |
| 112 | { |
| 113 | // public constructor --------------------------------------------- |
| 114 | |
| 115 | /** |
| 116 | * TrieEnumeration constructor |
| 117 | * @param trie to be used |
| 118 | * @exception IllegalArgumentException throw when argument is null. |
| 119 | * @draft 2.1 |
| 120 | */ |
| 121 | public TrieIterator(Trie trie) |
| 122 | { |
| 123 | if (trie == null) { |
| 124 | throw new IllegalArgumentException( |
| 125 | "Argument trie cannot be null"); |
| 126 | } |
| 127 | m_trie_ = trie; |
| 128 | // synwee: check that extract belongs to the child class |
| 129 | m_initialValue_ = extract(m_trie_.getInitialValue()); |
| 130 | reset(); |
| 131 | } |
| 132 | |
| 133 | // public methods ------------------------------------------------- |
| 134 | |
| 135 | /** |
| 136 | * <p>Returns true if we are not at the end of the iteration, false |
| 137 | * otherwise.</p> |
| 138 | * <p>The next set of codepoints with the same value type will be |
| 139 | * calculated during this call and returned in the arguement element.</p> |
| 140 | * @param element return result |
| 141 | * @return true if we are not at the end of the iteration, false otherwise. |
| 142 | * @exception NoSuchElementException - if no more elements exist. |
| 143 | * @see com.ibm.icu.util.RangeValueIterator.Element |
| 144 | * @draft 2.1 |
| 145 | */ |
| 146 | public final boolean next(Element element) |
| 147 | { |
| 148 | if (m_nextCodepoint_ > UCharacter.MAX_VALUE) { |
| 149 | return false; |
| 150 | } |
| 151 | if (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE && |
| 152 | calculateNextBMPElement(element)) { |
| 153 | return true; |
| 154 | } |
| 155 | calculateNextSupplementaryElement(element); |
| 156 | return true; |
| 157 | } |
| 158 | |
| 159 | /** |
| 160 | * Resets the iterator to the beginning of the iteration |
| 161 | * @draft 2.1 |
| 162 | */ |
| 163 | public final void reset() |
| 164 | { |
| 165 | m_currentCodepoint_ = 0; |
| 166 | m_nextCodepoint_ = 0; |
| 167 | m_nextIndex_ = 0; |
| 168 | m_nextBlock_ = m_trie_.m_index_[0] << Trie.INDEX_STAGE_2_SHIFT_; |
| 169 | if (m_nextBlock_ == 0) { |
| 170 | m_nextValue_ = m_initialValue_; |
| 171 | } |
| 172 | else { |
| 173 | m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_)); |
| 174 | } |
| 175 | m_nextBlockIndex_ = 0; |
| 176 | m_nextTrailIndexOffset_ = TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_; |
| 177 | } |
| 178 | |
| 179 | // protected methods ---------------------------------------------- |
| 180 | |
| 181 | /** |
| 182 | * Called by next() to extracts a 32 bit value from a trie value |
| 183 | * used for comparison. |
| 184 | * This method is to be overwritten if special manipulation is to be done |
| 185 | * to retrieve a relevant comparison. |
| 186 | * The default function is to return the value as it is. |
| 187 | * @param value a value from the trie |
| 188 | * @return extracted value |
| 189 | * @draft 2.1 |
| 190 | */ |
| 191 | protected int extract(int value) |
| 192 | { |
| 193 | return value; |
| 194 | } |
| 195 | |
| 196 | // private methods ------------------------------------------------ |
| 197 | |
| 198 | /** |
| 199 | * Set the result values |
| 200 | * @param element return result object |
| 201 | * @param start codepoint of range |
| 202 | * @param limit (end + 1) codepoint of range |
| 203 | * @param value common value of range |
| 204 | */ |
| 205 | private final void setResult(Element element, int start, int limit, |
| 206 | int value) |
| 207 | { |
| 208 | element.start = start; |
| 209 | element.limit = limit; |
| 210 | element.value = value; |
| 211 | } |
| 212 | |
| 213 | /** |
| 214 | * Finding the next element. |
| 215 | * This method is called just before returning the result of |
| 216 | * next(). |
| 217 | * We always store the next element before it is requested. |
| 218 | * In the case that we have to continue calculations into the |
| 219 | * supplementary planes, a false will be returned. |
| 220 | * @param element return result object |
| 221 | * @return true if the next range is found, false if we have to proceed to |
| 222 | * the supplementary range. |
| 223 | */ |
| 224 | private final boolean calculateNextBMPElement(Element element) |
| 225 | { |
| 226 | int currentBlock = m_nextBlock_; |
| 227 | int currentValue = m_nextValue_; |
| 228 | m_currentCodepoint_ = m_nextCodepoint_; |
| 229 | m_nextCodepoint_ ++; |
| 230 | m_nextBlockIndex_ ++; |
| 231 | if (!checkBlockDetail(currentValue)) { |
| 232 | setResult(element, m_currentCodepoint_, m_nextCodepoint_, |
| 233 | currentValue); |
| 234 | return true; |
| 235 | } |
| 236 | // synwee check that next block index == 0 here |
| 237 | // enumerate BMP - the main loop enumerates data blocks |
| 238 | while (m_nextCodepoint_ < UCharacter.SUPPLEMENTARY_MIN_VALUE) { |
| 239 | m_nextIndex_ ++; |
| 240 | // because of the way the character is split to form the index |
| 241 | // the lead surrogate and trail surrogate can not be in the |
| 242 | // mid of a block |
| 243 | if (m_nextCodepoint_ == LEAD_SURROGATE_MIN_VALUE_) { |
| 244 | // skip lead surrogate code units, |
| 245 | // go to lead surrogate codepoints |
| 246 | m_nextIndex_ = BMP_INDEX_LENGTH_; |
| 247 | } |
| 248 | else if (m_nextCodepoint_ == TRAIL_SURROGATE_MIN_VALUE_) { |
| 249 | // go back to regular BMP code points |
| 250 | m_nextIndex_ = m_nextCodepoint_ >> Trie.INDEX_STAGE_1_SHIFT_; |
| 251 | } |
| 252 | |
| 253 | m_nextBlockIndex_ = 0; |
| 254 | if (!checkBlock(currentBlock, currentValue)) { |
| 255 | setResult(element, m_currentCodepoint_, m_nextCodepoint_, |
| 256 | currentValue); |
| 257 | return true; |
| 258 | } |
| 259 | } |
| 260 | m_nextCodepoint_ --; // step one back since this value has not been |
| 261 | m_nextBlockIndex_ --; // retrieved yet. |
| 262 | return false; |
| 263 | } |
| 264 | |
| 265 | /** |
| 266 | * Finds the next supplementary element. |
| 267 | * For each entry in the trie, the value to be delivered is passed through |
| 268 | * extract(). |
| 269 | * We always store the next element before it is requested. |
| 270 | * Called after calculateNextBMP() completes its round of BMP characters. |
| 271 | * There is a slight difference in the usage of m_currentCodepoint_ |
| 272 | * here as compared to calculateNextBMP(). Though both represents the |
| 273 | * lower bound of the next element, in calculateNextBMP() it gets set |
| 274 | * at the start of any loop, where-else, in calculateNextSupplementary() |
| 275 | * since m_currentCodepoint_ already contains the lower bound of the |
| 276 | * next element (passed down from calculateNextBMP()), we keep it till |
| 277 | * the end before resetting it to the new value. |
| 278 | * Note, if there are no more iterations, it will never get to here. |
| 279 | * Blocked out by next(). |
| 280 | * @param element return result object |
| 281 | * @draft 2.1 |
| 282 | */ |
| 283 | private final void calculateNextSupplementaryElement(Element element) |
| 284 | { |
| 285 | int currentValue = m_nextValue_; |
| 286 | int currentBlock = m_nextBlock_; |
| 287 | m_nextCodepoint_ ++; |
| 288 | m_nextBlockIndex_ ++; |
| 289 | |
| 290 | if (UTF16.getTrailSurrogate(m_nextCodepoint_) |
| 291 | != UTF16.TRAIL_SURROGATE_MIN_VALUE) { |
| 292 | // this piece is only called when we are in the middle of a lead |
| 293 | // surrogate block |
| 294 | if (!checkNullNextTrailIndex() && !checkBlockDetail(currentValue)) { |
| 295 | setResult(element, m_currentCodepoint_, m_nextCodepoint_, |
| 296 | currentValue); |
| 297 | m_currentCodepoint_ = m_nextCodepoint_; |
| 298 | return; |
| 299 | } |
| 300 | // we have cleared one block |
| 301 | m_nextIndex_ ++; |
| 302 | m_nextTrailIndexOffset_ ++; |
| 303 | if (!checkTrailBlock(currentBlock, currentValue)) { |
| 304 | setResult(element, m_currentCodepoint_, m_nextCodepoint_, |
| 305 | currentValue); |
| 306 | m_currentCodepoint_ = m_nextCodepoint_; |
| 307 | return; |
| 308 | } |
| 309 | } |
| 310 | int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_); |
| 311 | // enumerate supplementary code points |
| 312 | while (nextLead < TRAIL_SURROGATE_MIN_VALUE_) { |
| 313 | // lead surrogate access |
| 314 | int leadBlock = |
| 315 | m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << |
| 316 | Trie.INDEX_STAGE_2_SHIFT_; |
| 317 | if (leadBlock == m_trie_.m_dataOffset_) { |
| 318 | // no entries for a whole block of lead surrogates |
| 319 | if (currentValue != m_initialValue_) { |
| 320 | m_nextValue_ = m_initialValue_; |
| 321 | m_nextBlock_ = 0; |
| 322 | m_nextBlockIndex_ = 0; |
| 323 | setResult(element, m_currentCodepoint_, m_nextCodepoint_, |
| 324 | currentValue); |
| 325 | m_currentCodepoint_ = m_nextCodepoint_; |
| 326 | return; |
| 327 | } |
| 328 | |
| 329 | nextLead += DATA_BLOCK_LENGTH_; |
| 330 | // number of total affected supplementary codepoints in one |
| 331 | // block |
| 332 | // this is not a simple addition of |
| 333 | // DATA_BLOCK_SUPPLEMENTARY_LENGTH since we need to consider |
| 334 | // that we might have moved some of the codepoints |
| 335 | m_nextCodepoint_ = UCharacterProperty.getRawSupplementary( |
| 336 | (char)nextLead, |
| 337 | (char)UTF16.TRAIL_SURROGATE_MIN_VALUE); |
| 338 | continue; |
| 339 | } |
| 340 | if (m_trie_.m_dataManipulate_ == null) { |
| 341 | throw new NullPointerException( |
| 342 | "The field DataManipulate in this Trie is null"); |
| 343 | } |
| 344 | // enumerate trail surrogates for this lead surrogate |
| 345 | m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset( |
| 346 | m_trie_.getValue(leadBlock + |
| 347 | (nextLead & Trie.INDEX_STAGE_3_MASK_))); |
| 348 | if (m_nextIndex_ <= 0) { |
| 349 | // no data for this lead surrogate |
| 350 | if (currentValue != m_initialValue_) { |
| 351 | m_nextValue_ = m_initialValue_; |
| 352 | m_nextBlock_ = 0; |
| 353 | m_nextBlockIndex_ = 0; |
| 354 | setResult(element, m_currentCodepoint_, m_nextCodepoint_, |
| 355 | currentValue); |
| 356 | m_currentCodepoint_ = m_nextCodepoint_; |
| 357 | return; |
| 358 | } |
| 359 | m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_; |
| 360 | } else { |
| 361 | m_nextTrailIndexOffset_ = 0; |
| 362 | if (!checkTrailBlock(currentBlock, currentValue)) { |
| 363 | setResult(element, m_currentCodepoint_, m_nextCodepoint_, |
| 364 | currentValue); |
| 365 | m_currentCodepoint_ = m_nextCodepoint_; |
| 366 | return; |
| 367 | } |
| 368 | } |
| 369 | nextLead ++; |
| 370 | } |
| 371 | |
| 372 | // deliver last range |
| 373 | setResult(element, m_currentCodepoint_, UCharacter.MAX_VALUE + 1, |
| 374 | currentValue); |
| 375 | } |
| 376 | |
| 377 | /** |
| 378 | * Internal block value calculations |
| 379 | * Performs calculations on a data block to find codepoints in m_nextBlock_ |
| 380 | * after the index m_nextBlockIndex_ that has the same value. |
| 381 | * Note m_*_ variables at this point is the next codepoint whose value |
| 382 | * has not been calculated. |
| 383 | * But when returned with false, it will be the last codepoint whose |
| 384 | * value has been calculated. |
| 385 | * @param currentValue the value which other codepoints are tested against |
| 386 | * @return true if the whole block has the same value as currentValue or if |
| 387 | * the whole block has been calculated, false otherwise. |
| 388 | */ |
| 389 | private final boolean checkBlockDetail(int currentValue) |
| 390 | { |
| 391 | while (m_nextBlockIndex_ < DATA_BLOCK_LENGTH_) { |
| 392 | m_nextValue_ = extract(m_trie_.getValue(m_nextBlock_ + |
| 393 | m_nextBlockIndex_)); |
| 394 | if (m_nextValue_ != currentValue) { |
| 395 | return false; |
| 396 | } |
| 397 | ++ m_nextBlockIndex_; |
| 398 | ++ m_nextCodepoint_; |
| 399 | } |
| 400 | return true; |
| 401 | } |
| 402 | |
| 403 | /** |
| 404 | * Internal block value calculations |
| 405 | * Performs calculations on a data block to find codepoints in m_nextBlock_ |
| 406 | * that has the same value. |
| 407 | * Will call checkBlockDetail() if highlevel check fails. |
| 408 | * Note m_*_ variables at this point is the next codepoint whose value |
| 409 | * has not been calculated. |
| 410 | * @param currentBlock the initial block containing all currentValue |
| 411 | * @param currentValue the value which other codepoints are tested against |
| 412 | * @return true if the whole block has the same value as currentValue or if |
| 413 | * the whole block has been calculated, false otherwise. |
| 414 | */ |
| 415 | private final boolean checkBlock(int currentBlock, int currentValue) |
| 416 | { |
| 417 | m_nextBlock_ = m_trie_.m_index_[m_nextIndex_] << |
| 418 | Trie.INDEX_STAGE_2_SHIFT_; |
| 419 | if (m_nextBlock_ == currentBlock && |
| 420 | (m_nextCodepoint_ - m_currentCodepoint_) >= DATA_BLOCK_LENGTH_) { |
| 421 | // the block is the same as the previous one, filled with |
| 422 | // currentValue |
| 423 | m_nextCodepoint_ += DATA_BLOCK_LENGTH_; |
| 424 | } |
| 425 | else if (m_nextBlock_ == 0) { |
| 426 | // this is the all-initial-value block |
| 427 | if (currentValue != m_initialValue_) { |
| 428 | m_nextValue_ = m_initialValue_; |
| 429 | m_nextBlockIndex_ = 0; |
| 430 | return false; |
| 431 | } |
| 432 | m_nextCodepoint_ += DATA_BLOCK_LENGTH_; |
| 433 | } |
| 434 | else { |
| 435 | if (!checkBlockDetail(currentValue)) { |
| 436 | return false; |
| 437 | } |
| 438 | } |
| 439 | return true; |
| 440 | } |
| 441 | |
| 442 | /** |
| 443 | * Internal block value calculations |
| 444 | * Performs calculations on multiple data blocks for a set of trail |
| 445 | * surrogates to find codepoints in m_nextBlock_ that has the same value. |
| 446 | * Will call checkBlock() for internal block checks. |
| 447 | * Note m_*_ variables at this point is the next codepoint whose value |
| 448 | * has not been calculated. |
| 449 | * @param currentBlock the initial block containing all currentValue |
| 450 | * @param currentValue the value which other codepoints are tested against |
| 451 | * @return true if the whole block has the same value as currentValue or if |
| 452 | * the whole block has been calculated, false otherwise. |
| 453 | */ |
| 454 | private final boolean checkTrailBlock(int currentBlock, |
| 455 | int currentValue) |
| 456 | { |
| 457 | // enumerate code points for this lead surrogate |
| 458 | while (m_nextTrailIndexOffset_ < TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_) |
| 459 | { |
| 460 | // if we ever reach here, we are at the start of a new block |
| 461 | m_nextBlockIndex_ = 0; |
| 462 | // copy of most of the body of the BMP loop |
| 463 | if (!checkBlock(currentBlock, currentValue)) { |
| 464 | return false; |
| 465 | } |
| 466 | m_nextTrailIndexOffset_ ++; |
| 467 | m_nextIndex_ ++; |
| 468 | } |
| 469 | return true; |
| 470 | } |
| 471 | |
| 472 | /** |
| 473 | * Checks if we are beginning at the start of a initial block. |
| 474 | * If we are then the rest of the codepoints in this initial block |
| 475 | * has the same values. |
| 476 | * We increment m_nextCodepoint_ and relevant data members if so. |
| 477 | * This is used only in for the supplementary codepoints because |
| 478 | * the offset to the trail indexes could be 0. |
| 479 | * @return true if we are at the start of a initial block. |
| 480 | */ |
| 481 | private final boolean checkNullNextTrailIndex() |
| 482 | { |
| 483 | if (m_nextIndex_ <= 0) { |
| 484 | m_nextCodepoint_ += TRAIL_SURROGATE_COUNT_ - 1; |
| 485 | int nextLead = UTF16.getLeadSurrogate(m_nextCodepoint_); |
| 486 | int leadBlock = |
| 487 | m_trie_.m_index_[nextLead >> Trie.INDEX_STAGE_1_SHIFT_] << |
| 488 | Trie.INDEX_STAGE_2_SHIFT_; |
| 489 | if (m_trie_.m_dataManipulate_ == null) { |
| 490 | throw new NullPointerException( |
| 491 | "The field DataManipulate in this Trie is null"); |
| 492 | } |
| 493 | m_nextIndex_ = m_trie_.m_dataManipulate_.getFoldingOffset( |
| 494 | m_trie_.getValue(leadBlock + |
| 495 | (nextLead & Trie.INDEX_STAGE_3_MASK_))); |
| 496 | m_nextIndex_ --; |
| 497 | m_nextBlockIndex_ = DATA_BLOCK_LENGTH_; |
| 498 | return true; |
| 499 | } |
| 500 | return false; |
| 501 | } |
| 502 | |
| 503 | // private data members -------------------------------------------- |
| 504 | |
| 505 | /** |
| 506 | * Size of the stage 1 BMP indexes |
| 507 | */ |
| 508 | private static final int BMP_INDEX_LENGTH_ = |
| 509 | 0x10000 >> Trie.INDEX_STAGE_1_SHIFT_; |
| 510 | /** |
| 511 | * Lead surrogate minimum value |
| 512 | */ |
| 513 | private static final int LEAD_SURROGATE_MIN_VALUE_ = 0xD800; |
| 514 | /** |
| 515 | * Trail surrogate minimum value |
| 516 | */ |
| 517 | private static final int TRAIL_SURROGATE_MIN_VALUE_ = 0xDC00; |
| 518 | /** |
| 519 | * Trail surrogate maximum value |
| 520 | */ |
| 521 | private static final int TRAIL_SURROGATE_MAX_VALUE_ = 0xDFFF; |
| 522 | /** |
| 523 | * Number of trail surrogate |
| 524 | */ |
| 525 | private static final int TRAIL_SURROGATE_COUNT_ = 0x400; |
| 526 | /** |
| 527 | * Number of stage 1 indexes for supplementary calculations that maps to |
| 528 | * each lead surrogate character. |
| 529 | * See second pass into getRawOffset for the trail surrogate character. |
| 530 | * 10 for significant number of bits for trail surrogates, 5 for what we |
| 531 | * discard during shifting. |
| 532 | */ |
| 533 | private static final int TRAIL_SURROGATE_INDEX_BLOCK_LENGTH_ = |
| 534 | 1 << (10 - Trie.INDEX_STAGE_1_SHIFT_); |
| 535 | /** |
| 536 | * Number of data values in a stage 2 (data array) block. |
| 537 | */ |
| 538 | private static final int DATA_BLOCK_LENGTH_ = |
| 539 | 1 << Trie.INDEX_STAGE_1_SHIFT_; |
| 540 | /** |
| 541 | * Number of codepoints in a stage 2 block |
| 542 | */ |
| 543 | private static final int DATA_BLOCK_SUPPLEMENTARY_LENGTH_ = |
| 544 | DATA_BLOCK_LENGTH_ << 10; |
| 545 | /** |
| 546 | * Trie instance |
| 547 | */ |
| 548 | private Trie m_trie_; |
| 549 | /** |
| 550 | * Initial value for trie values |
| 551 | */ |
| 552 | private int m_initialValue_; |
| 553 | /** |
| 554 | * Next element results and data. |
| 555 | */ |
| 556 | private int m_currentCodepoint_; |
| 557 | private int m_nextCodepoint_; |
| 558 | private int m_nextValue_; |
| 559 | private int m_nextIndex_; |
| 560 | private int m_nextBlock_; |
| 561 | private int m_nextBlockIndex_; |
| 562 | private int m_nextTrailIndexOffset_; |
| 563 | /** |
| 564 | * This is the return result element |
| 565 | */ |
| 566 | private int m_start_; |
| 567 | private int m_limit_; |
| 568 | private int m_value_; |
| 569 | } |