J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Portions Copyright 2000-2003 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | /* |
| 27 | * (C) Copyright IBM Corp. 1999-2003 - All Rights Reserved |
| 28 | * |
| 29 | * The original version of this source code and documentation is |
| 30 | * copyrighted and owned by IBM. These materials are provided |
| 31 | * under terms of a License Agreement between IBM and Sun. |
| 32 | * This technology is protected by multiple US and International |
| 33 | * patents. This notice and attribution to IBM may not be removed. |
| 34 | */ |
| 35 | |
| 36 | /* |
| 37 | * file name: ubidiln.c |
| 38 | * encoding: US-ASCII |
| 39 | * tab size: 8 (not used) |
| 40 | * indentation:4 |
| 41 | * |
| 42 | * created on: 1999aug06 |
| 43 | * created by: Markus W. Scherer |
| 44 | */ |
| 45 | |
| 46 | /* set import/export definitions */ |
| 47 | #ifndef U_COMMON_IMPLEMENTATION |
| 48 | # define U_COMMON_IMPLEMENTATION |
| 49 | #endif |
| 50 | |
| 51 | #include "cmemory.h" |
| 52 | #include "utypes.h" |
| 53 | #include "uchardir.h" |
| 54 | #include "ubidi.h" |
| 55 | #include "ubidiimp.h" |
| 56 | |
| 57 | /* |
| 58 | * General remarks about the functions in this file: |
| 59 | * |
| 60 | * These functions deal with the aspects of potentially mixed-directional |
| 61 | * text in a single paragraph or in a line of a single paragraph |
| 62 | * which has already been processed according to |
| 63 | * the Unicode 3.0 BiDi algorithm as defined in |
| 64 | * http://www.unicode.org/unicode/reports/tr9/ , version 5, |
| 65 | * also described in The Unicode Standard, Version 3.0 . |
| 66 | * |
| 67 | * This means that there is a UBiDi object with a levels |
| 68 | * and a dirProps array. |
| 69 | * paraLevel and direction are also set. |
| 70 | * Only if the length of the text is zero, then levels==dirProps==NULL. |
| 71 | * |
| 72 | * The overall directionality of the paragraph |
| 73 | * or line is used to bypass the reordering steps if possible. |
| 74 | * Even purely RTL text does not need reordering there because |
| 75 | * the ubidi_getLogical/VisualIndex() functions can compute the |
| 76 | * index on the fly in such a case. |
| 77 | * |
| 78 | * The implementation of the access to same-level-runs and of the reordering |
| 79 | * do attempt to provide better performance and less memory usage compared to |
| 80 | * a direct implementation of especially rule (L2) with an array of |
| 81 | * one (32-bit) integer per text character. |
| 82 | * |
| 83 | * Here, the levels array is scanned as soon as necessary, and a vector of |
| 84 | * same-level-runs is created. Reordering then is done on this vector. |
| 85 | * For each run of text positions that were resolved to the same level, |
| 86 | * only 8 bytes are stored: the first text position of the run and the visual |
| 87 | * position behind the run after reordering. |
| 88 | * One sign bit is used to hold the directionality of the run. |
| 89 | * This is inefficient if there are many very short runs. If the average run |
| 90 | * length is <2, then this uses more memory. |
| 91 | * |
| 92 | * In a further attempt to save memory, the levels array is never changed |
| 93 | * after all the resolution rules (Xn, Wn, Nn, In). |
| 94 | * Many functions have to consider the field trailingWSStart: |
| 95 | * if it is less than length, then there is an implicit trailing run |
| 96 | * at the paraLevel, |
| 97 | * which is not reflected in the levels array. |
| 98 | * This allows a line UBiDi object to use the same levels array as |
| 99 | * its paragraph parent object. |
| 100 | * |
| 101 | * When a UBiDi object is created for a line of a paragraph, then the |
| 102 | * paragraph's levels and dirProps arrays are reused by way of setting |
| 103 | * a pointer into them, not by copying. This again saves memory and forbids to |
| 104 | * change the now shared levels for (L1). |
| 105 | */ |
| 106 | |
| 107 | /* prototypes --------------------------------------------------------------- */ |
| 108 | |
| 109 | static void |
| 110 | setTrailingWSStart(UBiDi *pBiDi); |
| 111 | |
| 112 | static void |
| 113 | getSingleRun(UBiDi *pBiDi, UBiDiLevel level); |
| 114 | |
| 115 | static void |
| 116 | reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel); |
| 117 | |
| 118 | static bool_t |
| 119 | prepareReorder(const UBiDiLevel *levels, int32_t length, |
| 120 | int32_t *indexMap, |
| 121 | UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel); |
| 122 | |
| 123 | /* ubidi_setLine ------------------------------------------------------------ */ |
| 124 | |
| 125 | U_CAPI void U_EXPORT2 |
| 126 | ubidi_setLine(const UBiDi *pParaBiDi, |
| 127 | int32_t start, int32_t limit, |
| 128 | UBiDi *pLineBiDi, |
| 129 | UErrorCode *pErrorCode) { |
| 130 | int32_t length; |
| 131 | |
| 132 | /* check the argument values */ |
| 133 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
| 134 | return; |
| 135 | } else if(pParaBiDi==NULL || pLineBiDi==NULL) { |
| 136 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 137 | return; |
| 138 | } else if(start<0 || start>limit || limit>pParaBiDi->length) { |
| 139 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
| 140 | return; |
| 141 | } |
| 142 | |
| 143 | /* set the values in pLineBiDi from its pParaBiDi parent */ |
| 144 | pLineBiDi->text=pParaBiDi->text+start; |
| 145 | length=pLineBiDi->length=limit-start; |
| 146 | pLineBiDi->paraLevel=pParaBiDi->paraLevel; |
| 147 | |
| 148 | pLineBiDi->runs=NULL; |
| 149 | pLineBiDi->flags=0; |
| 150 | |
| 151 | if(length>0) { |
| 152 | pLineBiDi->dirProps=pParaBiDi->dirProps+start; |
| 153 | pLineBiDi->levels=pParaBiDi->levels+start; |
| 154 | pLineBiDi->runCount=-1; |
| 155 | |
| 156 | if(pParaBiDi->direction!=UBIDI_MIXED) { |
| 157 | /* the parent is already trivial */ |
| 158 | pLineBiDi->direction=pParaBiDi->direction; |
| 159 | |
| 160 | /* |
| 161 | * The parent's levels are all either |
| 162 | * implicitly or explicitly ==paraLevel; |
| 163 | * do the same here. |
| 164 | */ |
| 165 | if(pParaBiDi->trailingWSStart<=start) { |
| 166 | pLineBiDi->trailingWSStart=0; |
| 167 | } else if(pParaBiDi->trailingWSStart<limit) { |
| 168 | pLineBiDi->trailingWSStart=pParaBiDi->trailingWSStart-start; |
| 169 | } else { |
| 170 | pLineBiDi->trailingWSStart=length; |
| 171 | } |
| 172 | } else { |
| 173 | const UBiDiLevel *levels=pLineBiDi->levels; |
| 174 | int32_t i, trailingWSStart; |
| 175 | UBiDiLevel level; |
| 176 | |
| 177 | setTrailingWSStart(pLineBiDi); |
| 178 | trailingWSStart=pLineBiDi->trailingWSStart; |
| 179 | |
| 180 | /* recalculate pLineBiDi->direction */ |
| 181 | if(trailingWSStart==0) { |
| 182 | /* all levels are at paraLevel */ |
| 183 | pLineBiDi->direction=(UBiDiDirection)(pLineBiDi->paraLevel&1); |
| 184 | } else { |
| 185 | /* get the level of the first character */ |
| 186 | level=(UBiDiLevel)(levels[0]&1); |
| 187 | |
| 188 | /* if there is anything of a different level, then the line is mixed */ |
| 189 | if(trailingWSStart<length && (pLineBiDi->paraLevel&1)!=level) { |
| 190 | /* the trailing WS is at paraLevel, which differs from levels[0] */ |
| 191 | pLineBiDi->direction=UBIDI_MIXED; |
| 192 | } else { |
| 193 | /* see if levels[1..trailingWSStart-1] have the same direction as levels[0] and paraLevel */ |
| 194 | i=1; |
| 195 | for(;;) { |
| 196 | if(i==trailingWSStart) { |
| 197 | /* the direction values match those in level */ |
| 198 | pLineBiDi->direction=(UBiDiDirection)level; |
| 199 | break; |
| 200 | } else if((levels[i]&1)!=level) { |
| 201 | pLineBiDi->direction=UBIDI_MIXED; |
| 202 | break; |
| 203 | } |
| 204 | ++i; |
| 205 | } |
| 206 | } |
| 207 | } |
| 208 | |
| 209 | switch(pLineBiDi->direction) { |
| 210 | case UBIDI_LTR: |
| 211 | /* make sure paraLevel is even */ |
| 212 | pLineBiDi->paraLevel=(UBiDiLevel)((pLineBiDi->paraLevel+1)&~1); |
| 213 | |
| 214 | /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ |
| 215 | pLineBiDi->trailingWSStart=0; |
| 216 | break; |
| 217 | case UBIDI_RTL: |
| 218 | /* make sure paraLevel is odd */ |
| 219 | pLineBiDi->paraLevel|=1; |
| 220 | |
| 221 | /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */ |
| 222 | pLineBiDi->trailingWSStart=0; |
| 223 | break; |
| 224 | default: |
| 225 | break; |
| 226 | } |
| 227 | } |
| 228 | } else { |
| 229 | /* create an object for a zero-length line */ |
| 230 | pLineBiDi->direction=pLineBiDi->paraLevel&1 ? UBIDI_RTL : UBIDI_LTR; |
| 231 | pLineBiDi->trailingWSStart=pLineBiDi->runCount=0; |
| 232 | |
| 233 | pLineBiDi->dirProps=NULL; |
| 234 | pLineBiDi->levels=NULL; |
| 235 | } |
| 236 | return; |
| 237 | } |
| 238 | |
| 239 | U_CAPI UBiDiLevel U_EXPORT2 |
| 240 | ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex) { |
| 241 | /* return paraLevel if in the trailing WS run, otherwise the real level */ |
| 242 | if(pBiDi==NULL || charIndex<0 || pBiDi->length<=charIndex) { |
| 243 | return 0; |
| 244 | } else if(pBiDi->direction!=UBIDI_MIXED || charIndex>=pBiDi->trailingWSStart) { |
| 245 | return pBiDi->paraLevel; |
| 246 | } else { |
| 247 | return pBiDi->levels[charIndex]; |
| 248 | } |
| 249 | } |
| 250 | |
| 251 | U_CAPI const UBiDiLevel * U_EXPORT2 |
| 252 | ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { |
| 253 | int32_t start, length; |
| 254 | |
| 255 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
| 256 | return NULL; |
| 257 | } else if(pBiDi==NULL || (length=pBiDi->length)<=0) { |
| 258 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 259 | return NULL; |
| 260 | } |
| 261 | |
| 262 | if((start=pBiDi->trailingWSStart)==length) { |
| 263 | /* the current levels array reflects the WS run */ |
| 264 | return pBiDi->levels; |
| 265 | } |
| 266 | |
| 267 | /* |
| 268 | * After the previous if(), we know that the levels array |
| 269 | * has an implicit trailing WS run and therefore does not fully |
| 270 | * reflect itself all the levels. |
| 271 | * This must be a UBiDi object for a line, and |
| 272 | * we need to create a new levels array. |
| 273 | */ |
| 274 | |
| 275 | if(getLevelsMemory(pBiDi, length)) { |
| 276 | UBiDiLevel *levels=pBiDi->levelsMemory; |
| 277 | |
| 278 | if(start>0 && levels!=pBiDi->levels) { |
| 279 | icu_memcpy(levels, pBiDi->levels, start); |
| 280 | } |
| 281 | icu_memset(levels+start, pBiDi->paraLevel, length-start); |
| 282 | |
| 283 | /* this new levels array is set for the line and reflects the WS run */ |
| 284 | pBiDi->trailingWSStart=length; |
| 285 | return pBiDi->levels=levels; |
| 286 | } else { |
| 287 | /* out of memory */ |
| 288 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 289 | return NULL; |
| 290 | } |
| 291 | } |
| 292 | |
| 293 | U_CAPI void U_EXPORT2 |
| 294 | ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalStart, |
| 295 | int32_t *pLogicalLimit, UBiDiLevel *pLevel) { |
| 296 | int32_t length; |
| 297 | |
| 298 | if(pBiDi==NULL || logicalStart<0 || (length=pBiDi->length)<=logicalStart) { |
| 299 | return; |
| 300 | } |
| 301 | |
| 302 | if(pBiDi->direction!=UBIDI_MIXED || logicalStart>=pBiDi->trailingWSStart) { |
| 303 | if(pLogicalLimit!=NULL) { |
| 304 | *pLogicalLimit=length; |
| 305 | } |
| 306 | if(pLevel!=NULL) { |
| 307 | *pLevel=pBiDi->paraLevel; |
| 308 | } |
| 309 | } else { |
| 310 | UBiDiLevel *levels=pBiDi->levels; |
| 311 | UBiDiLevel level=levels[logicalStart]; |
| 312 | |
| 313 | /* search for the end of the run */ |
| 314 | length=pBiDi->trailingWSStart; |
| 315 | while(++logicalStart<length && level==levels[logicalStart]) {} |
| 316 | |
| 317 | if(pLogicalLimit!=NULL) { |
| 318 | *pLogicalLimit=logicalStart; |
| 319 | } |
| 320 | if(pLevel!=NULL) { |
| 321 | *pLevel=level; |
| 322 | } |
| 323 | } |
| 324 | } |
| 325 | |
| 326 | /* handle trailing WS (L1) -------------------------------------------------- */ |
| 327 | |
| 328 | /* |
| 329 | * setTrailingWSStart() sets the start index for a trailing |
| 330 | * run of WS in the line. This is necessary because we do not modify |
| 331 | * the paragraph's levels array that we just point into. |
| 332 | * Using trailingWSStart is another form of performing (L1). |
| 333 | * |
| 334 | * To make subsequent operations easier, we also include the run |
| 335 | * before the WS if it is at the paraLevel - we merge the two here. |
| 336 | */ |
| 337 | static void |
| 338 | setTrailingWSStart(UBiDi *pBiDi) { |
| 339 | /* pBiDi->direction!=UBIDI_MIXED */ |
| 340 | |
| 341 | const DirProp *dirProps=pBiDi->dirProps; |
| 342 | UBiDiLevel *levels=pBiDi->levels; |
| 343 | int32_t start=pBiDi->length; |
| 344 | UBiDiLevel paraLevel=pBiDi->paraLevel; |
| 345 | |
| 346 | /* go backwards across all WS, BN, explicit codes */ |
| 347 | while(start>0 && DIRPROP_FLAG(dirProps[start-1])&MASK_WS) { |
| 348 | --start; |
| 349 | } |
| 350 | |
| 351 | /* if the WS run can be merged with the previous run then do so here */ |
| 352 | while(start>0 && levels[start-1]==paraLevel) { |
| 353 | --start; |
| 354 | } |
| 355 | |
| 356 | pBiDi->trailingWSStart=start; |
| 357 | } |
| 358 | |
| 359 | /* runs API functions ------------------------------------------------------- */ |
| 360 | |
| 361 | U_CAPI int32_t U_EXPORT2 |
| 362 | ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode) { |
| 363 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
| 364 | return -1; |
| 365 | } else if(pBiDi==NULL || (pBiDi->runCount<0 && !ubidi_getRuns(pBiDi))) { |
| 366 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 367 | return -1; |
| 368 | } else { |
| 369 | return pBiDi->runCount; |
| 370 | } |
| 371 | } |
| 372 | |
| 373 | U_CAPI UBiDiDirection U_EXPORT2 |
| 374 | ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, |
| 375 | int32_t *pLogicalStart, int32_t *pLength) { |
| 376 | if( pBiDi==NULL || runIndex<0 || |
| 377 | (pBiDi->runCount==-1 && !ubidi_getRuns(pBiDi)) || |
| 378 | runIndex>=pBiDi->runCount |
| 379 | ) { |
| 380 | return UBIDI_LTR; |
| 381 | } else { |
| 382 | int32_t start=pBiDi->runs[runIndex].logicalStart; |
| 383 | if(pLogicalStart!=NULL) { |
| 384 | *pLogicalStart=GET_INDEX(start); |
| 385 | } |
| 386 | if(pLength!=NULL) { |
| 387 | if(runIndex>0) { |
| 388 | *pLength=pBiDi->runs[runIndex].visualLimit- |
| 389 | pBiDi->runs[runIndex-1].visualLimit; |
| 390 | } else { |
| 391 | *pLength=pBiDi->runs[0].visualLimit; |
| 392 | } |
| 393 | } |
| 394 | return (UBiDiDirection)GET_ODD_BIT(start); |
| 395 | } |
| 396 | } |
| 397 | |
| 398 | /* compute the runs array --------------------------------------------------- */ |
| 399 | |
| 400 | /* |
| 401 | * Compute the runs array from the levels array. |
| 402 | * After ubidi_getRuns() returns TRUE, runCount is guaranteed to be >0 |
| 403 | * and the runs are reordered. |
| 404 | * Odd-level runs have visualStart on their visual right edge and |
| 405 | * they progress visually to the left. |
| 406 | */ |
| 407 | U_CFUNC bool_t |
| 408 | ubidi_getRuns(UBiDi *pBiDi) { |
| 409 | if(pBiDi->direction!=UBIDI_MIXED) { |
| 410 | /* simple, single-run case - this covers length==0 */ |
| 411 | getSingleRun(pBiDi, pBiDi->paraLevel); |
| 412 | } else /* UBIDI_MIXED, length>0 */ { |
| 413 | /* mixed directionality */ |
| 414 | int32_t length=pBiDi->length, limit; |
| 415 | |
| 416 | /* |
| 417 | * If there are WS characters at the end of the line |
| 418 | * and the run preceding them has a level different from |
| 419 | * paraLevel, then they will form their own run at paraLevel (L1). |
| 420 | * Count them separately. |
| 421 | * We need some special treatment for this in order to not |
| 422 | * modify the levels array which a line UBiDi object shares |
| 423 | * with its paragraph parent and its other line siblings. |
| 424 | * In other words, for the trailing WS, it may be |
| 425 | * levels[]!=paraLevel but we have to treat it like it were so. |
| 426 | */ |
| 427 | limit=pBiDi->trailingWSStart; |
| 428 | if(limit==0) { |
| 429 | /* there is only WS on this line */ |
| 430 | getSingleRun(pBiDi, pBiDi->paraLevel); |
| 431 | } else { |
| 432 | UBiDiLevel *levels=pBiDi->levels; |
| 433 | int32_t i, runCount; |
| 434 | UBiDiLevel level=UBIDI_DEFAULT_LTR; /* initialize with no valid level */ |
| 435 | |
| 436 | /* count the runs, there is at least one non-WS run, and limit>0 */ |
| 437 | runCount=0; |
| 438 | for(i=0; i<limit; ++i) { |
| 439 | /* increment runCount at the start of each run */ |
| 440 | if(levels[i]!=level) { |
| 441 | ++runCount; |
| 442 | level=levels[i]; |
| 443 | } |
| 444 | } |
| 445 | |
| 446 | /* |
| 447 | * We don't need to see if the last run can be merged with a trailing |
| 448 | * WS run because setTrailingWSStart() would have done that. |
| 449 | */ |
| 450 | if(runCount==1 && limit==length) { |
| 451 | /* There is only one non-WS run and no trailing WS-run. */ |
| 452 | getSingleRun(pBiDi, levels[0]); |
| 453 | } else /* runCount>1 || limit<length */ { |
| 454 | /* allocate and set the runs */ |
| 455 | Run *runs; |
| 456 | int32_t runIndex, start; |
| 457 | UBiDiLevel minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1, maxLevel=0; |
| 458 | |
| 459 | /* now, count a (non-mergable) WS run */ |
| 460 | if(limit<length) { |
| 461 | ++runCount; |
| 462 | } |
| 463 | |
| 464 | /* runCount>1 */ |
| 465 | if(getRunsMemory(pBiDi, runCount)) { |
| 466 | runs=pBiDi->runsMemory; |
| 467 | } else { |
| 468 | return FALSE; |
| 469 | } |
| 470 | |
| 471 | /* set the runs */ |
| 472 | /* this could be optimized, e.g.: 464->444, 484->444, 575->555, 595->555 */ |
| 473 | /* however, that would take longer and make other functions more complicated */ |
| 474 | runIndex=0; |
| 475 | |
| 476 | /* search for the run limits and initialize visualLimit values with the run lengths */ |
| 477 | i=0; |
| 478 | do { |
| 479 | /* prepare this run */ |
| 480 | start=i; |
| 481 | level=levels[i]; |
| 482 | if(level<minLevel) { |
| 483 | minLevel=level; |
| 484 | } |
| 485 | if(level>maxLevel) { |
| 486 | maxLevel=level; |
| 487 | } |
| 488 | |
| 489 | /* look for the run limit */ |
| 490 | while(++i<limit && levels[i]==level) {} |
| 491 | |
| 492 | /* i is another run limit */ |
| 493 | runs[runIndex].logicalStart=start; |
| 494 | runs[runIndex].visualLimit=i-start; |
| 495 | ++runIndex; |
| 496 | } while(i<limit); |
| 497 | |
| 498 | if(limit<length) { |
| 499 | /* there is a separate WS run */ |
| 500 | runs[runIndex].logicalStart=limit; |
| 501 | runs[runIndex].visualLimit=length-limit; |
| 502 | if(pBiDi->paraLevel<minLevel) { |
| 503 | minLevel=pBiDi->paraLevel; |
| 504 | } |
| 505 | } |
| 506 | |
| 507 | /* set the object fields */ |
| 508 | pBiDi->runs=runs; |
| 509 | pBiDi->runCount=runCount; |
| 510 | |
| 511 | reorderLine(pBiDi, minLevel, maxLevel); |
| 512 | |
| 513 | /* now add the direction flags and adjust the visualLimit's to be just that */ |
| 514 | ADD_ODD_BIT_FROM_LEVEL(runs[0].logicalStart, levels[runs[0].logicalStart]); |
| 515 | limit=runs[0].visualLimit; |
| 516 | for(i=1; i<runIndex; ++i) { |
| 517 | ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, levels[runs[i].logicalStart]); |
| 518 | limit=runs[i].visualLimit+=limit; |
| 519 | } |
| 520 | |
| 521 | /* same for the trailing WS run */ |
| 522 | if(runIndex<runCount) { |
| 523 | ADD_ODD_BIT_FROM_LEVEL(runs[i].logicalStart, pBiDi->paraLevel); |
| 524 | runs[runIndex].visualLimit+=limit; |
| 525 | } |
| 526 | } |
| 527 | } |
| 528 | } |
| 529 | return TRUE; |
| 530 | } |
| 531 | |
| 532 | /* in trivial cases there is only one trivial run; called by ubidi_getRuns() */ |
| 533 | static void |
| 534 | getSingleRun(UBiDi *pBiDi, UBiDiLevel level) { |
| 535 | /* simple, single-run case */ |
| 536 | pBiDi->runs=pBiDi->simpleRuns; |
| 537 | pBiDi->runCount=1; |
| 538 | |
| 539 | /* fill and reorder the single run */ |
| 540 | pBiDi->runs[0].logicalStart=MAKE_INDEX_ODD_PAIR(0, level); |
| 541 | pBiDi->runs[0].visualLimit=pBiDi->length; |
| 542 | } |
| 543 | |
| 544 | /* reorder the runs array (L2) ---------------------------------------------- */ |
| 545 | |
| 546 | /* |
| 547 | * Reorder the same-level runs in the runs array. |
| 548 | * Here, runCount>1 and maxLevel>=minLevel>=paraLevel. |
| 549 | * All the visualStart fields=logical start before reordering. |
| 550 | * The "odd" bits are not set yet. |
| 551 | * |
| 552 | * Reordering with this data structure lends itself to some handy shortcuts: |
| 553 | * |
| 554 | * Since each run is moved but not modified, and since at the initial maxLevel |
| 555 | * each sequence of same-level runs consists of only one run each, we |
| 556 | * don't need to do anything there and can predecrement maxLevel. |
| 557 | * In many simple cases, the reordering is thus done entirely in the |
| 558 | * index mapping. |
| 559 | * Also, reordering occurs only down to the lowest odd level that occurs, |
| 560 | * which is minLevel|1. However, if the lowest level itself is odd, then |
| 561 | * in the last reordering the sequence of the runs at this level or higher |
| 562 | * will be all runs, and we don't need the elaborate loop to search for them. |
| 563 | * This is covered by ++minLevel instead of minLevel|=1 followed |
| 564 | * by an extra reorder-all after the reorder-some loop. |
| 565 | * About a trailing WS run: |
| 566 | * Such a run would need special treatment because its level is not |
| 567 | * reflected in levels[] if this is not a paragraph object. |
| 568 | * Instead, all characters from trailingWSStart on are implicitly at |
| 569 | * paraLevel. |
| 570 | * However, for all maxLevel>paraLevel, this run will never be reordered |
| 571 | * and does not need to be taken into account. maxLevel==paraLevel is only reordered |
| 572 | * if minLevel==paraLevel is odd, which is done in the extra segment. |
| 573 | * This means that for the main reordering loop we don't need to consider |
| 574 | * this run and can --runCount. If it is later part of the all-runs |
| 575 | * reordering, then runCount is adjusted accordingly. |
| 576 | */ |
| 577 | static void |
| 578 | reorderLine(UBiDi *pBiDi, UBiDiLevel minLevel, UBiDiLevel maxLevel) { |
| 579 | Run *runs; |
| 580 | UBiDiLevel *levels; |
| 581 | int32_t firstRun, endRun, limitRun, runCount, |
| 582 | temp; |
| 583 | |
| 584 | /* nothing to do? */ |
| 585 | if(maxLevel<=(minLevel|1)) { |
| 586 | return; |
| 587 | } |
| 588 | |
| 589 | /* |
| 590 | * Reorder only down to the lowest odd level |
| 591 | * and reorder at an odd minLevel in a separate, simpler loop. |
| 592 | * See comments above for why minLevel is always incremented. |
| 593 | */ |
| 594 | ++minLevel; |
| 595 | |
| 596 | runs=pBiDi->runs; |
| 597 | levels=pBiDi->levels; |
| 598 | runCount=pBiDi->runCount; |
| 599 | |
| 600 | /* do not include the WS run at paraLevel<=old minLevel except in the simple loop */ |
| 601 | if(pBiDi->trailingWSStart<pBiDi->length) { |
| 602 | --runCount; |
| 603 | } |
| 604 | |
| 605 | while(--maxLevel>=minLevel) { |
| 606 | firstRun=0; |
| 607 | |
| 608 | /* loop for all sequences of runs */ |
| 609 | for(;;) { |
| 610 | /* look for a sequence of runs that are all at >=maxLevel */ |
| 611 | /* look for the first run of such a sequence */ |
| 612 | while(firstRun<runCount && levels[runs[firstRun].logicalStart]<maxLevel) { |
| 613 | ++firstRun; |
| 614 | } |
| 615 | if(firstRun>=runCount) { |
| 616 | break; /* no more such runs */ |
| 617 | } |
| 618 | |
| 619 | /* look for the limit run of such a sequence (the run behind it) */ |
| 620 | for(limitRun=firstRun; ++limitRun<runCount && levels[runs[limitRun].logicalStart]>=maxLevel;) {} |
| 621 | |
| 622 | /* Swap the entire sequence of runs from firstRun to limitRun-1. */ |
| 623 | endRun=limitRun-1; |
| 624 | while(firstRun<endRun) { |
| 625 | temp=runs[firstRun].logicalStart; |
| 626 | runs[firstRun].logicalStart=runs[endRun].logicalStart; |
| 627 | runs[endRun].logicalStart=temp; |
| 628 | |
| 629 | temp=runs[firstRun].visualLimit; |
| 630 | runs[firstRun].visualLimit=runs[endRun].visualLimit; |
| 631 | runs[endRun].visualLimit=temp; |
| 632 | |
| 633 | ++firstRun; |
| 634 | --endRun; |
| 635 | } |
| 636 | |
| 637 | if(limitRun==runCount) { |
| 638 | break; /* no more such runs */ |
| 639 | } else { |
| 640 | firstRun=limitRun+1; |
| 641 | } |
| 642 | } |
| 643 | } |
| 644 | |
| 645 | /* now do maxLevel==old minLevel (==odd!), see above */ |
| 646 | if(!(minLevel&1)) { |
| 647 | firstRun=0; |
| 648 | |
| 649 | /* include the trailing WS run in this complete reordering */ |
| 650 | if(pBiDi->trailingWSStart==pBiDi->length) { |
| 651 | --runCount; |
| 652 | } |
| 653 | |
| 654 | /* Swap the entire sequence of all runs. (endRun==runCount) */ |
| 655 | while(firstRun<runCount) { |
| 656 | temp=runs[firstRun].logicalStart; |
| 657 | runs[firstRun].logicalStart=runs[runCount].logicalStart; |
| 658 | runs[runCount].logicalStart=temp; |
| 659 | |
| 660 | temp=runs[firstRun].visualLimit; |
| 661 | runs[firstRun].visualLimit=runs[runCount].visualLimit; |
| 662 | runs[runCount].visualLimit=temp; |
| 663 | |
| 664 | ++firstRun; |
| 665 | --runCount; |
| 666 | } |
| 667 | } |
| 668 | } |
| 669 | |
| 670 | /* reorder a line based on a levels array (L2) ------------------------------ */ |
| 671 | |
| 672 | U_CAPI void U_EXPORT2 |
| 673 | ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { |
| 674 | int32_t start, limit, sumOfSosEos; |
| 675 | UBiDiLevel minLevel, maxLevel; |
| 676 | |
| 677 | if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { |
| 678 | return; |
| 679 | } |
| 680 | |
| 681 | /* nothing to do? */ |
| 682 | if(minLevel==maxLevel && (minLevel&1)==0) { |
| 683 | return; |
| 684 | } |
| 685 | |
| 686 | /* reorder only down to the lowest odd level */ |
| 687 | minLevel|=1; |
| 688 | |
| 689 | /* loop maxLevel..minLevel */ |
| 690 | do { |
| 691 | start=0; |
| 692 | |
| 693 | /* loop for all sequences of levels to reorder at the current maxLevel */ |
| 694 | for(;;) { |
| 695 | /* look for a sequence of levels that are all at >=maxLevel */ |
| 696 | /* look for the first index of such a sequence */ |
| 697 | while(start<length && levels[start]<maxLevel) { |
| 698 | ++start; |
| 699 | } |
| 700 | if(start>=length) { |
| 701 | break; /* no more such sequences */ |
| 702 | } |
| 703 | |
| 704 | /* look for the limit of such a sequence (the index behind it) */ |
| 705 | for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {} |
| 706 | |
| 707 | /* |
| 708 | * sos=start of sequence, eos=end of sequence |
| 709 | * |
| 710 | * The closed (inclusive) interval from sos to eos includes all the logical |
| 711 | * and visual indexes within this sequence. They are logically and |
| 712 | * visually contiguous and in the same range. |
| 713 | * |
| 714 | * For each run, the new visual index=sos+eos-old visual index; |
| 715 | * we pre-add sos+eos into sumOfSosEos -> |
| 716 | * new visual index=sumOfSosEos-old visual index; |
| 717 | */ |
| 718 | sumOfSosEos=start+limit-1; |
| 719 | |
| 720 | /* reorder each index in the sequence */ |
| 721 | do { |
| 722 | indexMap[start]=sumOfSosEos-indexMap[start]; |
| 723 | } while(++start<limit); |
| 724 | |
| 725 | /* start==limit */ |
| 726 | if(limit==length) { |
| 727 | break; /* no more such sequences */ |
| 728 | } else { |
| 729 | start=limit+1; |
| 730 | } |
| 731 | } |
| 732 | } while(--maxLevel>=minLevel); |
| 733 | } |
| 734 | |
| 735 | U_CAPI void U_EXPORT2 |
| 736 | ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap) { |
| 737 | int32_t start, end, limit, temp; |
| 738 | UBiDiLevel minLevel, maxLevel; |
| 739 | |
| 740 | if(indexMap==NULL || !prepareReorder(levels, length, indexMap, &minLevel, &maxLevel)) { |
| 741 | return; |
| 742 | } |
| 743 | |
| 744 | /* nothing to do? */ |
| 745 | if(minLevel==maxLevel && (minLevel&1)==0) { |
| 746 | return; |
| 747 | } |
| 748 | |
| 749 | /* reorder only down to the lowest odd level */ |
| 750 | minLevel|=1; |
| 751 | |
| 752 | /* loop maxLevel..minLevel */ |
| 753 | do { |
| 754 | start=0; |
| 755 | |
| 756 | /* loop for all sequences of levels to reorder at the current maxLevel */ |
| 757 | for(;;) { |
| 758 | /* look for a sequence of levels that are all at >=maxLevel */ |
| 759 | /* look for the first index of such a sequence */ |
| 760 | while(start<length && levels[start]<maxLevel) { |
| 761 | ++start; |
| 762 | } |
| 763 | if(start>=length) { |
| 764 | break; /* no more such runs */ |
| 765 | } |
| 766 | |
| 767 | /* look for the limit of such a sequence (the index behind it) */ |
| 768 | for(limit=start; ++limit<length && levels[limit]>=maxLevel;) {} |
| 769 | |
| 770 | /* |
| 771 | * Swap the entire interval of indexes from start to limit-1. |
| 772 | * We don't need to swap the levels for the purpose of this |
| 773 | * algorithm: the sequence of levels that we look at does not |
| 774 | * move anyway. |
| 775 | */ |
| 776 | end=limit-1; |
| 777 | while(start<end) { |
| 778 | temp=indexMap[start]; |
| 779 | indexMap[start]=indexMap[end]; |
| 780 | indexMap[end]=temp; |
| 781 | |
| 782 | ++start; |
| 783 | --end; |
| 784 | } |
| 785 | |
| 786 | if(limit==length) { |
| 787 | break; /* no more such sequences */ |
| 788 | } else { |
| 789 | start=limit+1; |
| 790 | } |
| 791 | } |
| 792 | } while(--maxLevel>=minLevel); |
| 793 | } |
| 794 | |
| 795 | static bool_t |
| 796 | prepareReorder(const UBiDiLevel *levels, int32_t length, |
| 797 | int32_t *indexMap, |
| 798 | UBiDiLevel *pMinLevel, UBiDiLevel *pMaxLevel) { |
| 799 | int32_t start; |
| 800 | UBiDiLevel level, minLevel, maxLevel; |
| 801 | |
| 802 | if(levels==NULL || length<=0) { |
| 803 | return FALSE; |
| 804 | } |
| 805 | |
| 806 | /* determine minLevel and maxLevel */ |
| 807 | minLevel=UBIDI_MAX_EXPLICIT_LEVEL+1; |
| 808 | maxLevel=0; |
| 809 | for(start=length; start>0;) { |
| 810 | level=levels[--start]; |
| 811 | if(level>UBIDI_MAX_EXPLICIT_LEVEL+1) { |
| 812 | return FALSE; |
| 813 | } |
| 814 | if(level<minLevel) { |
| 815 | minLevel=level; |
| 816 | } |
| 817 | if(level>maxLevel) { |
| 818 | maxLevel=level; |
| 819 | } |
| 820 | } |
| 821 | *pMinLevel=minLevel; |
| 822 | *pMaxLevel=maxLevel; |
| 823 | |
| 824 | /* initialize the index map */ |
| 825 | for(start=length; start>0;) { |
| 826 | --start; |
| 827 | indexMap[start]=start; |
| 828 | } |
| 829 | |
| 830 | return TRUE; |
| 831 | } |
| 832 | |
| 833 | /* API functions for logical<->visual mapping ------------------------------- */ |
| 834 | |
| 835 | U_CAPI int32_t U_EXPORT2 |
| 836 | ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode) { |
| 837 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
| 838 | return 0; |
| 839 | } else if(pBiDi==NULL) { |
| 840 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 841 | return 0; |
| 842 | } else if(logicalIndex<0 || pBiDi->length<=logicalIndex) { |
| 843 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
| 844 | return 0; |
| 845 | } else { |
| 846 | /* we can do the trivial cases without the runs array */ |
| 847 | switch(pBiDi->direction) { |
| 848 | case UBIDI_LTR: |
| 849 | return logicalIndex; |
| 850 | case UBIDI_RTL: |
| 851 | return pBiDi->length-logicalIndex-1; |
| 852 | default: |
| 853 | if(pBiDi->runCount<0 && !ubidi_getRuns(pBiDi)) { |
| 854 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 855 | return 0; |
| 856 | } else { |
| 857 | Run *runs=pBiDi->runs; |
| 858 | int32_t i, visualStart=0, offset, length; |
| 859 | |
| 860 | /* linear search for the run, search on the visual runs */ |
| 861 | for(i=0;; ++i) { |
| 862 | length=runs[i].visualLimit-visualStart; |
| 863 | offset=logicalIndex-GET_INDEX(runs[i].logicalStart); |
| 864 | if(offset>=0 && offset<length) { |
| 865 | if(IS_EVEN_RUN(runs[i].logicalStart)) { |
| 866 | /* LTR */ |
| 867 | return visualStart+offset; |
| 868 | } else { |
| 869 | /* RTL */ |
| 870 | return visualStart+length-offset-1; |
| 871 | } |
| 872 | } |
| 873 | visualStart+=length; |
| 874 | } |
| 875 | } |
| 876 | } |
| 877 | } |
| 878 | } |
| 879 | |
| 880 | U_CAPI int32_t U_EXPORT2 |
| 881 | ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode) { |
| 882 | if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { |
| 883 | return 0; |
| 884 | } else if(pBiDi==NULL) { |
| 885 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 886 | return 0; |
| 887 | } else if(visualIndex<0 || pBiDi->length<=visualIndex) { |
| 888 | *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; |
| 889 | return 0; |
| 890 | } else { |
| 891 | /* we can do the trivial cases without the runs array */ |
| 892 | switch(pBiDi->direction) { |
| 893 | case UBIDI_LTR: |
| 894 | return visualIndex; |
| 895 | case UBIDI_RTL: |
| 896 | return pBiDi->length-visualIndex-1; |
| 897 | default: |
| 898 | if(pBiDi->runCount<0 && !ubidi_getRuns(pBiDi)) { |
| 899 | *pErrorCode=U_MEMORY_ALLOCATION_ERROR; |
| 900 | return 0; |
| 901 | } else { |
| 902 | Run *runs=pBiDi->runs; |
| 903 | int32_t i, runCount=pBiDi->runCount, start; |
| 904 | |
| 905 | if(runCount<=10) { |
| 906 | /* linear search for the run */ |
| 907 | for(i=0; visualIndex>=runs[i].visualLimit; ++i) {} |
| 908 | } else { |
| 909 | /* binary search for the run */ |
| 910 | int32_t begin=0, limit=runCount; |
| 911 | |
| 912 | /* the middle if() will guaranteed find the run, we don't need a loop limit */ |
| 913 | for(;;) { |
| 914 | i=(begin+limit)/2; |
| 915 | if(visualIndex>=runs[i].visualLimit) { |
| 916 | begin=i+1; |
| 917 | } else if(i==0 || visualIndex>=runs[i-1].visualLimit) { |
| 918 | break; |
| 919 | } else { |
| 920 | limit=i; |
| 921 | } |
| 922 | } |
| 923 | } |
| 924 | |
| 925 | start=runs[i].logicalStart; |
| 926 | if(IS_EVEN_RUN(start)) { |
| 927 | /* LTR */ |
| 928 | /* the offset in runs[i] is visualIndex-runs[i-1].visualLimit */ |
| 929 | if(i>0) { |
| 930 | visualIndex-=runs[i-1].visualLimit; |
| 931 | } |
| 932 | return GET_INDEX(start)+visualIndex; |
| 933 | } else { |
| 934 | /* RTL */ |
| 935 | return GET_INDEX(start)+runs[i].visualLimit-visualIndex-1; |
| 936 | } |
| 937 | } |
| 938 | } |
| 939 | } |
| 940 | } |
| 941 | |
| 942 | U_CAPI void U_EXPORT2 |
| 943 | ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { |
| 944 | UBiDiLevel *levels; |
| 945 | |
| 946 | /* ubidi_getLevels() checks all of its and our arguments */ |
| 947 | if((levels=(UBiDiLevel *)ubidi_getLevels(pBiDi, pErrorCode))==NULL) { |
| 948 | /* no op */ |
| 949 | } else if(indexMap==NULL) { |
| 950 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 951 | } else { |
| 952 | ubidi_reorderLogical(levels, pBiDi->length, indexMap); |
| 953 | } |
| 954 | } |
| 955 | |
| 956 | U_CAPI void U_EXPORT2 |
| 957 | ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode) { |
| 958 | /* ubidi_countRuns() checks all of its and our arguments */ |
| 959 | if(ubidi_countRuns(pBiDi, pErrorCode)<=0) { |
| 960 | /* no op */ |
| 961 | } else if(indexMap==NULL) { |
| 962 | *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 963 | } else { |
| 964 | /* fill a visual-to-logical index map using the runs[] */ |
| 965 | Run *runs=pBiDi->runs, *runsLimit=runs+pBiDi->runCount; |
| 966 | int32_t logicalStart, visualStart, visualLimit; |
| 967 | |
| 968 | visualStart=0; |
| 969 | for(; runs<runsLimit; ++runs) { |
| 970 | logicalStart=runs->logicalStart; |
| 971 | visualLimit=runs->visualLimit; |
| 972 | if(IS_EVEN_RUN(logicalStart)) { |
| 973 | do { /* LTR */ |
| 974 | *indexMap++ = logicalStart++; |
| 975 | } while(++visualStart<visualLimit); |
| 976 | } else { |
| 977 | REMOVE_ODD_BIT(logicalStart); |
| 978 | logicalStart+=visualLimit-visualStart; /* logicalLimit */ |
| 979 | do { /* RTL */ |
| 980 | *indexMap++ = --logicalStart; |
| 981 | } while(++visualStart<visualLimit); |
| 982 | } |
| 983 | /* visualStart==visualLimit; */ |
| 984 | } |
| 985 | } |
| 986 | } |
| 987 | |
| 988 | U_CAPI void U_EXPORT2 |
| 989 | ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length) { |
| 990 | if(srcMap!=NULL && destMap!=NULL) { |
| 991 | srcMap+=length; |
| 992 | while(length>0) { |
| 993 | destMap[*--srcMap]=--length; |
| 994 | } |
| 995 | } |
| 996 | } |