J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Portions Copyright 2000-2003 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | /* |
| 27 | * (C) Copyright IBM Corp. 1999-2003 - All Rights Reserved |
| 28 | * |
| 29 | * The original version of this source code and documentation is |
| 30 | * copyrighted and owned by IBM. These materials are provided |
| 31 | * under terms of a License Agreement between IBM and Sun. |
| 32 | * This technology is protected by multiple US and International |
| 33 | * patents. This notice and attribution to IBM may not be removed. |
| 34 | */ |
| 35 | |
| 36 | /* |
| 37 | * file name: ubidi.h |
| 38 | * encoding: US-ASCII |
| 39 | * tab size: 8 (not used) |
| 40 | * indentation:4 |
| 41 | * |
| 42 | * created on: 1999jul27 |
| 43 | * created by: Markus W. Scherer |
| 44 | */ |
| 45 | |
| 46 | #ifndef UBIDI_H |
| 47 | #define UBIDI_H |
| 48 | |
| 49 | #include "utypes.h" |
| 50 | #include "uchardir.h" |
| 51 | |
| 52 | /* |
| 53 | * javadoc-style comments are intended to be transformed into HTML |
| 54 | * using DOC++ - see |
| 55 | * http://www.zib.de/Visual/software/doc++/index.html . |
| 56 | * |
| 57 | * The HTML documentation is created with |
| 58 | * doc++ -H ubidi.h |
| 59 | * |
| 60 | * The following #define trick allows us to do it all in one file |
| 61 | * and still be able to compile it. |
| 62 | */ |
| 63 | #define DOCXX_TAG |
| 64 | #define BIDI_SAMPLE_CODE |
| 65 | |
| 66 | /** |
| 67 | * @name BiDi algorithm for ICU |
| 68 | * |
| 69 | * <h2>BiDi algorithm for ICU</h2> |
| 70 | * |
| 71 | * This is an implementation of the Unicode Bidirectional algorithm. |
| 72 | * The algorithm is defined in the |
| 73 | * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>, |
| 74 | * version 5, also described in The Unicode Standard, Version 3.0 .<p> |
| 75 | * |
| 76 | * <h3>General remarks about the API:</h3> |
| 77 | * |
| 78 | * In functions with an error code parameter, |
| 79 | * the <code>pErrorCode</code> pointer must be valid |
| 80 | * and the value that it points to must not indicate a failure before |
| 81 | * the function call. Otherwise, the function returns immediately. |
| 82 | * After the function call, the value indicates success or failure.<p> |
| 83 | * |
| 84 | * The <quote>limit</quote> of a sequence of characters is the position just after their |
| 85 | * last character, i.e., one more than that position.<p> |
| 86 | * |
| 87 | * Some of the API functions provide access to <quote>runs</quote>. |
| 88 | * Such a <quote>run</quote> is defined as a sequence of characters |
| 89 | * that are at the same embedding level |
| 90 | * after performing the BiDi algorithm.<p> |
| 91 | * |
| 92 | * @author Markus W. Scherer |
| 93 | */ |
| 94 | DOCXX_TAG |
| 95 | /*@{*/ |
| 96 | |
| 97 | /** |
| 98 | * UBiDiLevel is the type of the level values in this |
| 99 | * BiDi implementation. |
| 100 | * It holds an embedding level and indicates the visual direction |
| 101 | * by its bit 0 (even/odd value).<p> |
| 102 | * |
| 103 | * It can also hold non-level values for the |
| 104 | * <code>paraLevel</code> and <code>embeddingLevels</code> |
| 105 | * arguments of <code>ubidi_setPara()</code>; there: |
| 106 | * <ul> |
| 107 | * <li>bit 7 of an <code>embeddingLevels[]</code> |
| 108 | * value indicates whether the using application is |
| 109 | * specifying the level of a character to <i>override</i> whatever the |
| 110 | * BiDi implementation would resolve it to.</li> |
| 111 | * <li><code>paraLevel</code> can be set to the |
| 112 | * pesudo-level values <code>UBIDI_DEFAULT_LTR</code> |
| 113 | * and <code>UBIDI_DEFAULT_RTL</code>.</li> |
| 114 | * |
| 115 | * @see ubidi_setPara |
| 116 | * |
| 117 | * <p>The related constants are not real, valid level values. |
| 118 | * <code>UBIDI_DEFAULT_XXX</code> can be used to specify |
| 119 | * a default for the paragraph level for |
| 120 | * when the <code>ubidi_setPara()</code> function |
| 121 | * shall determine it but there is no |
| 122 | * strongly typed character in the input.<p> |
| 123 | * |
| 124 | * Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even |
| 125 | * and the one for <code>UBIDI_DEFAULT_RTL</code> is odd, |
| 126 | * just like with normal LTR and RTL level values - |
| 127 | * these special values are designed that way. Also, the implementation |
| 128 | * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. |
| 129 | * |
| 130 | * @see UBIDI_DEFAULT_LTR |
| 131 | * @see UBIDI_DEFAULT_RTL |
| 132 | * @see UBIDI_LEVEL_OVERRIDE |
| 133 | * @see UBIDI_MAX_EXPLICIT_LEVEL |
| 134 | */ |
| 135 | typedef uint8_t UBiDiLevel; |
| 136 | |
| 137 | /** @memo If there is no strong character, then set the paragraph level to 0 (left-to-right). */ |
| 138 | #define UBIDI_DEFAULT_LTR 0xfe |
| 139 | |
| 140 | /** @memo If there is no strong character, then set the paragraph level to 1 (right-to-left). */ |
| 141 | #define UBIDI_DEFAULT_RTL 0xff |
| 142 | |
| 143 | /** |
| 144 | * @memo Maximum explicit embedding level |
| 145 | * (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>). |
| 146 | */ |
| 147 | #define UBIDI_MAX_EXPLICIT_LEVEL 61 |
| 148 | |
| 149 | /** @memo Bit flag for level input: overrides directional properties. */ |
| 150 | #define UBIDI_LEVEL_OVERRIDE 0x80 |
| 151 | |
| 152 | /** |
| 153 | * @memo <code>UBiDiDirection</code> values indicate the text direction. |
| 154 | */ |
| 155 | enum UBiDiDirection { |
| 156 | /** @memo All left-to-right text. This is a 0 value. */ |
| 157 | UBIDI_LTR, |
| 158 | /** @memo All right-to-left text. This is a 1 value. */ |
| 159 | UBIDI_RTL, |
| 160 | /** @memo Mixed-directional text. */ |
| 161 | UBIDI_MIXED |
| 162 | }; |
| 163 | |
| 164 | typedef enum UBiDiDirection UBiDiDirection; |
| 165 | |
| 166 | /** |
| 167 | * Forward declaration of the <code>UBiDi</code> structure for the declaration of |
| 168 | * the API functions. Its fields are implementation-specific.<p> |
| 169 | * This structure holds information about a paragraph of text |
| 170 | * with BiDi-algorithm-related details, or about one line of |
| 171 | * such a paragraph.<p> |
| 172 | * Reordering can be done on a line, or on a paragraph which is |
| 173 | * then interpreted as one single line. |
| 174 | */ |
| 175 | struct UBiDi; |
| 176 | |
| 177 | typedef struct UBiDi UBiDi; |
| 178 | |
| 179 | /** |
| 180 | * Allocate a <code>UBiDi</code> structure. |
| 181 | * Such an object is initially empty. It is assigned |
| 182 | * the BiDi properties of a paragraph by <code>ubidi_setPara()</code> |
| 183 | * or the BiDi properties of a line of a paragraph by |
| 184 | * <code>ubidi_getLine()</code>.<p> |
| 185 | * This object can be reused for as long as it is not deallocated |
| 186 | * by calling <code>ubidi_close()</code>.<p> |
| 187 | * <code>ubidi_set()</code> will allocate additional memory for |
| 188 | * internal structures as necessary. |
| 189 | * |
| 190 | * @return An empty <code>UBiDi</code> object. |
| 191 | */ |
| 192 | U_CAPI UBiDi * U_EXPORT2 |
| 193 | ubidi_open(); |
| 194 | |
| 195 | /** |
| 196 | * Allocate a <code>UBiDi</code> structure with preallocated memory |
| 197 | * for internal structures. |
| 198 | * This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code> |
| 199 | * with no arguments, but it also preallocates memory for internal structures |
| 200 | * according to the sizings supplied by the caller.<p> |
| 201 | * Subsequent functions will not allocate any more memory, and are thus |
| 202 | * guaranteed not to fail because of lack of memory.<p> |
| 203 | * The preallocation can be limited to some of the internal memory |
| 204 | * by setting some values to 0 here. That means that if, e.g., |
| 205 | * <code>maxRunCount</code> cannot be reasonably predetermined and should not |
| 206 | * be set to <code>maxLength</code> (the only failproof value) to avoid |
| 207 | * wasting memory, then <code>maxRunCount</code> could be set to 0 here |
| 208 | * and the internal structures that are associated with it will be allocated |
| 209 | * on demand, just like with <code>ubidi_open()</code>. |
| 210 | * |
| 211 | * @param maxLength is the maximum paragraph or line length that internal memory |
| 212 | * will be preallocated for. An attempt to associate this object with a |
| 213 | * longer text will fail, unless this value is 0, which leaves the allocation |
| 214 | * up to the implementation. |
| 215 | * |
| 216 | * @param maxRunCount is the maximum anticipated number of same-level runs |
| 217 | * that internal memory will be preallocated for. An attempt to access |
| 218 | * visual runs on an object that was not preallocated for as many runs |
| 219 | * as the text was actually resolved to will fail, |
| 220 | * unless this value is 0, which leaves the allocation up to the implementation.<p> |
| 221 | * The number of runs depends on the actual text and maybe anywhere between |
| 222 | * 1 and <code>maxLength</code>. It is typically small.<p> |
| 223 | * |
| 224 | * @param pErrorCode must be a valid pointer to an error code value, |
| 225 | * which must not indicate a failure before the function call. |
| 226 | * |
| 227 | * @return An empty <code>UBiDi</code> object with preallocated memory. |
| 228 | */ |
| 229 | U_CAPI UBiDi * U_EXPORT2 |
| 230 | ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode); |
| 231 | |
| 232 | /** |
| 233 | * <code>ubidi_close()</code> must be called to free the memory |
| 234 | * associated with a UBiDi object.<p> |
| 235 | * |
| 236 | * <strong>Important: </strong> |
| 237 | * If a <code>UBiDi</code> object is the <quote>child</quote> |
| 238 | * of another one (its <quote>parent</quote>), after calling |
| 239 | * <code>ubidi_setLine()</code>, then the child object must |
| 240 | * be destroyed (closed) or reused (by calling |
| 241 | * <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>) |
| 242 | * before the parent object. |
| 243 | * |
| 244 | * @param pBiDi is a <code>UBiDi</code> object. |
| 245 | * |
| 246 | * @see ubidi_setPara |
| 247 | * @see ubidi_setLine |
| 248 | */ |
| 249 | U_CAPI void U_EXPORT2 |
| 250 | ubidi_close(UBiDi *pBiDi); |
| 251 | |
| 252 | /** |
| 253 | * Perform the Unicode BiDi algorithm. It is defined in the |
| 254 | * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Technical Report 9</a>, |
| 255 | * version 5, |
| 256 | * also described in The Unicode Standard, Version 3.0 .<p> |
| 257 | * |
| 258 | * This function takes a single plain text paragraph with or without |
| 259 | * externally specified embedding levels from <quote>styled</quote> text |
| 260 | * and computes the left-right-directionality of each character.<p> |
| 261 | * |
| 262 | * If the entire paragraph consists of text of only one direction, then |
| 263 | * the function may not perform all the steps described by the algorithm, |
| 264 | * i.e., some levels may not be the same as if all steps were performed. |
| 265 | * This is not relevant for unidirectional text.<br> |
| 266 | * For example, in pure LTR text with numbers the numbers would get |
| 267 | * a resolved level of 2 higher than the surrounding text according to |
| 268 | * the algorithm. This implementation may set all resolved levels to |
| 269 | * the same value in such a case.<p> |
| 270 | * |
| 271 | * The text must be externally split into separate paragraphs (rule P1). |
| 272 | * Paragraph separators (B) should appear at most at the very end. |
| 273 | * |
| 274 | * @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code> |
| 275 | * which will be set to contain the reordering information, |
| 276 | * especially the resolved levels for all the characters in <code>text</code>. |
| 277 | * |
| 278 | * @param text is a pointer to the single-paragraph text that the |
| 279 | * BiDi algorithm will be performed on |
| 280 | * (step (P1) of the algorithm is performed externally). |
| 281 | * <strong>The text must be (at least) <code>length</code> long.</strong> |
| 282 | * |
| 283 | * @param length is the length of the text; if <code>length==-1</code> then |
| 284 | * the text must be zero-terminated. |
| 285 | * |
| 286 | * @param paraLevel specifies the default level for the paragraph; |
| 287 | * it is typically 0 (LTR) or 1 (RTL). |
| 288 | * If the function shall determine the paragraph level from the text, |
| 289 | * then <code>paraLevel</code> can be set to |
| 290 | * either <code>UBIDI_DEFAULT_LTR</code> |
| 291 | * or <code>UBIDI_DEFAULT_RTL</code>; |
| 292 | * if there is no strongly typed character, then |
| 293 | * the desired default is used (0 for LTR or 1 for RTL). |
| 294 | * Any other value between 0 and <code>UBIDI_MAX_EXPLICIT_LEVEL</code> is also valid, |
| 295 | * with odd levels indicating RTL. |
| 296 | * |
| 297 | * @param embeddingLevels (in) may be used to preset the embedding and override levels, |
| 298 | * ignoring characters like LRE and PDF in the text. |
| 299 | * A level overrides the directional property of its corresponding |
| 300 | * (same index) character if the level has the |
| 301 | * <code>UBIDI_LEVEL_OVERRIDE</code> bit set.<p> |
| 302 | * Except for that bit, it must be |
| 303 | * <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>.<p> |
| 304 | * <strong>Caution: </strong>A copy of this pointer, not of the levels, |
| 305 | * will be stored in the <code>UBiDi</code> object; |
| 306 | * the <code>embeddingLevels</code> array must not be |
| 307 | * deallocated before the <code>UBiDi</code> structure is destroyed or reused, |
| 308 | * and the <code>embeddingLevels</code> |
| 309 | * should not be modified to avoid unexpected results on subsequent BiDi operations. |
| 310 | * However, the <code>ubidi_setPara()</code> and |
| 311 | * <code>ubidi_setLine()</code> functions may modify some or all of the levels.<p> |
| 312 | * After the <code>UBiDi</code> object is reused or destroyed, the caller |
| 313 | * must take care of the deallocation of the <code>embeddingLevels</code> array.<p> |
| 314 | * <strong>The <code>embeddingLevels</code> array must be |
| 315 | * at least <code>length</code> long.</strong> |
| 316 | * |
| 317 | * @param pErrorCode must be a valid pointer to an error code value, |
| 318 | * which must not indicate a failure before the function call. |
| 319 | */ |
| 320 | U_CAPI void U_EXPORT2 |
| 321 | ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, |
| 322 | UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, |
| 323 | UErrorCode *pErrorCode); |
| 324 | |
| 325 | /** |
| 326 | * <code>ubidi_getLine()</code> sets a <code>UBiDi</code> to |
| 327 | * contain the reordering information, especially the resolved levels, |
| 328 | * for all the characters in a line of text. This line of text is |
| 329 | * specified by referring to a <code>UBiDi</code> object representing |
| 330 | * this information for a paragraph of text, and by specifying |
| 331 | * a range of indexes in this paragraph.<p> |
| 332 | * In the new line object, the indexes will range from 0 to <code>limit-start</code>.<p> |
| 333 | * |
| 334 | * This is used after calling <code>ubidi_setPara()</code> |
| 335 | * for a paragraph, and after line-breaking on that paragraph. |
| 336 | * It is not necessary if the paragraph is treated as a single line.<p> |
| 337 | * |
| 338 | * After line-breaking, rules (L1) and (L2) for the treatment of |
| 339 | * trailing WS and for reordering are performed on |
| 340 | * a <code>UBiDi</code> object that represents a line.<p> |
| 341 | * |
| 342 | * <strong>Important: </strong><code>pLineBiDi</code> shares data with |
| 343 | * <code>pParaBiDi</code>. |
| 344 | * You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>. |
| 345 | * In other words, you must destroy or reuse the <code>UBiDi</code> object for a line |
| 346 | * before the object for its parent paragraph. |
| 347 | * |
| 348 | * @param pParaBiDi is the parent paragraph object. |
| 349 | * |
| 350 | * @param start is the line's first index into the paragraph text. |
| 351 | * |
| 352 | * @param limit is just behind the line's last index into the paragraph text |
| 353 | * (its last index +1).<br> |
| 354 | * It must be <code>0<=start<=limit<=</code>paragraph length. |
| 355 | * |
| 356 | * @param pLineBiDi is the object that will now represent a line of the paragraph. |
| 357 | * |
| 358 | * @param pErrorCode must be a valid pointer to an error code value, |
| 359 | * which must not indicate a failure before the function call. |
| 360 | * |
| 361 | * @see ubidi_setPara |
| 362 | */ |
| 363 | U_CAPI void U_EXPORT2 |
| 364 | ubidi_setLine(const UBiDi *pParaBiDi, |
| 365 | int32_t start, int32_t limit, |
| 366 | UBiDi *pLineBiDi, |
| 367 | UErrorCode *pErrorCode); |
| 368 | |
| 369 | /** |
| 370 | * Get the directionality of the text. |
| 371 | * |
| 372 | * @param pBiDi is the paragraph or line <code>UBiDi</code> object. |
| 373 | * |
| 374 | * @return A <code>UBIDI_XXX</code> value that indicates if the entire text |
| 375 | * represented by this object is unidirectional, |
| 376 | * and which direction, or if it is mixed-directional. |
| 377 | * |
| 378 | * @see UBiDiDirection |
| 379 | */ |
| 380 | U_CAPI UBiDiDirection U_EXPORT2 |
| 381 | ubidi_getDirection(const UBiDi *pBiDi); |
| 382 | |
| 383 | /** |
| 384 | * Get the length of the text. |
| 385 | * |
| 386 | * @param pBiDi is the paragraph or line <code>UBiDi</code> object. |
| 387 | * |
| 388 | * @return The length of the text that the UBiDi object was created for. |
| 389 | */ |
| 390 | U_CAPI int32_t U_EXPORT2 |
| 391 | ubidi_getLength(const UBiDi *pBiDi); |
| 392 | |
| 393 | /** |
| 394 | * Get the paragraph level of the text. |
| 395 | * |
| 396 | * @param pBiDi is the paragraph or line <code>UBiDi</code> object. |
| 397 | * |
| 398 | * @return The paragraph level. |
| 399 | * |
| 400 | * @see UBiDiLevel |
| 401 | */ |
| 402 | U_CAPI UBiDiLevel U_EXPORT2 |
| 403 | ubidi_getParaLevel(const UBiDi *pBiDi); |
| 404 | |
| 405 | /** |
| 406 | * Get the level for one character. |
| 407 | * |
| 408 | * @param pBiDi is the paragraph or line <code>UBiDi</code> object. |
| 409 | * |
| 410 | * @param charIndex the index of a character. |
| 411 | * |
| 412 | * @return The level for the character at charIndex. |
| 413 | * |
| 414 | * @see UBiDiLevel |
| 415 | */ |
| 416 | U_CAPI UBiDiLevel U_EXPORT2 |
| 417 | ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex); |
| 418 | |
| 419 | /** |
| 420 | * Get an array of levels for each character.<p> |
| 421 | * |
| 422 | * Note that this function may allocate memory under some |
| 423 | * circumstances, unlike <code>ubidi_getLevelAt()</code>. |
| 424 | * |
| 425 | * @param pBiDi is the paragraph or line <code>UBiDi</code> object. |
| 426 | * |
| 427 | * @param pErrorCode must be a valid pointer to an error code value, |
| 428 | * which must not indicate a failure before the function call. |
| 429 | * |
| 430 | * @return The levels array for the text, |
| 431 | * or <code>NULL</code> if an error occurs. |
| 432 | * |
| 433 | * @see UBiDiLevel |
| 434 | */ |
| 435 | U_CAPI const UBiDiLevel * U_EXPORT2 |
| 436 | ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode); |
| 437 | |
| 438 | /** |
| 439 | * Get a logical run. |
| 440 | * This function returns information about a run and is used |
| 441 | * to retrieve runs in logical order.<p> |
| 442 | * This is especially useful for line-breaking on a paragraph. |
| 443 | * |
| 444 | * @param pBiDi is the paragraph or line <code>UBiDi</code> object. |
| 445 | * |
| 446 | * @param logicalStart is the first character of the run. |
| 447 | * |
| 448 | * @param pLogicalLimit will receive the limit of the run. |
| 449 | * The l-value that you point to here may be the |
| 450 | * same expression (variable) as the one for |
| 451 | * <code>logicalStart</code>. |
| 452 | * This pointer can be <code>NULL</code> if this |
| 453 | * value is not necessary. |
| 454 | * |
| 455 | * @param pLevel will receive the level of the run. |
| 456 | * This pointer can be <code>NULL</code> if this |
| 457 | * value is not necessary. |
| 458 | */ |
| 459 | U_CAPI void U_EXPORT2 |
| 460 | ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalStart, |
| 461 | int32_t *pLogicalLimit, UBiDiLevel *pLevel); |
| 462 | |
| 463 | /** |
| 464 | * Get the number of runs. |
| 465 | * This function may invoke the actual reordering on the |
| 466 | * <code>UBiDi</code> object, after <code>ubidi_setPara()</code> |
| 467 | * may have resolved only the levels of the text. Therefore, |
| 468 | * <code>ubidi_countRuns()</code> may have to allocate memory, |
| 469 | * and may fail doing so. |
| 470 | * |
| 471 | * @param pBiDi is the paragraph or line <code>UBiDi</code> object. |
| 472 | * |
| 473 | * @param pErrorCode must be a valid pointer to an error code value, |
| 474 | * which must not indicate a failure before the function call. |
| 475 | * |
| 476 | * @return The number of runs. |
| 477 | */ |
| 478 | U_CAPI int32_t U_EXPORT2 |
| 479 | ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode); |
| 480 | |
| 481 | /** |
| 482 | * Get one run's logical start, length, and directionality, |
| 483 | * which can be 0 for LTR or 1 for RTL. |
| 484 | * In an RTL run, the character at the logical start is |
| 485 | * visually on the right of the displayed run. |
| 486 | * The length is the number of characters in the run.<p> |
| 487 | * <code>ubidi_countRuns()</code> should be called |
| 488 | * before the runs are retrieved. |
| 489 | * |
| 490 | * @param pBiDi is the paragraph or line <code>UBiDi</code> object. |
| 491 | * |
| 492 | * @param runIndex is the number of the run in visual order, in the |
| 493 | * range <code>[0..ubidi_countRuns(pBiDi)-1]</code>. |
| 494 | * |
| 495 | * @param pLogicalStart is the first logical character index in the text. |
| 496 | * The pointer may be <code>NULL</code> if this index is not needed. |
| 497 | * |
| 498 | * @param pLength is the number of characters (at least one) in the run. |
| 499 | * The pointer may be <code>NULL</code> if this is not needed. |
| 500 | * |
| 501 | * @return the directionality of the run, |
| 502 | * <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>, |
| 503 | * never <code>UBIDI_MIXED</code>. |
| 504 | * |
| 505 | * @see ubidi_countRuns |
| 506 | * |
| 507 | * Example: |
| 508 | * <pre> |
| 509 | * int32_t i, count=ubidi_countRuns(pBiDi), |
| 510 | * logicalStart, visualIndex=0, length; |
| 511 | * for(i=0; i<count; ++i) { |
| 512 | * if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) { |
| 513 | * do { // LTR |
| 514 | * show_char(text[logicalStart++], visualIndex++); |
| 515 | * } while(--length>0); |
| 516 | * } else { |
| 517 | * logicalStart+=length; // logicalLimit |
| 518 | * do { // RTL |
| 519 | * show_char(text[--logicalStart], visualIndex++); |
| 520 | * } while(--length>0); |
| 521 | * } |
| 522 | * } |
| 523 | * </pre> |
| 524 | * |
| 525 | * Note that in right-to-left runs, code like this places |
| 526 | * modifier letters before base characters and second surrogates |
| 527 | * before first ones. |
| 528 | */ |
| 529 | U_CAPI UBiDiDirection U_EXPORT2 |
| 530 | ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex, |
| 531 | int32_t *pLogicalStart, int32_t *pLength); |
| 532 | |
| 533 | /** |
| 534 | * Get the visual position from a logical text position. |
| 535 | * If such a mapping is used many times on the same |
| 536 | * <code>UBiDi</code> object, then calling |
| 537 | * <code>ubidi_getLogicalMap()</code> is more efficient.<p> |
| 538 | * |
| 539 | * Note that in right-to-left runs, this mapping places |
| 540 | * modifier letters before base characters and second surrogates |
| 541 | * before first ones. |
| 542 | * |
| 543 | * @param pBiDi is the paragraph or line <code>UBiDi</code> object. |
| 544 | * |
| 545 | * @param logicalIndex is the index of a character in the text. |
| 546 | * |
| 547 | * @param pErrorCode must be a valid pointer to an error code value, |
| 548 | * which must not indicate a failure before the function call. |
| 549 | * |
| 550 | * @return The visual position of this character. |
| 551 | * |
| 552 | * @see ubidi_getLogicalMap |
| 553 | * @see ubidi_getLogicalIndex |
| 554 | */ |
| 555 | U_CAPI int32_t U_EXPORT2 |
| 556 | ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode); |
| 557 | |
| 558 | /** |
| 559 | * Get the logical text position from a visual position. |
| 560 | * If such a mapping is used many times on the same |
| 561 | * <code>UBiDi</code> object, then calling |
| 562 | * <code>ubidi_getVisualMap()</code> is more efficient.<p> |
| 563 | * |
| 564 | * This is the inverse function to <code>ubidi_getVisualIndex()</code>. |
| 565 | * |
| 566 | * @param pBiDi is the paragraph or line <code>UBiDi</code> object. |
| 567 | * |
| 568 | * @param visualIndex is the visual position of a character. |
| 569 | * |
| 570 | * @param pErrorCode must be a valid pointer to an error code value, |
| 571 | * which must not indicate a failure before the function call. |
| 572 | * |
| 573 | * @return The index of this character in the text. |
| 574 | * |
| 575 | * @see ubidi_getVisualMap |
| 576 | * @see ubidi_getVisualIndex |
| 577 | */ |
| 578 | U_CAPI int32_t U_EXPORT2 |
| 579 | ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode); |
| 580 | |
| 581 | /** |
| 582 | * Get a logical-to-visual index map (array) for the characters in the UBiDi |
| 583 | * (paragraph or line) object. |
| 584 | * |
| 585 | * @param pBiDi is the paragraph or line <code>UBiDi</code> object. |
| 586 | * |
| 587 | * @param indexMap is a pointer to an array of <code>ubidi_getLength()</code> |
| 588 | * indexes which will reflect the reordering of the characters. |
| 589 | * The array does not need to be initialized.<p> |
| 590 | * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.<p> |
| 591 | * |
| 592 | * @param pErrorCode must be a valid pointer to an error code value, |
| 593 | * which must not indicate a failure before the function call. |
| 594 | * |
| 595 | * @see ubidi_getVisualMap |
| 596 | * @see ubidi_getVisualIndex |
| 597 | */ |
| 598 | U_CAPI void U_EXPORT2 |
| 599 | ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode); |
| 600 | |
| 601 | /** |
| 602 | * Get a visual-to-logical index map (array) for the characters in the UBiDi |
| 603 | * (paragraph or line) object. |
| 604 | * |
| 605 | * @param pBiDi is the paragraph or line <code>UBiDi</code> object. |
| 606 | * |
| 607 | * @param indexMap is a pointer to an array of <code>ubidi_getLength()</code> |
| 608 | * indexes which will reflect the reordering of the characters. |
| 609 | * The array does not need to be initialized.<p> |
| 610 | * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.<p> |
| 611 | * |
| 612 | * @param pErrorCode must be a valid pointer to an error code value, |
| 613 | * which must not indicate a failure before the function call. |
| 614 | * |
| 615 | * @see ubidi_getLogicalMap |
| 616 | * @see ubidi_getLogicalIndex |
| 617 | */ |
| 618 | U_CAPI void U_EXPORT2 |
| 619 | ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode); |
| 620 | |
| 621 | /** |
| 622 | * This is a convenience function that does not use a UBiDi object. |
| 623 | * It is intended to be used for when an application has determined the levels |
| 624 | * of objects (character sequences) and just needs to have them reordered (L2). |
| 625 | * This is equivalent to using <code>ubidi_getLogicalMap</code> on a |
| 626 | * <code>UBiDi</code> object. |
| 627 | * |
| 628 | * @param levels is an array with <code>length</code> levels that have been determined by |
| 629 | * the application. |
| 630 | * |
| 631 | * @param length is the number of levels in the array, or, semantically, |
| 632 | * the number of objects to be reordered. |
| 633 | * It must be <code>length>0</code>. |
| 634 | * |
| 635 | * @param indexMap is a pointer to an array of <code>length</code> |
| 636 | * indexes which will reflect the reordering of the characters. |
| 637 | * The array does not need to be initialized.<p> |
| 638 | * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>. |
| 639 | */ |
| 640 | U_CAPI void U_EXPORT2 |
| 641 | ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap); |
| 642 | |
| 643 | /** |
| 644 | * This is a convenience function that does not use a UBiDi object. |
| 645 | * It is intended to be used for when an application has determined the levels |
| 646 | * of objects (character sequences) and just needs to have them reordered (L2). |
| 647 | * This is equivalent to using <code>ubidi_getVisualMap</code> on a |
| 648 | * <code>UBiDi</code> object. |
| 649 | * |
| 650 | * @param levels is an array with <code>length</code> levels that have been determined by |
| 651 | * the application. |
| 652 | * |
| 653 | * @param length is the number of levels in the array, or, semantically, |
| 654 | * the number of objects to be reordered. |
| 655 | * It must be <code>length>0</code>. |
| 656 | * |
| 657 | * @param indexMap is a pointer to an array of <code>length</code> |
| 658 | * indexes which will reflect the reordering of the characters. |
| 659 | * The array does not need to be initialized.<p> |
| 660 | * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>. |
| 661 | */ |
| 662 | U_CAPI void U_EXPORT2 |
| 663 | ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap); |
| 664 | |
| 665 | /** |
| 666 | * Invert an index map. |
| 667 | * The one-to-one index mapping of the first map is inverted and written to |
| 668 | * the second one. |
| 669 | * |
| 670 | * @param srcMap is an array with <code>length</code> indexes |
| 671 | * which define the original mapping. |
| 672 | * |
| 673 | * @param destMap is an array with <code>length</code> indexes |
| 674 | * which will be filled with the inverse mapping. |
| 675 | * |
| 676 | * @param length is the length of each array. |
| 677 | */ |
| 678 | U_CAPI void U_EXPORT2 |
| 679 | ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length); |
| 680 | |
| 681 | /** |
| 682 | * @name Sample code for the ICU BiDi API |
| 683 | * |
| 684 | * <h2>Rendering a paragraph with the ICU BiDi API</h2> |
| 685 | * |
| 686 | * This is (hypothetical) sample code that illustrates |
| 687 | * how the ICU BiDi API could be used to render a paragraph of text. |
| 688 | * Rendering code depends highly on the graphics system, |
| 689 | * therefore this sample code must make a lot of assumptions, |
| 690 | * which may or may not match any existing graphics system's properties. |
| 691 | * |
| 692 | * <p>The basic assumptions are:</p> |
| 693 | * <ul> |
| 694 | * <li>Rendering is done from left to right on a horizontal line.</li> |
| 695 | * <li>A run of single-style, unidirectional text can be rendered at once.</li> |
| 696 | * <li>Such a run of text is passed to the graphics system with |
| 697 | * characters (code units) in logical order.</li> |
| 698 | * <li>The line-breaking algorithm is very complicated |
| 699 | * and Locale-dependent - |
| 700 | * and therefore its implementation omitted from this sample code.</li> |
| 701 | * </ul> |
| 702 | * |
| 703 | * <pre> |
| 704 | * #include "ubidi.h" |
| 705 | * |
| 706 | * typedef enum { |
| 707 | * styleNormal=0, styleSelected=1, |
| 708 | * styleBold=2, styleItalics=4, |
| 709 | * styleSuper=8, styleSub=16 |
| 710 | * } Style; |
| 711 | * |
| 712 | * typedef struct { int32_t limit; Style style; } StyleRun; |
| 713 | * |
| 714 | * int getTextWidth(const UChar *text, int32_t start, int32_t limit, |
| 715 | * const StyleRun *styleRuns, int styleRunCount); |
| 716 | * |
| 717 | * // set *pLimit and *pStyleRunLimit for a line |
| 718 | * // from text[start] and from styleRuns[styleRunStart] |
| 719 | * // using ubidi_getLogicalRun(para, ...) |
| 720 | * void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit, |
| 721 | * UBiDi *para, |
| 722 | * const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit, |
| 723 | * int *pLineWidth); |
| 724 | * |
| 725 | * // render runs on a line sequentially, always from left to right |
| 726 | * |
| 727 | * // prepare rendering a new line |
| 728 | * void startLine(UBiDiDirection textDirection, int lineWidth); |
| 729 | * |
| 730 | * // render a run of text and advance to the right by the run width |
| 731 | * // the text[start..limit-1] is always in logical order |
| 732 | * void renderRun(const UChar *text, int32_t start, int32_t limit, |
| 733 | * UBiDiDirection textDirection, Style style); |
| 734 | * |
| 735 | * // We could compute a cross-product |
| 736 | * // from the style runs with the directional runs |
| 737 | * // and then reorder it. |
| 738 | * // Instead, here we iterate over each run type |
| 739 | * // and render the intersections - |
| 740 | * // with shortcuts in simple (and common) cases. |
| 741 | * // renderParagraph() is the main function. |
| 742 | * |
| 743 | * // render a directional run with |
| 744 | * // (possibly) multiple style runs intersecting with it |
| 745 | * void renderDirectionalRun(const UChar *text, |
| 746 | * int32_t start, int32_t limit, |
| 747 | * UBiDiDirection direction, |
| 748 | * const StyleRun *styleRuns, int styleRunCount) { |
| 749 | * int i; |
| 750 | * |
| 751 | * // iterate over style runs |
| 752 | * if(direction==UBIDI_LTR) { |
| 753 | * int styleLimit; |
| 754 | * |
| 755 | * for(i=0; i<styleRunCount; ++i) { |
| 756 | * styleLimit=styleRun[i].limit; |
| 757 | * if(start<styleLimit) { |
| 758 | * if(styleLimit>limit) { styleLimit=limit; } |
| 759 | * renderRun(text, start, styleLimit, |
| 760 | * direction, styleRun[i].style); |
| 761 | * if(styleLimit==limit) { break; } |
| 762 | * start=styleLimit; |
| 763 | * } |
| 764 | * } |
| 765 | * } else { |
| 766 | * int styleStart; |
| 767 | * |
| 768 | * for(i=styleRunCount-1; i>=0; --i) { |
| 769 | * if(i>0) { |
| 770 | * styleStart=styleRun[i-1].limit; |
| 771 | * } else { |
| 772 | * styleStart=0; |
| 773 | * } |
| 774 | * if(limit>=styleStart) { |
| 775 | * if(styleStart<start) { styleStart=start; } |
| 776 | * renderRun(text, styleStart, limit, |
| 777 | * direction, styleRun[i].style); |
| 778 | * if(styleStart==start) { break; } |
| 779 | * limit=styleStart; |
| 780 | * } |
| 781 | * } |
| 782 | * } |
| 783 | * } |
| 784 | * |
| 785 | * // the line object represents text[start..limit-1] |
| 786 | * void renderLine(UBiDi *line, const UChar *text, |
| 787 | * int32_t start, int32_t limit, |
| 788 | * const StyleRun *styleRuns, int styleRunCount) { |
| 789 | * UBiDiDirection direction=ubidi_getDirection(line); |
| 790 | * if(direction!=UBIDI_MIXED) { |
| 791 | * // unidirectional |
| 792 | * if(styleRunCount<=1) { |
| 793 | * renderRun(text, start, limit, direction, styleRuns[0].style); |
| 794 | * } else { |
| 795 | * renderDirectionalRun(text, start, limit, |
| 796 | * direction, styleRuns, styleRunCount); |
| 797 | * } |
| 798 | * } else { |
| 799 | * // mixed-directional |
| 800 | * int32_t count, i, length; |
| 801 | * UBiDiLevel level; |
| 802 | * |
| 803 | * count=ubidi_countRuns(para, pErrorCode); |
| 804 | * if(U_SUCCESS(*pErrorCode)) { |
| 805 | * if(styleRunCount<=1) { |
| 806 | * Style style=styleRuns[0].style; |
| 807 | * |
| 808 | * // iterate over directional runs |
| 809 | * for(i=0; i<count; ++i) { |
| 810 | * direction=ubidi_getVisualRun(para, i, &start, &length); |
| 811 | * renderRun(text, start, start+length, direction, style); |
| 812 | * } |
| 813 | * } else { |
| 814 | * int32_t j; |
| 815 | * |
| 816 | * // iterate over both directional and style runs |
| 817 | * for(i=0; i<count; ++i) { |
| 818 | * direction=ubidi_getVisualRun(line, i, &start, &length); |
| 819 | * renderDirectionalRun(text, start, start+length, |
| 820 | * direction, styleRuns, styleRunCount); |
| 821 | * } |
| 822 | * } |
| 823 | * } |
| 824 | * } |
| 825 | * } |
| 826 | * |
| 827 | * void renderParagraph(const UChar *text, int32_t length, |
| 828 | * UBiDiDirection textDirection, |
| 829 | * const StyleRun *styleRuns, int styleRunCount, |
| 830 | * int lineWidth, |
| 831 | * UErrorCode *pErrorCode) { |
| 832 | * UBiDi *para; |
| 833 | * |
| 834 | * if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) { |
| 835 | * return; |
| 836 | * } |
| 837 | * |
| 838 | * para=ubidi_openSized(length, 0, pErrorCode); |
| 839 | * if(para==NULL) { return; } |
| 840 | * |
| 841 | * ubidi_setPara(para, text, length, |
| 842 | * textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR, |
| 843 | * NULL, pErrorCode); |
| 844 | * if(U_SUCCESS(*pErrorCode)) { |
| 845 | * UBiDiLevel paraLevel=1&ubidi_getParaLevel(para); |
| 846 | * StyleRun styleRun={ length, styleNormal }; |
| 847 | * int width; |
| 848 | * |
| 849 | * if(styleRuns==NULL || styleRunCount<=0) { |
| 850 | * styleRunCount=1; |
| 851 | * styleRuns=&styleRun; |
| 852 | * } |
| 853 | * |
| 854 | * // assume styleRuns[styleRunCount-1].limit>=length |
| 855 | * |
| 856 | * width=getTextWidth(text, 0, length, styleRuns, styleRunCount); |
| 857 | * if(width<=lineWidth) { |
| 858 | * // everything fits onto one line |
| 859 | * |
| 860 | * // prepare rendering a new line from either left or right |
| 861 | * startLine(paraLevel, width); |
| 862 | * |
| 863 | * renderLine(para, text, 0, length, |
| 864 | * styleRuns, styleRunCount); |
| 865 | * } else { |
| 866 | * UBiDi *line; |
| 867 | * |
| 868 | * // we need to render several lines |
| 869 | * line=ubidi_openSized(length, 0, pErrorCode); |
| 870 | * if(line!=NULL) { |
| 871 | * int32_t start=0, limit; |
| 872 | * int styleRunStart=0, styleRunLimit; |
| 873 | * |
| 874 | * for(;;) { |
| 875 | * limit=length; |
| 876 | * styleRunLimit=styleRunCount; |
| 877 | * getLineBreak(text, start, &limit, para, |
| 878 | * styleRuns, styleRunStart, &styleRunLimit, |
| 879 | * &width); |
| 880 | * ubidi_setLine(para, start, limit, line, pErrorCode); |
| 881 | * if(U_SUCCESS(*pErrorCode)) { |
| 882 | * // prepare rendering a new line |
| 883 | * // from either left or right |
| 884 | * startLine(paraLevel, width); |
| 885 | * |
| 886 | * renderLine(line, text, start, limit, |
| 887 | * styleRuns+styleRunStart, |
| 888 | * styleRunLimit-styleRunStart); |
| 889 | * } |
| 890 | * if(limit==length) { break; } |
| 891 | * start=limit; |
| 892 | * styleRunStart=styleRunLimit-1; |
| 893 | * if(start>=styleRuns[styleRunStart].limit) { |
| 894 | * ++styleRunStart; |
| 895 | * } |
| 896 | * } |
| 897 | * |
| 898 | * ubidi_close(line); |
| 899 | * } |
| 900 | * } |
| 901 | * } |
| 902 | * |
| 903 | * ubidi_close(para); |
| 904 | * } |
| 905 | * </pre> |
| 906 | */ |
| 907 | BIDI_SAMPLE_CODE |
| 908 | /*@{*/ |
| 909 | /*@}*/ |
| 910 | |
| 911 | /*@}*/ |
| 912 | |
| 913 | #endif |