blob: 63d0e45cb7f3c535eb37f2b46ab809ddba9aab09 [file] [log] [blame]
Victor Chang73229502020-09-17 13:39:19 +01001// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5*
6* Copyright (C) 1999-2013, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9******************************************************************************
10* file name: ubidi.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 1999jul27
16* created by: Markus W. Scherer, updated by Matitiahu Allouche
17*/
18
19#ifndef UBIDI_H
20#define UBIDI_H
21
22#include "unicode/utypes.h"
23#include "unicode/uchar.h"
24
25#if U_SHOW_CPLUSPLUS_API
26#include "unicode/localpointer.h"
27#endif // U_SHOW_CPLUSPLUS_API
28
29/**
30 *\file
31 * \brief C API: Bidi algorithm
32 *
33 * <h2>Bidi algorithm for ICU</h2>
34 *
35 * This is an implementation of the Unicode Bidirectional Algorithm.
36 * The algorithm is defined in the
37 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>.<p>
38 *
39 * Note: Libraries that perform a bidirectional algorithm and
40 * reorder strings accordingly are sometimes called "Storage Layout Engines".
41 * ICU's Bidi and shaping (u_shapeArabic()) APIs can be used at the core of such
42 * "Storage Layout Engines".
43 *
44 * <h3>General remarks about the API:</h3>
45 *
46 * In functions with an error code parameter,
47 * the <code>pErrorCode</code> pointer must be valid
48 * and the value that it points to must not indicate a failure before
49 * the function call. Otherwise, the function returns immediately.
50 * After the function call, the value indicates success or failure.<p>
51 *
52 * The &quot;limit&quot; of a sequence of characters is the position just after their
53 * last character, i.e., one more than that position.<p>
54 *
55 * Some of the API functions provide access to &quot;runs&quot;.
56 * Such a &quot;run&quot; is defined as a sequence of characters
57 * that are at the same embedding level
58 * after performing the Bidi algorithm.<p>
59 *
60 * @author Markus W. Scherer
61 * @version 1.0
62 *
63 *
64 * <h4> Sample code for the ICU Bidi API </h4>
65 *
66 * <h5>Rendering a paragraph with the ICU Bidi API</h5>
67 *
68 * This is (hypothetical) sample code that illustrates
69 * how the ICU Bidi API could be used to render a paragraph of text.
70 * Rendering code depends highly on the graphics system,
71 * therefore this sample code must make a lot of assumptions,
72 * which may or may not match any existing graphics system's properties.
73 *
74 * <p>The basic assumptions are:</p>
75 * <ul>
76 * <li>Rendering is done from left to right on a horizontal line.</li>
77 * <li>A run of single-style, unidirectional text can be rendered at once.</li>
78 * <li>Such a run of text is passed to the graphics system with
79 * characters (code units) in logical order.</li>
80 * <li>The line-breaking algorithm is very complicated
81 * and Locale-dependent -
82 * and therefore its implementation omitted from this sample code.</li>
83 * </ul>
84 *
85 * <pre>
86 * \code
87 *#include "unicode/ubidi.h"
88 *
89 *typedef enum {
90 * styleNormal=0, styleSelected=1,
91 * styleBold=2, styleItalics=4,
92 * styleSuper=8, styleSub=16
93 *} Style;
94 *
95 *typedef struct { int32_t limit; Style style; } StyleRun;
96 *
97 *int getTextWidth(const UChar *text, int32_t start, int32_t limit,
98 * const StyleRun *styleRuns, int styleRunCount);
99 *
100 * // set *pLimit and *pStyleRunLimit for a line
101 * // from text[start] and from styleRuns[styleRunStart]
102 * // using ubidi_getLogicalRun(para, ...)
103 *void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit,
104 * UBiDi *para,
105 * const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit,
106 * int *pLineWidth);
107 *
108 * // render runs on a line sequentially, always from left to right
109 *
110 * // prepare rendering a new line
111 * void startLine(UBiDiDirection textDirection, int lineWidth);
112 *
113 * // render a run of text and advance to the right by the run width
114 * // the text[start..limit-1] is always in logical order
115 * void renderRun(const UChar *text, int32_t start, int32_t limit,
116 * UBiDiDirection textDirection, Style style);
117 *
118 * // We could compute a cross-product
119 * // from the style runs with the directional runs
120 * // and then reorder it.
121 * // Instead, here we iterate over each run type
122 * // and render the intersections -
123 * // with shortcuts in simple (and common) cases.
124 * // renderParagraph() is the main function.
125 *
126 * // render a directional run with
127 * // (possibly) multiple style runs intersecting with it
128 * void renderDirectionalRun(const UChar *text,
129 * int32_t start, int32_t limit,
130 * UBiDiDirection direction,
131 * const StyleRun *styleRuns, int styleRunCount) {
132 * int i;
133 *
134 * // iterate over style runs
135 * if(direction==UBIDI_LTR) {
136 * int styleLimit;
137 *
138 * for(i=0; i<styleRunCount; ++i) {
139 * styleLimit=styleRun[i].limit;
140 * if(start<styleLimit) {
141 * if(styleLimit>limit) { styleLimit=limit; }
142 * renderRun(text, start, styleLimit,
143 * direction, styleRun[i].style);
144 * if(styleLimit==limit) { break; }
145 * start=styleLimit;
146 * }
147 * }
148 * } else {
149 * int styleStart;
150 *
151 * for(i=styleRunCount-1; i>=0; --i) {
152 * if(i>0) {
153 * styleStart=styleRun[i-1].limit;
154 * } else {
155 * styleStart=0;
156 * }
157 * if(limit>=styleStart) {
158 * if(styleStart<start) { styleStart=start; }
159 * renderRun(text, styleStart, limit,
160 * direction, styleRun[i].style);
161 * if(styleStart==start) { break; }
162 * limit=styleStart;
163 * }
164 * }
165 * }
166 * }
167 *
168 * // the line object represents text[start..limit-1]
169 * void renderLine(UBiDi *line, const UChar *text,
170 * int32_t start, int32_t limit,
171 * const StyleRun *styleRuns, int styleRunCount) {
172 * UBiDiDirection direction=ubidi_getDirection(line);
173 * if(direction!=UBIDI_MIXED) {
174 * // unidirectional
175 * if(styleRunCount<=1) {
176 * renderRun(text, start, limit, direction, styleRuns[0].style);
177 * } else {
178 * renderDirectionalRun(text, start, limit,
179 * direction, styleRuns, styleRunCount);
180 * }
181 * } else {
182 * // mixed-directional
183 * int32_t count, i, length;
184 * UBiDiLevel level;
185 *
186 * count=ubidi_countRuns(para, pErrorCode);
187 * if(U_SUCCESS(*pErrorCode)) {
188 * if(styleRunCount<=1) {
189 * Style style=styleRuns[0].style;
190 *
191 * // iterate over directional runs
192 * for(i=0; i<count; ++i) {
193 * direction=ubidi_getVisualRun(para, i, &start, &length);
194 * renderRun(text, start, start+length, direction, style);
195 * }
196 * } else {
197 * int32_t j;
198 *
199 * // iterate over both directional and style runs
200 * for(i=0; i<count; ++i) {
201 * direction=ubidi_getVisualRun(line, i, &start, &length);
202 * renderDirectionalRun(text, start, start+length,
203 * direction, styleRuns, styleRunCount);
204 * }
205 * }
206 * }
207 * }
208 * }
209 *
210 *void renderParagraph(const UChar *text, int32_t length,
211 * UBiDiDirection textDirection,
212 * const StyleRun *styleRuns, int styleRunCount,
213 * int lineWidth,
214 * UErrorCode *pErrorCode) {
215 * UBiDi *para;
216 *
217 * if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) {
218 * return;
219 * }
220 *
221 * para=ubidi_openSized(length, 0, pErrorCode);
222 * if(para==NULL) { return; }
223 *
224 * ubidi_setPara(para, text, length,
225 * textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR,
226 * NULL, pErrorCode);
227 * if(U_SUCCESS(*pErrorCode)) {
228 * UBiDiLevel paraLevel=1&ubidi_getParaLevel(para);
229 * StyleRun styleRun={ length, styleNormal };
230 * int width;
231 *
232 * if(styleRuns==NULL || styleRunCount<=0) {
233 * styleRunCount=1;
234 * styleRuns=&styleRun;
235 * }
236 *
237 * // assume styleRuns[styleRunCount-1].limit>=length
238 *
239 * width=getTextWidth(text, 0, length, styleRuns, styleRunCount);
240 * if(width<=lineWidth) {
241 * // everything fits onto one line
242 *
243 * // prepare rendering a new line from either left or right
244 * startLine(paraLevel, width);
245 *
246 * renderLine(para, text, 0, length,
247 * styleRuns, styleRunCount);
248 * } else {
249 * UBiDi *line;
250 *
251 * // we need to render several lines
252 * line=ubidi_openSized(length, 0, pErrorCode);
253 * if(line!=NULL) {
254 * int32_t start=0, limit;
255 * int styleRunStart=0, styleRunLimit;
256 *
257 * for(;;) {
258 * limit=length;
259 * styleRunLimit=styleRunCount;
260 * getLineBreak(text, start, &limit, para,
261 * styleRuns, styleRunStart, &styleRunLimit,
262 * &width);
263 * ubidi_setLine(para, start, limit, line, pErrorCode);
264 * if(U_SUCCESS(*pErrorCode)) {
265 * // prepare rendering a new line
266 * // from either left or right
267 * startLine(paraLevel, width);
268 *
269 * renderLine(line, text, start, limit,
270 * styleRuns+styleRunStart,
271 * styleRunLimit-styleRunStart);
272 * }
273 * if(limit==length) { break; }
274 * start=limit;
275 * styleRunStart=styleRunLimit-1;
276 * if(start>=styleRuns[styleRunStart].limit) {
277 * ++styleRunStart;
278 * }
279 * }
280 *
281 * ubidi_close(line);
282 * }
283 * }
284 * }
285 *
286 * ubidi_close(para);
287 *}
288 *\endcode
289 * </pre>
290 */
291
292/*DOCXX_TAG*/
293/*@{*/
294
295/**
296 * UBiDiLevel is the type of the level values in this
297 * Bidi implementation.
298 * It holds an embedding level and indicates the visual direction
299 * by its bit&nbsp;0 (even/odd value).<p>
300 *
301 * It can also hold non-level values for the
302 * <code>paraLevel</code> and <code>embeddingLevels</code>
303 * arguments of <code>ubidi_setPara()</code>; there:
304 * <ul>
305 * <li>bit&nbsp;7 of an <code>embeddingLevels[]</code>
306 * value indicates whether the using application is
307 * specifying the level of a character to <i>override</i> whatever the
308 * Bidi implementation would resolve it to.</li>
309 * <li><code>paraLevel</code> can be set to the
310 * pseudo-level values <code>UBIDI_DEFAULT_LTR</code>
311 * and <code>UBIDI_DEFAULT_RTL</code>.</li>
312 * </ul>
313 *
314 * @see ubidi_setPara
315 *
316 * <p>The related constants are not real, valid level values.
317 * <code>UBIDI_DEFAULT_XXX</code> can be used to specify
318 * a default for the paragraph level for
319 * when the <code>ubidi_setPara()</code> function
320 * shall determine it but there is no
321 * strongly typed character in the input.<p>
322 *
323 * Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even
324 * and the one for <code>UBIDI_DEFAULT_RTL</code> is odd,
325 * just like with normal LTR and RTL level values -
326 * these special values are designed that way. Also, the implementation
327 * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
328 *
329 * Note: The numeric values of the related constants will not change:
330 * They are tied to the use of 7-bit byte values (plus the override bit)
331 * and of the UBiDiLevel=uint8_t data type in this API.
332 *
333 * @see UBIDI_DEFAULT_LTR
334 * @see UBIDI_DEFAULT_RTL
335 * @see UBIDI_LEVEL_OVERRIDE
336 * @see UBIDI_MAX_EXPLICIT_LEVEL
337 * @stable ICU 2.0
338 */
339typedef uint8_t UBiDiLevel;
340
341/** Paragraph level setting.<p>
342 *
343 * Constant indicating that the base direction depends on the first strong
344 * directional character in the text according to the Unicode Bidirectional
345 * Algorithm. If no strong directional character is present,
346 * then set the paragraph level to 0 (left-to-right).<p>
347 *
348 * If this value is used in conjunction with reordering modes
349 * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or
350 * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
351 * is assumed to be visual LTR, and the text after reordering is required
352 * to be the corresponding logical string with appropriate contextual
353 * direction. The direction of the result string will be RTL if either
354 * the righmost or leftmost strong character of the source text is RTL
355 * or Arabic Letter, the direction will be LTR otherwise.<p>
356 *
357 * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may
358 * be added at the beginning of the result string to ensure round trip
359 * (that the result string, when reordered back to visual, will produce
360 * the original source text).
361 * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
362 * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
363 * @stable ICU 2.0
364 */
365#define UBIDI_DEFAULT_LTR 0xfe
366
367/** Paragraph level setting.<p>
368 *
369 * Constant indicating that the base direction depends on the first strong
370 * directional character in the text according to the Unicode Bidirectional
371 * Algorithm. If no strong directional character is present,
372 * then set the paragraph level to 1 (right-to-left).<p>
373 *
374 * If this value is used in conjunction with reordering modes
375 * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or
376 * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
377 * is assumed to be visual LTR, and the text after reordering is required
378 * to be the corresponding logical string with appropriate contextual
379 * direction. The direction of the result string will be RTL if either
380 * the righmost or leftmost strong character of the source text is RTL
381 * or Arabic Letter, or if the text contains no strong character;
382 * the direction will be LTR otherwise.<p>
383 *
384 * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may
385 * be added at the beginning of the result string to ensure round trip
386 * (that the result string, when reordered back to visual, will produce
387 * the original source text).
388 * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
389 * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
390 * @stable ICU 2.0
391 */
392#define UBIDI_DEFAULT_RTL 0xff
393
394/**
395 * Maximum explicit embedding level.
396 * Same as the max_depth value in the
397 * <a href="http://www.unicode.org/reports/tr9/#BD2">Unicode Bidirectional Algorithm</a>.
398 * (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>).
399 * @stable ICU 2.0
400 */
401#define UBIDI_MAX_EXPLICIT_LEVEL 125
402
403/** Bit flag for level input.
404 * Overrides directional properties.
405 * @stable ICU 2.0
406 */
407#define UBIDI_LEVEL_OVERRIDE 0x80
408
409/**
410 * Special value which can be returned by the mapping functions when a logical
411 * index has no corresponding visual index or vice-versa. This may happen
412 * for the logical-to-visual mapping of a Bidi control when option
413 * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is specified. This can also happen
414 * for the visual-to-logical mapping of a Bidi mark (LRM or RLM) inserted
415 * by option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
416 * @see ubidi_getVisualIndex
417 * @see ubidi_getVisualMap
418 * @see ubidi_getLogicalIndex
419 * @see ubidi_getLogicalMap
420 * @stable ICU 3.6
421 */
422#define UBIDI_MAP_NOWHERE (-1)
423
424/**
425 * <code>UBiDiDirection</code> values indicate the text direction.
426 * @stable ICU 2.0
427 */
428enum UBiDiDirection {
429 /** Left-to-right text. This is a 0 value.
430 * <ul>
431 * <li>As return value for <code>ubidi_getDirection()</code>, it means
432 * that the source string contains no right-to-left characters, or
433 * that the source string is empty and the paragraph level is even.
434 * <li> As return value for <code>ubidi_getBaseDirection()</code>, it
435 * means that the first strong character of the source string has
436 * a left-to-right direction.
437 * </ul>
438 * @stable ICU 2.0
439 */
440 UBIDI_LTR,
441 /** Right-to-left text. This is a 1 value.
442 * <ul>
443 * <li>As return value for <code>ubidi_getDirection()</code>, it means
444 * that the source string contains no left-to-right characters, or
445 * that the source string is empty and the paragraph level is odd.
446 * <li> As return value for <code>ubidi_getBaseDirection()</code>, it
447 * means that the first strong character of the source string has
448 * a right-to-left direction.
449 * </ul>
450 * @stable ICU 2.0
451 */
452 UBIDI_RTL,
453 /** Mixed-directional text.
454 * <p>As return value for <code>ubidi_getDirection()</code>, it means
455 * that the source string contains both left-to-right and
456 * right-to-left characters.
457 * @stable ICU 2.0
458 */
459 UBIDI_MIXED,
460 /** No strongly directional text.
461 * <p>As return value for <code>ubidi_getBaseDirection()</code>, it means
462 * that the source string is missing or empty, or contains neither left-to-right
463 * nor right-to-left characters.
464 * @stable ICU 4.6
465 */
466 UBIDI_NEUTRAL
467};
468
469/** @stable ICU 2.0 */
470typedef enum UBiDiDirection UBiDiDirection;
471
472/**
473 * Forward declaration of the <code>UBiDi</code> structure for the declaration of
474 * the API functions. Its fields are implementation-specific.<p>
475 * This structure holds information about a paragraph (or multiple paragraphs)
476 * of text with Bidi-algorithm-related details, or about one line of
477 * such a paragraph.<p>
478 * Reordering can be done on a line, or on one or more paragraphs which are
479 * then interpreted each as one single line.
480 * @stable ICU 2.0
481 */
482struct UBiDi;
483
484/** @stable ICU 2.0 */
485typedef struct UBiDi UBiDi;
486
487/**
488 * Allocate a <code>UBiDi</code> structure.
489 * Such an object is initially empty. It is assigned
490 * the Bidi properties of a piece of text containing one or more paragraphs
491 * by <code>ubidi_setPara()</code>
492 * or the Bidi properties of a line within a paragraph by
493 * <code>ubidi_setLine()</code>.<p>
494 * This object can be reused for as long as it is not deallocated
495 * by calling <code>ubidi_close()</code>.<p>
496 * <code>ubidi_setPara()</code> and <code>ubidi_setLine()</code> will allocate
497 * additional memory for internal structures as necessary.
498 *
499 * @return An empty <code>UBiDi</code> object.
500 * @stable ICU 2.0
501 */
Victor Changce4bf3c2021-01-19 16:34:24 +0000502U_CAPI UBiDi * U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +0100503ubidi_open(void);
504
505/**
506 * Allocate a <code>UBiDi</code> structure with preallocated memory
507 * for internal structures.
508 * This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code>
509 * with no arguments, but it also preallocates memory for internal structures
510 * according to the sizings supplied by the caller.<p>
511 * Subsequent functions will not allocate any more memory, and are thus
512 * guaranteed not to fail because of lack of memory.<p>
513 * The preallocation can be limited to some of the internal memory
514 * by setting some values to 0 here. That means that if, e.g.,
515 * <code>maxRunCount</code> cannot be reasonably predetermined and should not
516 * be set to <code>maxLength</code> (the only failproof value) to avoid
517 * wasting memory, then <code>maxRunCount</code> could be set to 0 here
518 * and the internal structures that are associated with it will be allocated
519 * on demand, just like with <code>ubidi_open()</code>.
520 *
521 * @param maxLength is the maximum text or line length that internal memory
522 * will be preallocated for. An attempt to associate this object with a
523 * longer text will fail, unless this value is 0, which leaves the allocation
524 * up to the implementation.
525 *
526 * @param maxRunCount is the maximum anticipated number of same-level runs
527 * that internal memory will be preallocated for. An attempt to access
528 * visual runs on an object that was not preallocated for as many runs
529 * as the text was actually resolved to will fail,
530 * unless this value is 0, which leaves the allocation up to the implementation.<br><br>
531 * The number of runs depends on the actual text and maybe anywhere between
532 * 1 and <code>maxLength</code>. It is typically small.
533 *
534 * @param pErrorCode must be a valid pointer to an error code value.
535 *
536 * @return An empty <code>UBiDi</code> object with preallocated memory.
537 * @stable ICU 2.0
538 */
Victor Changce4bf3c2021-01-19 16:34:24 +0000539U_CAPI UBiDi * U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +0100540ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode);
541
542/**
543 * <code>ubidi_close()</code> must be called to free the memory
544 * associated with a UBiDi object.<p>
545 *
546 * <strong>Important: </strong>
547 * A parent <code>UBiDi</code> object must not be destroyed or reused if
548 * it still has children.
549 * If a <code>UBiDi</code> object has become the <i>child</i>
550 * of another one (its <i>parent</i>) by calling
551 * <code>ubidi_setLine()</code>, then the child object must
552 * be destroyed (closed) or reused (by calling
553 * <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>)
554 * before the parent object.
555 *
556 * @param pBiDi is a <code>UBiDi</code> object.
557 *
558 * @see ubidi_setPara
559 * @see ubidi_setLine
560 * @stable ICU 2.0
561 */
Victor Changce4bf3c2021-01-19 16:34:24 +0000562U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +0100563ubidi_close(UBiDi *pBiDi);
564
565#if U_SHOW_CPLUSPLUS_API
566
567U_NAMESPACE_BEGIN
568
569/**
570 * \class LocalUBiDiPointer
571 * "Smart pointer" class, closes a UBiDi via ubidi_close().
572 * For most methods see the LocalPointerBase base class.
573 *
574 * @see LocalPointerBase
575 * @see LocalPointer
576 * @stable ICU 4.4
577 */
578U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiPointer, UBiDi, ubidi_close);
579
580U_NAMESPACE_END
581
582#endif
583
584/**
585 * Modify the operation of the Bidi algorithm such that it
586 * approximates an "inverse Bidi" algorithm. This function
587 * must be called before <code>ubidi_setPara()</code>.
588 *
589 * <p>The normal operation of the Bidi algorithm as described
590 * in the Unicode Technical Report is to take text stored in logical
591 * (keyboard, typing) order and to determine the reordering of it for visual
592 * rendering.
593 * Some legacy systems store text in visual order, and for operations
594 * with standard, Unicode-based algorithms, the text needs to be transformed
595 * to logical order. This is effectively the inverse algorithm of the
596 * described Bidi algorithm. Note that there is no standard algorithm for
597 * this "inverse Bidi" and that the current implementation provides only an
598 * approximation of "inverse Bidi".</p>
599 *
Victor Changce4bf3c2021-01-19 16:34:24 +0000600 * <p>With <code>isInverse</code> set to <code>true</code>,
Victor Chang73229502020-09-17 13:39:19 +0100601 * this function changes the behavior of some of the subsequent functions
602 * in a way that they can be used for the inverse Bidi algorithm.
603 * Specifically, runs of text with numeric characters will be treated in a
604 * special way and may need to be surrounded with LRM characters when they are
605 * written in reordered sequence.</p>
606 *
607 * <p>Output runs should be retrieved using <code>ubidi_getVisualRun()</code>.
608 * Since the actual input for "inverse Bidi" is visually ordered text and
609 * <code>ubidi_getVisualRun()</code> gets the reordered runs, these are actually
610 * the runs of the logically ordered output.</p>
611 *
612 * <p>Calling this function with argument <code>isInverse</code> set to
Victor Changce4bf3c2021-01-19 16:34:24 +0000613 * <code>true</code> is equivalent to calling
Victor Chang73229502020-09-17 13:39:19 +0100614 * <code>ubidi_setReorderingMode</code> with argument
615 * <code>reorderingMode</code>
616 * set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
617 * Calling this function with argument <code>isInverse</code> set to
Victor Changce4bf3c2021-01-19 16:34:24 +0000618 * <code>false</code> is equivalent to calling
Victor Chang73229502020-09-17 13:39:19 +0100619 * <code>ubidi_setReorderingMode</code> with argument
620 * <code>reorderingMode</code>
621 * set to <code>#UBIDI_REORDER_DEFAULT</code>.
622 *
623 * @param pBiDi is a <code>UBiDi</code> object.
624 *
625 * @param isInverse specifies "forward" or "inverse" Bidi operation.
626 *
627 * @see ubidi_setPara
628 * @see ubidi_writeReordered
629 * @see ubidi_setReorderingMode
630 * @stable ICU 2.0
631 */
Victor Changce4bf3c2021-01-19 16:34:24 +0000632U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +0100633ubidi_setInverse(UBiDi *pBiDi, UBool isInverse);
634
635/**
636 * Is this Bidi object set to perform the inverse Bidi algorithm?
637 * <p>Note: calling this function after setting the reordering mode with
Victor Changce4bf3c2021-01-19 16:34:24 +0000638 * <code>ubidi_setReorderingMode</code> will return <code>true</code> if the
Victor Chang73229502020-09-17 13:39:19 +0100639 * reordering mode was set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>,
Victor Changce4bf3c2021-01-19 16:34:24 +0000640 * <code>false</code> for all other values.</p>
Victor Chang73229502020-09-17 13:39:19 +0100641 *
642 * @param pBiDi is a <code>UBiDi</code> object.
Victor Changce4bf3c2021-01-19 16:34:24 +0000643 * @return true if the Bidi object is set to perform the inverse Bidi algorithm
Victor Chang73229502020-09-17 13:39:19 +0100644 * by handling numbers as L.
645 *
646 * @see ubidi_setInverse
647 * @see ubidi_setReorderingMode
648 * @stable ICU 2.0
649 */
650
Victor Changce4bf3c2021-01-19 16:34:24 +0000651U_CAPI UBool U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +0100652ubidi_isInverse(UBiDi *pBiDi);
653
654/**
655 * Specify whether block separators must be allocated level zero,
656 * so that successive paragraphs will progress from left to right.
657 * This function must be called before <code>ubidi_setPara()</code>.
658 * Paragraph separators (B) may appear in the text. Setting them to level zero
659 * means that all paragraph separators (including one possibly appearing
660 * in the last text position) are kept in the reordered text after the text
661 * that they follow in the source text.
662 * When this feature is not enabled, a paragraph separator at the last
663 * position of the text before reordering will go to the first position
664 * of the reordered text when the paragraph level is odd.
665 *
666 * @param pBiDi is a <code>UBiDi</code> object.
667 *
668 * @param orderParagraphsLTR specifies whether paragraph separators (B) must
669 * receive level 0, so that successive paragraphs progress from left to right.
670 *
671 * @see ubidi_setPara
672 * @stable ICU 3.4
673 */
Victor Changce4bf3c2021-01-19 16:34:24 +0000674U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +0100675ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR);
676
677/**
678 * Is this Bidi object set to allocate level 0 to block separators so that
679 * successive paragraphs progress from left to right?
680 *
681 * @param pBiDi is a <code>UBiDi</code> object.
Victor Changce4bf3c2021-01-19 16:34:24 +0000682 * @return true if the Bidi object is set to allocate level 0 to block
Victor Chang73229502020-09-17 13:39:19 +0100683 * separators.
684 *
685 * @see ubidi_orderParagraphsLTR
686 * @stable ICU 3.4
687 */
Victor Changce4bf3c2021-01-19 16:34:24 +0000688U_CAPI UBool U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +0100689ubidi_isOrderParagraphsLTR(UBiDi *pBiDi);
690
691/**
692 * <code>UBiDiReorderingMode</code> values indicate which variant of the Bidi
693 * algorithm to use.
694 *
695 * @see ubidi_setReorderingMode
696 * @stable ICU 3.6
697 */
698typedef enum UBiDiReorderingMode {
699 /** Regular Logical to Visual Bidi algorithm according to Unicode.
700 * This is a 0 value.
701 * @stable ICU 3.6 */
702 UBIDI_REORDER_DEFAULT = 0,
703 /** Logical to Visual algorithm which handles numbers in a way which
704 * mimics the behavior of Windows XP.
705 * @stable ICU 3.6 */
706 UBIDI_REORDER_NUMBERS_SPECIAL,
707 /** Logical to Visual algorithm grouping numbers with adjacent R characters
708 * (reversible algorithm).
709 * @stable ICU 3.6 */
710 UBIDI_REORDER_GROUP_NUMBERS_WITH_R,
711 /** Reorder runs only to transform a Logical LTR string to the Logical RTL
712 * string with the same display, or vice-versa.<br>
713 * If this mode is set together with option
714 * <code>#UBIDI_OPTION_INSERT_MARKS</code>, some Bidi controls in the source
715 * text may be removed and other controls may be added to produce the
716 * minimum combination which has the required display.
717 * @stable ICU 3.6 */
718 UBIDI_REORDER_RUNS_ONLY,
719 /** Visual to Logical algorithm which handles numbers like L
Victor Changce4bf3c2021-01-19 16:34:24 +0000720 * (same algorithm as selected by <code>ubidi_setInverse(true)</code>.
Victor Chang73229502020-09-17 13:39:19 +0100721 * @see ubidi_setInverse
722 * @stable ICU 3.6 */
723 UBIDI_REORDER_INVERSE_NUMBERS_AS_L,
724 /** Visual to Logical algorithm equivalent to the regular Logical to Visual
725 * algorithm.
726 * @stable ICU 3.6 */
727 UBIDI_REORDER_INVERSE_LIKE_DIRECT,
728 /** Inverse Bidi (Visual to Logical) algorithm for the
729 * <code>UBIDI_REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
730 * @stable ICU 3.6 */
731 UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL,
732#ifndef U_HIDE_DEPRECATED_API
733 /**
734 * Number of values for reordering mode.
735 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
736 */
737 UBIDI_REORDER_COUNT
738#endif // U_HIDE_DEPRECATED_API
739} UBiDiReorderingMode;
740
741/**
742 * Modify the operation of the Bidi algorithm such that it implements some
743 * variant to the basic Bidi algorithm or approximates an "inverse Bidi"
744 * algorithm, depending on different values of the "reordering mode".
745 * This function must be called before <code>ubidi_setPara()</code>, and stays
746 * in effect until called again with a different argument.
747 *
748 * <p>The normal operation of the Bidi algorithm as described
749 * in the Unicode Standard Annex #9 is to take text stored in logical
750 * (keyboard, typing) order and to determine how to reorder it for visual
751 * rendering.</p>
752 *
753 * <p>With the reordering mode set to a value other than
754 * <code>#UBIDI_REORDER_DEFAULT</code>, this function changes the behavior of
755 * some of the subsequent functions in a way such that they implement an
756 * inverse Bidi algorithm or some other algorithm variants.</p>
757 *
758 * <p>Some legacy systems store text in visual order, and for operations
759 * with standard, Unicode-based algorithms, the text needs to be transformed
760 * into logical order. This is effectively the inverse algorithm of the
761 * described Bidi algorithm. Note that there is no standard algorithm for
762 * this "inverse Bidi", so a number of variants are implemented here.</p>
763 *
764 * <p>In other cases, it may be desirable to emulate some variant of the
765 * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a
766 * Logical to Logical transformation.</p>
767 *
768 * <ul>
769 * <li>When the reordering mode is set to <code>#UBIDI_REORDER_DEFAULT</code>,
770 * the standard Bidi Logical to Visual algorithm is applied.</li>
771 *
772 * <li>When the reordering mode is set to
773 * <code>#UBIDI_REORDER_NUMBERS_SPECIAL</code>,
774 * the algorithm used to perform Bidi transformations when calling
775 * <code>ubidi_setPara</code> should approximate the algorithm used in
776 * Microsoft Windows XP rather than strictly conform to the Unicode Bidi
777 * algorithm.
778 * <br>
779 * The differences between the basic algorithm and the algorithm addressed
780 * by this option are as follows:
781 * <ul>
782 * <li>Within text at an even embedding level, the sequence "123AB"
783 * (where AB represent R or AL letters) is transformed to "123BA" by the
784 * Unicode algorithm and to "BA123" by the Windows algorithm.</li>
785 * <li>Arabic-Indic numbers (AN) are handled by the Windows algorithm just
786 * like regular numbers (EN).</li>
787 * </ul></li>
788 *
789 * <li>When the reordering mode is set to
790 * <code>#UBIDI_REORDER_GROUP_NUMBERS_WITH_R</code>,
791 * numbers located between LTR text and RTL text are associated with the RTL
792 * text. For instance, an LTR paragraph with content "abc 123 DEF" (where
793 * upper case letters represent RTL characters) will be transformed to
794 * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed
795 * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc".
796 * This makes the algorithm reversible and makes it useful when round trip
797 * (from visual to logical and back to visual) must be achieved without
798 * adding LRM characters. However, this is a variation from the standard
799 * Unicode Bidi algorithm.<br>
800 * The source text should not contain Bidi control characters other than LRM
801 * or RLM.</li>
802 *
803 * <li>When the reordering mode is set to
804 * <code>#UBIDI_REORDER_RUNS_ONLY</code>,
805 * a "Logical to Logical" transformation must be performed:
806 * <ul>
807 * <li>If the default text level of the source text (argument <code>paraLevel</code>
808 * in <code>ubidi_setPara</code>) is even, the source text will be handled as
809 * LTR logical text and will be transformed to the RTL logical text which has
810 * the same LTR visual display.</li>
811 * <li>If the default level of the source text is odd, the source text
812 * will be handled as RTL logical text and will be transformed to the
813 * LTR logical text which has the same LTR visual display.</li>
814 * </ul>
815 * This mode may be needed when logical text which is basically Arabic or
816 * Hebrew, with possible included numbers or phrases in English, has to be
817 * displayed as if it had an even embedding level (this can happen if the
818 * displaying application treats all text as if it was basically LTR).
819 * <br>
820 * This mode may also be needed in the reverse case, when logical text which is
821 * basically English, with possible included phrases in Arabic or Hebrew, has to
822 * be displayed as if it had an odd embedding level.
823 * <br>
824 * Both cases could be handled by adding LRE or RLE at the head of the text,
825 * if the display subsystem supports these formatting controls. If it does not,
826 * the problem may be handled by transforming the source text in this mode
827 * before displaying it, so that it will be displayed properly.<br>
828 * The source text should not contain Bidi control characters other than LRM
829 * or RLM.</li>
830 *
831 * <li>When the reordering mode is set to
832 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, an "inverse Bidi" algorithm
833 * is applied.
834 * Runs of text with numeric characters will be treated like LTR letters and
835 * may need to be surrounded with LRM characters when they are written in
836 * reordered sequence (the option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> can
837 * be used with function <code>ubidi_writeReordered</code> to this end. This
838 * mode is equivalent to calling <code>ubidi_setInverse()</code> with
Victor Changce4bf3c2021-01-19 16:34:24 +0000839 * argument <code>isInverse</code> set to <code>true</code>.</li>
Victor Chang73229502020-09-17 13:39:19 +0100840 *
841 * <li>When the reordering mode is set to
842 * <code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>, the "direct" Logical to Visual
843 * Bidi algorithm is used as an approximation of an "inverse Bidi" algorithm.
844 * This mode is similar to mode <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>
845 * but is closer to the regular Bidi algorithm.
846 * <br>
847 * For example, an LTR paragraph with the content "FED 123 456 CBA" (where
848 * upper case represents RTL characters) will be transformed to
849 * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC"
850 * with mode <code>UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
851 * When used in conjunction with option
852 * <code>#UBIDI_OPTION_INSERT_MARKS</code>, this mode generally
853 * adds Bidi marks to the output significantly more sparingly than mode
854 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> with option
855 * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to
856 * <code>ubidi_writeReordered</code>.</li>
857 *
858 * <li>When the reordering mode is set to
859 * <code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the Logical to Visual
860 * Bidi algorithm used in Windows XP is used as an approximation of an "inverse Bidi" algorithm.
861 * <br>
862 * For example, an LTR paragraph with the content "abc FED123" (where
863 * upper case represents RTL characters) will be transformed to "abc 123DEF."</li>
864 * </ul>
865 *
866 * <p>In all the reordering modes specifying an "inverse Bidi" algorithm
867 * (i.e. those with a name starting with <code>UBIDI_REORDER_INVERSE</code>),
868 * output runs should be retrieved using
869 * <code>ubidi_getVisualRun()</code>, and the output text with
870 * <code>ubidi_writeReordered()</code>. The caller should keep in mind that in
871 * "inverse Bidi" modes the input is actually visually ordered text and
872 * reordered output returned by <code>ubidi_getVisualRun()</code> or
873 * <code>ubidi_writeReordered()</code> are actually runs or character string
874 * of logically ordered output.<br>
875 * For all the "inverse Bidi" modes, the source text should not contain
876 * Bidi control characters other than LRM or RLM.</p>
877 *
878 * <p>Note that option <code>#UBIDI_OUTPUT_REVERSE</code> of
879 * <code>ubidi_writeReordered</code> has no useful meaning and should not be
880 * used in conjunction with any value of the reordering mode specifying
881 * "inverse Bidi" or with value <code>UBIDI_REORDER_RUNS_ONLY</code>.
882 *
883 * @param pBiDi is a <code>UBiDi</code> object.
884 * @param reorderingMode specifies the required variant of the Bidi algorithm.
885 *
886 * @see UBiDiReorderingMode
887 * @see ubidi_setInverse
888 * @see ubidi_setPara
889 * @see ubidi_writeReordered
890 * @stable ICU 3.6
891 */
Victor Changce4bf3c2021-01-19 16:34:24 +0000892U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +0100893ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode);
894
895/**
896 * What is the requested reordering mode for a given Bidi object?
897 *
898 * @param pBiDi is a <code>UBiDi</code> object.
899 * @return the current reordering mode of the Bidi object
900 * @see ubidi_setReorderingMode
901 * @stable ICU 3.6
902 */
Victor Changce4bf3c2021-01-19 16:34:24 +0000903U_CAPI UBiDiReorderingMode U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +0100904ubidi_getReorderingMode(UBiDi *pBiDi);
905
906/**
907 * <code>UBiDiReorderingOption</code> values indicate which options are
908 * specified to affect the Bidi algorithm.
909 *
910 * @see ubidi_setReorderingOptions
911 * @stable ICU 3.6
912 */
913typedef enum UBiDiReorderingOption {
914 /**
915 * option value for <code>ubidi_setReorderingOptions</code>:
916 * disable all the options which can be set with this function
917 * @see ubidi_setReorderingOptions
918 * @stable ICU 3.6
919 */
920 UBIDI_OPTION_DEFAULT = 0,
921
922 /**
923 * option bit for <code>ubidi_setReorderingOptions</code>:
924 * insert Bidi marks (LRM or RLM) when needed to ensure correct result of
925 * a reordering to a Logical order
926 *
927 * <p>This option must be set or reset before calling
928 * <code>ubidi_setPara</code>.</p>
929 *
930 * <p>This option is significant only with reordering modes which generate
931 * a result with Logical order, specifically:</p>
932 * <ul>
933 * <li><code>#UBIDI_REORDER_RUNS_ONLY</code></li>
934 * <li><code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code></li>
935 * <li><code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code></li>
936 * <li><code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li>
937 * </ul>
938 *
939 * <p>If this option is set in conjunction with reordering mode
940 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> or with calling
Victor Changce4bf3c2021-01-19 16:34:24 +0000941 * <code>ubidi_setInverse(true)</code>, it implies
Victor Chang73229502020-09-17 13:39:19 +0100942 * option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>
943 * in calls to function <code>ubidi_writeReordered()</code>.</p>
944 *
945 * <p>For other reordering modes, a minimum number of LRM or RLM characters
946 * will be added to the source text after reordering it so as to ensure
947 * round trip, i.e. when applying the inverse reordering mode on the
948 * resulting logical text with removal of Bidi marks
949 * (option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> set before calling
950 * <code>ubidi_setPara()</code> or option <code>#UBIDI_REMOVE_BIDI_CONTROLS</code>
951 * in <code>ubidi_writeReordered</code>), the result will be identical to the
952 * source text in the first transformation.
953 *
954 * <p>This option will be ignored if specified together with option
955 * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. It inhibits option
956 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to function
957 * <code>ubidi_writeReordered()</code> and it implies option
958 * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to function
959 * <code>ubidi_writeReordered()</code> if the reordering mode is
960 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.</p>
961 *
962 * @see ubidi_setReorderingMode
963 * @see ubidi_setReorderingOptions
964 * @stable ICU 3.6
965 */
966 UBIDI_OPTION_INSERT_MARKS = 1,
967
968 /**
969 * option bit for <code>ubidi_setReorderingOptions</code>:
970 * remove Bidi control characters
971 *
972 * <p>This option must be set or reset before calling
973 * <code>ubidi_setPara</code>.</p>
974 *
975 * <p>This option nullifies option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
976 * It inhibits option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls
977 * to function <code>ubidi_writeReordered()</code> and it implies option
978 * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to that function.</p>
979 *
980 * @see ubidi_setReorderingMode
981 * @see ubidi_setReorderingOptions
982 * @stable ICU 3.6
983 */
984 UBIDI_OPTION_REMOVE_CONTROLS = 2,
985
986 /**
987 * option bit for <code>ubidi_setReorderingOptions</code>:
988 * process the output as part of a stream to be continued
989 *
990 * <p>This option must be set or reset before calling
991 * <code>ubidi_setPara</code>.</p>
992 *
993 * <p>This option specifies that the caller is interested in processing large
994 * text object in parts.
995 * The results of the successive calls are expected to be concatenated by the
996 * caller. Only the call for the last part will have this option bit off.</p>
997 *
998 * <p>When this option bit is on, <code>ubidi_setPara()</code> may process
999 * less than the full source text in order to truncate the text at a meaningful
1000 * boundary. The caller should call <code>ubidi_getProcessedLength()</code>
1001 * immediately after calling <code>ubidi_setPara()</code> in order to
1002 * determine how much of the source text has been processed.
1003 * Source text beyond that length should be resubmitted in following calls to
1004 * <code>ubidi_setPara</code>. The processed length may be less than
1005 * the length of the source text if a character preceding the last character of
1006 * the source text constitutes a reasonable boundary (like a block separator)
1007 * for text to be continued.<br>
1008 * If the last character of the source text constitutes a reasonable
1009 * boundary, the whole text will be processed at once.<br>
1010 * If nowhere in the source text there exists
1011 * such a reasonable boundary, the processed length will be zero.<br>
1012 * The caller should check for such an occurrence and do one of the following:
1013 * <ul><li>submit a larger amount of text with a better chance to include
1014 * a reasonable boundary.</li>
1015 * <li>resubmit the same text after turning off option
1016 * <code>UBIDI_OPTION_STREAMING</code>.</li></ul>
1017 * In all cases, this option should be turned off before processing the last
1018 * part of the text.</p>
1019 *
1020 * <p>When the <code>UBIDI_OPTION_STREAMING</code> option is used,
1021 * it is recommended to call <code>ubidi_orderParagraphsLTR()</code> with
Victor Changce4bf3c2021-01-19 16:34:24 +00001022 * argument <code>orderParagraphsLTR</code> set to <code>true</code> before
Victor Chang73229502020-09-17 13:39:19 +01001023 * calling <code>ubidi_setPara</code> so that later paragraphs may be
1024 * concatenated to previous paragraphs on the right.</p>
1025 *
1026 * @see ubidi_setReorderingMode
1027 * @see ubidi_setReorderingOptions
1028 * @see ubidi_getProcessedLength
1029 * @see ubidi_orderParagraphsLTR
1030 * @stable ICU 3.6
1031 */
1032 UBIDI_OPTION_STREAMING = 4
1033} UBiDiReorderingOption;
1034
1035/**
1036 * Specify which of the reordering options
1037 * should be applied during Bidi transformations.
1038 *
1039 * @param pBiDi is a <code>UBiDi</code> object.
1040 * @param reorderingOptions is a combination of zero or more of the following
1041 * options:
1042 * <code>#UBIDI_OPTION_DEFAULT</code>, <code>#UBIDI_OPTION_INSERT_MARKS</code>,
1043 * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>, <code>#UBIDI_OPTION_STREAMING</code>.
1044 *
1045 * @see ubidi_getReorderingOptions
1046 * @stable ICU 3.6
1047 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001048U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001049ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions);
1050
1051/**
1052 * What are the reordering options applied to a given Bidi object?
1053 *
1054 * @param pBiDi is a <code>UBiDi</code> object.
1055 * @return the current reordering options of the Bidi object
1056 * @see ubidi_setReorderingOptions
1057 * @stable ICU 3.6
1058 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001059U_CAPI uint32_t U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001060ubidi_getReorderingOptions(UBiDi *pBiDi);
1061
1062/**
1063 * Set the context before a call to ubidi_setPara().<p>
1064 *
1065 * ubidi_setPara() computes the left-right directionality for a given piece
1066 * of text which is supplied as one of its arguments. Sometimes this piece
1067 * of text (the "main text") should be considered in context, because text
1068 * appearing before ("prologue") and/or after ("epilogue") the main text
1069 * may affect the result of this computation.<p>
1070 *
1071 * This function specifies the prologue and/or the epilogue for the next
1072 * call to ubidi_setPara(). The characters specified as prologue and
1073 * epilogue should not be modified by the calling program until the call
1074 * to ubidi_setPara() has returned. If successive calls to ubidi_setPara()
1075 * all need specification of a context, ubidi_setContext() must be called
1076 * before each call to ubidi_setPara(). In other words, a context is not
1077 * "remembered" after the following successful call to ubidi_setPara().<p>
1078 *
1079 * If a call to ubidi_setPara() specifies UBIDI_DEFAULT_LTR or
1080 * UBIDI_DEFAULT_RTL as paraLevel and is preceded by a call to
1081 * ubidi_setContext() which specifies a prologue, the paragraph level will
1082 * be computed taking in consideration the text in the prologue.<p>
1083 *
1084 * When ubidi_setPara() is called without a previous call to
1085 * ubidi_setContext, the main text is handled as if preceded and followed
1086 * by strong directional characters at the current paragraph level.
1087 * Calling ubidi_setContext() with specification of a prologue will change
1088 * this behavior by handling the main text as if preceded by the last
1089 * strong character appearing in the prologue, if any.
1090 * Calling ubidi_setContext() with specification of an epilogue will change
1091 * the behavior of ubidi_setPara() by handling the main text as if followed
1092 * by the first strong character or digit appearing in the epilogue, if any.<p>
1093 *
1094 * Note 1: if <code>ubidi_setContext</code> is called repeatedly without
1095 * calling <code>ubidi_setPara</code>, the earlier calls have no effect,
1096 * only the last call will be remembered for the next call to
1097 * <code>ubidi_setPara</code>.<p>
1098 *
1099 * Note 2: calling <code>ubidi_setContext(pBiDi, NULL, 0, NULL, 0, &errorCode)</code>
1100 * cancels any previous setting of non-empty prologue or epilogue.
1101 * The next call to <code>ubidi_setPara()</code> will process no
1102 * prologue or epilogue.<p>
1103 *
1104 * Note 3: users must be aware that even after setting the context
1105 * before a call to ubidi_setPara() to perform e.g. a logical to visual
1106 * transformation, the resulting string may not be identical to what it
1107 * would have been if all the text, including prologue and epilogue, had
1108 * been processed together.<br>
1109 * Example (upper case letters represent RTL characters):<br>
1110 * &nbsp;&nbsp;prologue = "<code>abc DE</code>"<br>
1111 * &nbsp;&nbsp;epilogue = none<br>
1112 * &nbsp;&nbsp;main text = "<code>FGH xyz</code>"<br>
1113 * &nbsp;&nbsp;paraLevel = UBIDI_LTR<br>
1114 * &nbsp;&nbsp;display without prologue = "<code>HGF xyz</code>"
1115 * ("HGF" is adjacent to "xyz")<br>
1116 * &nbsp;&nbsp;display with prologue = "<code>abc HGFED xyz</code>"
1117 * ("HGF" is not adjacent to "xyz")<br>
1118 *
1119 * @param pBiDi is a paragraph <code>UBiDi</code> object.
1120 *
1121 * @param prologue is a pointer to the text which precedes the text that
1122 * will be specified in a coming call to ubidi_setPara().
1123 * If there is no prologue to consider, then <code>proLength</code>
1124 * must be zero and this pointer can be NULL.
1125 *
1126 * @param proLength is the length of the prologue; if <code>proLength==-1</code>
1127 * then the prologue must be zero-terminated.
1128 * Otherwise proLength must be >= 0. If <code>proLength==0</code>, it means
1129 * that there is no prologue to consider.
1130 *
1131 * @param epilogue is a pointer to the text which follows the text that
1132 * will be specified in a coming call to ubidi_setPara().
1133 * If there is no epilogue to consider, then <code>epiLength</code>
1134 * must be zero and this pointer can be NULL.
1135 *
1136 * @param epiLength is the length of the epilogue; if <code>epiLength==-1</code>
1137 * then the epilogue must be zero-terminated.
1138 * Otherwise epiLength must be >= 0. If <code>epiLength==0</code>, it means
1139 * that there is no epilogue to consider.
1140 *
1141 * @param pErrorCode must be a valid pointer to an error code value.
1142 *
1143 * @see ubidi_setPara
1144 * @stable ICU 4.8
1145 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001146U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001147ubidi_setContext(UBiDi *pBiDi,
1148 const UChar *prologue, int32_t proLength,
1149 const UChar *epilogue, int32_t epiLength,
1150 UErrorCode *pErrorCode);
1151
1152/**
1153 * Perform the Unicode Bidi algorithm. It is defined in the
1154 * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
1155 * version 13,
1156 * also described in The Unicode Standard, Version 4.0 .<p>
1157 *
1158 * This function takes a piece of plain text containing one or more paragraphs,
1159 * with or without externally specified embedding levels from <i>styled</i>
1160 * text and computes the left-right-directionality of each character.<p>
1161 *
1162 * If the entire text is all of the same directionality, then
1163 * the function may not perform all the steps described by the algorithm,
1164 * i.e., some levels may not be the same as if all steps were performed.
1165 * This is not relevant for unidirectional text.<br>
1166 * For example, in pure LTR text with numbers the numbers would get
1167 * a resolved level of 2 higher than the surrounding text according to
1168 * the algorithm. This implementation may set all resolved levels to
1169 * the same value in such a case.<p>
1170 *
1171 * The text can be composed of multiple paragraphs. Occurrence of a block
1172 * separator in the text terminates a paragraph, and whatever comes next starts
1173 * a new paragraph. The exception to this rule is when a Carriage Return (CR)
1174 * is followed by a Line Feed (LF). Both CR and LF are block separators, but
1175 * in that case, the pair of characters is considered as terminating the
1176 * preceding paragraph, and a new paragraph will be started by a character
1177 * coming after the LF.
1178 *
1179 * @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code>
1180 * which will be set to contain the reordering information,
1181 * especially the resolved levels for all the characters in <code>text</code>.
1182 *
1183 * @param text is a pointer to the text that the Bidi algorithm will be performed on.
1184 * This pointer is stored in the UBiDi object and can be retrieved
1185 * with <code>ubidi_getText()</code>.<br>
1186 * <strong>Note:</strong> the text must be (at least) <code>length</code> long.
1187 *
1188 * @param length is the length of the text; if <code>length==-1</code> then
1189 * the text must be zero-terminated.
1190 *
1191 * @param paraLevel specifies the default level for the text;
1192 * it is typically 0 (LTR) or 1 (RTL).
1193 * If the function shall determine the paragraph level from the text,
1194 * then <code>paraLevel</code> can be set to
1195 * either <code>#UBIDI_DEFAULT_LTR</code>
1196 * or <code>#UBIDI_DEFAULT_RTL</code>; if the text contains multiple
1197 * paragraphs, the paragraph level shall be determined separately for
1198 * each paragraph; if a paragraph does not include any strongly typed
1199 * character, then the desired default is used (0 for LTR or 1 for RTL).
1200 * Any other value between 0 and <code>#UBIDI_MAX_EXPLICIT_LEVEL</code>
1201 * is also valid, with odd levels indicating RTL.
1202 *
1203 * @param embeddingLevels (in) may be used to preset the embedding and override levels,
1204 * ignoring characters like LRE and PDF in the text.
1205 * A level overrides the directional property of its corresponding
1206 * (same index) character if the level has the
1207 * <code>#UBIDI_LEVEL_OVERRIDE</code> bit set.<br><br>
1208 * Aside from that bit, it must be
1209 * <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>,
1210 * except that level 0 is always allowed.
1211 * Level 0 for a paragraph separator prevents reordering of paragraphs;
1212 * this only works reliably if <code>#UBIDI_LEVEL_OVERRIDE</code>
1213 * is also set for paragraph separators.
1214 * Level 0 for other characters is treated as a wildcard
1215 * and is lifted up to the resolved level of the surrounding paragraph.<br><br>
1216 * <strong>Caution: </strong>A copy of this pointer, not of the levels,
1217 * will be stored in the <code>UBiDi</code> object;
1218 * the <code>embeddingLevels</code> array must not be
1219 * deallocated before the <code>UBiDi</code> structure is destroyed or reused,
1220 * and the <code>embeddingLevels</code>
1221 * should not be modified to avoid unexpected results on subsequent Bidi operations.
1222 * However, the <code>ubidi_setPara()</code> and
1223 * <code>ubidi_setLine()</code> functions may modify some or all of the levels.<br><br>
1224 * After the <code>UBiDi</code> object is reused or destroyed, the caller
1225 * must take care of the deallocation of the <code>embeddingLevels</code> array.<br><br>
1226 * <strong>Note:</strong> the <code>embeddingLevels</code> array must be
1227 * at least <code>length</code> long.
1228 * This pointer can be <code>NULL</code> if this
1229 * value is not necessary.
1230 *
1231 * @param pErrorCode must be a valid pointer to an error code value.
1232 * @stable ICU 2.0
1233 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001234U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001235ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
1236 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
1237 UErrorCode *pErrorCode);
1238
1239/**
1240 * <code>ubidi_setLine()</code> sets a <code>UBiDi</code> to
1241 * contain the reordering information, especially the resolved levels,
1242 * for all the characters in a line of text. This line of text is
1243 * specified by referring to a <code>UBiDi</code> object representing
1244 * this information for a piece of text containing one or more paragraphs,
1245 * and by specifying a range of indexes in this text.<p>
1246 * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>
1247 *
1248 * This is used after calling <code>ubidi_setPara()</code>
1249 * for a piece of text, and after line-breaking on that text.
1250 * It is not necessary if each paragraph is treated as a single line.<p>
1251 *
1252 * After line-breaking, rules (L1) and (L2) for the treatment of
1253 * trailing WS and for reordering are performed on
1254 * a <code>UBiDi</code> object that represents a line.<p>
1255 *
1256 * <strong>Important: </strong><code>pLineBiDi</code> shares data with
1257 * <code>pParaBiDi</code>.
1258 * You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>.
1259 * In other words, you must destroy or reuse the <code>UBiDi</code> object for a line
1260 * before the object for its parent paragraph.<p>
1261 *
1262 * The text pointer that was stored in <code>pParaBiDi</code> is also copied,
1263 * and <code>start</code> is added to it so that it points to the beginning of the
1264 * line for this object.
1265 *
1266 * @param pParaBiDi is the parent paragraph object. It must have been set
1267 * by a successful call to ubidi_setPara.
1268 *
1269 * @param start is the line's first index into the text.
1270 *
1271 * @param limit is just behind the line's last index into the text
1272 * (its last index +1).<br>
1273 * It must be <code>0<=start<limit<=</code>containing paragraph limit.
1274 * If the specified line crosses a paragraph boundary, the function
1275 * will terminate with error code U_ILLEGAL_ARGUMENT_ERROR.
1276 *
1277 * @param pLineBiDi is the object that will now represent a line of the text.
1278 *
1279 * @param pErrorCode must be a valid pointer to an error code value.
1280 *
1281 * @see ubidi_setPara
1282 * @see ubidi_getProcessedLength
1283 * @stable ICU 2.0
1284 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001285U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001286ubidi_setLine(const UBiDi *pParaBiDi,
1287 int32_t start, int32_t limit,
1288 UBiDi *pLineBiDi,
1289 UErrorCode *pErrorCode);
1290
1291/**
1292 * Get the directionality of the text.
1293 *
1294 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1295 *
1296 * @return a value of <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code>
1297 * or <code>UBIDI_MIXED</code>
1298 * that indicates if the entire text
1299 * represented by this object is unidirectional,
1300 * and which direction, or if it is mixed-directional.
1301 * Note - The value <code>UBIDI_NEUTRAL</code> is never returned from this method.
1302 *
1303 * @see UBiDiDirection
1304 * @stable ICU 2.0
1305 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001306U_CAPI UBiDiDirection U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001307ubidi_getDirection(const UBiDi *pBiDi);
1308
1309/**
1310 * Gets the base direction of the text provided according
1311 * to the Unicode Bidirectional Algorithm. The base direction
1312 * is derived from the first character in the string with bidirectional
1313 * character type L, R, or AL. If the first such character has type L,
1314 * <code>UBIDI_LTR</code> is returned. If the first such character has
1315 * type R or AL, <code>UBIDI_RTL</code> is returned. If the string does
1316 * not contain any character of these types, then
1317 * <code>UBIDI_NEUTRAL</code> is returned.
1318 *
1319 * This is a lightweight function for use when only the base direction
1320 * is needed and no further bidi processing of the text is needed.
1321 *
1322 * @param text is a pointer to the text whose base
1323 * direction is needed.
1324 * Note: the text must be (at least) @c length long.
1325 *
1326 * @param length is the length of the text;
1327 * if <code>length==-1</code> then the text
1328 * must be zero-terminated.
1329 *
1330 * @return <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code>,
1331 * <code>UBIDI_NEUTRAL</code>
1332 *
1333 * @see UBiDiDirection
1334 * @stable ICU 4.6
1335 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001336U_CAPI UBiDiDirection U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001337ubidi_getBaseDirection(const UChar *text, int32_t length );
1338
1339/**
1340 * Get the pointer to the text.
1341 *
1342 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1343 *
1344 * @return The pointer to the text that the UBiDi object was created for.
1345 *
1346 * @see ubidi_setPara
1347 * @see ubidi_setLine
1348 * @stable ICU 2.0
1349 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001350U_CAPI const UChar * U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001351ubidi_getText(const UBiDi *pBiDi);
1352
1353/**
1354 * Get the length of the text.
1355 *
1356 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1357 *
1358 * @return The length of the text that the UBiDi object was created for.
1359 * @stable ICU 2.0
1360 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001361U_CAPI int32_t U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001362ubidi_getLength(const UBiDi *pBiDi);
1363
1364/**
1365 * Get the paragraph level of the text.
1366 *
1367 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1368 *
1369 * @return The paragraph level. If there are multiple paragraphs, their
1370 * level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or
1371 * UBIDI_DEFAULT_RTL. In that case, the level of the first paragraph
1372 * is returned.
1373 *
1374 * @see UBiDiLevel
1375 * @see ubidi_getParagraph
1376 * @see ubidi_getParagraphByIndex
1377 * @stable ICU 2.0
1378 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001379U_CAPI UBiDiLevel U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001380ubidi_getParaLevel(const UBiDi *pBiDi);
1381
1382/**
1383 * Get the number of paragraphs.
1384 *
1385 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1386 *
1387 * @return The number of paragraphs.
1388 * @stable ICU 3.4
1389 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001390U_CAPI int32_t U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001391ubidi_countParagraphs(UBiDi *pBiDi);
1392
1393/**
1394 * Get a paragraph, given a position within the text.
1395 * This function returns information about a paragraph.<br>
1396 * Note: if the paragraph index is known, it is more efficient to
1397 * retrieve the paragraph information using ubidi_getParagraphByIndex().<p>
1398 *
1399 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1400 *
1401 * @param charIndex is the index of a character within the text, in the
1402 * range <code>[0..ubidi_getProcessedLength(pBiDi)-1]</code>.
1403 *
1404 * @param pParaStart will receive the index of the first character of the
1405 * paragraph in the text.
1406 * This pointer can be <code>NULL</code> if this
1407 * value is not necessary.
1408 *
1409 * @param pParaLimit will receive the limit of the paragraph.
1410 * The l-value that you point to here may be the
1411 * same expression (variable) as the one for
1412 * <code>charIndex</code>.
1413 * This pointer can be <code>NULL</code> if this
1414 * value is not necessary.
1415 *
1416 * @param pParaLevel will receive the level of the paragraph.
1417 * This pointer can be <code>NULL</code> if this
1418 * value is not necessary.
1419 *
1420 * @param pErrorCode must be a valid pointer to an error code value.
1421 *
1422 * @return The index of the paragraph containing the specified position.
1423 *
1424 * @see ubidi_getProcessedLength
1425 * @stable ICU 3.4
1426 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001427U_CAPI int32_t U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001428ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart,
1429 int32_t *pParaLimit, UBiDiLevel *pParaLevel,
1430 UErrorCode *pErrorCode);
1431
1432/**
1433 * Get a paragraph, given the index of this paragraph.
1434 *
1435 * This function returns information about a paragraph.<p>
1436 *
1437 * @param pBiDi is the paragraph <code>UBiDi</code> object.
1438 *
1439 * @param paraIndex is the number of the paragraph, in the
1440 * range <code>[0..ubidi_countParagraphs(pBiDi)-1]</code>.
1441 *
1442 * @param pParaStart will receive the index of the first character of the
1443 * paragraph in the text.
1444 * This pointer can be <code>NULL</code> if this
1445 * value is not necessary.
1446 *
1447 * @param pParaLimit will receive the limit of the paragraph.
1448 * This pointer can be <code>NULL</code> if this
1449 * value is not necessary.
1450 *
1451 * @param pParaLevel will receive the level of the paragraph.
1452 * This pointer can be <code>NULL</code> if this
1453 * value is not necessary.
1454 *
1455 * @param pErrorCode must be a valid pointer to an error code value.
1456 *
1457 * @stable ICU 3.4
1458 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001459U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001460ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
1461 int32_t *pParaStart, int32_t *pParaLimit,
1462 UBiDiLevel *pParaLevel, UErrorCode *pErrorCode);
1463
1464/**
1465 * Get the level for one character.
1466 *
1467 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1468 *
1469 * @param charIndex the index of a character. It must be in the range
1470 * [0..ubidi_getProcessedLength(pBiDi)].
1471 *
1472 * @return The level for the character at charIndex (0 if charIndex is not
1473 * in the valid range).
1474 *
1475 * @see UBiDiLevel
1476 * @see ubidi_getProcessedLength
1477 * @stable ICU 2.0
1478 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001479U_CAPI UBiDiLevel U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001480ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex);
1481
1482/**
1483 * Get an array of levels for each character.<p>
1484 *
1485 * Note that this function may allocate memory under some
1486 * circumstances, unlike <code>ubidi_getLevelAt()</code>.
1487 *
1488 * @param pBiDi is the paragraph or line <code>UBiDi</code> object, whose
1489 * text length must be strictly positive.
1490 *
1491 * @param pErrorCode must be a valid pointer to an error code value.
1492 *
1493 * @return The levels array for the text,
1494 * or <code>NULL</code> if an error occurs.
1495 *
1496 * @see UBiDiLevel
1497 * @see ubidi_getProcessedLength
1498 * @stable ICU 2.0
1499 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001500U_CAPI const UBiDiLevel * U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001501ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);
1502
1503/**
1504 * Get a logical run.
1505 * This function returns information about a run and is used
1506 * to retrieve runs in logical order.<p>
1507 * This is especially useful for line-breaking on a paragraph.
1508 *
1509 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1510 *
1511 * @param logicalPosition is a logical position within the source text.
1512 *
1513 * @param pLogicalLimit will receive the limit of the corresponding run.
1514 * The l-value that you point to here may be the
1515 * same expression (variable) as the one for
1516 * <code>logicalPosition</code>.
1517 * This pointer can be <code>NULL</code> if this
1518 * value is not necessary.
1519 *
1520 * @param pLevel will receive the level of the corresponding run.
1521 * This pointer can be <code>NULL</code> if this
1522 * value is not necessary.
1523 *
1524 * @see ubidi_getProcessedLength
1525 * @stable ICU 2.0
1526 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001527U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001528ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
1529 int32_t *pLogicalLimit, UBiDiLevel *pLevel);
1530
1531/**
1532 * Get the number of runs.
1533 * This function may invoke the actual reordering on the
1534 * <code>UBiDi</code> object, after <code>ubidi_setPara()</code>
1535 * may have resolved only the levels of the text. Therefore,
1536 * <code>ubidi_countRuns()</code> may have to allocate memory,
1537 * and may fail doing so.
1538 *
1539 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1540 *
1541 * @param pErrorCode must be a valid pointer to an error code value.
1542 *
1543 * @return The number of runs.
1544 * @stable ICU 2.0
1545 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001546U_CAPI int32_t U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001547ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
1548
1549/**
1550 * Get one run's logical start, length, and directionality,
1551 * which can be 0 for LTR or 1 for RTL.
1552 * In an RTL run, the character at the logical start is
1553 * visually on the right of the displayed run.
1554 * The length is the number of characters in the run.<p>
1555 * <code>ubidi_countRuns()</code> should be called
1556 * before the runs are retrieved.
1557 *
1558 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1559 *
1560 * @param runIndex is the number of the run in visual order, in the
1561 * range <code>[0..ubidi_countRuns(pBiDi)-1]</code>.
1562 *
1563 * @param pLogicalStart is the first logical character index in the text.
1564 * The pointer may be <code>NULL</code> if this index is not needed.
1565 *
1566 * @param pLength is the number of characters (at least one) in the run.
1567 * The pointer may be <code>NULL</code> if this is not needed.
1568 *
1569 * @return the directionality of the run,
1570 * <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>,
1571 * never <code>UBIDI_MIXED</code>,
1572 * never <code>UBIDI_NEUTRAL</code>.
1573 *
1574 * @see ubidi_countRuns
1575 *
1576 * Example:
1577 * <pre>
1578 * \code
1579 * int32_t i, count=ubidi_countRuns(pBiDi),
1580 * logicalStart, visualIndex=0, length;
1581 * for(i=0; i<count; ++i) {
1582 * if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) {
1583 * do { // LTR
1584 * show_char(text[logicalStart++], visualIndex++);
1585 * } while(--length>0);
1586 * } else {
1587 * logicalStart+=length; // logicalLimit
1588 * do { // RTL
1589 * show_char(text[--logicalStart], visualIndex++);
1590 * } while(--length>0);
1591 * }
1592 * }
1593 *\endcode
1594 * </pre>
1595 *
1596 * Note that in right-to-left runs, code like this places
1597 * second surrogates before first ones (which is generally a bad idea)
1598 * and combining characters before base characters.
1599 * <p>
1600 * Use of <code>ubidi_writeReordered()</code>, optionally with the
1601 * <code>#UBIDI_KEEP_BASE_COMBINING</code> option, can be considered in order
1602 * to avoid these issues.
1603 * @stable ICU 2.0
1604 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001605U_CAPI UBiDiDirection U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001606ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
1607 int32_t *pLogicalStart, int32_t *pLength);
1608
1609/**
1610 * Get the visual position from a logical text position.
1611 * If such a mapping is used many times on the same
1612 * <code>UBiDi</code> object, then calling
1613 * <code>ubidi_getLogicalMap()</code> is more efficient.<p>
1614 *
1615 * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no
1616 * visual position because the corresponding text character is a Bidi control
1617 * removed from output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>.
1618 * <p>
1619 * When the visual output is altered by using options of
1620 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
1621 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
1622 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual position returned may not
1623 * be correct. It is advised to use, when possible, reordering options
1624 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
1625 * <p>
1626 * Note that in right-to-left runs, this mapping places
1627 * second surrogates before first ones (which is generally a bad idea)
1628 * and combining characters before base characters.
1629 * Use of <code>ubidi_writeReordered()</code>, optionally with the
1630 * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead
1631 * of using the mapping, in order to avoid these issues.
1632 *
1633 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1634 *
1635 * @param logicalIndex is the index of a character in the text.
1636 *
1637 * @param pErrorCode must be a valid pointer to an error code value.
1638 *
1639 * @return The visual position of this character.
1640 *
1641 * @see ubidi_getLogicalMap
1642 * @see ubidi_getLogicalIndex
1643 * @see ubidi_getProcessedLength
1644 * @stable ICU 2.0
1645 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001646U_CAPI int32_t U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001647ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode);
1648
1649/**
1650 * Get the logical text position from a visual position.
1651 * If such a mapping is used many times on the same
1652 * <code>UBiDi</code> object, then calling
1653 * <code>ubidi_getVisualMap()</code> is more efficient.<p>
1654 *
1655 * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no
1656 * logical position because the corresponding text character is a Bidi mark
1657 * inserted in the output by option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
1658 * <p>
1659 * This is the inverse function to <code>ubidi_getVisualIndex()</code>.
1660 * <p>
1661 * When the visual output is altered by using options of
1662 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
1663 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
1664 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical position returned may not
1665 * be correct. It is advised to use, when possible, reordering options
1666 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
1667 *
1668 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1669 *
1670 * @param visualIndex is the visual position of a character.
1671 *
1672 * @param pErrorCode must be a valid pointer to an error code value.
1673 *
1674 * @return The index of this character in the text.
1675 *
1676 * @see ubidi_getVisualMap
1677 * @see ubidi_getVisualIndex
1678 * @see ubidi_getResultLength
1679 * @stable ICU 2.0
1680 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001681U_CAPI int32_t U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001682ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode);
1683
1684/**
1685 * Get a logical-to-visual index map (array) for the characters in the UBiDi
1686 * (paragraph or line) object.
1687 * <p>
1688 * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the
1689 * corresponding text characters are Bidi controls removed from the visual
1690 * output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>.
1691 * <p>
1692 * When the visual output is altered by using options of
1693 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
1694 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
1695 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual positions returned may not
1696 * be correct. It is advised to use, when possible, reordering options
1697 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
1698 * <p>
1699 * Note that in right-to-left runs, this mapping places
1700 * second surrogates before first ones (which is generally a bad idea)
1701 * and combining characters before base characters.
1702 * Use of <code>ubidi_writeReordered()</code>, optionally with the
1703 * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead
1704 * of using the mapping, in order to avoid these issues.
1705 *
1706 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1707 *
1708 * @param indexMap is a pointer to an array of <code>ubidi_getProcessedLength()</code>
1709 * indexes which will reflect the reordering of the characters.
1710 * If option <code>#UBIDI_OPTION_INSERT_MARKS</code> is set, the number
1711 * of elements allocated in <code>indexMap</code> must be no less than
1712 * <code>ubidi_getResultLength()</code>.
1713 * The array does not need to be initialized.<br><br>
1714 * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
1715 *
1716 * @param pErrorCode must be a valid pointer to an error code value.
1717 *
1718 * @see ubidi_getVisualMap
1719 * @see ubidi_getVisualIndex
1720 * @see ubidi_getProcessedLength
1721 * @see ubidi_getResultLength
1722 * @stable ICU 2.0
1723 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001724U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001725ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
1726
1727/**
1728 * Get a visual-to-logical index map (array) for the characters in the UBiDi
1729 * (paragraph or line) object.
1730 * <p>
1731 * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the
1732 * corresponding text characters are Bidi marks inserted in the visual output
1733 * by the option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
1734 * <p>
1735 * When the visual output is altered by using options of
1736 * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
1737 * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
1738 * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not
1739 * be correct. It is advised to use, when possible, reordering options
1740 * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
1741 *
1742 * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
1743 *
1744 * @param indexMap is a pointer to an array of <code>ubidi_getResultLength()</code>
1745 * indexes which will reflect the reordering of the characters.
1746 * If option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is set, the number
1747 * of elements allocated in <code>indexMap</code> must be no less than
1748 * <code>ubidi_getProcessedLength()</code>.
1749 * The array does not need to be initialized.<br><br>
1750 * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
1751 *
1752 * @param pErrorCode must be a valid pointer to an error code value.
1753 *
1754 * @see ubidi_getLogicalMap
1755 * @see ubidi_getLogicalIndex
1756 * @see ubidi_getProcessedLength
1757 * @see ubidi_getResultLength
1758 * @stable ICU 2.0
1759 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001760U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001761ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
1762
1763/**
1764 * This is a convenience function that does not use a UBiDi object.
1765 * It is intended to be used for when an application has determined the levels
1766 * of objects (character sequences) and just needs to have them reordered (L2).
1767 * This is equivalent to using <code>ubidi_getLogicalMap()</code> on a
1768 * <code>UBiDi</code> object.
1769 *
1770 * @param levels is an array with <code>length</code> levels that have been determined by
1771 * the application.
1772 *
1773 * @param length is the number of levels in the array, or, semantically,
1774 * the number of objects to be reordered.
1775 * It must be <code>length>0</code>.
1776 *
1777 * @param indexMap is a pointer to an array of <code>length</code>
1778 * indexes which will reflect the reordering of the characters.
1779 * The array does not need to be initialized.<p>
1780 * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
1781 * @stable ICU 2.0
1782 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001783U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001784ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
1785
1786/**
1787 * This is a convenience function that does not use a UBiDi object.
1788 * It is intended to be used for when an application has determined the levels
1789 * of objects (character sequences) and just needs to have them reordered (L2).
1790 * This is equivalent to using <code>ubidi_getVisualMap()</code> on a
1791 * <code>UBiDi</code> object.
1792 *
1793 * @param levels is an array with <code>length</code> levels that have been determined by
1794 * the application.
1795 *
1796 * @param length is the number of levels in the array, or, semantically,
1797 * the number of objects to be reordered.
1798 * It must be <code>length>0</code>.
1799 *
1800 * @param indexMap is a pointer to an array of <code>length</code>
1801 * indexes which will reflect the reordering of the characters.
1802 * The array does not need to be initialized.<p>
1803 * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
1804 * @stable ICU 2.0
1805 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001806U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001807ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
1808
1809/**
1810 * Invert an index map.
1811 * The index mapping of the first map is inverted and written to
1812 * the second one.
1813 *
1814 * @param srcMap is an array with <code>length</code> elements
1815 * which defines the original mapping from a source array containing
1816 * <code>length</code> elements to a destination array.
1817 * Some elements of the source array may have no mapping in the
1818 * destination array. In that case, their value will be
1819 * the special value <code>UBIDI_MAP_NOWHERE</code>.
1820 * All elements must be >=0 or equal to <code>UBIDI_MAP_NOWHERE</code>.
1821 * Some elements may have a value >= <code>length</code>, if the
1822 * destination array has more elements than the source array.
1823 * There must be no duplicate indexes (two or more elements with the
1824 * same value except <code>UBIDI_MAP_NOWHERE</code>).
1825 *
1826 * @param destMap is an array with a number of elements equal to 1 + the highest
1827 * value in <code>srcMap</code>.
1828 * <code>destMap</code> will be filled with the inverse mapping.
1829 * If element with index i in <code>srcMap</code> has a value k different
1830 * from <code>UBIDI_MAP_NOWHERE</code>, this means that element i of
1831 * the source array maps to element k in the destination array.
1832 * The inverse map will have value i in its k-th element.
1833 * For all elements of the destination array which do not map to
1834 * an element in the source array, the corresponding element in the
1835 * inverse map will have a value equal to <code>UBIDI_MAP_NOWHERE</code>.
1836 *
1837 * @param length is the length of each array.
1838 * @see UBIDI_MAP_NOWHERE
1839 * @stable ICU 2.0
1840 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001841U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001842ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length);
1843
1844/** option flags for ubidi_writeReordered() */
1845
1846/**
1847 * option bit for ubidi_writeReordered():
1848 * keep combining characters after their base characters in RTL runs
1849 *
1850 * @see ubidi_writeReordered
1851 * @stable ICU 2.0
1852 */
1853#define UBIDI_KEEP_BASE_COMBINING 1
1854
1855/**
1856 * option bit for ubidi_writeReordered():
1857 * replace characters with the "mirrored" property in RTL runs
1858 * by their mirror-image mappings
1859 *
1860 * @see ubidi_writeReordered
1861 * @stable ICU 2.0
1862 */
1863#define UBIDI_DO_MIRRORING 2
1864
1865/**
1866 * option bit for ubidi_writeReordered():
1867 * surround the run with LRMs if necessary;
1868 * this is part of the approximate "inverse Bidi" algorithm
1869 *
1870 * <p>This option does not imply corresponding adjustment of the index
1871 * mappings.</p>
1872 *
1873 * @see ubidi_setInverse
1874 * @see ubidi_writeReordered
1875 * @stable ICU 2.0
1876 */
1877#define UBIDI_INSERT_LRM_FOR_NUMERIC 4
1878
1879/**
1880 * option bit for ubidi_writeReordered():
1881 * remove Bidi control characters
1882 * (this does not affect #UBIDI_INSERT_LRM_FOR_NUMERIC)
1883 *
1884 * <p>This option does not imply corresponding adjustment of the index
1885 * mappings.</p>
1886 *
1887 * @see ubidi_writeReordered
1888 * @stable ICU 2.0
1889 */
1890#define UBIDI_REMOVE_BIDI_CONTROLS 8
1891
1892/**
1893 * option bit for ubidi_writeReordered():
1894 * write the output in reverse order
1895 *
1896 * <p>This has the same effect as calling <code>ubidi_writeReordered()</code>
1897 * first without this option, and then calling
1898 * <code>ubidi_writeReverse()</code> without mirroring.
1899 * Doing this in the same step is faster and avoids a temporary buffer.
1900 * An example for using this option is output to a character terminal that
1901 * is designed for RTL scripts and stores text in reverse order.</p>
1902 *
1903 * @see ubidi_writeReordered
1904 * @stable ICU 2.0
1905 */
1906#define UBIDI_OUTPUT_REVERSE 16
1907
1908/**
1909 * Get the length of the source text processed by the last call to
1910 * <code>ubidi_setPara()</code>. This length may be different from the length
1911 * of the source text if option <code>#UBIDI_OPTION_STREAMING</code>
1912 * has been set.
1913 * <br>
1914 * Note that whenever the length of the text affects the execution or the
1915 * result of a function, it is the processed length which must be considered,
1916 * except for <code>ubidi_setPara</code> (which receives unprocessed source
1917 * text) and <code>ubidi_getLength</code> (which returns the original length
1918 * of the source text).<br>
1919 * In particular, the processed length is the one to consider in the following
1920 * cases:
1921 * <ul>
1922 * <li>maximum value of the <code>limit</code> argument of
1923 * <code>ubidi_setLine</code></li>
1924 * <li>maximum value of the <code>charIndex</code> argument of
1925 * <code>ubidi_getParagraph</code></li>
1926 * <li>maximum value of the <code>charIndex</code> argument of
1927 * <code>ubidi_getLevelAt</code></li>
1928 * <li>number of elements in the array returned by <code>ubidi_getLevels</code></li>
1929 * <li>maximum value of the <code>logicalStart</code> argument of
1930 * <code>ubidi_getLogicalRun</code></li>
1931 * <li>maximum value of the <code>logicalIndex</code> argument of
1932 * <code>ubidi_getVisualIndex</code></li>
1933 * <li>number of elements filled in the <code>*indexMap</code> argument of
1934 * <code>ubidi_getLogicalMap</code></li>
1935 * <li>length of text processed by <code>ubidi_writeReordered</code></li>
1936 * </ul>
1937 *
1938 * @param pBiDi is the paragraph <code>UBiDi</code> object.
1939 *
1940 * @return The length of the part of the source text processed by
1941 * the last call to <code>ubidi_setPara</code>.
1942 * @see ubidi_setPara
1943 * @see UBIDI_OPTION_STREAMING
1944 * @stable ICU 3.6
1945 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001946U_CAPI int32_t U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001947ubidi_getProcessedLength(const UBiDi *pBiDi);
1948
1949/**
1950 * Get the length of the reordered text resulting from the last call to
1951 * <code>ubidi_setPara()</code>. This length may be different from the length
1952 * of the source text if option <code>#UBIDI_OPTION_INSERT_MARKS</code>
1953 * or option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> has been set.
1954 * <br>
1955 * This resulting length is the one to consider in the following cases:
1956 * <ul>
1957 * <li>maximum value of the <code>visualIndex</code> argument of
1958 * <code>ubidi_getLogicalIndex</code></li>
1959 * <li>number of elements of the <code>*indexMap</code> argument of
1960 * <code>ubidi_getVisualMap</code></li>
1961 * </ul>
1962 * Note that this length stays identical to the source text length if
1963 * Bidi marks are inserted or removed using option bits of
1964 * <code>ubidi_writeReordered</code>, or if option
1965 * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> has been set.
1966 *
1967 * @param pBiDi is the paragraph <code>UBiDi</code> object.
1968 *
1969 * @return The length of the reordered text resulting from
1970 * the last call to <code>ubidi_setPara</code>.
1971 * @see ubidi_setPara
1972 * @see UBIDI_OPTION_INSERT_MARKS
1973 * @see UBIDI_OPTION_REMOVE_CONTROLS
1974 * @stable ICU 3.6
1975 */
Victor Changce4bf3c2021-01-19 16:34:24 +00001976U_CAPI int32_t U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01001977ubidi_getResultLength(const UBiDi *pBiDi);
1978
1979U_CDECL_BEGIN
1980
1981#ifndef U_HIDE_DEPRECATED_API
1982/**
1983 * Value returned by <code>UBiDiClassCallback</code> callbacks when
1984 * there is no need to override the standard Bidi class for a given code point.
1985 *
1986 * This constant is deprecated; use u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1 instead.
1987 *
1988 * @see UBiDiClassCallback
1989 * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
1990 */
1991#define U_BIDI_CLASS_DEFAULT U_CHAR_DIRECTION_COUNT
1992#endif // U_HIDE_DEPRECATED_API
1993
1994/**
1995 * Callback type declaration for overriding default Bidi class values with
1996 * custom ones.
1997 * <p>Usually, the function pointer will be propagated to a <code>UBiDi</code>
1998 * object by calling the <code>ubidi_setClassCallback()</code> function;
1999 * then the callback will be invoked by the UBA implementation any time the
2000 * class of a character is to be determined.</p>
2001 *
2002 * @param context is a pointer to the callback private data.
2003 *
2004 * @param c is the code point to get a Bidi class for.
2005 *
2006 * @return The directional property / Bidi class for the given code point
2007 * <code>c</code> if the default class has been overridden, or
2008 * <code>u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>
2009 * if the standard Bidi class value for <code>c</code> is to be used.
2010 * @see ubidi_setClassCallback
2011 * @see ubidi_getClassCallback
2012 * @stable ICU 3.6
2013 */
2014typedef UCharDirection U_CALLCONV
2015UBiDiClassCallback(const void *context, UChar32 c);
2016
2017U_CDECL_END
2018
2019/**
2020 * Retrieve the Bidi class for a given code point.
2021 * <p>If a <code>#UBiDiClassCallback</code> callback is defined and returns a
2022 * value other than <code>u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>,
2023 * that value is used; otherwise the default class determination mechanism is invoked.</p>
2024 *
2025 * @param pBiDi is the paragraph <code>UBiDi</code> object.
2026 *
2027 * @param c is the code point whose Bidi class must be retrieved.
2028 *
2029 * @return The Bidi class for character <code>c</code> based
2030 * on the given <code>pBiDi</code> instance.
2031 * @see UBiDiClassCallback
2032 * @stable ICU 3.6
2033 */
Victor Changce4bf3c2021-01-19 16:34:24 +00002034U_CAPI UCharDirection U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01002035ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c);
2036
2037/**
2038 * Set the callback function and callback data used by the UBA
2039 * implementation for Bidi class determination.
2040 * <p>This may be useful for assigning Bidi classes to PUA characters, or
2041 * for special application needs. For instance, an application may want to
2042 * handle all spaces like L or R characters (according to the base direction)
2043 * when creating the visual ordering of logical lines which are part of a report
2044 * organized in columns: there should not be interaction between adjacent
2045 * cells.<p>
2046 *
2047 * @param pBiDi is the paragraph <code>UBiDi</code> object.
2048 *
2049 * @param newFn is the new callback function pointer.
2050 *
2051 * @param newContext is the new callback context pointer. This can be NULL.
2052 *
2053 * @param oldFn fillin: Returns the old callback function pointer. This can be
2054 * NULL.
2055 *
2056 * @param oldContext fillin: Returns the old callback's context. This can be
2057 * NULL.
2058 *
2059 * @param pErrorCode must be a valid pointer to an error code value.
2060 *
2061 * @see ubidi_getClassCallback
2062 * @stable ICU 3.6
2063 */
Victor Changce4bf3c2021-01-19 16:34:24 +00002064U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01002065ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
2066 const void *newContext, UBiDiClassCallback **oldFn,
2067 const void **oldContext, UErrorCode *pErrorCode);
2068
2069/**
2070 * Get the current callback function used for Bidi class determination.
2071 *
2072 * @param pBiDi is the paragraph <code>UBiDi</code> object.
2073 *
2074 * @param fn fillin: Returns the callback function pointer.
2075 *
2076 * @param context fillin: Returns the callback's private context.
2077 *
2078 * @see ubidi_setClassCallback
2079 * @stable ICU 3.6
2080 */
Victor Changce4bf3c2021-01-19 16:34:24 +00002081U_CAPI void U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01002082ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context);
2083
2084/**
2085 * Take a <code>UBiDi</code> object containing the reordering
2086 * information for a piece of text (one or more paragraphs) set by
2087 * <code>ubidi_setPara()</code> or for a line of text set by
2088 * <code>ubidi_setLine()</code> and write a reordered string to the
2089 * destination buffer.
2090 *
2091 * This function preserves the integrity of characters with multiple
2092 * code units and (optionally) combining characters.
2093 * Characters in RTL runs can be replaced by mirror-image characters
2094 * in the destination buffer. Note that "real" mirroring has
2095 * to be done in a rendering engine by glyph selection
2096 * and that for many "mirrored" characters there are no
2097 * Unicode characters as mirror-image equivalents.
2098 * There are also options to insert or remove Bidi control
2099 * characters; see the description of the <code>destSize</code>
2100 * and <code>options</code> parameters and of the option bit flags.
2101 *
2102 * @param pBiDi A pointer to a <code>UBiDi</code> object that
2103 * is set by <code>ubidi_setPara()</code> or
2104 * <code>ubidi_setLine()</code> and contains the reordering
2105 * information for the text that it was defined for,
2106 * as well as a pointer to that text.<br><br>
2107 * The text was aliased (only the pointer was stored
2108 * without copying the contents) and must not have been modified
2109 * since the <code>ubidi_setPara()</code> call.
2110 *
2111 * @param dest A pointer to where the reordered text is to be copied.
2112 * The source text and <code>dest[destSize]</code>
2113 * must not overlap.
2114 *
2115 * @param destSize The size of the <code>dest</code> buffer,
2116 * in number of UChars.
2117 * If the <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>
2118 * option is set, then the destination length could be
2119 * as large as
2120 * <code>ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)</code>.
2121 * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
2122 * is set, then the destination length may be less than
2123 * <code>ubidi_getLength(pBiDi)</code>.
2124 * If none of these options is set, then the destination length
2125 * will be exactly <code>ubidi_getProcessedLength(pBiDi)</code>.
2126 *
2127 * @param options A bit set of options for the reordering that control
2128 * how the reordered text is written.
2129 * The options include mirroring the characters on a code
2130 * point basis and inserting LRM characters, which is used
2131 * especially for transforming visually stored text
2132 * to logically stored text (although this is still an
2133 * imperfect implementation of an "inverse Bidi" algorithm
2134 * because it uses the "forward Bidi" algorithm at its core).
2135 * The available options are:
2136 * <code>#UBIDI_DO_MIRRORING</code>,
2137 * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
2138 * <code>#UBIDI_KEEP_BASE_COMBINING</code>,
2139 * <code>#UBIDI_OUTPUT_REVERSE</code>,
2140 * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code>
2141 *
2142 * @param pErrorCode must be a valid pointer to an error code value.
2143 *
2144 * @return The length of the output string.
2145 *
2146 * @see ubidi_getProcessedLength
2147 * @stable ICU 2.0
2148 */
Victor Changce4bf3c2021-01-19 16:34:24 +00002149U_CAPI int32_t U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01002150ubidi_writeReordered(UBiDi *pBiDi,
2151 UChar *dest, int32_t destSize,
2152 uint16_t options,
2153 UErrorCode *pErrorCode);
2154
2155/**
2156 * Reverse a Right-To-Left run of Unicode text.
2157 *
2158 * This function preserves the integrity of characters with multiple
2159 * code units and (optionally) combining characters.
2160 * Characters can be replaced by mirror-image characters
2161 * in the destination buffer. Note that "real" mirroring has
2162 * to be done in a rendering engine by glyph selection
2163 * and that for many "mirrored" characters there are no
2164 * Unicode characters as mirror-image equivalents.
2165 * There are also options to insert or remove Bidi control
2166 * characters.
2167 *
2168 * This function is the implementation for reversing RTL runs as part
2169 * of <code>ubidi_writeReordered()</code>. For detailed descriptions
2170 * of the parameters, see there.
2171 * Since no Bidi controls are inserted here, the output string length
2172 * will never exceed <code>srcLength</code>.
2173 *
2174 * @see ubidi_writeReordered
2175 *
2176 * @param src A pointer to the RTL run text.
2177 *
2178 * @param srcLength The length of the RTL run.
2179 *
2180 * @param dest A pointer to where the reordered text is to be copied.
2181 * <code>src[srcLength]</code> and <code>dest[destSize]</code>
2182 * must not overlap.
2183 *
2184 * @param destSize The size of the <code>dest</code> buffer,
2185 * in number of UChars.
2186 * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
2187 * is set, then the destination length may be less than
2188 * <code>srcLength</code>.
2189 * If this option is not set, then the destination length
2190 * will be exactly <code>srcLength</code>.
2191 *
2192 * @param options A bit set of options for the reordering that control
2193 * how the reordered text is written.
2194 * See the <code>options</code> parameter in <code>ubidi_writeReordered()</code>.
2195 *
2196 * @param pErrorCode must be a valid pointer to an error code value.
2197 *
2198 * @return The length of the output string.
2199 * @stable ICU 2.0
2200 */
Victor Changce4bf3c2021-01-19 16:34:24 +00002201U_CAPI int32_t U_EXPORT2
Victor Chang73229502020-09-17 13:39:19 +01002202ubidi_writeReverse(const UChar *src, int32_t srcLength,
2203 UChar *dest, int32_t destSize,
2204 uint16_t options,
2205 UErrorCode *pErrorCode);
2206
2207/*#define BIDI_SAMPLE_CODE*/
2208/*@}*/
2209
2210#endif