blob: 97392b91d2a50085a6e79bdfb8894ffa93ef80ec [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Portions Copyright 2000-2004 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26/*
27 * (C) Copyright IBM Corp. 1999-2003 - All Rights Reserved
28 *
29 * The original version of this source code and documentation is
30 * copyrighted and owned by IBM. These materials are provided
31 * under terms of a License Agreement between IBM and Sun.
32 * This technology is protected by multiple US and International
33 * patents. This notice and attribution to IBM may not be removed.
34 */
35
36/*
37*
38******************************************************************************
39* file name: ubidi.c
40* encoding: US-ASCII
41* tab size: 8 (not used)
42* indentation:4
43*
44* created on: 1999jul27
45* created by: Markus W. Scherer
46*/
47
48/* set import/export definitions */
49#ifndef U_COMMON_IMPLEMENTATION
50# define U_COMMON_IMPLEMENTATION
51#endif
52
53#include "cmemory.h"
54#include "utypes.h"
55#include "uchardir.h"
56#include "ubidi.h"
57#include "ubidiimp.h"
58
59/*
60 * General implementation notes:
61 *
62 * Throughout the implementation, there are comments like (W2) that refer to
63 * rules of the BiDi algorithm in its version 5, in this example to the second
64 * rule of the resolution of weak types.
65 *
66 * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32)
67 * character according to UTF-16, the second UChar gets the directional property of
68 * the entire character assigned, while the first one gets a BN, a boundary
69 * neutral, type, which is ignored by most of the algorithm according to
70 * rule (X9) and the implementation suggestions of the BiDi algorithm.
71 *
72 * Later, adjustWSLevels() will set the level for each BN to that of the
73 * following character (UChar), which results in surrogate pairs getting the
74 * same level on each of their surrogates.
75 *
76 * In a UTF-8 implementation, the same thing could be done: the last byte of
77 * a multi-byte sequence would get the "real" property, while all previous
78 * bytes of that sequence would get BN.
79 *
80 * It is not possible to assign all those parts of a character the same real
81 * property because this would fail in the resolution of weak types with rules
82 * that look at immediately surrounding types.
83 *
84 * As a related topic, this implementation does not remove Boundary Neutral
85 * types from the input, but ignores them whereever this is relevant.
86 * For example, the loop for the resolution of the weak types reads
87 * types until it finds a non-BN.
88 * Also, explicit embedding codes are neither changed into BN nor removed.
89 * They are only treated the same way real BNs are.
90 * As stated before, adjustWSLevels() takes care of them at the end.
91 * For the purpose of conformance, the levels of all these codes
92 * do not matter.
93 *
94 * Note that this implementation never modifies the dirProps
95 * after the initial setup.
96 *
97 *
98 * In this implementation, the resolution of weak types (Wn),
99 * neutrals (Nn), and the assignment of the resolved level (In)
100 * are all done in one single loop, in resolveImplicitLevels().
101 * Changes of dirProp values are done on the fly, without writing
102 * them back to the dirProps array.
103 *
104 *
105 * This implementation contains code that allows to bypass steps of the
106 * algorithm that are not needed on the specific paragraph
107 * in order to speed up the most common cases considerably,
108 * like text that is entirely LTR, or RTL text without numbers.
109 *
110 * Most of this is done by setting a bit for each directional property
111 * in a flags variable and later checking for whether there are
112 * any LTR characters or any RTL characters, or both, whether
113 * there are any explicit embedding codes, etc.
114 *
115 * If the (Xn) steps are performed, then the flags are re-evaluated,
116 * because they will then not contain the embedding codes any more
117 * and will be adjusted for override codes, so that subsequently
118 * more bypassing may be possible than what the initial flags suggested.
119 *
120 * If the text is not mixed-directional, then the
121 * algorithm steps for the weak type resolution are not performed,
122 * and all levels are set to the paragraph level.
123 *
124 * If there are no explicit embedding codes, then the (Xn) steps
125 * are not performed.
126 *
127 * If embedding levels are supplied as a parameter, then all
128 * explicit embedding codes are ignored, and the (Xn) steps
129 * are not performed.
130 *
131 * White Space types could get the level of the run they belong to,
132 * and are checked with a test of (flags&MASK_EMBEDDING) to
133 * consider if the paragraph direction should be considered in
134 * the flags variable.
135 *
136 * If there are no White Space types in the paragraph, then
137 * (L1) is not necessary in adjustWSLevels().
138 */
139
140/* prototypes --------------------------------------------------------------- */
141
142static void
143getDirProps(UBiDi *pBiDi, const UChar *text);
144
145static UBiDiDirection
146resolveExplicitLevels(UBiDi *pBiDi);
147
148static UBiDiDirection
149checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);
150
151static UBiDiDirection
152directionFromFlags(Flags flags);
153
154static void
155resolveImplicitLevels(UBiDi *pBiDi,
156 int32_t start, int32_t limit,
157 DirProp sor, DirProp eor);
158
159static void
160adjustWSLevels(UBiDi *pBiDi);
161
162/* to avoid some conditional statements, use tiny constant arrays */
163static const Flags flagLR[2]={ DIRPROP_FLAG(L), DIRPROP_FLAG(R) };
164static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) };
165static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) };
166
167#define DIRPROP_FLAG_LR(level) flagLR[(level)&1]
168#define DIRPROP_FLAG_E(level) flagE[(level)&1]
169#define DIRPROP_FLAG_O(level) flagO[(level)&1]
170
171/* UBiDi object management -------------------------------------------------- */
172
173U_CAPI UBiDi * U_EXPORT2
174ubidi_open(void)
175{
176 UErrorCode errorCode=U_ZERO_ERROR;
177 return ubidi_openSized(0, 0, &errorCode);
178}
179
180U_CAPI UBiDi * U_EXPORT2
181ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode) {
182 UBiDi *pBiDi;
183
184 /* check the argument values */
185 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
186 return NULL;
187 } else if(maxLength<0 || maxRunCount<0) {
188 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
189 return NULL; /* invalid arguments */
190 }
191
192 /* allocate memory for the object */
193 pBiDi=(UBiDi *)icu_malloc(sizeof(UBiDi));
194 if(pBiDi==NULL) {
195 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
196 return NULL;
197 }
198
199 /* reset the object, all pointers NULL, all flags FALSE, all sizes 0 */
200 icu_memset(pBiDi, 0, sizeof(UBiDi));
201
202 /* allocate memory for arrays as requested */
203 if(maxLength>0) {
204 if( !getInitialDirPropsMemory(pBiDi, maxLength) ||
205 !getInitialLevelsMemory(pBiDi, maxLength)
206 ) {
207 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
208 }
209 } else {
210 pBiDi->mayAllocateText=TRUE;
211 }
212
213 if(maxRunCount>0) {
214 if(maxRunCount==1) {
215 /* use simpleRuns[] */
216 pBiDi->runsSize=sizeof(Run);
217 } else if(!getInitialRunsMemory(pBiDi, maxRunCount)) {
218 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
219 }
220 } else {
221 pBiDi->mayAllocateRuns=TRUE;
222 }
223
224 if(U_SUCCESS(*pErrorCode)) {
225 return pBiDi;
226 } else {
227 ubidi_close(pBiDi);
228 return NULL;
229 }
230}
231
232/*
233 * We are allowed to allocate memory if memory==NULL or
234 * mayAllocate==TRUE for each array that we need.
235 * We also try to grow and shrink memory as needed if we
236 * allocate it.
237 *
238 * Assume sizeNeeded>0.
239 * If *pMemory!=NULL, then assume *pSize>0.
240 *
241 * ### this realloc() may unnecessarily copy the old data,
242 * which we know we don't need any more;
243 * is this the best way to do this??
244 */
245extern bool_t
246ubidi_getMemory(void **pMemory, int32_t *pSize, bool_t mayAllocate, int32_t sizeNeeded) {
247 /* check for existing memory */
248 if(*pMemory==NULL) {
249 /* we need to allocate memory */
250 if(mayAllocate && (*pMemory=icu_malloc(sizeNeeded))!=NULL) {
251 *pSize=sizeNeeded;
252 return TRUE;
253 } else {
254 return FALSE;
255 }
256 } else {
257 /* there is some memory, is it enough or too much? */
258 if(sizeNeeded>*pSize && !mayAllocate) {
259 /* not enough memory, and we must not allocate */
260 return FALSE;
261 } else if(sizeNeeded!=*pSize && mayAllocate) {
262 /* we may try to grow or shrink */
263 void *memory;
264
265 if((memory=icu_realloc(*pMemory, sizeNeeded))!=NULL) {
266 *pMemory=memory;
267 *pSize=sizeNeeded;
268 return TRUE;
269 } else {
270 /* we failed to grow */
271 return FALSE;
272 }
273 } else {
274 /* we have at least enough memory and must not allocate */
275 return TRUE;
276 }
277 }
278}
279
280U_CAPI void U_EXPORT2
281ubidi_close(UBiDi *pBiDi) {
282 if(pBiDi!=NULL) {
283 if(pBiDi->dirPropsMemory!=NULL) {
284 icu_free(pBiDi->dirPropsMemory);
285 }
286 if(pBiDi->levelsMemory!=NULL) {
287 icu_free(pBiDi->levelsMemory);
288 }
289 if(pBiDi->runsMemory!=NULL) {
290 icu_free(pBiDi->runsMemory);
291 }
292 icu_free(pBiDi);
293 }
294}
295
296/* set to approximate "inverse BiDi" ---------------------------------------- */
297
298U_CAPI void U_EXPORT2
299ubidi_setInverse(UBiDi *pBiDi, bool_t isInverse) {
300 if(pBiDi!=NULL) {
301 pBiDi->isInverse=isInverse;
302 }
303}
304
305U_CAPI bool_t U_EXPORT2
306ubidi_isInverse(UBiDi *pBiDi) {
307 if(pBiDi!=NULL) {
308 return pBiDi->isInverse;
309 } else {
310 return FALSE;
311 }
312}
313
314/* ubidi_setPara ------------------------------------------------------------ */
315
316U_CAPI void U_EXPORT2
317ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
318 UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
319 UErrorCode *pErrorCode) {
320 UBiDiDirection direction;
321
322 /* check the argument values */
323 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
324 return;
325 } else if(pBiDi==NULL || text==NULL ||
326 ((UBIDI_MAX_EXPLICIT_LEVEL<paraLevel) && !IS_DEFAULT_LEVEL(paraLevel)) ||
327 length<-1
328 ) {
329 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
330 return;
331 }
332
333 if(length==-1) {
334 // length=u_strlen(text);
335 const UChar *p = text - 1;
336 while(*++p);
337 length = p - text;
338 }
339
340 /* initialize the UBiDi structure */
341 pBiDi->text=text;
342 pBiDi->length=length;
343 pBiDi->paraLevel=paraLevel;
344 pBiDi->direction=UBIDI_LTR;
345 pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */
346
347 pBiDi->dirProps=NULL;
348 pBiDi->levels=NULL;
349 pBiDi->runs=NULL;
350
351 if(length==0) {
352 /*
353 * For an empty paragraph, create a UBiDi object with the paraLevel and
354 * the flags and the direction set but without allocating zero-length arrays.
355 * There is nothing more to do.
356 */
357 if(IS_DEFAULT_LEVEL(paraLevel)) {
358 pBiDi->paraLevel&=1;
359 }
360 if(paraLevel&1) {
361 pBiDi->flags=DIRPROP_FLAG(R);
362 pBiDi->direction=UBIDI_RTL;
363 } else {
364 pBiDi->flags=DIRPROP_FLAG(L);
365 pBiDi->direction=UBIDI_LTR;
366 }
367
368 pBiDi->runCount=0;
369 return;
370 }
371
372 pBiDi->runCount=-1;
373
374 /*
375 * Get the directional properties,
376 * the flags bit-set, and
377 * determine the partagraph level if necessary.
378 */
379 if(getDirPropsMemory(pBiDi, length)) {
380 pBiDi->dirProps=pBiDi->dirPropsMemory;
381 getDirProps(pBiDi, text);
382 } else {
383 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
384 return;
385 }
386
387 /* are explicit levels specified? */
388 if(embeddingLevels==NULL) {
389 /* no: determine explicit levels according to the (Xn) rules */\
390 if(getLevelsMemory(pBiDi, length)) {
391 pBiDi->levels=pBiDi->levelsMemory;
392 direction=resolveExplicitLevels(pBiDi);
393 } else {
394 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
395 return;
396 }
397 } else {
398 /* set BN for all explicit codes, check that all levels are paraLevel..UBIDI_MAX_EXPLICIT_LEVEL */
399 pBiDi->levels=embeddingLevels;
400 direction=checkExplicitLevels(pBiDi, pErrorCode);
401 if(U_FAILURE(*pErrorCode)) {
402 return;
403 }
404 }
405
406 /*
407 * The steps after (X9) in the UBiDi algorithm are performed only if
408 * the paragraph text has mixed directionality!
409 */
410 pBiDi->direction=direction;
411 switch(direction) {
412 case UBIDI_LTR:
413 /* make sure paraLevel is even */
414 pBiDi->paraLevel=(UBiDiLevel)((pBiDi->paraLevel+1)&~1);
415
416 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
417 pBiDi->trailingWSStart=0;
418 break;
419 case UBIDI_RTL:
420 /* make sure paraLevel is odd */
421 pBiDi->paraLevel|=1;
422
423 /* all levels are implicitly at paraLevel (important for ubidi_getLevels()) */
424 pBiDi->trailingWSStart=0;
425 break;
426 default:
427 /*
428 * If there are no external levels specified and there
429 * are no significant explicit level codes in the text,
430 * then we can treat the entire paragraph as one run.
431 * Otherwise, we need to perform the following rules on runs of
432 * the text with the same embedding levels. (X10)
433 * "Significant" explicit level codes are ones that actually
434 * affect non-BN characters.
435 * Examples for "insignificant" ones are empty embeddings
436 * LRE-PDF, LRE-RLE-PDF-PDF, etc.
437 */
438 if(embeddingLevels==NULL && !(pBiDi->flags&DIRPROP_FLAG_MULTI_RUNS)) {
439 resolveImplicitLevels(pBiDi, 0, length,
440 GET_LR_FROM_LEVEL(pBiDi->paraLevel),
441 GET_LR_FROM_LEVEL(pBiDi->paraLevel));
442 } else {
443 /* sor, eor: start and end types of same-level-run */
444 UBiDiLevel *levels=pBiDi->levels;
445 int32_t start, limit=0;
446 UBiDiLevel level, nextLevel;
447 DirProp sor, eor;
448
449 /* determine the first sor and set eor to it because of the loop body (sor=eor there) */
450 level=pBiDi->paraLevel;
451 nextLevel=levels[0];
452 if(level<nextLevel) {
453 eor=GET_LR_FROM_LEVEL(nextLevel);
454 } else {
455 eor=GET_LR_FROM_LEVEL(level);
456 }
457
458 do {
459 /* determine start and limit of the run (end points just behind the run) */
460
461 /* the values for this run's start are the same as for the previous run's end */
462 sor=eor;
463 start=limit;
464 level=nextLevel;
465
466 /* search for the limit of this run */
467 while(++limit<length && levels[limit]==level) {}
468
469 /* get the correct level of the next run */
470 if(limit<length) {
471 nextLevel=levels[limit];
472 } else {
473 nextLevel=pBiDi->paraLevel;
474 }
475
476 /* determine eor from max(level, nextLevel); sor is last run's eor */
477 if((level&~UBIDI_LEVEL_OVERRIDE)<(nextLevel&~UBIDI_LEVEL_OVERRIDE)) {
478 eor=GET_LR_FROM_LEVEL(nextLevel);
479 } else {
480 eor=GET_LR_FROM_LEVEL(level);
481 }
482
483 /* if the run consists of overridden directional types, then there
484 are no implicit types to be resolved */
485 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
486 resolveImplicitLevels(pBiDi, start, limit, sor, eor);
487 } else {
488 /* remove the UBIDI_LEVEL_OVERRIDE flags */
489 do {
490 levels[start++]&=~UBIDI_LEVEL_OVERRIDE;
491 } while(start<limit);
492 }
493 } while(limit<length);
494 }
495
496 /* reset the embedding levels for some non-graphic characters (L1), (X9) */
497 adjustWSLevels(pBiDi);
498
499 /* for "inverse BiDi", ubidi_getRuns() modifies the levels of numeric runs following RTL runs */
500 if(pBiDi->isInverse) {
501 if(!ubidi_getRuns(pBiDi)) {
502 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
503 return;
504 }
505 }
506 break;
507 }
508}
509
510/* perform (P2)..(P3) ------------------------------------------------------- */
511
512/*
513 * Get the directional properties for the text,
514 * calculate the flags bit-set, and
515 * determine the partagraph level if necessary.
516 */
517static void
518getDirProps(UBiDi *pBiDi, const UChar *text) {
519 DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */
520
521 int32_t i=0, i0, i1, length=pBiDi->length;
522 Flags flags=0; /* collect all directionalities in the text */
523 UChar uchar;
524 DirProp dirProp;
525
526 if(IS_DEFAULT_LEVEL(pBiDi->paraLevel)) {
527 /* determine the paragraph level (P2..P3) */
528 for(;;) {
529 uchar=text[i];
530 if(!IS_FIRST_SURROGATE(uchar) || i+1==length || !IS_SECOND_SURROGATE(text[i+1])) {
531 /* not a surrogate pair */
532 flags|=DIRPROP_FLAG(dirProps[i]=dirProp=u_charDirection(uchar));
533 } else {
534 /* a surrogate pair */
535 dirProps[i++]=BN; /* first surrogate in the pair gets the BN type */
536 flags|=DIRPROP_FLAG(dirProps[i]=dirProp=u_surrogatePairDirection(uchar, text[i]))|DIRPROP_FLAG(BN);
537 }
538 ++i;
539 if(dirProp==L) {
540 pBiDi->paraLevel=0;
541 break;
542 } else if(dirProp==R || dirProp==AL) {
543 pBiDi->paraLevel=1;
544 break;
545 } else if(i>=length) {
546 /*
547 * see comment in ubidi.h:
548 * the DEFAULT_XXX values are designed so that
549 * their bit 0 alone yields the intended default
550 */
551 pBiDi->paraLevel&=1;
552 break;
553 }
554 }
555 } else {
556 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
557 }
558
559 /* get the rest of the directional properties and the flags bits */
560 while(i<length) {
561 uchar=text[i];
562 if(!IS_FIRST_SURROGATE(uchar) || i+1==length || !IS_SECOND_SURROGATE(text[i+1])) {
563 /* not a surrogate pair */
564 flags|=DIRPROP_FLAG(dirProps[i]=u_charDirection(uchar));
565 } else {
566 /* a surrogate pair */
567 dirProps[i++]=BN; /* first surrogate in the pair gets the BN type */
568 flags|=DIRPROP_FLAG(dirProps[i]=dirProp=u_surrogatePairDirection(uchar, text[i]))|DIRPROP_FLAG(BN);
569 }
570 ++i;
571 }
572 if(flags&MASK_EMBEDDING) {
573 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
574 }
575
576 pBiDi->flags=flags;
577}
578
579/* perform (X1)..(X9) ------------------------------------------------------- */
580
581/*
582 * Resolve the explicit levels as specified by explicit embedding codes.
583 * Recalculate the flags to have them reflect the real properties
584 * after taking the explicit embeddings into account.
585 *
586 * The BiDi algorithm is designed to result in the same behavior whether embedding
587 * levels are externally specified (from "styled text", supposedly the preferred
588 * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text.
589 * That is why (X9) instructs to remove all explicit codes (and BN).
590 * However, in a real implementation, this removal of these codes and their index
591 * positions in the plain text is undesirable since it would result in
592 * reallocated, reindexed text.
593 * Instead, this implementation leaves the codes in there and just ignores them
594 * in the subsequent processing.
595 * In order to get the same reordering behavior, positions with a BN or an
596 * explicit embedding code just get the same level assigned as the last "real"
597 * character.
598 *
599 * Some implementations, not this one, then overwrite some of these
600 * directionality properties at "real" same-level-run boundaries by
601 * L or R codes so that the resolution of weak types can be performed on the
602 * entire paragraph at once instead of having to parse it once more and
603 * perform that resolution on same-level-runs.
604 * This limits the scope of the implicit rules in effectively
605 * the same way as the run limits.
606 *
607 * Instead, this implementation does not modify these codes.
608 * On one hand, the paragraph has to be scanned for same-level-runs, but
609 * on the other hand, this saves another loop to reset these codes,
610 * or saves making and modifying a copy of dirProps[].
611 *
612 *
613 * Note that (Pn) and (Xn) changed significantly from version 4 of the BiDi algorithm.
614 *
615 *
616 * Handling the stack of explicit levels (Xn):
617 *
618 * With the BiDi stack of explicit levels,
619 * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF,
620 * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61.
621 *
622 * In order to have a correct push-pop semantics even in the case of overflows,
623 * there are two overflow counters:
624 * - countOver60 is incremented with each LRx at level 60
625 * - from level 60, one RLx increases the level to 61
626 * - countOver61 is incremented with each LRx and RLx at level 61
627 *
628 * Popping levels with PDF must work in the opposite order so that level 61
629 * is correct at the correct point. Underflows (too many PDFs) must be checked.
630 *
631 * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
632 */
633
634static UBiDiDirection
635resolveExplicitLevels(UBiDi *pBiDi) {
636 const DirProp *dirProps=pBiDi->dirProps;
637 UBiDiLevel *levels=pBiDi->levels;
638
639 int32_t i=0, length=pBiDi->length;
640 Flags flags=pBiDi->flags; /* collect all directionalities in the text */
641 DirProp dirProp;
642 UBiDiLevel level=pBiDi->paraLevel;
643
644 UBiDiDirection direction;
645
646 /* determine if the text is mixed-directional or single-directional */
647 direction=directionFromFlags(flags);
648
649 /* we may not need to resolve any explicit levels */
650 if(direction!=UBIDI_MIXED) {
651 /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */
652 } else if(!(flags&MASK_EXPLICIT) || pBiDi->isInverse) {
653 /* mixed, but all characters are at the same embedding level */
654 /* or we are in "inverse BiDi" */
655 /* set all levels to the paragraph level */
656 for(i=0; i<length; ++i) {
657 levels[i]=level;
658 }
659 } else {
660 /* continue to perform (Xn) */
661
662 /* (X1) level is set for all codes, embeddingLevel keeps track of the push/pop operations */
663 /* both variables may carry the UBIDI_LEVEL_OVERRIDE flag to indicate the override status */
664 UBiDiLevel embeddingLevel=level, newLevel, stackTop=0;
665
666 UBiDiLevel stack[UBIDI_MAX_EXPLICIT_LEVEL]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL */
667 uint32_t countOver60=0, countOver61=0; /* count overflows of explicit levels */
668
669 /* recalculate the flags */
670 flags=0;
671
672 /* since we assume that this is a single paragraph, we ignore (X8) */
673 for(i=0; i<length; ++i) {
674 dirProp=dirProps[i];
675 switch(dirProp) {
676 case LRE:
677 case LRO:
678 /* (X3, X5) */
679 newLevel=(UBiDiLevel)((embeddingLevel+2)&~(UBIDI_LEVEL_OVERRIDE|1)); /* least greater even level */
680 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
681 stack[stackTop]=embeddingLevel;
682 ++stackTop;
683 embeddingLevel=newLevel;
684 if(dirProp==LRO) {
685 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
686 } else {
687 embeddingLevel&=~UBIDI_LEVEL_OVERRIDE;
688 }
689 } else if((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL) {
690 ++countOver61;
691 } else /* (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)==UBIDI_MAX_EXPLICIT_LEVEL-1 */ {
692 ++countOver60;
693 }
694 flags|=DIRPROP_FLAG(BN);
695 break;
696 case RLE:
697 case RLO:
698 /* (X2, X4) */
699 newLevel=(UBiDiLevel)(((embeddingLevel&~UBIDI_LEVEL_OVERRIDE)+1)|1); /* least greater odd level */
700 if(newLevel<=UBIDI_MAX_EXPLICIT_LEVEL) {
701 stack[stackTop]=embeddingLevel;
702 ++stackTop;
703 embeddingLevel=newLevel;
704 if(dirProp==RLO) {
705 embeddingLevel|=UBIDI_LEVEL_OVERRIDE;
706 } else {
707 embeddingLevel&=~UBIDI_LEVEL_OVERRIDE;
708 }
709 } else {
710 ++countOver61;
711 }
712 flags|=DIRPROP_FLAG(BN);
713 break;
714 case PDF:
715 /* (X7) */
716 /* handle all the overflow cases first */
717 if(countOver61>0) {
718 --countOver61;
719 } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)!=UBIDI_MAX_EXPLICIT_LEVEL) {
720 /* handle LRx overflows from level 60 */
721 --countOver60;
722 } else if(stackTop>0) {
723 /* this is the pop operation; it also pops level 61 while countOver60>0 */
724 --stackTop;
725 embeddingLevel=stack[stackTop];
726 /* } else { (underflow) */
727 }
728 flags|=DIRPROP_FLAG(BN);
729 break;
730 case B:
731 /*
732 * We do not really expect to see a paragraph separator (B),
733 * but we should do something reasonable with it,
734 * especially at the end of the text.
735 */
736 stackTop=0;
737 countOver60=countOver61=0;
738 embeddingLevel=level=pBiDi->paraLevel;
739 flags|=DIRPROP_FLAG(B);
740 break;
741 case BN:
742 /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */
743 /* they will get their levels set correctly in adjustWSLevels() */
744 flags|=DIRPROP_FLAG(BN);
745 break;
746 default:
747 /* all other types get the "real" level */
748 if(level!=embeddingLevel) {
749 level=embeddingLevel;
750 if(level&UBIDI_LEVEL_OVERRIDE) {
751 flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS;
752 } else {
753 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS;
754 }
755 }
756 if(!(level&UBIDI_LEVEL_OVERRIDE)) {
757 flags|=DIRPROP_FLAG(dirProp);
758 }
759 break;
760 }
761
762 /*
763 * We need to set reasonable levels even on BN codes and
764 * explicit codes because we will later look at same-level runs (X10).
765 */
766 levels[i]=level;
767 }
768 if(flags&MASK_EMBEDDING) {
769 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
770 }
771
772 /* subsequently, ignore the explicit codes and BN (X9) */
773
774 /* again, determine if the text is mixed-directional or single-directional */
775 pBiDi->flags=flags;
776 direction=directionFromFlags(flags);
777 }
778 return direction;
779}
780
781/*
782 * Use a pre-specified embedding levels array:
783 *
784 * Adjust the directional properties for overrides (->LEVEL_OVERRIDE),
785 * ignore all explicit codes (X9),
786 * and check all the preset levels.
787 *
788 * Recalculate the flags to have them reflect the real properties
789 * after taking the explicit embeddings into account.
790 */
791static UBiDiDirection
792checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) {
793 const DirProp *dirProps=pBiDi->dirProps;
794 UBiDiLevel *levels=pBiDi->levels;
795
796 int32_t i, length=pBiDi->length;
797 Flags flags=0; /* collect all directionalities in the text */
798 UBiDiLevel level, paraLevel=pBiDi->paraLevel;
799
800 for(i=0; i<length; ++i) {
801 // dlf: we special case levels array for java, 0 means base level, not actually 0
802 if (levels[i] == 0) {
803 levels[i] = paraLevel;
804 }
805 level=levels[i];
806 if(level&UBIDI_LEVEL_OVERRIDE) {
807 /* keep the override flag in levels[i] but adjust the flags */
808 level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */
809 flags|=DIRPROP_FLAG_O(level);
810 } else {
811 /* set the flags */
812 flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG(dirProps[i]);
813 }
814 if(level<paraLevel || UBIDI_MAX_EXPLICIT_LEVEL<level) {
815 /* level out of bounds */
816 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
817 return UBIDI_LTR;
818 }
819 }
820 if(flags&MASK_EMBEDDING) {
821 flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel);
822 }
823
824 /* determine if the text is mixed-directional or single-directional */
825 pBiDi->flags=flags;
826 return directionFromFlags(flags);
827}
828
829/* determine if the text is mixed-directional or single-directional */
830static UBiDiDirection
831directionFromFlags(Flags flags) {
832 /* if the text contains AN and neutrals, then some neutrals may become RTL */
833 if(!(flags&MASK_RTL || ((flags&DIRPROP_FLAG(AN)) && (flags&MASK_POSSIBLE_N)))) {
834 return UBIDI_LTR;
835 } else if(!(flags&MASK_LTR)) {
836 return UBIDI_RTL;
837 } else {
838 return UBIDI_MIXED;
839 }
840}
841
842/* perform rules (Wn), (Nn), and (In) on a run of the text ------------------ */
843
844/*
845 * This implementation of the (Wn) rules applies all rules in one pass.
846 * In order to do so, it needs a look-ahead of typically 1 character
847 * (except for W5: sequences of ET) and keeps track of changes
848 * in a rule Wp that affect a later Wq (p<q).
849 *
850 * historyOfEN is a variable-saver: it contains 4 boolean states;
851 * a bit in it set to 1 means:
852 * bit 0: the current code is an EN after W2
853 * bit 1: the current code is an EN after W4
854 * bit 2: the previous code was an EN after W2
855 * bit 3: the previous code was an EN after W4
856 * In other words, b0..1 have transitions of EN in the current iteration,
857 * while b2..3 have the transitions of EN in the previous iteration.
858 * A simple historyOfEN<<=2 suffices for the propagation.
859 *
860 * The (Nn) and (In) rules are also performed in that same single loop,
861 * but effectively one iteration behind for white space.
862 *
863 * Since all implicit rules are performed in one step, it is not necessary
864 * to actually store the intermediate directional properties in dirProps[].
865 */
866
867#define EN_SHIFT 2
868#define EN_AFTER_W2 1
869#define EN_AFTER_W4 2
870#define EN_ALL 3
871#define PREV_EN_AFTER_W2 4
872#define PREV_EN_AFTER_W4 8
873
874static void
875resolveImplicitLevels(UBiDi *pBiDi,
876 int32_t start, int32_t limit,
877 DirProp sor, DirProp eor) {
878 const DirProp *dirProps=pBiDi->dirProps;
879 UBiDiLevel *levels=pBiDi->levels;
880
881 int32_t i, next, neutralStart=-1;
882 DirProp prevDirProp, dirProp, nextDirProp, lastStrong, beforeNeutral=L;
883 UBiDiLevel numberLevel;
884 uint8_t historyOfEN;
885
886 /* initialize: current at sor, next at start (it is start<limit) */
887 next=start;
888 dirProp=lastStrong=sor;
889 nextDirProp=dirProps[next];
890 historyOfEN=0;
891
892 if(pBiDi->isInverse) {
893 /*
894 * For "inverse BiDi", we set the levels of numbers just like for
895 * regular L characters, plus a flag that ubidi_getRuns() will use
896 * to set a similar flag on the corresponding output run.
897 */
898 numberLevel=levels[start];
899 if(numberLevel&1) {
900 ++numberLevel;
901 }
902 } else {
903 /* normal BiDi: least greater even level */
904 numberLevel=(UBiDiLevel)((levels[start]+2)&~1);
905 }
906
907 /*
908 * In all steps of this implementation, BN and explicit embedding codes
909 * must be treated as if they didn't exist (X9).
910 * They will get levels set before a non-neutral character, and remain
911 * undefined before a neutral one, but adjustWSLevels() will take care
912 * of all of them.
913 */
914 while(DIRPROP_FLAG(nextDirProp)&MASK_BN_EXPLICIT) {
915 if(++next<limit) {
916 nextDirProp=dirProps[next];
917 } else {
918 nextDirProp=eor;
919 break;
920 }
921 }
922
923 /*
924 * Note: at the end of this file, there is a prototype
925 * of a version of this function that uses a statetable
926 * at the core of this state machine.
927 * If you make changes to this state machine,
928 * please update that prototype as well.
929 */
930
931 /* loop for entire run */
932 while(next<limit) {
933 /* advance */
934 prevDirProp=dirProp;
935 dirProp=nextDirProp;
936 i=next;
937 do {
938 if(++next<limit) {
939 nextDirProp=dirProps[next];
940 } else {
941 nextDirProp=eor;
942 break;
943 }
944 } while(DIRPROP_FLAG(nextDirProp)&MASK_BN_EXPLICIT);
945 historyOfEN<<=EN_SHIFT;
946
947 /* (W1..W7) */
948 switch(dirProp) {
949 case L:
950 lastStrong=L;
951 break;
952 case R:
953 lastStrong=R;
954 break;
955 case AL:
956 /* (W3) */
957 lastStrong=AL;
958 dirProp=R;
959 break;
960 case EN:
961 /* we have to set historyOfEN correctly */
962 if(lastStrong==AL) {
963 /* (W2) */
964 dirProp=AN;
965 } else {
966 if(lastStrong==L) {
967 /* (W7) */
968 dirProp=L;
969 }
970 /* this EN stays after (W2) and (W4) - at least before (W7) */
971 historyOfEN|=EN_ALL;
972 }
973 break;
974 case ES:
975 if( historyOfEN&PREV_EN_AFTER_W2 && /* previous was EN before (W4) */
976 nextDirProp==EN && lastStrong!=AL /* next is EN and (W2) won't make it AN */
977 ) {
978 /* (W4) */
979 if(lastStrong!=L) {
980 dirProp=EN;
981 } else {
982 /* (W7) */
983 dirProp=L;
984 }
985 historyOfEN|=EN_AFTER_W4;
986 } else {
987 /* (W6) */
988 dirProp=ON;
989 }
990 break;
991 case CS:
992 if( historyOfEN&PREV_EN_AFTER_W2 && /* previous was EN before (W4) */
993 nextDirProp==EN && lastStrong!=AL /* next is EN and (W2) won't make it AN */
994 ) {
995 /* (W4) */
996 if(lastStrong!=L) {
997 dirProp=EN;
998 } else {
999 /* (W7) */
1000 dirProp=L;
1001 }
1002 historyOfEN|=EN_AFTER_W4;
1003 } else if(prevDirProp==AN && /* previous was AN */
1004 (nextDirProp==AN || /* next is AN */
1005 (nextDirProp==EN && lastStrong==AL)) /* or (W2) will make it one */
1006 ) {
1007 /* (W4) */
1008 dirProp=AN;
1009 } else {
1010 /* (W6) */
1011 dirProp=ON;
1012 }
1013 break;
1014 case ET:
1015 /* get sequence of ET; advance only next, not current, previous or historyOfEN */
1016 if(next<limit) {
1017 while(DIRPROP_FLAG(nextDirProp)&MASK_ET_NSM_BN /* (W1), (X9) */) {
1018 if(++next<limit) {
1019 nextDirProp=dirProps[next];
1020 } else {
1021 nextDirProp=eor;
1022 break;
1023 }
1024 }
1025 }
1026
1027 /* now process the sequence of ET like a single ET */
1028 if((historyOfEN&PREV_EN_AFTER_W4) || /* previous was EN before (W5) */
1029 (nextDirProp==EN && lastStrong!=AL) /* next is EN and (W2) won't make it AN */
1030 ) {
1031 /* (W5) */
1032 if(lastStrong!=L) {
1033 dirProp=EN;
1034 } else {
1035 /* (W7) */
1036 dirProp=L;
1037 }
1038 } else {
1039 /* (W6) */
1040 dirProp=ON;
1041 }
1042
1043 /* apply the result of (W1), (W5)..(W7) to the entire sequence of ET */
1044 break;
1045 case NSM:
1046 /* (W1) */
1047 dirProp=prevDirProp;
1048 /* set historyOfEN back to prevDirProp's historyOfEN */
1049 historyOfEN>>=EN_SHIFT;
1050 /*
1051 * Technically, this should be done before the switch() in the form
1052 * if(nextDirProp==NSM) {
1053 * dirProps[next]=nextDirProp=dirProp;
1054 * }
1055 *
1056 * - effectively one iteration ahead.
1057 * However, whether the next dirProp is NSM or is equal to the current dirProp
1058 * does not change the outcome of any condition in (W2)..(W7).
1059 */
1060 break;
1061 default:
1062 break;
1063 }
1064
1065 /* here, it is always [prev,this,next]dirProp!=BN; it may be next>i+1 */
1066
1067 /* perform (Nn) - here, only L, R, EN, AN, and neutrals are left */
1068 /* for "inverse BiDi", treat neutrals like L */
1069 /* this is one iteration late for the neutrals */
1070 if(DIRPROP_FLAG(dirProp)&MASK_N) {
1071 if(neutralStart<0) {
1072 /* start of a sequence of neutrals */
1073 neutralStart=i;
1074 beforeNeutral=prevDirProp;
1075 }
1076 } else /* not a neutral, can be only one of { L, R, EN, AN } */ {
1077 /*
1078 * Note that all levels[] values are still the same at this
1079 * point because this function is called for an entire
1080 * same-level run.
1081 * Therefore, we need to read only one actual level.
1082 */
1083 UBiDiLevel level=levels[i];
1084
1085 if(neutralStart>=0) {
1086 UBiDiLevel final;
1087 /* end of a sequence of neutrals (dirProp is "afterNeutral") */
1088 if(!(pBiDi->isInverse)) {
1089 if(beforeNeutral==L) {
1090 if(dirProp==L) {
1091 final=0; /* make all neutrals L (N1) */
1092 } else {
1093 final=level; /* make all neutrals "e" (N2) */
1094 }
1095 } else /* beforeNeutral is one of { R, EN, AN } */ {
1096 if(dirProp==L) {
1097 final=level; /* make all neutrals "e" (N2) */
1098 } else {
1099 final=1; /* make all neutrals R (N1) */
1100 }
1101 }
1102 } else {
1103 /* "inverse BiDi": collapse [before]dirProps L, EN, AN into L */
1104 if(beforeNeutral!=R) {
1105 if(dirProp!=R) {
1106 final=0; /* make all neutrals L (N1) */
1107 } else {
1108 final=level; /* make all neutrals "e" (N2) */
1109 }
1110 } else /* beforeNeutral is one of { R, EN, AN } */ {
1111 if(dirProp!=R) {
1112 final=level; /* make all neutrals "e" (N2) */
1113 } else {
1114 final=1; /* make all neutrals R (N1) */
1115 }
1116 }
1117 }
1118 /* perform (In) on the sequence of neutrals */
1119 if((level^final)&1) {
1120 /* do something only if we need to _change_ the level */
1121 do {
1122 ++levels[neutralStart];
1123 } while(++neutralStart<i);
1124 }
1125 neutralStart=-1;
1126 }
1127
1128 /* perform (In) on the non-neutral character */
1129 /*
1130 * in the cases of (W5), processing a sequence of ET,
1131 * and of (X9), skipping BN,
1132 * there may be multiple characters from i to <next
1133 * that all get (virtually) the same dirProp and (really) the same level
1134 */
1135 if(dirProp==L) {
1136 if(level&1) {
1137 ++level;
1138 } else {
1139 i=next; /* we keep the levels */
1140 }
1141 } else if(dirProp==R) {
1142 if(!(level&1)) {
1143 ++level;
1144 } else {
1145 i=next; /* we keep the levels */
1146 }
1147 } else /* EN or AN */ {
1148 /* this level depends on whether we do "inverse BiDi" */
1149 level=numberLevel;
1150 }
1151
1152 /* apply the new level to the sequence, if necessary */
1153 while(i<next) {
1154 levels[i++]=level;
1155 }
1156 }
1157 }
1158
1159 /* perform (Nn) - here,
1160 the character after the the neutrals is eor, which is either L or R */
1161 /* this is one iteration late for the neutrals */
1162 if(neutralStart>=0) {
1163 /*
1164 * Note that all levels[] values are still the same at this
1165 * point because this function is called for an entire
1166 * same-level run.
1167 * Therefore, we need to read only one actual level.
1168 */
1169 UBiDiLevel level=levels[neutralStart], final;
1170
1171 /* end of a sequence of neutrals (eor is "afterNeutral") */
1172 if(!(pBiDi->isInverse)) {
1173 if(beforeNeutral==L) {
1174 if(eor==L) {
1175 final=0; /* make all neutrals L (N1) */
1176 } else {
1177 final=level; /* make all neutrals "e" (N2) */
1178 }
1179 } else /* beforeNeutral is one of { R, EN, AN } */ {
1180 if(eor==L) {
1181 final=level; /* make all neutrals "e" (N2) */
1182 } else {
1183 final=1; /* make all neutrals R (N1) */
1184 }
1185 }
1186 } else {
1187 /* "inverse BiDi": collapse [before]dirProps L, EN, AN into L */
1188 if(beforeNeutral!=R) {
1189 if(eor!=R) {
1190 final=0; /* make all neutrals L (N1) */
1191 } else {
1192 final=level; /* make all neutrals "e" (N2) */
1193 }
1194 } else /* beforeNeutral is one of { R, EN, AN } */ {
1195 if(eor!=R) {
1196 final=level; /* make all neutrals "e" (N2) */
1197 } else {
1198 final=1; /* make all neutrals R (N1) */
1199 }
1200 }
1201 }
1202 /* perform (In) on the sequence of neutrals */
1203 if((level^final)&1) {
1204 /* do something only if we need to _change_ the level */
1205 do {
1206 ++levels[neutralStart];
1207 } while(++neutralStart<limit);
1208 }
1209 }
1210}
1211
1212/* perform (L1) and (X9) ---------------------------------------------------- */
1213
1214/*
1215 * Reset the embedding levels for some non-graphic characters (L1).
1216 * This function also sets appropriate levels for BN, and
1217 * explicit embedding types that are supposed to have been removed
1218 * from the paragraph in (X9).
1219 */
1220static void
1221adjustWSLevels(UBiDi *pBiDi) {
1222 const DirProp *dirProps=pBiDi->dirProps;
1223 UBiDiLevel *levels=pBiDi->levels;
1224 int32_t i;
1225
1226 if(pBiDi->flags&MASK_WS) {
1227 UBiDiLevel paraLevel=pBiDi->paraLevel;
1228 Flags flag;
1229
1230 i=pBiDi->trailingWSStart;
1231 while(i>0) {
1232 /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */
1233 while(i>0 && DIRPROP_FLAG(dirProps[--i])&MASK_WS) {
1234 levels[i]=paraLevel;
1235 }
1236
1237 /* reset BN to the next character's paraLevel until B/S, which restarts above loop */
1238 /* here, i+1 is guaranteed to be <length */
1239 while(i>0) {
1240 flag=DIRPROP_FLAG(dirProps[--i]);
1241 if(flag&MASK_BN_EXPLICIT) {
1242 levels[i]=levels[i+1];
1243 } else if(flag&MASK_B_S) {
1244 levels[i]=paraLevel;
1245 break;
1246 }
1247 }
1248 }
1249 }
1250
1251 /* now remove the UBIDI_LEVEL_OVERRIDE flags, if any */
1252 /* (a separate loop can be optimized more easily by a compiler) */
1253 if(pBiDi->flags&MASK_OVERRIDE) {
1254 for(i=pBiDi->trailingWSStart; i>0;) {
1255 levels[--i]&=~UBIDI_LEVEL_OVERRIDE;
1256 }
1257 }
1258}
1259
1260/* -------------------------------------------------------------------------- */
1261
1262U_CAPI UBiDiDirection U_EXPORT2
1263ubidi_getDirection(const UBiDi *pBiDi) {
1264 if(pBiDi!=NULL) {
1265 return pBiDi->direction;
1266 } else {
1267 return UBIDI_LTR;
1268 }
1269}
1270
1271U_CAPI const UChar * U_EXPORT2
1272ubidi_getText(const UBiDi *pBiDi) {
1273 if(pBiDi!=NULL) {
1274 return pBiDi->text;
1275 } else {
1276 return NULL;
1277 }
1278}
1279
1280U_CAPI int32_t U_EXPORT2
1281ubidi_getLength(const UBiDi *pBiDi) {
1282 if(pBiDi!=NULL) {
1283 return pBiDi->length;
1284 } else {
1285 return 0;
1286 }
1287}
1288
1289U_CAPI UBiDiLevel U_EXPORT2
1290ubidi_getParaLevel(const UBiDi *pBiDi) {
1291 if(pBiDi!=NULL) {
1292 return pBiDi->paraLevel;
1293 } else {
1294 return 0;
1295 }
1296}
1297
1298/* statetable prototype ----------------------------------------------------- */
1299
1300/*
1301 * This is here for possible future
1302 * performance work and is not compiled right now.
1303 */
1304
1305#if 0
1306/*
1307 * This is a piece of code that could be part of ubidi.c/resolveImplicitLevels().
1308 * It replaces in the (Wn) state machine the switch()-if()-cascade with
1309 * just a few if()s and a state table.
1310 */
1311
1312/* use the state table only for the following dirProp's */
1313#define MASK_W_TABLE (FLAG(L)|FLAG(R)|FLAG(AL)|FLAG(EN)|FLAG(ES)|FLAG(CS)|FLAG(ET)|FLAG(AN))
1314
1315/*
1316 * inputs:
1317 *
1318 * 0..1 historyOfEN - 2b
1319 * 2 prevDirProp==AN - 1b
1320 * 3..4 lastStrong, one of { L, R, AL, none } - 2b
1321 * 5..7 dirProp, one of { L, R, AL, EN, ES, CS, ET, AN } - 3b
1322 * 8..9 nextDirProp, one of { EN, AN, other }
1323 *
1324 * total: 10b=1024 states
1325 */
1326enum { _L, _R, _AL, _EN, _ES, _CS, _ET, _AN, _OTHER }; /* lastStrong, dirProp */
1327enum { __EN, __AN, __OTHER }; /* nextDirProp */
1328
1329#define LAST_STRONG_SHIFT 3
1330#define DIR_PROP_SHIFT 5
1331#define NEXT_DIR_PROP_SHIFT 8
1332
1333/* masks after shifting */
1334#define LAST_STRONG_MASK 3
1335#define DIR_PROP_MASK 7
1336#define STATE_MASK 0x1f
1337
1338/* convert dirProp into _dirProp (above enum) */
1339static DirProp inputDirProp[dirPropCount]={ _X<<DIR_PROP_SHIFT, ... };
1340
1341/* convert dirProp into __dirProp (above enum) */
1342static DirProp inputNextDirProp[dirPropCount]={ __X<<NEXT_DIR_PROP_SHIFT, ... };
1343
1344/*
1345 * outputs:
1346 *
1347 * dirProp, one of { L, R, EN, AN, ON } - 3b
1348 *
1349 * 0..1 historyOfEN - 2b
1350 * 2 prevDirProp==AN - 1b
1351 * 3..4 lastStrong, one of { L, R, AL, none } - 2b
1352 * 5..7 new dirProp, one of { L, R, EN, AN, ON }
1353 *
1354 * total: 8 bits=1 byte per state
1355 */
1356enum { ___L, ___R, ___EN, ___AN, ___ON, ___count };
1357
1358/* convert ___dirProp into dirProp (above enum) */
1359static DirProp outputDirProp[___count]={ X, ... };
1360
1361/* state table */
1362static uint8_t wnTable[1024]={ /* calculate with switch()-if()-cascade */ };
1363
1364static void
1365resolveImplicitLevels(BiDi *pBiDi,
1366 Index start, Index end,
1367 DirProp sor, DirProp eor) {
1368 /* new variable */
1369 uint8_t state;
1370
1371 /* remove variable lastStrong */
1372
1373 /* set initial state (set lastStrong, the rest is 0) */
1374 state= sor==L ? 0 : _R<<LAST_STRONG_SHIFT;
1375
1376 while(next<limit) {
1377 /* advance */
1378 prevDirProp=dirProp;
1379 dirProp=nextDirProp;
1380 i=next;
1381 do {
1382 if(++next<limit) {
1383 nextDirProp=dirProps[next];
1384 } else {
1385 nextDirProp=eor;
1386 break;
1387 }
1388 } while(FLAG(nextDirProp)&MASK_BN_EXPLICIT);
1389
1390 /* (W1..W7) */
1391 /* ### This may be more efficient with a switch(dirProp). */
1392 if(FLAG(dirProp)&MASK_W_TABLE) {
1393 state=wnTable[
1394 ((int)state)|
1395 inputDirProp[dirProp]|
1396 inputNextDirProp[nextDirProp]
1397 ];
1398 dirProp=outputDirProp[state>>DIR_PROP_SHIFT];
1399 state&=STATE_MASK;
1400 } else if(dirProp==ET) {
1401 /* get sequence of ET; advance only next, not current, previous or historyOfEN */
1402 while(next<limit && FLAG(nextDirProp)&MASK_ET_NSM_BN /* (W1), (X9) */) {
1403 if(++next<limit) {
1404 nextDirProp=dirProps[next];
1405 } else {
1406 nextDirProp=eor;
1407 break;
1408 }
1409 }
1410
1411 state=wnTable[
1412 ((int)state)|
1413 _ET<<DIR_PROP_SHIFT|
1414 inputNextDirProp[nextDirProp]
1415 ];
1416 dirProp=outputDirProp[state>>DIR_PROP_SHIFT];
1417 state&=STATE_MASK;
1418
1419 /* apply the result of (W1), (W5)..(W7) to the entire sequence of ET */
1420 } else if(dirProp==NSM) {
1421 /* (W1) */
1422 dirProp=prevDirProp;
1423 /* keep prevDirProp's EN and AN states! */
1424 } else /* other */ {
1425 /* set EN and AN states to 0 */
1426 state&=LAST_STRONG_MASK<<LAST_STRONG_SHIFT;
1427 }
1428
1429 /* perform (Nn) and (In) as usual */
1430 }
1431 /* perform (Nn) and (In) as usual */
1432}
1433#endif