blob: 80d7f6fa8f83ae5abbeaf77c06d03069aefeb0eb [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2000-2003 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26/*
27 * (C) Copyright IBM Corp. 1999-2003 - All Rights Reserved
28 *
29 * The original version of this source code and documentation is
30 * copyrighted and owned by IBM. These materials are provided
31 * under terms of a License Agreement between IBM and Sun.
32 * This technology is protected by multiple US and International
33 * patents. This notice and attribution to IBM may not be removed.
34 */
35
36package java.text;
37
38import java.awt.Toolkit;
39import java.awt.font.TextAttribute;
40import java.awt.font.NumericShaper;
41import sun.text.CodePointIterator;
42
43/**
44 * This class implements the Unicode Bidirectional Algorithm.
45 * <p>
46 * A Bidi object provides information on the bidirectional reordering of the text
47 * used to create it. This is required, for example, to properly display Arabic
48 * or Hebrew text. These languages are inherently mixed directional, as they order
49 * numbers from left-to-right while ordering most other text from right-to-left.
50 * <p>
51 * Once created, a Bidi object can be queried to see if the text it represents is
52 * all left-to-right or all right-to-left. Such objects are very lightweight and
53 * this text is relatively easy to process.
54 * <p>
55 * If there are multiple runs of text, information about the runs can be accessed
56 * by indexing to get the start, limit, and level of a run. The level represents
57 * both the direction and the 'nesting level' of a directional run. Odd levels
58 * are right-to-left, while even levels are left-to-right. So for example level
59 * 0 represents left-to-right text, while level 1 represents right-to-left text, and
60 * level 2 represents left-to-right text embedded in a right-to-left run.
61 *
62 * @since 1.4
63 */
64public final class Bidi {
65 byte dir;
66 byte baselevel;
67 int length;
68 int[] runs;
69 int[] cws;
70
71 static {
72 sun.font.FontManagerNativeLibrary.load();
73 }
74
75 /** Constant indicating base direction is left-to-right. */
76 public static final int DIRECTION_LEFT_TO_RIGHT = 0;
77
78 /** Constant indicating base direction is right-to-left. */
79 public static final int DIRECTION_RIGHT_TO_LEFT = 1;
80
81 /**
82 * Constant indicating that the base direction depends on the first strong
83 * directional character in the text according to the Unicode
84 * Bidirectional Algorithm. If no strong directional character is present,
85 * the base direction is left-to-right.
86 */
87 public static final int DIRECTION_DEFAULT_LEFT_TO_RIGHT = -2;
88
89 /**
90 * Constant indicating that the base direction depends on the first strong
91 * directional character in the text according to the Unicode
92 * Bidirectional Algorithm. If no strong directional character is present,
93 * the base direction is right-to-left.
94 */
95 public static final int DIRECTION_DEFAULT_RIGHT_TO_LEFT = -1;
96
97 private static final int DIR_MIXED = 2;
98
99 /**
100 * Create Bidi from the given paragraph of text and base direction.
101 * @param paragraph a paragraph of text
102 * @param flags a collection of flags that control the algorithm. The
103 * algorithm understands the flags DIRECTION_LEFT_TO_RIGHT, DIRECTION_RIGHT_TO_LEFT,
104 * DIRECTION_DEFAULT_LEFT_TO_RIGHT, and DIRECTION_DEFAULT_RIGHT_TO_LEFT.
105 * Other values are reserved.
106 */
107 public Bidi(String paragraph, int flags) {
108 if (paragraph == null) {
109 throw new IllegalArgumentException("paragraph is null");
110 }
111
112 nativeBidiChars(this, paragraph.toCharArray(), 0, null, 0, paragraph.length(), flags);
113 }
114
115 /**
116 * Create Bidi from the given paragraph of text.
117 * <p>
118 * The RUN_DIRECTION attribute in the text, if present, determines the base
119 * direction (left-to-right or right-to-left). If not present, the base
120 * direction is computes using the Unicode Bidirectional Algorithm, defaulting to left-to-right
121 * if there are no strong directional characters in the text. This attribute, if
122 * present, must be applied to all the text in the paragraph.
123 * <p>
124 * The BIDI_EMBEDDING attribute in the text, if present, represents embedding level
125 * information. Negative values from -1 to -62 indicate overrides at the absolute value
126 * of the level. Positive values from 1 to 62 indicate embeddings. Where values are
127 * zero or not defined, the base embedding level as determined by the base direction
128 * is assumed.
129 * <p>
130 * The NUMERIC_SHAPING attribute in the text, if present, converts European digits to
131 * other decimal digits before running the bidi algorithm. This attribute, if present,
132 * must be applied to all the text in the paragraph.
133 *
134 * @param paragraph a paragraph of text with optional character and paragraph attribute information
135 *
136 * @see TextAttribute#BIDI_EMBEDDING
137 * @see TextAttribute#NUMERIC_SHAPING
138 * @see TextAttribute#RUN_DIRECTION
139 */
140 public Bidi(AttributedCharacterIterator paragraph) {
141 if (paragraph == null) {
142 throw new IllegalArgumentException("paragraph is null");
143 }
144
145 int flags = DIRECTION_DEFAULT_LEFT_TO_RIGHT;
146 byte[] embeddings = null;
147
148 int start = paragraph.getBeginIndex();
149 int limit = paragraph.getEndIndex();
150 int length = limit - start;
151 int n = 0;
152 char[] text = new char[length];
153 for (char c = paragraph.first(); c != paragraph.DONE; c = paragraph.next()) {
154 text[n++] = c;
155 }
156
157 paragraph.first();
158 try {
159 Boolean runDirection = (Boolean)paragraph.getAttribute(TextAttribute.RUN_DIRECTION);
160 if (runDirection != null) {
161 if (TextAttribute.RUN_DIRECTION_LTR.equals(runDirection)) {
162 flags = DIRECTION_LEFT_TO_RIGHT; // clears default setting
163 } else {
164 flags = DIRECTION_RIGHT_TO_LEFT;
165 }
166 }
167 }
168 catch (ClassCastException e) {
169 }
170
171 try {
172 NumericShaper shaper = (NumericShaper)paragraph.getAttribute(TextAttribute.NUMERIC_SHAPING);
173 if (shaper != null) {
174 shaper.shape(text, 0, text.length);
175 }
176 }
177 catch (ClassCastException e) {
178 }
179
180 int pos = start;
181 do {
182 paragraph.setIndex(pos);
183 Object embeddingLevel = paragraph.getAttribute(TextAttribute.BIDI_EMBEDDING);
184 int newpos = paragraph.getRunLimit(TextAttribute.BIDI_EMBEDDING);
185
186 if (embeddingLevel != null) {
187 try {
188 int intLevel = ((Integer)embeddingLevel).intValue();
189 if (intLevel >= -61 && intLevel < 61) {
190 byte level = (byte)(intLevel < 0 ? (-intLevel | 0x80) : intLevel);
191 if (embeddings == null) {
192 embeddings = new byte[length];
193 }
194 for (int i = pos - start; i < newpos - start; ++i) {
195 embeddings[i] = level;
196 }
197 }
198 }
199 catch (ClassCastException e) {
200 }
201 }
202 pos = newpos;
203 } while (pos < limit);
204
205 nativeBidiChars(this, text, 0, embeddings, 0, text.length, flags);
206 }
207
208 /**
209 * Create Bidi from the given text, embedding, and direction information.
210 * The embeddings array may be null. If present, the values represent embedding level
211 * information. Negative values from -1 to -61 indicate overrides at the absolute value
212 * of the level. Positive values from 1 to 61 indicate embeddings. Where values are
213 * zero, the base embedding level as determined by the base direction is assumed.
214 * @param text an array containing the paragraph of text to process.
215 * @param textStart the index into the text array of the start of the paragraph.
216 * @param embeddings an array containing embedding values for each character in the paragraph.
217 * This can be null, in which case it is assumed that there is no external embedding information.
218 * @param embStart the index into the embedding array of the start of the paragraph.
219 * @param paragraphLength the length of the paragraph in the text and embeddings arrays.
220 * @param flags a collection of flags that control the algorithm. The
221 * algorithm understands the flags DIRECTION_LEFT_TO_RIGHT, DIRECTION_RIGHT_TO_LEFT,
222 * DIRECTION_DEFAULT_LEFT_TO_RIGHT, and DIRECTION_DEFAULT_RIGHT_TO_LEFT.
223 * Other values are reserved.
224 */
225 public Bidi(char[] text, int textStart, byte[] embeddings, int embStart, int paragraphLength, int flags) {
226 if (text == null) {
227 throw new IllegalArgumentException("text is null");
228 }
229 if (paragraphLength < 0) {
230 throw new IllegalArgumentException("bad length: " + paragraphLength);
231 }
232 if (textStart < 0 || paragraphLength > text.length - textStart) {
233 throw new IllegalArgumentException("bad range: " + textStart +
234 " length: " + paragraphLength +
235 " for text of length: " + text.length);
236 }
237 if (embeddings != null && (embStart < 0 || paragraphLength > embeddings.length - embStart)) {
238 throw new IllegalArgumentException("bad range: " + embStart +
239 " length: " + paragraphLength +
240 " for embeddings of length: " + text.length);
241 }
242
243 if (embeddings != null) {
244 // native uses high bit to indicate override, not negative value, sigh
245
246 for (int i = embStart, embLimit = embStart + paragraphLength; i < embLimit; ++i) {
247 if (embeddings[i] < 0) {
248 byte[] temp = new byte[paragraphLength];
249 System.arraycopy(embeddings, embStart, temp, 0, paragraphLength);
250
251 for (i -= embStart; i < paragraphLength; ++i) {
252 if (temp[i] < 0) {
253 temp[i] = (byte)(-temp[i] | 0x80);
254 }
255 }
256
257 embeddings = temp;
258 embStart = 0;
259 break;
260 }
261 }
262 }
263
264 nativeBidiChars(this, text, textStart, embeddings, embStart, paragraphLength, flags);
265 }
266
267 /**
268 * Private constructor used by line bidi.
269 */
270 private Bidi(int dir, int baseLevel, int length, int[] data, int[] cws) {
271 reset(dir, baseLevel, length, data, cws);
272 }
273
274 /**
275 * Private mutator used by native code.
276 */
277 private void reset(int dir, int baselevel, int length, int[] data, int[] cws) {
278 this.dir = (byte)dir;
279 this.baselevel = (byte)baselevel;
280 this.length = length;
281 this.runs = data;
282 this.cws = cws;
283 }
284
285 /**
286 * Create a Bidi object representing the bidi information on a line of text within
287 * the paragraph represented by the current Bidi. This call is not required if the
288 * entire paragraph fits on one line.
289 * @param lineStart the offset from the start of the paragraph to the start of the line.
290 * @param lineLimit the offset from the start of the paragraph to the limit of the line.
291 */
292 public Bidi createLineBidi(int lineStart, int lineLimit) {
293 if (lineStart == 0 && lineLimit == length) {
294 return this;
295 }
296
297 int lineLength = lineLimit - lineStart;
298 if (lineStart < 0 ||
299 lineLimit < lineStart ||
300 lineLimit > length) {
301 throw new IllegalArgumentException("range " + lineStart +
302 " to " + lineLimit +
303 " is invalid for paragraph of length " + length);
304 }
305
306 if (runs == null) {
307 return new Bidi(dir, baselevel, lineLength, null, null);
308 } else {
309 int cwspos = -1;
310 int[] ncws = null;
311 if (cws != null) {
312 int cwss = 0;
313 int cwsl = cws.length;
314 while (cwss < cwsl) {
315 if (cws[cwss] >= lineStart) {
316 cwsl = cwss;
317 while (cwsl < cws.length && cws[cwsl] < lineLimit) {
318 cwsl++;
319 }
320 int ll = lineLimit-1;
321 while (cwsl > cwss && cws[cwsl-1] == ll) {
322 cwspos = ll; // record start of counter-directional whitespace
323 --cwsl;
324 --ll;
325 }
326
327 if (cwspos == lineStart) { // entire line is cws, so ignore
328 return new Bidi(dir, baselevel, lineLength, null, null);
329 }
330
331 int ncwslen = cwsl - cwss;
332 if (ncwslen > 0) {
333 ncws = new int[ncwslen];
334 for (int i = 0; i < ncwslen; ++i) {
335 ncws[i] = cws[cwss+i] - lineStart;
336 }
337 }
338 break;
339 }
340 ++cwss;
341 }
342 }
343
344 int[] nruns = null;
345 int nlevel = baselevel;
346 int limit = cwspos == -1 ? lineLimit : cwspos;
347 int rs = 0;
348 int rl = runs.length;
349 int ndir = dir;
350 for (; rs < runs.length; rs += 2) {
351 if (runs[rs] > lineStart) {
352 rl = rs;
353 while (rl < runs.length && runs[rl] < limit) {
354 rl += 2;
355 }
356 if ((rl > rs) || (runs[rs+1] != baselevel)) {
357 rl += 2;
358
359 if (cwspos != -1 && rl > rs && runs[rl-1] != baselevel) { // add level for cws
360 nruns = new int[rl - rs + 2];
361 nruns[rl - rs] = lineLength;
362 nruns[rl - rs + 1] = baselevel;
363 } else {
364 limit = lineLimit;
365 nruns = new int[rl - rs];
366 }
367
368 int n = 0;
369 for (int i = rs; i < rl; i += 2) {
370 nruns[n++] = runs[i] - lineStart;
371 nruns[n++] = runs[i+1];
372 }
373 nruns[n-2] = limit - lineStart;
374 } else {
375 ndir = (runs[rs+1] & 0x1) == 0 ? DIRECTION_LEFT_TO_RIGHT : DIRECTION_RIGHT_TO_LEFT;
376 }
377 break;
378 }
379 }
380
381 return new Bidi(ndir, baselevel, lineLength, nruns, ncws);
382 }
383 }
384
385 /**
386 * Return true if the line is not left-to-right or right-to-left. This means it either has mixed runs of left-to-right
387 * and right-to-left text, or the base direction differs from the direction of the only run of text.
388 * @return true if the line is not left-to-right or right-to-left.
389 */
390 public boolean isMixed() {
391 return dir == DIR_MIXED;
392 }
393
394 /**
395 * Return true if the line is all left-to-right text and the base direction is left-to-right.
396 * @return true if the line is all left-to-right text and the base direction is left-to-right
397 */
398 public boolean isLeftToRight() {
399 return dir == DIRECTION_LEFT_TO_RIGHT;
400 }
401
402 /**
403 * Return true if the line is all right-to-left text, and the base direction is right-to-left.
404 * @return true if the line is all right-to-left text, and the base direction is right-to-left
405 */
406 public boolean isRightToLeft() {
407 return dir == DIRECTION_RIGHT_TO_LEFT;
408 }
409
410 /**
411 * Return the length of text in the line.
412 * @return the length of text in the line
413 */
414 public int getLength() {
415 return length;
416 }
417
418 /**
419 * Return true if the base direction is left-to-right.
420 * @return true if the base direction is left-to-right
421 */
422 public boolean baseIsLeftToRight() {
423 return (baselevel & 0x1) == 0;
424 }
425
426 /**
427 * Return the base level (0 if left-to-right, 1 if right-to-left).
428 * @return the base level
429 */
430 public int getBaseLevel() {
431 return baselevel;
432 }
433
434 /**
435 * Return the resolved level of the character at offset. If offset is <0 or >=
436 * the length of the line, return the base direction level.
437 * @param offset the index of the character for which to return the level
438 * @return the resolved level of the character at offset
439 */
440 public int getLevelAt(int offset) {
441 if (runs == null || offset < 0 || offset >= length) {
442 return baselevel;
443 } else {
444 int i = 0;
445 do {
446 if (offset < runs[i]) {
447 return runs[i+1];
448 }
449 i += 2;
450 } while (true);
451 }
452 }
453
454 /**
455 * Return the number of level runs.
456 * @return the number of level runs
457 */
458 public int getRunCount() {
459 return runs == null ? 1 : runs.length / 2;
460 }
461
462 /**
463 * Return the level of the nth logical run in this line.
464 * @param run the index of the run, between 0 and <code>getRunCount()</code>
465 * @return the level of the run
466 */
467 public int getRunLevel(int run) {
468 return runs == null ? baselevel : runs[run * 2 + 1];
469 }
470
471 /**
472 * Return the index of the character at the start of the nth logical run in this line, as
473 * an offset from the start of the line.
474 * @param run the index of the run, between 0 and <code>getRunCount()</code>
475 * @return the start of the run
476 */
477 public int getRunStart(int run) {
478 return (runs == null || run == 0) ? 0 : runs[run * 2 - 2];
479 }
480
481 /**
482 * Return the index of the character past the end of the nth logical run in this line, as
483 * an offset from the start of the line. For example, this will return the length
484 * of the line for the last run on the line.
485 * @param run the index of the run, between 0 and <code>getRunCount()</code>
486 * @return limit the limit of the run
487 */
488 public int getRunLimit(int run) {
489 return runs == null ? length : runs[run * 2];
490 }
491
492 /**
493 * Return true if the specified text requires bidi analysis. If this returns false,
494 * the text will display left-to-right. Clients can then avoid constructing a Bidi object.
495 * Text in the Arabic Presentation Forms area of Unicode is presumed to already be shaped
496 * and ordered for display, and so will not cause this function to return true.
497 *
498 * @param text the text containing the characters to test
499 * @param start the start of the range of characters to test
500 * @param limit the limit of the range of characters to test
501 * @return true if the range of characters requires bidi analysis
502 */
503 public static boolean requiresBidi(char[] text, int start, int limit) {
504 CodePointIterator cpi = CodePointIterator.create(text, start, limit);
505 for (int cp = cpi.next(); cp != CodePointIterator.DONE; cp = cpi.next()) {
506 if (cp > 0x0590) {
507 int dc = nativeGetDirectionCode(cp);
508 if ((RMASK & (1 << dc)) != 0) {
509 return true;
510 }
511 }
512 }
513 return false;
514 }
515
516 /**
517 * Reorder the objects in the array into visual order based on their levels.
518 * This is a utility function to use when you have a collection of objects
519 * representing runs of text in logical order, each run containing text
520 * at a single level. The elements at <code>index</code> from
521 * <code>objectStart</code> up to <code>objectStart + count</code>
522 * in the objects array will be reordered into visual order assuming
523 * each run of text has the level indicated by the corresponding element
524 * in the levels array (at <code>index - objectStart + levelStart</code>).
525 *
526 * @param levels an array representing the bidi level of each object
527 * @param levelStart the start position in the levels array
528 * @param objects the array of objects to be reordered into visual order
529 * @param objectStart the start position in the objects array
530 * @param count the number of objects to reorder
531 */
532 public static void reorderVisually(byte[] levels, int levelStart, Object[] objects, int objectStart, int count) {
533
534 if (count < 0) {
535 throw new IllegalArgumentException("count " + count + " must be >= 0");
536 }
537 if (levelStart < 0 || levelStart + count > levels.length) {
538 throw new IllegalArgumentException("levelStart " + levelStart + " and count " + count +
539 " out of range [0, " + levels.length + "]");
540 }
541 if (objectStart < 0 || objectStart + count > objects.length) {
542 throw new IllegalArgumentException("objectStart " + objectStart + " and count " + count +
543 " out of range [0, " + objects.length + "]");
544 }
545
546 byte lowestOddLevel = (byte)(NUMLEVELS + 1);
547 byte highestLevel = 0;
548
549 // initialize mapping and levels
550
551 int levelLimit = levelStart + count;
552 for (int i = levelStart; i < levelLimit; i++) {
553 byte level = levels[i];
554 if (level > highestLevel) {
555 highestLevel = level;
556 }
557
558 if ((level & 0x01) != 0 && level < lowestOddLevel) {
559 lowestOddLevel = level;
560 }
561 }
562
563 int delta = objectStart - levelStart;
564
565 while (highestLevel >= lowestOddLevel) {
566 int i = levelStart;
567
568 for (;;) {
569 while (i < levelLimit && levels[i] < highestLevel) {
570 i++;
571 }
572 int begin = i++;
573
574 if (begin == levelLimit) {
575 break; // no more runs at this level
576 }
577
578 while (i < levelLimit && levels[i] >= highestLevel) {
579 i++;
580 }
581 int end = i - 1;
582
583 begin += delta;
584 end += delta;
585 while (begin < end) {
586 Object temp = objects[begin];
587 objects[begin] = objects[end];
588 objects[end] = temp;
589 ++begin;
590 --end;
591 }
592 }
593
594 --highestLevel;
595 }
596 }
597
598 private static final char NUMLEVELS = 62;
599
600 private static final int RMASK =
601 (1 << 1 /* U_RIGHT_TO_LEFT */) |
602 (1 << 5 /* U_ARABIC_NUMBER */) |
603 (1 << 13 /* U_RIGHT_TO_LEFT_ARABIC */) |
604 (1 << 14 /* U_RIGHT_TO_LEFT_EMBEDDING */) |
605 (1 << 15 /* U_RIGHT_TO_LEFT_OVERRIDE */);
606
607 /** Access native bidi implementation. */
608 private static native int nativeGetDirectionCode(int cp);
609
610 /** Access native bidi implementation. */
611 private static synchronized native void nativeBidiChars(Bidi bidi, char[] text, int textStart,
612 byte[] embeddings, int embeddingStart,
613 int length, int flags);
614
615 /**
616 * Display the bidi internal state, used in debugging.
617 */
618 public String toString() {
619 StringBuffer buf = new StringBuffer(super.toString());
620 buf.append("[dir: " + dir);
621 buf.append(" baselevel: " + baselevel);
622 buf.append(" length: " + length);
623 if (runs == null) {
624 buf.append(" runs: null");
625 } else {
626 buf.append(" runs: [");
627 for (int i = 0; i < runs.length; i += 2) {
628 if (i != 0) {
629 buf.append(' ');
630 }
631 buf.append(runs[i]); // limit
632 buf.append('/');
633 buf.append(runs[i+1]); // level
634 }
635 buf.append(']');
636 }
637 if (cws == null) {
638 buf.append(" cws: null");
639 } else {
640 buf.append(" cws: [");
641 for (int i = 0; i < cws.length; ++i) {
642 if (i != 0) {
643 buf.append(' ');
644 }
645 buf.append(Integer.toHexString(cws[i]));
646 }
647 buf.append(']');
648 }
649 buf.append(']');
650
651 return buf.toString();
652 }
653}