Add BidiFormatter utility See bug #7587797 Need BidiFormatter class in Android API Change-Id: I999282b9a4c76d8b4a3fc254b0c12ca95fa6ea4a

commit: 57a85740d721caf8dcd94a545b2dd920e8e84e01 [log] [tgz]
author: Fabrice Di Meglio <fdimeglio@google.com> Thu Jan 31 13:29:36 2013 -0800
committer: Fabrice Di Meglio <fdimeglio@google.com> Mon Feb 11 14:27:10 2013 -0800
tree: 4a7334850018c38d1c8f02db230e8edc3497de7a
parent: 981e60edc3ed91ea8b5e1dae98669b88b175e19b [diff]
diff --git a/api/current.txt b/api/current.txt
index c8f999f..5f6abbc 100644
--- a/api/current.txt
+++ b/api/current.txt

@@ -22265,6 +22265,21 @@
     method public int getTopPadding();
   }
 
+  public abstract interface TextDirectionHeuristic {
+    method public abstract boolean isRtl(char[], int, int);
+    method public abstract boolean isRtl(java.lang.CharSequence, int, int);
+  }
+
+  public class TextDirectionHeuristics {
+    ctor public TextDirectionHeuristics();
+    field public static final android.text.TextDirectionHeuristic ANYRTL_LTR;
+    field public static final android.text.TextDirectionHeuristic FIRSTSTRONG_LTR;
+    field public static final android.text.TextDirectionHeuristic FIRSTSTRONG_RTL;
+    field public static final android.text.TextDirectionHeuristic LOCALE;
+    field public static final android.text.TextDirectionHeuristic LTR;
+    field public static final android.text.TextDirectionHeuristic RTL;
+  }
+
   public class TextPaint extends android.graphics.Paint {
     ctor public TextPaint();
     ctor public TextPaint(int);
@@ -22356,6 +22371,48 @@
 
 }
 
+package android.text.bidi {
+
+  public final class BidiFormatter {
+    method public java.lang.String dirAttr(java.lang.String);
+    method public java.lang.String dirAttr(java.lang.String, android.text.TextDirectionHeuristic);
+    method public java.lang.String dirAttr(boolean);
+    method public java.lang.String dirAttrValue(java.lang.String);
+    method public java.lang.String dirAttrValue(java.lang.String, android.text.TextDirectionHeuristic);
+    method public java.lang.String dirAttrValue(boolean);
+    method public java.lang.String endEdge();
+    method public static android.text.bidi.BidiFormatter getInstance(boolean);
+    method public static android.text.bidi.BidiFormatter getInstance(java.util.Locale);
+    method public boolean getStereoReset();
+    method public boolean isRtl(java.lang.String);
+    method public boolean isRtlContext();
+    method public java.lang.String mark();
+    method public java.lang.String markAfter(java.lang.String);
+    method public java.lang.String markAfter(java.lang.String, android.text.TextDirectionHeuristic);
+    method public java.lang.String markBefore(java.lang.String);
+    method public java.lang.String markBefore(java.lang.String, android.text.TextDirectionHeuristic);
+    method public java.lang.String spanWrap(java.lang.String, android.text.TextDirectionHeuristic, boolean);
+    method public java.lang.String spanWrap(java.lang.String, android.text.TextDirectionHeuristic);
+    method public java.lang.String spanWrap(java.lang.String, boolean);
+    method public java.lang.String spanWrap(java.lang.String);
+    method public java.lang.String startEdge();
+    method public java.lang.String unicodeWrap(java.lang.String, android.text.TextDirectionHeuristic, boolean);
+    method public java.lang.String unicodeWrap(java.lang.String, android.text.TextDirectionHeuristic);
+    method public java.lang.String unicodeWrap(java.lang.String, boolean);
+    method public java.lang.String unicodeWrap(java.lang.String);
+  }
+
+  public static final class BidiFormatter.Builder {
+    ctor public BidiFormatter.Builder();
+    ctor public BidiFormatter.Builder(boolean);
+    ctor public BidiFormatter.Builder(java.util.Locale);
+    method public android.text.bidi.BidiFormatter build();
+    method public android.text.bidi.BidiFormatter.Builder setTextDirectionHeuristic(android.text.TextDirectionHeuristic);
+    method public android.text.bidi.BidiFormatter.Builder stereoReset(boolean);
+  }
+
+}
+
 package android.text.format {
 
   public class DateFormat {

diff --git a/core/java/android/text/TextDirectionHeuristic.java b/core/java/android/text/TextDirectionHeuristic.java
index 513e11c..8a4ba42 100644
--- a/core/java/android/text/TextDirectionHeuristic.java
+++ b/core/java/android/text/TextDirectionHeuristic.java

@@ -17,10 +17,30 @@
 package android.text;
 
 /**
- * Interface for objects that guess at the paragraph direction by examining text.
- *
- * @hide
+ * Interface for objects that use a heuristic for guessing at the paragraph direction by examining text.
  */
 public interface TextDirectionHeuristic {
-    boolean isRtl(char[] text, int start, int count);
+    /**
+     * Guess if a chars array is in the RTL direction or not.
+     *
+     * @param array the char array.
+     * @param start start index, inclusive.
+     * @param count the length to check, must not be negative and not greater than
+     *          {@code array.length - start}.
+     * @return true if all chars in the range are to be considered in a RTL direction,
+     *          false otherwise.
+     */
+    boolean isRtl(char[] array, int start, int count);
+
+    /**
+     * Guess if a {@code CharSequence} is in the RTL direction or not.
+     *
+     * @param cs the CharSequence.
+     * @param start start index, inclusive.
+     * @param count the length to check, must not be negative and not greater than
+     *            {@code CharSequence.length() - start}.
+     * @return true if all chars in the range are to be considered in a RTL direction,
+     *          false otherwise.
+     */
+    boolean isRtl(CharSequence cs, int start, int count);
 }

diff --git a/core/java/android/text/TextDirectionHeuristics.java b/core/java/android/text/TextDirectionHeuristics.java
index df8c4c6..7d7e3a9 100644
--- a/core/java/android/text/TextDirectionHeuristics.java
+++ b/core/java/android/text/TextDirectionHeuristics.java

@@ -19,43 +19,45 @@
 
 import android.view.View;
 
+import java.nio.CharBuffer;
+
 /**
  * Some objects that implement TextDirectionHeuristic.
  *
- * @hide
  */
 public class TextDirectionHeuristics {
 
-    /** Always decides that the direction is left to right. */
+    /**
+     * Always decides that the direction is left to right.
+     */
     public static final TextDirectionHeuristic LTR =
         new TextDirectionHeuristicInternal(null /* no algorithm */, false);
 
-    /** Always decides that the direction is right to left. */
+    /**
+     * Always decides that the direction is right to left.
+     */
     public static final TextDirectionHeuristic RTL =
         new TextDirectionHeuristicInternal(null /* no algorithm */, true);
 
     /**
-     * Determines the direction based on the first strong directional character,
-     * including bidi format chars, falling back to left to right if it
-     * finds none.  This is the default behavior of the Unicode Bidirectional
-     * Algorithm.
+     * Determines the direction based on the first strong directional character, including bidi
+     * format chars, falling back to left to right if it finds none. This is the default behavior
+     * of the Unicode Bidirectional Algorithm.
      */
     public static final TextDirectionHeuristic FIRSTSTRONG_LTR =
         new TextDirectionHeuristicInternal(FirstStrong.INSTANCE, false);
 
     /**
-     * Determines the direction based on the first strong directional character,
-     * including bidi format chars, falling back to right to left if it
-     * finds none.  This is similar to the default behavior of the Unicode
-     * Bidirectional Algorithm, just with different fallback behavior.
+     * Determines the direction based on the first strong directional character, including bidi
+     * format chars, falling back to right to left if it finds none. This is similar to the default
+     * behavior of the Unicode Bidirectional Algorithm, just with different fallback behavior.
      */
     public static final TextDirectionHeuristic FIRSTSTRONG_RTL =
         new TextDirectionHeuristicInternal(FirstStrong.INSTANCE, true);
 
     /**
-     * If the text contains any strong right to left non-format character, determines
-     * that the direction is right to left, falling back to left to right if it
-     * finds none.
+     * If the text contains any strong right to left non-format character, determines that the
+     * direction is right to left, falling back to left to right if it finds none.
      */
     public static final TextDirectionHeuristic ANYRTL_LTR =
         new TextDirectionHeuristicInternal(AnyStrong.INSTANCE_RTL, false);
@@ -65,8 +67,39 @@
      */
     public static final TextDirectionHeuristic LOCALE = TextDirectionHeuristicLocale.INSTANCE;
 
-    private static enum TriState {
-        TRUE, FALSE, UNKNOWN;
+    /**
+     * State constants for taking care about true / false / unknown
+     */
+    private static final int STATE_TRUE = 0;
+    private static final int STATE_FALSE = 1;
+    private static final int STATE_UNKNOWN = 2;
+
+    private static int isRtlText(int directionality) {
+        switch (directionality) {
+            case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
+                return STATE_FALSE;
+            case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
+            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC:
+                return STATE_TRUE;
+            default:
+                return STATE_UNKNOWN;
+        }
+    }
+
+    private static int isRtlTextOrFormat(int directionality) {
+        switch (directionality) {
+            case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
+            case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
+            case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
+                return STATE_FALSE;
+            case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
+            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC:
+            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
+            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
+                return STATE_TRUE;
+            default:
+                return STATE_UNKNOWN;
+        }
     }
 
     /**
@@ -87,21 +120,26 @@
         abstract protected boolean defaultIsRtl();
 
         @Override
-        public boolean isRtl(char[] chars, int start, int count) {
-            if (chars == null || start < 0 || count < 0 || chars.length - count < start) {
+        public boolean isRtl(char[] array, int start, int count) {
+            return isRtl(CharBuffer.wrap(array), start, count);
+        }
+
+        @Override
+        public boolean isRtl(CharSequence cs, int start, int count) {
+            if (cs == null || start < 0 || count < 0 || cs.length() - count < start) {
                 throw new IllegalArgumentException();
             }
             if (mAlgorithm == null) {
                 return defaultIsRtl();
             }
-            return doCheck(chars, start, count);
+            return doCheck(cs, start, count);
         }
 
-        private boolean doCheck(char[] chars, int start, int count) {
-            switch(mAlgorithm.checkRtl(chars, start, count)) {
-                case TRUE:
+        private boolean doCheck(CharSequence cs, int start, int count) {
+            switch(mAlgorithm.checkRtl(cs, start, count)) {
+                case STATE_TRUE:
                     return true;
-                case FALSE:
+                case STATE_FALSE:
                     return false;
                 default:
                     return defaultIsRtl();
@@ -124,58 +162,26 @@
         }
     }
 
-    private static TriState isRtlText(int directionality) {
-        switch (directionality) {
-            case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
-                return TriState.FALSE;
-            case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
-            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC:
-                return TriState.TRUE;
-            default:
-                return TriState.UNKNOWN;
-        }
-    }
-
-    private static TriState isRtlTextOrFormat(int directionality) {
-        switch (directionality) {
-            case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
-            case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
-            case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
-                return TriState.FALSE;
-            case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
-            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC:
-            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
-            case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
-                return TriState.TRUE;
-            default:
-                return TriState.UNKNOWN;
-        }
-    }
-
     /**
      * Interface for an algorithm to guess the direction of a paragraph of text.
-     *
      */
     private static interface TextDirectionAlgorithm {
         /**
          * Returns whether the range of text is RTL according to the algorithm.
-         *
          */
-        TriState checkRtl(char[] text, int start, int count);
+        int checkRtl(CharSequence cs, int start, int count);
     }
 
     /**
-     * Algorithm that uses the first strong directional character to determine
-     * the paragraph direction.  This is the standard Unicode Bidirectional
-     * algorithm.
-     *
+     * Algorithm that uses the first strong directional character to determine the paragraph
+     * direction. This is the standard Unicode Bidirectional algorithm.
      */
     private static class FirstStrong implements TextDirectionAlgorithm {
         @Override
-        public TriState checkRtl(char[] text, int start, int count) {
-            TriState result = TriState.UNKNOWN;
-            for (int i = start, e = start + count; i < e && result == TriState.UNKNOWN; ++i) {
-                result = isRtlTextOrFormat(Character.getDirectionality(text[i]));
+        public int checkRtl(CharSequence cs, int start, int count) {
+            int result = STATE_UNKNOWN;
+            for (int i = start, e = start + count; i < e && result == STATE_UNKNOWN; ++i) {
+                result = isRtlTextOrFormat(Character.getDirectionality(cs.charAt(i)));
             }
             return result;
         }
@@ -190,25 +196,24 @@
      * Algorithm that uses the presence of any strong directional non-format
      * character (e.g. excludes LRE, LRO, RLE, RLO) to determine the
      * direction of text.
-     *
      */
     private static class AnyStrong implements TextDirectionAlgorithm {
         private final boolean mLookForRtl;
 
         @Override
-        public TriState checkRtl(char[] text, int start, int count) {
+        public int checkRtl(CharSequence cs, int start, int count) {
             boolean haveUnlookedFor = false;
             for (int i = start, e = start + count; i < e; ++i) {
-                switch (isRtlText(Character.getDirectionality(text[i]))) {
-                    case TRUE:
+                switch (isRtlText(Character.getDirectionality(cs.charAt(i)))) {
+                    case STATE_TRUE:
                         if (mLookForRtl) {
-                            return TriState.TRUE;
+                            return STATE_TRUE;
                         }
                         haveUnlookedFor = true;
                         break;
-                    case FALSE:
+                    case STATE_FALSE:
                         if (!mLookForRtl) {
-                            return TriState.FALSE;
+                            return STATE_FALSE;
                         }
                         haveUnlookedFor = true;
                         break;
@@ -217,9 +222,9 @@
                 }
             }
             if (haveUnlookedFor) {
-                return mLookForRtl ? TriState.FALSE : TriState.TRUE;
+                return mLookForRtl ? STATE_FALSE : STATE_TRUE;
             }
-            return TriState.UNKNOWN;
+            return STATE_UNKNOWN;
         }
 
         private AnyStrong(boolean lookForRtl) {

diff --git a/core/java/android/text/bidi/BidiFormatter.java b/core/java/android/text/bidi/BidiFormatter.java
new file mode 100644
index 0000000..3554751
--- /dev/null
+++ b/core/java/android/text/bidi/BidiFormatter.java

@@ -0,0 +1,1147 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package android.text.bidi;
+
+import android.text.TextDirectionHeuristic;
+import android.text.TextDirectionHeuristics;
+import android.text.TextUtils;
+import android.view.View;
+
+import static android.text.TextDirectionHeuristics.FIRSTSTRONG_LTR;
+
+import java.util.Locale;
+
+
+/**
+ * Utility class for formatting text for display in a potentially opposite-directionality context
+ * without garbling. The directionality of the context is set at formatter creation and the
+ * directionality of the text can be either estimated or passed in when known. Provides the
+ * following functionality:
+ * <p>
+ * 1. Bidi Wrapping
+ * When text in one language is mixed into a document in another, opposite-directionality language,
+ * e.g. when an English business name is embedded in a Hebrew web page, both the inserted string
+ * and the text surrounding it may be displayed incorrectly unless the inserted string is explicitly
+ * separated from the surrounding text in a "wrapper" that:
+ * <p>
+ * - Declares its directionality so that the string is displayed correctly. This can be done in HTML
+ *   markup (e.g. a 'span dir="rtl"' element) by {@link #spanWrap} and similar methods, or - only in
+ *   contexts where markup can't be used - in Unicode bidi formatting codes by {@link #unicodeWrap}
+ *   and similar methods.
+ * <p>
+ * - Isolates the string's directionality, so it does not unduly affect the surrounding content.
+ *   Currently, this can only be done using invisible Unicode characters of the same direction as
+ *   the context (LRM or RLM) in addition to the directionality declaration above, thus "resetting"
+ *   the directionality to that of the context. The "reset" may need to be done at both ends of the
+ *   string. Without "reset" after the string, the string will "stick" to a number or logically
+ *   separate opposite-direction text that happens to follow it in-line (even if separated by
+ *   neutral content like spaces and punctuation). Without "reset" before the string, the same can
+ *   happen there, but only with more opposite-direction text, not a number. One approach is to
+ *   "reset" the direction only after each string, on the theory that if the preceding opposite-
+ *   direction text is itself bidi-wrapped, the "reset" after it will prevent the sticking. (Doing
+ *   the "reset" only before each string definitely does not work because we do not want to require
+ *   bidi-wrapping numbers, and a bidi-wrapped opposite-direction string could be followed by a
+ *   number.) Still, the safest policy is to do the "reset" on both ends of each string, since RTL
+ *   message translations often contain untranslated Latin-script brand names and technical terms,
+ *   and one of these can be followed by a bidi-wrapped inserted value. On the other hand, when one
+ *   has such a message, it is best to do the "reset" manually in the message translation itself,
+ *   since the message's opposite-direction text could be followed by an inserted number, which we
+ *   would not bidi-wrap anyway. Thus, "reset" only after the string is the current default. In an
+ *   alternative to "reset", recent additions to the HTML, CSS, and Unicode standards allow the
+ *   isolation to be part of the directionality declaration. This form of isolation is better than
+ *   "reset" because it takes less space, does not require knowing the context directionality, has a
+ *   gentler effect than "reset", and protects both ends of the string. However, we do not yet allow
+ *   using it because required platforms do not yet support it.
+ * <p>
+ * Providing these wrapping services is the basic purpose of the bidi formatter.
+ * <p>
+ * 2. Directionality estimation
+ * How does one know whether a string about to be inserted into surrounding text has the same
+ * directionality? Well, in many cases, one knows that this must be the case when writing the code
+ * doing the insertion, e.g. when a localized message is inserted into a localized page. In such
+ * cases there is no need to involve the bidi formatter at all. In some other cases, it need not be
+ * the same as the context, but is either constant (e.g. urls are always LTR) or otherwise known.
+ * In the remaining cases, e.g. when the string is user-entered or comes from a database, the
+ * language of the string (and thus its directionality) is not known a priori, and must be
+ * estimated at run-time. The bidi formatter can do this automatically using the default
+ * first-strong estimation algorithm. It can also be configured to use a custom directionality
+ * estimation object.
+ * <p>
+ * 3. Escaping
+ * When wrapping plain text - i.e. text that is not already HTML or HTML-escaped - in HTML markup,
+ * the text must first be HTML-escaped to prevent XSS attacks and other nasty business. This of
+ * course is always true, but the escaping can not be done after the string has already been wrapped
+ * in markup, so the bidi formatter also serves as a last chance and includes escaping services.
+ * <p>
+ * Thus, in a single call, the formatter will escape the input string as specified, determine its
+ * directionality, and wrap it as necessary. It is then up to the caller to insert the return value
+ * in the output.
+ */
+public final class BidiFormatter {
+
+    /**
+     * The default text direction heuristic.
+     */
+    private static TextDirectionHeuristic DEFAULT_TEXT_DIRECTION_HEURISTIC = FIRSTSTRONG_LTR;
+
+    /**
+     * Unicode "Left-To-Right Embedding" (LRE) character.
+     */
+    private static final char LRE = '\u202A';
+
+    /**
+     * Unicode "Right-To-Left Embedding" (RLE) character.
+     */
+    private static final char RLE = '\u202B';
+
+    /**
+     * Unicode "Pop Directional Formatting" (PDF) character.
+     */
+    private static final char PDF = '\u202C';
+
+    /**
+     *  Unicode "Left-To-Right Mark" (LRM) character.
+     */
+    private static final char LRM = '\u200E';
+
+    /*
+     * Unicode "Right-To-Left Mark" (RLM) character.
+     */
+    private static final char RLM = '\u200F';
+
+    /*
+     * String representation of LRM
+     */
+    private static final String LRM_STRING = Character.toString(LRM);
+
+    /*
+     * String representation of RLM
+     */
+    private static final String RLM_STRING = Character.toString(RLM);
+
+    /**
+     * "ltr" string constant.
+     */
+    private static final String LTR_STRING = "ltr";
+
+    /**
+     * "rtl" string constant.
+     */
+    private static final String RTL_STRING = "rtl";
+
+    /**
+     * "dir=\"ltr\"" string constant.
+     */
+    private static final String DIR_LTR_STRING = "dir=\"ltr\"";
+
+    /**
+     * "dir=\"rtl\"" string constant.
+     */
+    private static final String DIR_RTL_STRING = "dir=\"rtl\"";
+
+    /**
+     * "right" string constant.
+     */
+    private static final String RIGHT = "right";
+
+    /**
+     * "left" string constant.
+     */
+    private static final String LEFT = "left";
+
+    /**
+     * Empty string constant.
+     */
+    private static final String EMPTY_STRING = "";
+
+    /**
+     * A class for building a BidiFormatter with non-default options.
+     */
+    public static final class Builder {
+        private boolean isRtlContext;
+        private int flags;
+        private TextDirectionHeuristic textDirectionHeuristic;
+
+        /**
+         * Constructor.
+         *
+         */
+        public Builder() {
+            initialize(isRtlLocale(Locale.getDefault()));
+        }
+
+        /**
+         * Constructor.
+         *
+         * @param rtlContext Whether the context directionality is RTL.
+         */
+        public Builder(boolean rtlContext) {
+            initialize(rtlContext);
+        }
+
+        /**
+         * Constructor.
+         *
+         * @param locale The context locale.
+         */
+        public Builder(Locale locale) {
+            initialize(isRtlLocale(locale));
+        }
+
+        /**
+         * Initializes the builder with the given context directionality and default options.
+         *
+         * @param isRtlContext Whether the context is RTL or not.
+         */
+        private void initialize(boolean isRtlContext) {
+            this.isRtlContext = isRtlContext;
+            textDirectionHeuristic = DEFAULT_TEXT_DIRECTION_HEURISTIC;
+            this.flags = DEFAULT_FLAGS;
+        }
+
+        /**
+         * Specifies whether the BidiFormatter to be built should also "reset" directionality before
+         * a string being bidi-wrapped, not just after it. The default is false.
+         */
+        public Builder stereoReset(boolean stereoReset) {
+            if (stereoReset) {
+                flags |= FLAG_STEREO_RESET;
+            } else {
+                flags &= ~FLAG_STEREO_RESET;
+            }
+            return this;
+        }
+
+        /**
+         * Specifies the default directionality estimation algorithm to be used by the BidiFormatter.
+         * By default, uses the first-strong heuristic.
+         *
+         * @param heuristic the {@code TextDirectionHeuristic} to use.
+         * @return the builder itself.
+         */
+        public Builder setTextDirectionHeuristic(TextDirectionHeuristic heuristic) {
+            this.textDirectionHeuristic = heuristic;
+            return this;
+        }
+
+        private static BidiFormatter getDefaultInstanceFromContext(boolean isRtlContext) {
+            return isRtlContext ? DEFAULT_RTL_INSTANCE : DEFAULT_LTR_INSTANCE;
+        }
+
+        /**
+         * @return A BidiFormatter with the specified options.
+         */
+        public BidiFormatter build() {
+            if (flags == DEFAULT_FLAGS &&
+                    textDirectionHeuristic == DEFAULT_TEXT_DIRECTION_HEURISTIC) {
+                return getDefaultInstanceFromContext(isRtlContext);
+            }
+            return new BidiFormatter(isRtlContext, flags, textDirectionHeuristic);
+        }
+    }
+
+    private static final int FLAG_STEREO_RESET = 2;
+    private static final int DEFAULT_FLAGS = FLAG_STEREO_RESET;
+
+    private static final BidiFormatter DEFAULT_LTR_INSTANCE =
+            new BidiFormatter(false /* LTR context */, DEFAULT_FLAGS, DEFAULT_TEXT_DIRECTION_HEURISTIC);
+    private static final BidiFormatter DEFAULT_RTL_INSTANCE =
+            new BidiFormatter(true /* RTL context */, DEFAULT_FLAGS, DEFAULT_TEXT_DIRECTION_HEURISTIC);
+
+    private final boolean isRtlContext;
+    private final int flags;
+    private final TextDirectionHeuristic defaultTextDirectionHeuristic;
+
+    /**
+     * Factory for creating an instance of BidiFormatter given the context directionality.
+     *
+     * @param rtlContext Whether the context directionality is RTL.
+     */
+    public static BidiFormatter getInstance(boolean rtlContext) {
+        return new Builder(rtlContext).build();
+    }
+
+    /**
+     * Factory for creating an instance of BidiFormatter given the context locale.
+     *
+     * @param locale The context locale.
+     */
+    public static BidiFormatter getInstance(Locale locale) {
+        return new Builder(locale).build();
+    }
+
+    /**
+     * @param isRtlContext Whether the context directionality is RTL or not.
+     * @param flags The option flags.
+     * @param heuristic The default text direction heuristic.
+     */
+    private BidiFormatter(boolean isRtlContext, int flags, TextDirectionHeuristic heuristic) {
+        this.isRtlContext = isRtlContext;
+        this.flags = flags;
+        this.defaultTextDirectionHeuristic = heuristic;
+    }
+
+    /**
+     * @return Whether the context directionality is RTL
+     */
+    public boolean isRtlContext() {
+        return isRtlContext;
+    }
+
+    /**
+     * @return Whether directionality "reset" should also be done before a string being
+     * bidi-wrapped, not just after it.
+     */
+    public boolean getStereoReset() {
+        return (flags & FLAG_STEREO_RESET) != 0;
+    }
+
+    /**
+     * Returns "rtl" if {@code str}'s estimated directionality is RTL, and "ltr" if it is LTR.
+     *
+     * @param str String whose directionality is to be estimated.
+     * @return "rtl" if {@code str}'s estimated directionality is RTL, and "ltr" otherwise.
+     */
+    public String dirAttrValue(String str) {
+        return dirAttrValue(isRtl(str));
+    }
+
+    /**
+     * Operates like {@link #dirAttrValue(String)}, but uses a given heuristic to estimate the
+     * {@code str}'s directionality.
+     *
+     * @param str String whose directionality is to be estimated.
+     * @param heuristic The text direction heuristic that will be used to estimate the {@code str}'s
+     *                  directionality.
+     * @return "rtl" if {@code str}'s estimated directionality is RTL, and "ltr" otherwise.
+     */
+    public String dirAttrValue(String str, TextDirectionHeuristic heuristic) {
+        return dirAttrValue(heuristic.isRtl(str, 0, str.length()));
+    }
+
+    /**
+     * Returns "rtl" if the given directionality is RTL, and "ltr" if it is LTR.
+     *
+     * @param isRtl Whether the directionality is RTL or not.
+     * @return "rtl" if the given directionality is RTL, and "ltr" otherwise.
+     */
+    public String dirAttrValue(boolean isRtl) {
+        return isRtl ? RTL_STRING : LTR_STRING;
+    }
+
+    /**
+     * Returns "dir=\"ltr\"" or "dir=\"rtl\"", depending on {@code str}'s estimated directionality,
+     * if it is not the same as the context directionality. Otherwise, returns the empty string.
+     *
+     * @param str String whose directionality is to be estimated.
+     * @return "dir=\"rtl\"" for RTL text in non-RTL context; "dir=\"ltr\"" for LTR text in non-LTR
+     *     context; else, the empty string.
+     */
+    public String dirAttr(String str) {
+        return dirAttr(isRtl(str));
+    }
+
+    /**
+     * Operates like {@link #dirAttr(String)}, but uses a given heuristic to estimate the
+     * {@code str}'s directionality.
+     *
+     * @param str String whose directionality is to be estimated.
+     * @param heuristic The text direction heuristic that will be used to estimate the {@code str}'s
+     *                  directionality.
+     * @return "dir=\"rtl\"" for RTL text in non-RTL context; "dir=\"ltr\"" for LTR text in non-LTR
+     *     context; else, the empty string.
+     */
+    public String dirAttr(String str, TextDirectionHeuristic heuristic) {
+        return dirAttr(heuristic.isRtl(str, 0, str.length()));
+    }
+
+    /**
+     * Returns "dir=\"ltr\"" or "dir=\"rtl\"", depending on the given directionality, if it is not
+     * the same as the context directionality. Otherwise, returns the empty string.
+     *
+     * @param isRtl Whether the directionality is RTL or not
+     * @return "dir=\"rtl\"" for RTL text in non-RTL context; "dir=\"ltr\"" for LTR text in non-LTR
+     *     context; else, the empty string.
+     */
+    public String dirAttr(boolean isRtl) {
+        return (isRtl != isRtlContext) ? (isRtl ? DIR_RTL_STRING :  DIR_LTR_STRING) : EMPTY_STRING;
+    }
+
+    /**
+     * Returns a Unicode bidi mark matching the context directionality (LRM or RLM) if either the
+     * overall or the exit directionality of a given string is opposite to the context directionality.
+     * Putting this after the string (including its directionality declaration wrapping) prevents it
+     * from "sticking" to other opposite-directionality text or a number appearing after it inline
+     * with only neutral content in between. Otherwise returns the empty string. While the exit
+     * directionality is determined by scanning the end of the string, the overall directionality is
+     * given explicitly in {@code dir}.
+     *
+     * @param str String after which the mark may need to appear.
+     * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context;
+     *     else, the empty string.
+     */
+    public String markAfter(String str) {
+        return markAfter(str, defaultTextDirectionHeuristic);
+    }
+
+    /**
+     * Operates like {@link #markAfter(String)}, but uses a given heuristic to estimate the
+     * {@code str}'s directionality.
+     *
+     * @param str String after which the mark may need to appear.
+     * @param heuristic The text direction heuristic that will be used to estimate the {@code str}'s
+     *                  directionality.
+     * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context;
+     *     else, the empty string.
+     */
+    public String markAfter(String str, TextDirectionHeuristic heuristic) {
+        final boolean isRtl = heuristic.isRtl(str, 0, str.length());
+        // getExitDir() is called only if needed (short-circuit).
+        if (!isRtlContext && (isRtl || getExitDir(str) == Dir.RTL)) {
+            return LRM_STRING;
+        }
+        if (isRtlContext && (!isRtl || getExitDir(str) == Dir.LTR)) {
+            return RLM_STRING;
+        }
+        return EMPTY_STRING;
+    }
+
+    /**
+     * Returns a Unicode bidi mark matching the context directionality (LRM or RLM) if either the
+     * overall or the entry directionality of a given string is opposite to the context
+     * directionality. Putting this before the string (including its directionality declaration
+     * wrapping) prevents it from "sticking" to other opposite-directionality text appearing before it
+     * inline with only neutral content in between. Otherwise returns the empty string. While the
+     * entry directionality is determined by scanning the beginning of the string, the overall
+     * directionality is given explicitly in {@code dir}.
+     *
+     * @param str String before which the mark may need to appear.
+     * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context;
+     *     else, the empty string.
+     */
+    public String markBefore(String str) {
+        return markBefore(str, defaultTextDirectionHeuristic);
+    }
+
+    /**
+     * Operates like {@link #markBefore(String)}, but uses a given heuristic to estimate the
+     * {@code str}'s directionality.
+     *
+     * @param str String before which the mark may need to appear.
+     * @param heuristic The text direction heuristic that will be used to estimate the {@code str}'s
+     *                  directionality.
+     * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context;
+     *     else, the empty string.
+     */
+    public String markBefore(String str, TextDirectionHeuristic heuristic) {
+        final boolean isRtl = heuristic.isRtl(str, 0, str.length());
+        // getEntryDir() is called only if needed (short-circuit).
+        if (!isRtlContext && (isRtl || getEntryDir(str) == Dir.RTL)) {
+            return LRM_STRING;
+        }
+        if (isRtlContext && (!isRtl || getEntryDir(str) == Dir.LTR)) {
+            return RLM_STRING;
+        }
+        return EMPTY_STRING;
+    }
+
+    /**
+     * Returns the Unicode bidi mark matching the context directionality (LRM for LTR context
+     * directionality, RLM for RTL context directionality).
+     */
+    public String mark() {
+        return isRtlContext ? RLM_STRING : LRM_STRING;
+    }
+
+    /**
+     * Returns "right" for RTL context directionality. Otherwise for LTR context directionality
+     * returns "left".
+     */
+    public String startEdge() {
+        return isRtlContext  ? RIGHT : LEFT;
+    }
+
+    /**
+     * Returns "left" for RTL context directionality. Otherwise for LTR context directionality
+     * returns "right".
+     */
+    public String endEdge() {
+        return isRtlContext ? LEFT : RIGHT;
+    }
+
+    /**
+     * Estimates the directionality of a string using the default text direction heuristic.
+     *
+     * @param str String whose directionality is to be estimated.
+     * @return true if {@code str}'s estimated overall directionality is RTL. Otherwise returns
+     *          false.
+     */
+    public boolean isRtl(String str) {
+        return defaultTextDirectionHeuristic.isRtl(str, 0, str.length());
+    }
+
+    /**
+     * Formats a given string of unknown directionality for use in HTML output of the context
+     * directionality, so an opposite-directionality string is neither garbled nor garbles its
+     * surroundings.
+     * <p>
+     * The algorithm: estimates the directionality of the given string using the given heuristic.
+     * If the directionality is known, pass TextDirectionHeuristics.LTR or RTL for heuristic.
+     * In case its directionality doesn't match the context directionality, wraps it with a 'span'
+     * element and adds a "dir" attribute (either 'dir=\"rtl\"' or 'dir=\"ltr\"').
+     * <p>
+     * If {@code isolate}, directionally isolates the string so that it does not garble its
+     * surroundings. Currently, this is done by "resetting" the directionality after the string by
+     * appending a trailing Unicode bidi mark matching the context directionality (LRM or RLM) when
+     * either the overall directionality or the exit directionality of the string is opposite to that
+     * of the context. If the formatter was built using {@link Builder#stereoReset(boolean)} and
+     * passing "true" as an argument, also prepends a Unicode bidi mark matching the context
+     * directionality when either the overall directionality or the entry directionality of the
+     * string is opposite to that of the context.
+     * <p>
+     *
+     * @param str The input string.
+     * @param heuristic The algorithm to be used to estimate the string's overall direction.
+     * @param isolate Whether to directionally isolate the string to prevent it from garbling the
+     *     content around it.
+     * @return Input string after applying the above processing.
+     */
+    public String spanWrap(String str, TextDirectionHeuristic heuristic, boolean isolate) {
+        final boolean isRtl = heuristic.isRtl(str, 0, str.length());
+        String origStr = str;
+        str = TextUtils.htmlEncode(str);
+
+        StringBuilder result = new StringBuilder();
+        if (getStereoReset() && isolate) {
+            result.append(markBefore(origStr,
+                    isRtl ? TextDirectionHeuristics.RTL : TextDirectionHeuristics.LTR));
+        }
+        if (isRtl != isRtlContext) {
+            result.append("<span ").append(dirAttr(isRtl)).append('>').append(str).append("</span>");
+        } else {
+            result.append(str);
+        }
+        if (isolate) {
+            result.append(markAfter(origStr,
+                    isRtl ? TextDirectionHeuristics.RTL : TextDirectionHeuristics.LTR));
+        }
+        return result.toString();
+    }
+
+    /**
+     * Operates like {@link #spanWrap(String, TextDirectionHeuristic, boolean)}, but assumes
+     * {@code isolate} is true.
+     *
+     * @param str The input string.
+     * @param heuristic The algorithm to be used to estimate the string's overall direction.
+     * @return Input string after applying the above processing.
+     */
+    public String spanWrap(String str, TextDirectionHeuristic heuristic) {
+        return spanWrap(str, heuristic, true /* isolate */);
+    }
+
+    /**
+     * Operates like {@link #spanWrap(String, TextDirectionHeuristic, boolean)}, but uses the
+     * formatter's default direction estimation algorithm.
+     *
+     * @param str The input string.
+     * @param isolate Whether to directionally isolate the string to prevent it from garbling the
+     *     content around it
+     * @return Input string after applying the above processing.
+     */
+    public String spanWrap(String str, boolean isolate) {
+        return spanWrap(str, defaultTextDirectionHeuristic, isolate);
+    }
+
+    /**
+     * Operates like {@link #spanWrap(String, TextDirectionHeuristic, boolean)}, but uses the
+     * formatter's default direction estimation algorithm and assumes {@code isolate} is true.
+     *
+     * @param str The input string.
+     * @return Input string after applying the above processing.
+     */
+    public String spanWrap(String str) {
+        return spanWrap(str, defaultTextDirectionHeuristic, true /* isolate */);
+    }
+
+    /**
+     * Formats a string of given directionality for use in plain-text output of the context
+     * directionality, so an opposite-directionality string is neither garbled nor garbles its
+     * surroundings. As opposed to {@link #spanWrap}, this makes use of Unicode bidi
+     * formatting characters. In HTML, its *only* valid use is inside of elements that do not allow
+     * markup, e.g. the 'option' and 'title' elements.
+     * <p>
+     * The algorithm: In case the given directionality doesn't match the context directionality, wraps
+     * the string with Unicode bidi formatting characters: RLE+{@code str}+PDF for RTL text, or
+     * LRE+{@code str}+PDF for LTR text.
+     * <p>
+     * If {@code isolate}, directionally isolates the string so that it does not garble its
+     * surroundings. Currently, this is done by "resetting" the directionality after the string by
+     * appending a trailing Unicode bidi mark matching the context directionality (LRM or RLM) when
+     * either the overall directionality or the exit directionality of the string is opposite to that
+     * of the context. If the formatter was built using {@link Builder#stereoReset(boolean)} and
+     * passing "true" as an argument, also prepends a Unicode bidi mark matching the context
+     * directionality when either the overall directionality or the entry directionality of the
+     * string is opposite to that of the context. Note that as opposed to the overall
+     * directionality, the entry and exit directionalities are determined from the string itself.
+     * <p>
+     * Does *not* do HTML-escaping.
+     *
+     * @param str The input string.
+     * @param heuristic The algorithm to be used to estimate the string's overall direction.
+     * @param isolate Whether to directionally isolate the string to prevent it from garbling the
+     *     content around it
+     * @return Input string after applying the above processing.
+     */
+    public String unicodeWrap(String str, TextDirectionHeuristic heuristic, boolean isolate) {
+        final boolean isRtl = heuristic.isRtl(str, 0, str.length());
+        StringBuilder result = new StringBuilder();
+        if (getStereoReset() && isolate) {
+            result.append(markBefore(str,
+                    isRtl ? TextDirectionHeuristics.RTL : TextDirectionHeuristics.LTR));
+        }
+        if (isRtl != isRtlContext) {
+            result.append(isRtl ? RLE : LRE);
+            result.append(str);
+            result.append(PDF);
+        } else {
+            result.append(str);
+        }
+        if (isolate) {
+            result.append(markAfter(str,
+                    isRtl ? TextDirectionHeuristics.RTL : TextDirectionHeuristics.LTR));
+        }
+        return result.toString();
+    }
+
+    /**
+     * Operates like {@link #unicodeWrap(String, TextDirectionHeuristic, boolean)}, but assumes
+     * {@code isolate} is true.
+     *
+     * @param str The input string.
+     * @param heuristic The algorithm to be used to estimate the string's overall direction.
+     * @return Input string after applying the above processing.
+     */
+    public String unicodeWrap(String str, TextDirectionHeuristic heuristic) {
+        return unicodeWrap(str, heuristic, true /* isolate */);
+    }
+
+    /**
+     * Operates like {@link #unicodeWrap(String, TextDirectionHeuristic, boolean)}, but uses the
+     * formatter's default direction estimation algorithm.
+     *
+     * @param str The input string.
+     * @param isolate Whether to directionally isolate the string to prevent it from garbling the
+     *     content around it
+     * @return Input string after applying the above processing.
+     */
+    public String unicodeWrap(String str, boolean isolate) {
+        return unicodeWrap(str, defaultTextDirectionHeuristic, isolate);
+    }
+
+    /**
+     * Operates like {@link #unicodeWrap(String, TextDirectionHeuristic, boolean)}, but uses the
+     * formatter's default direction estimation algorithm and assumes {@code isolate} is true.
+     *
+     * @param str The input string.
+     * @return Input string after applying the above processing.
+     */
+    public String unicodeWrap(String str) {
+        return unicodeWrap(str, defaultTextDirectionHeuristic, true /* isolate */);
+    }
+
+    /**
+     * Helper method to return true if the Locale directionality is RTL.
+     *
+     * @param locale The Locale whose directionality will be checked to be RTL or LTR
+     * @return true if the {@code locale} directionality is RTL. False otherwise.
+     */
+    private static boolean isRtlLocale(Locale locale) {
+        return (TextUtils.getLayoutDirectionFromLocale(locale) == View.LAYOUT_DIRECTION_RTL);
+    }
+
+    /**
+     * Enum for directionality type.
+     */
+    private enum Dir {
+        LTR     (1),
+        UNKNOWN (0),
+        RTL     (-1);
+
+        public final int ord;
+
+        Dir(int ord) {this.ord = ord; }
+
+        /**
+         * Interprets numeric representation of directionality: positive values are
+         * interpreted as RTL, negative values as LTR, and zero as UNKNOWN.
+         */
+        public static Dir valueOf(int dir) {
+            return dir > 0 ? LTR : dir < 0 ? RTL : UNKNOWN;
+        }
+
+        /**
+         * Interprets boolean representation of directionality: false is interpreted
+         * as LTR and true as RTL.
+         */
+        public static Dir valueOf(boolean dir) {
+            return dir ? RTL : LTR;
+        }
+
+        /**
+         * Returns whether this directionality is opposite to the given
+         * directionality.
+         */
+        public boolean isOppositeTo(Dir dir) {
+            return this.ord * dir.ord < 0;
+        }
+    }
+
+    /**
+     * Returns the directionality of the last character with strong directionality in the string, or
+     * Dir.UNKNOWN if none was encountered. For efficiency, actually scans backwards from the end of
+     * the string. Treats a non-BN character between an LRE/RLE/LRO/RLO and its matching PDF as a
+     * strong character, LTR after LRE/LRO, and RTL after RLE/RLO. The results are undefined for a
+     * string containing unbalanced LRE/RLE/LRO/RLO/PDF characters. The intended use is to check
+     * whether a logically separate item that starts with a number or a character of the string's
+     * exit directionality and follows this string inline (not counting any neutral characters in
+     * between) would "stick" to it in an opposite-directionality context, thus being displayed in
+     * an incorrect position. An LRM or RLM character (the one of the context's directionality)
+     * between the two will prevent such sticking.
+     *
+     * @param str the string to check.
+     */
+    private static Dir getExitDir(String str) {
+        return new DirectionalityEstimator(str, false /* isHtml */).getExitDir();
+    }
+
+    /**
+     * Returns the directionality of the first character with strong directionality in the string,
+     * or Dir.UNKNOWN if none was encountered. Treats a non-BN character between an
+     * LRE/RLE/LRO/RLO and its matching PDF as a strong character, LTR after LRE/LRO, and RTL after
+     * RLE/RLO. The results are undefined for a string containing unbalanced LRE/RLE/LRO/RLO/PDF
+     * characters. The intended use is to check whether a logically separate item that ends with a
+     * character of the string's entry directionality and precedes the string inline (not counting
+     * any neutral characters in between) would "stick" to it in an opposite-directionality context,
+     * thus being displayed in an incorrect position. An LRM or RLM character (the one of the
+     * context's directionality) between the two will prevent such sticking.
+     *
+     * @param str the string to check.
+     */
+    private static Dir getEntryDir(String str) {
+        return new DirectionalityEstimator(str, false /* isHtml */).getEntryDir();
+    }
+
+    /**
+     * An object that estimates the directionality of a given string by various methods.
+     *
+     */
+    private static class DirectionalityEstimator {
+
+        // Internal static variables and constants.
+
+        /**
+         * Size of the bidi character class cache. The results of the Character.getDirectionality()
+         * calls on the lowest DIR_TYPE_CACHE_SIZE codepoints are kept in an array for speed.
+         * The 0x700 value is designed to leave all the European and Near Eastern languages in the
+         * cache. It can be reduced to 0x180, restricting the cache to the Western European
+         * languages.
+         */
+        private static final int DIR_TYPE_CACHE_SIZE = 0x700;
+
+        /**
+         * The bidi character class cache.
+         */
+        private static final byte DIR_TYPE_CACHE[];
+
+        static {
+            DIR_TYPE_CACHE = new byte[DIR_TYPE_CACHE_SIZE];
+            for (int i = 0; i < DIR_TYPE_CACHE_SIZE; i++) {
+                DIR_TYPE_CACHE[i] = Character.getDirectionality(i);
+            }
+        }
+
+        // Internal instance variables.
+
+        /**
+         * The text to be scanned.
+         */
+        private final String text;
+
+        /**
+         * Whether the text to be scanned is to be treated as HTML, i.e. skipping over tags and
+         * entities when looking for the next / preceding dir type.
+         */
+        private final boolean isHtml;
+
+        /**
+         * The length of the text in chars.
+         */
+        private final int length;
+
+        /**
+         * The current position in the text.
+         */
+        private int charIndex;
+
+        /**
+         * The char encountered by the last dirTypeForward or dirTypeBackward call. If it
+         * encountered a supplementary codepoint, this contains a char that is not a valid
+         * codepoint. This is ok, because this member is only used to detect some well-known ASCII
+         * syntax, e.g. "http://" and the beginning of an HTML tag or entity.
+         */
+        private char lastChar;
+
+        /**
+         * Constructor.
+         *
+         * @param text The string to scan.
+         * @param isHtml Whether the text to be scanned is to be treated as HTML, i.e. skipping over
+         *     tags and entities.
+         */
+        DirectionalityEstimator(String text, boolean isHtml) {
+            this.text = text;
+            this.isHtml = isHtml;
+            length = text.length();
+        }
+
+        /**
+         * Returns the directionality of the first character with strong directionality in the
+         * string, or Dir.UNKNOWN if none was encountered. Treats a non-BN character between an
+         * LRE/RLE/LRO/RLO and its matching PDF as a strong character, LTR after LRE/LRO, and RTL
+         * after RLE/RLO. The results are undefined for a string containing unbalanced
+         * LRE/RLE/LRO/RLO/PDF characters.
+         */
+        Dir getEntryDir() {
+            // The reason for this method name, as opposed to getFirstStrongDir(), is that
+            // "first strong" is a commonly used description of Unicode's estimation algorithm,
+            // but the two must treat formatting characters quite differently. Thus, we are staying
+            // away from both "first" and "last" in these method names to avoid confusion.
+            charIndex = 0;
+            int embeddingLevel = 0;
+            Dir embeddingLevelDir = Dir.UNKNOWN;
+            int firstNonEmptyEmbeddingLevel = 0;
+            while (charIndex < length && firstNonEmptyEmbeddingLevel == 0) {
+                switch (dirTypeForward()) {
+                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
+                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
+                        ++embeddingLevel;
+                        embeddingLevelDir = Dir.LTR;
+                        break;
+                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
+                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
+                        ++embeddingLevel;
+                        embeddingLevelDir = Dir.RTL;
+                        break;
+                    case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT:
+                        --embeddingLevel;
+                        // To restore embeddingLevelDir to its previous value, we would need a
+                        // stack, which we want to avoid. Thus, at this point we do not know the
+                        // current embedding's directionality.
+                        embeddingLevelDir = Dir.UNKNOWN;
+                        break;
+                    case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL:
+                        break;
+                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
+                        if (embeddingLevel == 0) {
+                            return Dir.LTR;
+                        }
+                        firstNonEmptyEmbeddingLevel = embeddingLevel;
+                        break;
+                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
+                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC:
+                        if (embeddingLevel == 0) {
+                            return Dir.RTL;
+                        }
+                        firstNonEmptyEmbeddingLevel = embeddingLevel;
+                        break;
+                    default:
+                        firstNonEmptyEmbeddingLevel = embeddingLevel;
+                        break;
+                }
+            }
+
+            // We have either found a non-empty embedding or scanned the entire string finding
+            // neither a non-empty embedding nor a strong character outside of an embedding.
+            if (firstNonEmptyEmbeddingLevel == 0) {
+                // We have not found a non-empty embedding. Thus, the string contains neither a
+                // non-empty embedding nor a strong character outside of an embedding.
+                return Dir.UNKNOWN;
+            }
+
+            // We have found a non-empty embedding.
+            if (embeddingLevelDir != Dir.UNKNOWN) {
+                // We know the directionality of the non-empty embedding.
+                return embeddingLevelDir;
+            }
+
+            // We do not remember the directionality of the non-empty embedding we found. So, we go
+            // backwards to find the start of the non-empty embedding and get its directionality.
+            while (charIndex > 0) {
+                switch (dirTypeBackward()) {
+                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
+                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
+                        if (firstNonEmptyEmbeddingLevel == embeddingLevel) {
+                            return Dir.LTR;
+                        }
+                        --embeddingLevel;
+                        break;
+                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
+                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
+                        if (firstNonEmptyEmbeddingLevel == embeddingLevel) {
+                            return Dir.RTL;
+                        }
+                        --embeddingLevel;
+                        break;
+                    case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT:
+                        ++embeddingLevel;
+                        break;
+                }
+            }
+            // We should never get here.
+            return Dir.UNKNOWN;
+        }
+
+        /**
+         * Returns the directionality of the last character with strong directionality in the
+         * string, or Dir.UNKNOWN if none was encountered. For efficiency, actually scans backwards
+         * from the end of the string. Treats a non-BN character between an LRE/RLE/LRO/RLO and its
+         * matching PDF as a strong character, LTR after LRE/LRO, and RTL after RLE/RLO. The results
+         * are undefined for a string containing unbalanced LRE/RLE/LRO/RLO/PDF characters.
+         */
+        Dir getExitDir() {
+            // The reason for this method name, as opposed to getLastStrongDir(), is that "last
+            // strong" sounds like the exact opposite of "first strong", which is a commonly used
+            // description of Unicode's estimation algorithm (getUnicodeDir() above), but the two
+            // must treat formatting characters quite differently. Thus, we are staying away from
+            // both "first" and "last" in these method names to avoid confusion.
+            charIndex = length;
+            int embeddingLevel = 0;
+            int lastNonEmptyEmbeddingLevel = 0;
+            while (charIndex > 0) {
+                switch (dirTypeBackward()) {
+                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT:
+                        if (embeddingLevel == 0) {
+                            return Dir.LTR;
+                        }
+                        if (lastNonEmptyEmbeddingLevel == 0) {
+                            lastNonEmptyEmbeddingLevel = embeddingLevel;
+                        }
+                        break;
+                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING:
+                    case Character.DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE:
+                        if (lastNonEmptyEmbeddingLevel == embeddingLevel) {
+                            return Dir.LTR;
+                        }
+                        --embeddingLevel;
+                        break;
+                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT:
+                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC:
+                        if (embeddingLevel == 0) {
+                            return Dir.RTL;
+                        }
+                        if (lastNonEmptyEmbeddingLevel == 0) {
+                            lastNonEmptyEmbeddingLevel = embeddingLevel;
+                        }
+                        break;
+                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING:
+                    case Character.DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE:
+                        if (lastNonEmptyEmbeddingLevel == embeddingLevel) {
+                            return Dir.RTL;
+                        }
+                        --embeddingLevel;
+                        break;
+                    case Character.DIRECTIONALITY_POP_DIRECTIONAL_FORMAT:
+                        ++embeddingLevel;
+                        break;
+                    case Character.DIRECTIONALITY_BOUNDARY_NEUTRAL:
+                        break;
+                    default:
+                        if (lastNonEmptyEmbeddingLevel == 0) {
+                            lastNonEmptyEmbeddingLevel = embeddingLevel;
+                        }
+                        break;
+                }
+            }
+            return Dir.UNKNOWN;
+        }
+
+        // Internal methods
+
+        /**
+         * Gets the bidi character class, i.e. Character.getDirectionality(), of a given char, using
+         * a cache for speed. Not designed for supplementary codepoints, whose results we do not
+         * cache.
+         */
+        private static byte getCachedDirectionality(char c) {
+            return c < DIR_TYPE_CACHE_SIZE ? DIR_TYPE_CACHE[c] : Character.getDirectionality(c);
+        }
+
+        /**
+         * Returns the Character.DIRECTIONALITY_... value of the next codepoint and advances
+         * charIndex. If isHtml, and the codepoint is '<' or '&', advances through the tag/entity,
+         * and returns Character.DIRECTIONALITY_WHITESPACE. For an entity, it would be best to
+         * figure out the actual character, and return its dirtype, but treating it as whitespace is
+         * good enough for our purposes.
+         *
+         * @throws java.lang.IndexOutOfBoundsException if called when charIndex >= length or < 0.
+         */
+        byte dirTypeForward() {
+            lastChar = text.charAt(charIndex);
+            if (Character.isHighSurrogate(lastChar)) {
+                int codePoint = Character.codePointAt(text, charIndex);
+                charIndex += Character.charCount(codePoint);
+                return Character.getDirectionality(codePoint);
+            }
+            charIndex++;
+            byte dirType = getCachedDirectionality(lastChar);
+            if (isHtml) {
+                // Process tags and entities.
+                if (lastChar == '<') {
+                    dirType = skipTagForward();
+                } else if (lastChar == '&') {
+                    dirType = skipEntityForward();
+                }
+            }
+            return dirType;
+        }
+
+        /**
+         * Returns the Character.DIRECTIONALITY_... value of the preceding codepoint and advances
+         * charIndex backwards. If isHtml, and the codepoint is the end of a complete HTML tag or
+         * entity, advances over the whole tag/entity and returns
+         * Character.DIRECTIONALITY_WHITESPACE. For an entity, it would be best to figure out the
+         * actual character, and return its dirtype, but treating it as whitespace is good enough
+         * for our purposes.
+         *
+         * @throws java.lang.IndexOutOfBoundsException if called when charIndex > length or <= 0.
+         */
+        byte dirTypeBackward() {
+            lastChar = text.charAt(charIndex - 1);
+            if (Character.isLowSurrogate(lastChar)) {
+                int codePoint = Character.codePointBefore(text, charIndex);
+                charIndex -= Character.charCount(codePoint);
+                return Character.getDirectionality(codePoint);
+            }
+            charIndex--;
+            byte dirType = getCachedDirectionality(lastChar);
+            if (isHtml) {
+                // Process tags and entities.
+                if (lastChar == '>') {
+                    dirType = skipTagBackward();
+                } else if (lastChar == ';') {
+                    dirType = skipEntityBackward();
+                }
+            }
+            return dirType;
+        }
+
+        /**
+         * Advances charIndex forward through an HTML tag (after the opening &lt; has already been
+         * read) and returns Character.DIRECTIONALITY_WHITESPACE. If there is no matching &gt;,
+         * does not change charIndex and returns Character.DIRECTIONALITY_OTHER_NEUTRALS (for the
+         * &lt; that hadn't been part of a tag after all).
+         */
+        private byte skipTagForward() {
+            int initialCharIndex = charIndex;
+            while (charIndex < length) {
+                lastChar = text.charAt(charIndex++);
+                if (lastChar == '>') {
+                    // The end of the tag.
+                    return Character.DIRECTIONALITY_WHITESPACE;
+                }
+                if (lastChar == '"' || lastChar == '\'') {
+                    // Skip over a quoted attribute value inside the tag.
+                    char quote = lastChar;
+                    while (charIndex < length && (lastChar = text.charAt(charIndex++)) != quote) {}
+                }
+            }
+            // The original '<' wasn't the start of a tag after all.
+            charIndex = initialCharIndex;
+            lastChar = '<';
+            return Character.DIRECTIONALITY_OTHER_NEUTRALS;
+        }
+
+        /**
+         * Advances charIndex backward through an HTML tag (after the closing &gt; has already been
+         * read) and returns Character.DIRECTIONALITY_WHITESPACE. If there is no matching &lt;, does
+         * not change charIndex and returns Character.DIRECTIONALITY_OTHER_NEUTRALS (for the &gt;
+         * that hadn't been part of a tag after all). Nevertheless, the running time for calling
+         * skipTagBackward() in a loop remains linear in the size of the text, even for a text like
+         * "&gt;&gt;&gt;&gt;", because skipTagBackward() also stops looking for a matching &lt;
+         * when it encounters another &gt;.
+         */
+        private byte skipTagBackward() {
+            int initialCharIndex = charIndex;
+            while (charIndex > 0) {
+                lastChar = text.charAt(--charIndex);
+                if (lastChar == '<') {
+                    // The start of the tag.
+                    return Character.DIRECTIONALITY_WHITESPACE;
+                }
+                if (lastChar == '>') {
+                    break;
+                }
+                if (lastChar == '"' || lastChar == '\'') {
+                    // Skip over a quoted attribute value inside the tag.
+                    char quote = lastChar;
+                    while (charIndex > 0 && (lastChar = text.charAt(--charIndex)) != quote) {}
+                }
+            }
+            // The original '>' wasn't the end of a tag after all.
+            charIndex = initialCharIndex;
+            lastChar = '>';
+            return Character.DIRECTIONALITY_OTHER_NEUTRALS;
+        }
+
+        /**
+         * Advances charIndex forward through an HTML character entity tag (after the opening
+         * &amp; has already been read) and returns Character.DIRECTIONALITY_WHITESPACE. It would be
+         * best to figure out the actual character and return its dirtype, but this is good enough.
+         */
+        private byte skipEntityForward() {
+            while (charIndex < length && (lastChar = text.charAt(charIndex++)) != ';') {}
+            return Character.DIRECTIONALITY_WHITESPACE;
+        }
+
+        /**
+         * Advances charIndex backward through an HTML character entity tag (after the closing ;
+         * has already been read) and returns Character.DIRECTIONALITY_WHITESPACE. It would be best
+         * to figure out the actual character and return its dirtype, but this is good enough.
+         * If there is no matching &amp;, does not change charIndex and returns
+         * Character.DIRECTIONALITY_OTHER_NEUTRALS (for the ';' that did not start an entity after
+         * all). Nevertheless, the running time for calling skipEntityBackward() in a loop remains
+         * linear in the size of the text, even for a text like ";;;;;;;", because skipTagBackward()
+         * also stops looking for a matching &amp; when it encounters another ;.
+         */
+        private byte skipEntityBackward() {
+            int initialCharIndex = charIndex;
+            while (charIndex > 0) {
+                lastChar = text.charAt(--charIndex);
+                if (lastChar == '&') {
+                    return Character.DIRECTIONALITY_WHITESPACE;
+                }
+                if (lastChar == ';') {
+                    break;
+                }
+            }
+            charIndex = initialCharIndex;
+            lastChar = ';';
+            return Character.DIRECTIONALITY_OTHER_NEUTRALS;
+        }
+    }
+}
\ No newline at end of file
commit	57a85740d721caf8dcd94a545b2dd920e8e84e01	[log] [tgz]
author	Fabrice Di Meglio <fdimeglio@google.com>	Thu Jan 31 13:29:36 2013 -0800
committer	Fabrice Di Meglio <fdimeglio@google.com>	Mon Feb 11 14:27:10 2013 -0800
tree	4a7334850018c38d1c8f02db230e8edc3497de7a
parent	981e60edc3ed91ea8b5e1dae98669b88b175e19b [diff]