Implement line breaking using ICU break iterator

Change-Id: I4ad98757aa2eab5dbc2ae44c0391e900ef20c4d0
diff --git a/core/java/android/text/StaticLayout.java b/core/java/android/text/StaticLayout.java
index 0db00f0..fb4fd59 100644
--- a/core/java/android/text/StaticLayout.java
+++ b/core/java/android/text/StaticLayout.java
@@ -161,6 +161,9 @@
                         float spacingadd, boolean includepad,
                         boolean trackpad, float ellipsizedWidth,
                         TextUtils.TruncateAt ellipsize) {
+        int[] breakOpp = null;
+        final String localeLanguageTag = paint.getTextLocale().toLanguageTag();
+
         mLineCount = 0;
 
         int v = 0;
@@ -175,8 +178,6 @@
         if (source instanceof Spanned)
             spanned = (Spanned) source;
 
-        int DEFAULT_DIR = DIR_LEFT_TO_RIGHT; // XXX
-
         int paraEnd;
         for (int paraStart = bufStart; paraStart <= bufEnd; paraStart = paraEnd) {
             paraEnd = TextUtils.indexOf(source, CHAR_NEW_LINE, paraStart, bufEnd);
@@ -242,6 +243,9 @@
             int dir = measured.mDir;
             boolean easy = measured.mEasy;
 
+            breakOpp = nLineBreakOpportunities(localeLanguageTag, chs, paraEnd - paraStart, breakOpp);
+            int breakOppIndex = 0;
+
             int width = firstWidth;
 
             float w = 0;
@@ -354,15 +358,12 @@
                         if (fmBottom > fitBottom)
                             fitBottom = fmBottom;
 
-                        // From the Unicode Line Breaking Algorithm (at least approximately)
-                        boolean isLineBreak = isSpaceOrTab ||
-                                // / is class SY and - is class HY, except when followed by a digit
-                                ((c == CHAR_SLASH || c == CHAR_HYPHEN) &&
-                                (j + 1 >= spanEnd || !Character.isDigit(chs[j + 1 - paraStart]))) ||
-                                // Ideographs are class ID: breakpoints when adjacent, except for NS
-                                // (non-starters), which can be broken after but not before
-                                (c >= CHAR_FIRST_CJK && isIdeographic(c, true) &&
-                                j + 1 < spanEnd && isIdeographic(chs[j + 1 - paraStart], false));
+                        while (breakOpp[breakOppIndex] != -1
+                                && breakOpp[breakOppIndex] < j - paraStart + 1) {
+                            breakOppIndex++;
+                        }
+                        boolean isLineBreak = breakOppIndex < breakOpp.length &&
+                                breakOpp[breakOppIndex] == j - paraStart + 1;
 
                         if (isLineBreak) {
                             okWidth = w;
@@ -490,97 +491,6 @@
         }
     }
 
-    /**
-     * Returns true if the specified character is one of those specified
-     * as being Ideographic (class ID) by the Unicode Line Breaking Algorithm
-     * (http://www.unicode.org/unicode/reports/tr14/), and is therefore OK
-     * to break between a pair of.
-     *
-     * @param includeNonStarters also return true for category NS
-     *                           (non-starters), which can be broken
-     *                           after but not before.
-     */
-    private static final boolean isIdeographic(char c, boolean includeNonStarters) {
-        if (c >= '\u2E80' && c <= '\u2FFF') {
-            return true; // CJK, KANGXI RADICALS, DESCRIPTION SYMBOLS
-        }
-        if (c == '\u3000') {
-            return true; // IDEOGRAPHIC SPACE
-        }
-        if (c >= '\u3040' && c <= '\u309F') {
-            if (!includeNonStarters) {
-                switch (c) {
-                case '\u3041': //  # HIRAGANA LETTER SMALL A
-                case '\u3043': //  # HIRAGANA LETTER SMALL I
-                case '\u3045': //  # HIRAGANA LETTER SMALL U
-                case '\u3047': //  # HIRAGANA LETTER SMALL E
-                case '\u3049': //  # HIRAGANA LETTER SMALL O
-                case '\u3063': //  # HIRAGANA LETTER SMALL TU
-                case '\u3083': //  # HIRAGANA LETTER SMALL YA
-                case '\u3085': //  # HIRAGANA LETTER SMALL YU
-                case '\u3087': //  # HIRAGANA LETTER SMALL YO
-                case '\u308E': //  # HIRAGANA LETTER SMALL WA
-                case '\u3095': //  # HIRAGANA LETTER SMALL KA
-                case '\u3096': //  # HIRAGANA LETTER SMALL KE
-                case '\u309B': //  # KATAKANA-HIRAGANA VOICED SOUND MARK
-                case '\u309C': //  # KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
-                case '\u309D': //  # HIRAGANA ITERATION MARK
-                case '\u309E': //  # HIRAGANA VOICED ITERATION MARK
-                    return false;
-                }
-            }
-            return true; // Hiragana (except small characters)
-        }
-        if (c >= '\u30A0' && c <= '\u30FF') {
-            if (!includeNonStarters) {
-                switch (c) {
-                case '\u30A0': //  # KATAKANA-HIRAGANA DOUBLE HYPHEN
-                case '\u30A1': //  # KATAKANA LETTER SMALL A
-                case '\u30A3': //  # KATAKANA LETTER SMALL I
-                case '\u30A5': //  # KATAKANA LETTER SMALL U
-                case '\u30A7': //  # KATAKANA LETTER SMALL E
-                case '\u30A9': //  # KATAKANA LETTER SMALL O
-                case '\u30C3': //  # KATAKANA LETTER SMALL TU
-                case '\u30E3': //  # KATAKANA LETTER SMALL YA
-                case '\u30E5': //  # KATAKANA LETTER SMALL YU
-                case '\u30E7': //  # KATAKANA LETTER SMALL YO
-                case '\u30EE': //  # KATAKANA LETTER SMALL WA
-                case '\u30F5': //  # KATAKANA LETTER SMALL KA
-                case '\u30F6': //  # KATAKANA LETTER SMALL KE
-                case '\u30FB': //  # KATAKANA MIDDLE DOT
-                case '\u30FC': //  # KATAKANA-HIRAGANA PROLONGED SOUND MARK
-                case '\u30FD': //  # KATAKANA ITERATION MARK
-                case '\u30FE': //  # KATAKANA VOICED ITERATION MARK
-                    return false;
-                }
-            }
-            return true; // Katakana (except small characters)
-        }
-        if (c >= '\u3400' && c <= '\u4DB5') {
-            return true; // CJK UNIFIED IDEOGRAPHS EXTENSION A
-        }
-        if (c >= '\u4E00' && c <= '\u9FBB') {
-            return true; // CJK UNIFIED IDEOGRAPHS
-        }
-        if (c >= '\uF900' && c <= '\uFAD9') {
-            return true; // CJK COMPATIBILITY IDEOGRAPHS
-        }
-        if (c >= '\uA000' && c <= '\uA48F') {
-            return true; // YI SYLLABLES
-        }
-        if (c >= '\uA490' && c <= '\uA4CF') {
-            return true; // YI RADICALS
-        }
-        if (c >= '\uFE62' && c <= '\uFE66') {
-            return true; // SMALL PLUS SIGN to SMALL EQUALS SIGN
-        }
-        if (c >= '\uFF10' && c <= '\uFF19') {
-            return true; // WIDE DIGITS
-        }
-
-        return false;
-    }
-
     private int out(CharSequence text, int start, int end,
                       int above, int below, int top, int bottom, int v,
                       float spacingmult, float spacingadd,
@@ -929,6 +839,11 @@
         mMeasured = MeasuredText.recycle(mMeasured);
     }
 
+    // returns an array with terminal sentinel value -1 to indicate end
+    // this is so that arrays can be recycled instead of allocating new arrays
+    // every time
+    private static native int[] nLineBreakOpportunities(String locale, char[] text, int length, int[] recycle);
+
     private int mLineCount;
     private int mTopPadding, mBottomPadding;
     private int mColumns;
@@ -954,13 +869,9 @@
 
     private static final int TAB_INCREMENT = 20; // same as Layout, but that's private
 
-    private static final char CHAR_FIRST_CJK = '\u2E80';
-
     private static final char CHAR_NEW_LINE = '\n';
     private static final char CHAR_TAB = '\t';
     private static final char CHAR_SPACE = ' ';
-    private static final char CHAR_SLASH = '/';
-    private static final char CHAR_HYPHEN = '-';
     private static final char CHAR_ZWSP = '\u200B';
 
     private static final double EXTRA_ROUNDING = 0.5;