Implement line breaking using ICU break iterator

Change-Id: I4ad98757aa2eab5dbc2ae44c0391e900ef20c4d0
diff --git a/core/java/android/text/StaticLayout.java b/core/java/android/text/StaticLayout.java
index 0db00f0..fb4fd59 100644
--- a/core/java/android/text/StaticLayout.java
+++ b/core/java/android/text/StaticLayout.java
@@ -161,6 +161,9 @@
                         float spacingadd, boolean includepad,
                         boolean trackpad, float ellipsizedWidth,
                         TextUtils.TruncateAt ellipsize) {
+        int[] breakOpp = null;
+        final String localeLanguageTag = paint.getTextLocale().toLanguageTag();
+
         mLineCount = 0;
 
         int v = 0;
@@ -175,8 +178,6 @@
         if (source instanceof Spanned)
             spanned = (Spanned) source;
 
-        int DEFAULT_DIR = DIR_LEFT_TO_RIGHT; // XXX
-
         int paraEnd;
         for (int paraStart = bufStart; paraStart <= bufEnd; paraStart = paraEnd) {
             paraEnd = TextUtils.indexOf(source, CHAR_NEW_LINE, paraStart, bufEnd);
@@ -242,6 +243,9 @@
             int dir = measured.mDir;
             boolean easy = measured.mEasy;
 
+            breakOpp = nLineBreakOpportunities(localeLanguageTag, chs, paraEnd - paraStart, breakOpp);
+            int breakOppIndex = 0;
+
             int width = firstWidth;
 
             float w = 0;
@@ -354,15 +358,12 @@
                         if (fmBottom > fitBottom)
                             fitBottom = fmBottom;
 
-                        // From the Unicode Line Breaking Algorithm (at least approximately)
-                        boolean isLineBreak = isSpaceOrTab ||
-                                // / is class SY and - is class HY, except when followed by a digit
-                                ((c == CHAR_SLASH || c == CHAR_HYPHEN) &&
-                                (j + 1 >= spanEnd || !Character.isDigit(chs[j + 1 - paraStart]))) ||
-                                // Ideographs are class ID: breakpoints when adjacent, except for NS
-                                // (non-starters), which can be broken after but not before
-                                (c >= CHAR_FIRST_CJK && isIdeographic(c, true) &&
-                                j + 1 < spanEnd && isIdeographic(chs[j + 1 - paraStart], false));
+                        while (breakOpp[breakOppIndex] != -1
+                                && breakOpp[breakOppIndex] < j - paraStart + 1) {
+                            breakOppIndex++;
+                        }
+                        boolean isLineBreak = breakOppIndex < breakOpp.length &&
+                                breakOpp[breakOppIndex] == j - paraStart + 1;
 
                         if (isLineBreak) {
                             okWidth = w;
@@ -490,97 +491,6 @@
         }
     }
 
-    /**
-     * Returns true if the specified character is one of those specified
-     * as being Ideographic (class ID) by the Unicode Line Breaking Algorithm
-     * (http://www.unicode.org/unicode/reports/tr14/), and is therefore OK
-     * to break between a pair of.
-     *
-     * @param includeNonStarters also return true for category NS
-     *                           (non-starters), which can be broken
-     *                           after but not before.
-     */
-    private static final boolean isIdeographic(char c, boolean includeNonStarters) {
-        if (c >= '\u2E80' && c <= '\u2FFF') {
-            return true; // CJK, KANGXI RADICALS, DESCRIPTION SYMBOLS
-        }
-        if (c == '\u3000') {
-            return true; // IDEOGRAPHIC SPACE
-        }
-        if (c >= '\u3040' && c <= '\u309F') {
-            if (!includeNonStarters) {
-                switch (c) {
-                case '\u3041': //  # HIRAGANA LETTER SMALL A
-                case '\u3043': //  # HIRAGANA LETTER SMALL I
-                case '\u3045': //  # HIRAGANA LETTER SMALL U
-                case '\u3047': //  # HIRAGANA LETTER SMALL E
-                case '\u3049': //  # HIRAGANA LETTER SMALL O
-                case '\u3063': //  # HIRAGANA LETTER SMALL TU
-                case '\u3083': //  # HIRAGANA LETTER SMALL YA
-                case '\u3085': //  # HIRAGANA LETTER SMALL YU
-                case '\u3087': //  # HIRAGANA LETTER SMALL YO
-                case '\u308E': //  # HIRAGANA LETTER SMALL WA
-                case '\u3095': //  # HIRAGANA LETTER SMALL KA
-                case '\u3096': //  # HIRAGANA LETTER SMALL KE
-                case '\u309B': //  # KATAKANA-HIRAGANA VOICED SOUND MARK
-                case '\u309C': //  # KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
-                case '\u309D': //  # HIRAGANA ITERATION MARK
-                case '\u309E': //  # HIRAGANA VOICED ITERATION MARK
-                    return false;
-                }
-            }
-            return true; // Hiragana (except small characters)
-        }
-        if (c >= '\u30A0' && c <= '\u30FF') {
-            if (!includeNonStarters) {
-                switch (c) {
-                case '\u30A0': //  # KATAKANA-HIRAGANA DOUBLE HYPHEN
-                case '\u30A1': //  # KATAKANA LETTER SMALL A
-                case '\u30A3': //  # KATAKANA LETTER SMALL I
-                case '\u30A5': //  # KATAKANA LETTER SMALL U
-                case '\u30A7': //  # KATAKANA LETTER SMALL E
-                case '\u30A9': //  # KATAKANA LETTER SMALL O
-                case '\u30C3': //  # KATAKANA LETTER SMALL TU
-                case '\u30E3': //  # KATAKANA LETTER SMALL YA
-                case '\u30E5': //  # KATAKANA LETTER SMALL YU
-                case '\u30E7': //  # KATAKANA LETTER SMALL YO
-                case '\u30EE': //  # KATAKANA LETTER SMALL WA
-                case '\u30F5': //  # KATAKANA LETTER SMALL KA
-                case '\u30F6': //  # KATAKANA LETTER SMALL KE
-                case '\u30FB': //  # KATAKANA MIDDLE DOT
-                case '\u30FC': //  # KATAKANA-HIRAGANA PROLONGED SOUND MARK
-                case '\u30FD': //  # KATAKANA ITERATION MARK
-                case '\u30FE': //  # KATAKANA VOICED ITERATION MARK
-                    return false;
-                }
-            }
-            return true; // Katakana (except small characters)
-        }
-        if (c >= '\u3400' && c <= '\u4DB5') {
-            return true; // CJK UNIFIED IDEOGRAPHS EXTENSION A
-        }
-        if (c >= '\u4E00' && c <= '\u9FBB') {
-            return true; // CJK UNIFIED IDEOGRAPHS
-        }
-        if (c >= '\uF900' && c <= '\uFAD9') {
-            return true; // CJK COMPATIBILITY IDEOGRAPHS
-        }
-        if (c >= '\uA000' && c <= '\uA48F') {
-            return true; // YI SYLLABLES
-        }
-        if (c >= '\uA490' && c <= '\uA4CF') {
-            return true; // YI RADICALS
-        }
-        if (c >= '\uFE62' && c <= '\uFE66') {
-            return true; // SMALL PLUS SIGN to SMALL EQUALS SIGN
-        }
-        if (c >= '\uFF10' && c <= '\uFF19') {
-            return true; // WIDE DIGITS
-        }
-
-        return false;
-    }
-
     private int out(CharSequence text, int start, int end,
                       int above, int below, int top, int bottom, int v,
                       float spacingmult, float spacingadd,
@@ -929,6 +839,11 @@
         mMeasured = MeasuredText.recycle(mMeasured);
     }
 
+    // returns an array with terminal sentinel value -1 to indicate end
+    // this is so that arrays can be recycled instead of allocating new arrays
+    // every time
+    private static native int[] nLineBreakOpportunities(String locale, char[] text, int length, int[] recycle);
+
     private int mLineCount;
     private int mTopPadding, mBottomPadding;
     private int mColumns;
@@ -954,13 +869,9 @@
 
     private static final int TAB_INCREMENT = 20; // same as Layout, but that's private
 
-    private static final char CHAR_FIRST_CJK = '\u2E80';
-
     private static final char CHAR_NEW_LINE = '\n';
     private static final char CHAR_TAB = '\t';
     private static final char CHAR_SPACE = ' ';
-    private static final char CHAR_SLASH = '/';
-    private static final char CHAR_HYPHEN = '-';
     private static final char CHAR_ZWSP = '\u200B';
 
     private static final double EXTRA_ROUNDING = 0.5;
diff --git a/core/jni/Android.mk b/core/jni/Android.mk
index f287fca..38a7fa5 100644
--- a/core/jni/Android.mk
+++ b/core/jni/Android.mk
@@ -67,6 +67,7 @@
 	android_view_VelocityTracker.cpp \
 	android_text_AndroidCharacter.cpp \
 	android_text_AndroidBidi.cpp \
+	android_text_StaticLayout.cpp \
 	android_os_Debug.cpp \
 	android_os_MemoryFile.cpp \
 	android_os_MessageQueue.cpp \
diff --git a/core/jni/AndroidRuntime.cpp b/core/jni/AndroidRuntime.cpp
index 598d6c1..4798f58 100644
--- a/core/jni/AndroidRuntime.cpp
+++ b/core/jni/AndroidRuntime.cpp
@@ -157,6 +157,7 @@
 extern int register_android_net_NetworkUtils(JNIEnv* env);
 extern int register_android_net_TrafficStats(JNIEnv* env);
 extern int register_android_text_AndroidCharacter(JNIEnv *env);
+extern int register_android_text_StaticLayout(JNIEnv *env);
 extern int register_android_text_AndroidBidi(JNIEnv *env);
 extern int register_android_opengl_classes(JNIEnv *env);
 extern int register_android_server_fingerprint_FingerprintService(JNIEnv* env);
@@ -1228,6 +1229,7 @@
     REG_JNI(register_android_content_XmlBlock),
     REG_JNI(register_android_emoji_EmojiFactory),
     REG_JNI(register_android_text_AndroidCharacter),
+    REG_JNI(register_android_text_StaticLayout),
     REG_JNI(register_android_text_AndroidBidi),
     REG_JNI(register_android_view_InputDevice),
     REG_JNI(register_android_view_KeyCharacterMap),
diff --git a/core/jni/android_text_StaticLayout.cpp b/core/jni/android_text_StaticLayout.cpp
new file mode 100644
index 0000000..696926c
--- /dev/null
+++ b/core/jni/android_text_StaticLayout.cpp
@@ -0,0 +1,110 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#define LOG_TAG "StaticLayout"
+
+#include "ScopedIcuLocale.h"
+#include "unicode/locid.h"
+#include "unicode/brkiter.h"
+#include "utils/misc.h"
+#include "utils/Log.h"
+#include "ScopedPrimitiveArray.h"
+#include "JNIHelp.h"
+#include <android_runtime/AndroidRuntime.h>
+#include <vector>
+
+namespace android {
+
+class ScopedBreakIterator {
+    public:
+        ScopedBreakIterator(JNIEnv* env, BreakIterator* breakIterator, jcharArray inputText,
+                            jint length) : mBreakIterator(breakIterator), mChars(env, inputText) {
+            UErrorCode status = U_ZERO_ERROR;
+            mUText = utext_openUChars(NULL, mChars.get(), length, &status);
+            if (mUText == NULL) {
+                return;
+            }
+
+            mBreakIterator->setText(mUText, status);
+        }
+
+        inline BreakIterator* operator->() {
+            return mBreakIterator;
+        }
+
+        ~ScopedBreakIterator() {
+            utext_close(mUText);
+            delete mBreakIterator;
+        }
+    private:
+        BreakIterator* mBreakIterator;
+        ScopedCharArrayRO mChars;
+        UText* mUText;
+
+        // disable copying and assignment
+        ScopedBreakIterator(const ScopedBreakIterator&);
+        void operator=(const ScopedBreakIterator&);
+};
+
+static jintArray nLineBreakOpportunities(JNIEnv* env, jclass, jstring javaLocaleName,
+                                        jcharArray inputText, jint length,
+                                        jintArray recycle) {
+    jintArray ret;
+    std::vector<jint> breaks(16);
+
+    ScopedIcuLocale icuLocale(env, javaLocaleName);
+    if (icuLocale.valid()) {
+        UErrorCode status = U_ZERO_ERROR;
+        BreakIterator* it = BreakIterator::createLineInstance(icuLocale.locale(), status);
+        if (!U_SUCCESS(status) || it == NULL) {
+            if (it) {
+                delete it;
+            }
+        } else {
+            ScopedBreakIterator breakIterator(env, it, inputText, length);
+            for (int loc = breakIterator->first(); loc != BreakIterator::DONE;
+                    loc = breakIterator->next()) {
+                breaks.push_back(loc);
+            }
+        }
+    }
+
+    breaks.push_back(-1); // sentinel terminal value
+
+    if (recycle != NULL && env->GetArrayLength(recycle) >= breaks.size()) {
+        ret = recycle;
+    } else {
+        ret = env->NewIntArray(breaks.size());
+    }
+
+    if (ret != NULL) {
+        env->SetIntArrayRegion(ret, 0, breaks.size(), &breaks.front());
+    }
+
+    return ret;
+}
+
+static JNINativeMethod gMethods[] = {
+    {"nLineBreakOpportunities", "(Ljava/lang/String;[CI[I)[I", (void*) nLineBreakOpportunities}
+};
+
+int register_android_text_StaticLayout(JNIEnv* env)
+{
+    return AndroidRuntime::registerNativeMethods(env, "android/text/StaticLayout",
+            gMethods, NELEM(gMethods));
+}
+
+}