Customizable minimum suffix/prefix length for hyphenation

With this change, different languages can have a different minimum
length for suffix and prefixes when hyphenating. Previously, the
defaults used for English, 2 and 3, were used for every language.

Bug: 35712376
Test: Manual: German text can now break after two characters
Change-Id: Ia12d448a42bf2fab7c0bf5e85f8e27a4fb7f77d8
diff --git a/core/java/android/text/Hyphenator.java b/core/java/android/text/Hyphenator.java
index 80ec03e..c2508a6 100644
--- a/core/java/android/text/Hyphenator.java
+++ b/core/java/android/text/Hyphenator.java
@@ -42,13 +42,24 @@
 
     private static String TAG = "Hyphenator";
 
+    // TODO: Confirm that these are the best values. Various sources suggest (1, 1), but
+    // that appears too small.
+    private static final int INDIC_MIN_PREFIX = 2;
+    private static final int INDIC_MIN_SUFFIX = 2;
+
     private final static Object sLock = new Object();
 
     @GuardedBy("sLock")
     final static HashMap<Locale, Hyphenator> sMap = new HashMap<Locale, Hyphenator>();
 
+    // Reasonable enough values for cases where we have no hyphenation patterns but may be able to
+    // do some automatic hyphenation based on characters. These values would be used very rarely.
+    private static final int DEFAULT_MIN_PREFIX = 2;
+    private static final int DEFAULT_MIN_SUFFIX = 2;
     final static Hyphenator sEmptyHyphenator =
-            new Hyphenator(StaticLayout.nLoadHyphenator(null, 0), null);
+            new Hyphenator(StaticLayout.nLoadHyphenator(
+                                   null, 0, DEFAULT_MIN_PREFIX, DEFAULT_MIN_SUFFIX),
+                           null);
 
     final private long mNativePtr;
 
@@ -111,15 +122,26 @@
         return sEmptyHyphenator;
     }
 
-    private static Hyphenator loadHyphenator(String languageTag) {
-        String patternFilename = "hyph-" + languageTag.toLowerCase(Locale.US) + ".hyb";
+    private static class HyphenationData {
+        final String mLanguageTag;
+        final int mMinPrefix, mMinSuffix;
+        HyphenationData(String languageTag, int minPrefix, int minSuffix) {
+            this.mLanguageTag = languageTag;
+            this.mMinPrefix = minPrefix;
+            this.mMinSuffix = minSuffix;
+        }
+    }
+
+    private static Hyphenator loadHyphenator(HyphenationData data) {
+        String patternFilename = "hyph-" + data.mLanguageTag.toLowerCase(Locale.US) + ".hyb";
         File patternFile = new File(getSystemHyphenatorLocation(), patternFilename);
         try {
             RandomAccessFile f = new RandomAccessFile(patternFile, "r");
             try {
                 FileChannel fc = f.getChannel();
                 MappedByteBuffer buf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
-                long nativePtr = StaticLayout.nLoadHyphenator(buf, 0);
+                long nativePtr = StaticLayout.nLoadHyphenator(
+                        buf, 0, data.mMinPrefix, data.mMinSuffix);
                 return new Hyphenator(nativePtr, buf);
             } finally {
                 f.close();
@@ -176,6 +198,46 @@
         {"wal", "und-Ethi"}, // Wolaytta
     };
 
+    private static final HyphenationData[] AVAILABLE_LANGUAGES = {
+        new HyphenationData("as", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Assamese
+        new HyphenationData("bg", 2, 2), // Bulgarian
+        new HyphenationData("bn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Bengali
+        new HyphenationData("cu", 1, 2), // Church Slavonic
+        new HyphenationData("cy", 2, 3), // Welsh
+        new HyphenationData("da", 2, 2), // Danish
+        new HyphenationData("de-1901", 2, 2), // German 1901 orthography
+        new HyphenationData("de-1996", 2, 2), // German 1996 orthography
+        new HyphenationData("de-CH-1901", 2, 2), // Swiss High German 1901 orthography
+        new HyphenationData("en-GB", 2, 3), // British English
+        new HyphenationData("en-US", 2, 3), // American English
+        new HyphenationData("es", 2, 2), // Spanish
+        new HyphenationData("et", 2, 3), // Estonian
+        new HyphenationData("eu", 2, 2), // Basque
+        new HyphenationData("fr", 2, 3), // French
+        new HyphenationData("ga", 2, 3), // Irish
+        new HyphenationData("gu", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Gujarati
+        new HyphenationData("hi", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Hindi
+        new HyphenationData("hr", 2, 2), // Croatian
+        new HyphenationData("hu", 2, 2), // Hungarian
+        // texhyphen sources say Armenian may be (1, 2), but that it needs confirmation.
+        // Going with a more conservative value of (2, 2) for now.
+        new HyphenationData("hy", 2, 2), // Armenian
+        new HyphenationData("kn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Kannada
+        new HyphenationData("ml", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Malayalam
+        new HyphenationData("mn-Cyrl", 2, 2), // Mongolian in Cyrillic script
+        new HyphenationData("mr", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Marathi
+        new HyphenationData("nb", 2, 2), // Norwegian Bokmål
+        new HyphenationData("nn", 2, 2), // Norwegian Nynorsk
+        new HyphenationData("or", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Oriya
+        new HyphenationData("pa", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Punjabi
+        new HyphenationData("pt", 2, 3), // Portuguese
+        new HyphenationData("sl", 2, 2), // Slovenian
+        new HyphenationData("ta", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Tamil
+        new HyphenationData("te", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Telugu
+        new HyphenationData("tk", 2, 2), // Turkmen
+        new HyphenationData("und-Ethi", 1, 1), // Any language in Ethiopic script
+    };
+
     /**
      * Load hyphenation patterns at initialization time. We want to have patterns
      * for all locales loaded and ready to use so we don't have to do any file IO
@@ -186,46 +248,11 @@
     public static void init() {
         sMap.put(null, null);
 
-        // TODO: replace this with a discovery-based method that looks into /system/usr/hyphen-data
-        String[] availableLanguages = {
-            "as",
-            "bg",
-            "bn",
-            "cu",
-            "cy",
-            "da",
-            "de-1901", "de-1996", "de-CH-1901",
-            "en-GB", "en-US",
-            "es",
-            "et",
-            "eu",
-            "fr",
-            "ga",
-            "gu",
-            "hi",
-            "hr",
-            "hu",
-            "hy",
-            "kn",
-            "ml",
-            "mn-Cyrl",
-            "mr",
-            "nb",
-            "nn",
-            "or",
-            "pa",
-            "pt",
-            "sl",
-            "ta",
-            "te",
-            "tk",
-            "und-Ethi",
-        };
-        for (int i = 0; i < availableLanguages.length; i++) {
-            String languageTag = availableLanguages[i];
-            Hyphenator h = loadHyphenator(languageTag);
+        for (int i = 0; i < AVAILABLE_LANGUAGES.length; i++) {
+            HyphenationData data = AVAILABLE_LANGUAGES[i];
+            Hyphenator h = loadHyphenator(data);
             if (h != null) {
-                sMap.put(Locale.forLanguageTag(languageTag), h);
+                sMap.put(Locale.forLanguageTag(data.mLanguageTag), h);
             }
         }
 
diff --git a/core/java/android/text/StaticLayout.java b/core/java/android/text/StaticLayout.java
index cb5b073..94c463c 100644
--- a/core/java/android/text/StaticLayout.java
+++ b/core/java/android/text/StaticLayout.java
@@ -1290,7 +1290,8 @@
     private static native void nFreeBuilder(long nativePtr);
     private static native void nFinishBuilder(long nativePtr);
 
-    /* package */ static native long nLoadHyphenator(ByteBuffer buf, int offset);
+    /* package */ static native long nLoadHyphenator(ByteBuffer buf, int offset,
+            int minPrefix, int minSuffix);
 
     private static native void nSetLocale(long nativePtr, String locale, long nativeHyphenator);
 
diff --git a/core/jni/android_text_StaticLayout.cpp b/core/jni/android_text_StaticLayout.cpp
index 90ed6eb..4a445d8 100644
--- a/core/jni/android_text_StaticLayout.cpp
+++ b/core/jni/android_text_StaticLayout.cpp
@@ -121,7 +121,8 @@
     b->finish();
 }
 
-static jlong nLoadHyphenator(JNIEnv* env, jclass, jobject buffer, jint offset) {
+static jlong nLoadHyphenator(JNIEnv* env, jclass, jobject buffer, jint offset,
+        jint minPrefix, jint minSuffix) {
     const uint8_t* bytebuf = nullptr;
     if (buffer != nullptr) {
         void* rawbuf = env->GetDirectBufferAddress(buffer);
@@ -131,7 +132,8 @@
             ALOGE("failed to get direct buffer address");
         }
     }
-    minikin::Hyphenator* hyphenator = minikin::Hyphenator::loadBinary(bytebuf);
+    minikin::Hyphenator* hyphenator = minikin::Hyphenator::loadBinary(
+            bytebuf, minPrefix, minSuffix);
     return reinterpret_cast<jlong>(hyphenator);
 }
 
@@ -191,7 +193,7 @@
     {"nNewBuilder", "()J", (void*) nNewBuilder},
     {"nFreeBuilder", "(J)V", (void*) nFreeBuilder},
     {"nFinishBuilder", "(J)V", (void*) nFinishBuilder},
-    {"nLoadHyphenator", "(Ljava/nio/ByteBuffer;I)J", (void*) nLoadHyphenator},
+    {"nLoadHyphenator", "(Ljava/nio/ByteBuffer;III)J", (void*) nLoadHyphenator},
     {"nSetLocale", "(JLjava/lang/String;J)V", (void*) nSetLocale},
     {"nSetupParagraph", "(J[CIFIF[IIIIZ)V", (void*) nSetupParagraph},
     {"nSetIndents", "(J[I)V", (void*) nSetIndents},