Customizable minimum suffix/prefix length for hyphenation
With this change, different languages can have a different minimum
length for suffix and prefixes when hyphenating. Previously, the
defaults used for English, 2 and 3, were used for every language.
Bug: 35712376
Test: Manual: German text can now break after two characters
Change-Id: Ia12d448a42bf2fab7c0bf5e85f8e27a4fb7f77d8
diff --git a/core/java/android/text/Hyphenator.java b/core/java/android/text/Hyphenator.java
index 80ec03e..c2508a6 100644
--- a/core/java/android/text/Hyphenator.java
+++ b/core/java/android/text/Hyphenator.java
@@ -42,13 +42,24 @@
private static String TAG = "Hyphenator";
+ // TODO: Confirm that these are the best values. Various sources suggest (1, 1), but
+ // that appears too small.
+ private static final int INDIC_MIN_PREFIX = 2;
+ private static final int INDIC_MIN_SUFFIX = 2;
+
private final static Object sLock = new Object();
@GuardedBy("sLock")
final static HashMap<Locale, Hyphenator> sMap = new HashMap<Locale, Hyphenator>();
+ // Reasonable enough values for cases where we have no hyphenation patterns but may be able to
+ // do some automatic hyphenation based on characters. These values would be used very rarely.
+ private static final int DEFAULT_MIN_PREFIX = 2;
+ private static final int DEFAULT_MIN_SUFFIX = 2;
final static Hyphenator sEmptyHyphenator =
- new Hyphenator(StaticLayout.nLoadHyphenator(null, 0), null);
+ new Hyphenator(StaticLayout.nLoadHyphenator(
+ null, 0, DEFAULT_MIN_PREFIX, DEFAULT_MIN_SUFFIX),
+ null);
final private long mNativePtr;
@@ -111,15 +122,26 @@
return sEmptyHyphenator;
}
- private static Hyphenator loadHyphenator(String languageTag) {
- String patternFilename = "hyph-" + languageTag.toLowerCase(Locale.US) + ".hyb";
+ private static class HyphenationData {
+ final String mLanguageTag;
+ final int mMinPrefix, mMinSuffix;
+ HyphenationData(String languageTag, int minPrefix, int minSuffix) {
+ this.mLanguageTag = languageTag;
+ this.mMinPrefix = minPrefix;
+ this.mMinSuffix = minSuffix;
+ }
+ }
+
+ private static Hyphenator loadHyphenator(HyphenationData data) {
+ String patternFilename = "hyph-" + data.mLanguageTag.toLowerCase(Locale.US) + ".hyb";
File patternFile = new File(getSystemHyphenatorLocation(), patternFilename);
try {
RandomAccessFile f = new RandomAccessFile(patternFile, "r");
try {
FileChannel fc = f.getChannel();
MappedByteBuffer buf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size());
- long nativePtr = StaticLayout.nLoadHyphenator(buf, 0);
+ long nativePtr = StaticLayout.nLoadHyphenator(
+ buf, 0, data.mMinPrefix, data.mMinSuffix);
return new Hyphenator(nativePtr, buf);
} finally {
f.close();
@@ -176,6 +198,46 @@
{"wal", "und-Ethi"}, // Wolaytta
};
+ private static final HyphenationData[] AVAILABLE_LANGUAGES = {
+ new HyphenationData("as", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Assamese
+ new HyphenationData("bg", 2, 2), // Bulgarian
+ new HyphenationData("bn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Bengali
+ new HyphenationData("cu", 1, 2), // Church Slavonic
+ new HyphenationData("cy", 2, 3), // Welsh
+ new HyphenationData("da", 2, 2), // Danish
+ new HyphenationData("de-1901", 2, 2), // German 1901 orthography
+ new HyphenationData("de-1996", 2, 2), // German 1996 orthography
+ new HyphenationData("de-CH-1901", 2, 2), // Swiss High German 1901 orthography
+ new HyphenationData("en-GB", 2, 3), // British English
+ new HyphenationData("en-US", 2, 3), // American English
+ new HyphenationData("es", 2, 2), // Spanish
+ new HyphenationData("et", 2, 3), // Estonian
+ new HyphenationData("eu", 2, 2), // Basque
+ new HyphenationData("fr", 2, 3), // French
+ new HyphenationData("ga", 2, 3), // Irish
+ new HyphenationData("gu", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Gujarati
+ new HyphenationData("hi", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Hindi
+ new HyphenationData("hr", 2, 2), // Croatian
+ new HyphenationData("hu", 2, 2), // Hungarian
+ // texhyphen sources say Armenian may be (1, 2), but that it needs confirmation.
+ // Going with a more conservative value of (2, 2) for now.
+ new HyphenationData("hy", 2, 2), // Armenian
+ new HyphenationData("kn", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Kannada
+ new HyphenationData("ml", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Malayalam
+ new HyphenationData("mn-Cyrl", 2, 2), // Mongolian in Cyrillic script
+ new HyphenationData("mr", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Marathi
+ new HyphenationData("nb", 2, 2), // Norwegian Bokmål
+ new HyphenationData("nn", 2, 2), // Norwegian Nynorsk
+ new HyphenationData("or", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Oriya
+ new HyphenationData("pa", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Punjabi
+ new HyphenationData("pt", 2, 3), // Portuguese
+ new HyphenationData("sl", 2, 2), // Slovenian
+ new HyphenationData("ta", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Tamil
+ new HyphenationData("te", INDIC_MIN_PREFIX, INDIC_MIN_SUFFIX), // Telugu
+ new HyphenationData("tk", 2, 2), // Turkmen
+ new HyphenationData("und-Ethi", 1, 1), // Any language in Ethiopic script
+ };
+
/**
* Load hyphenation patterns at initialization time. We want to have patterns
* for all locales loaded and ready to use so we don't have to do any file IO
@@ -186,46 +248,11 @@
public static void init() {
sMap.put(null, null);
- // TODO: replace this with a discovery-based method that looks into /system/usr/hyphen-data
- String[] availableLanguages = {
- "as",
- "bg",
- "bn",
- "cu",
- "cy",
- "da",
- "de-1901", "de-1996", "de-CH-1901",
- "en-GB", "en-US",
- "es",
- "et",
- "eu",
- "fr",
- "ga",
- "gu",
- "hi",
- "hr",
- "hu",
- "hy",
- "kn",
- "ml",
- "mn-Cyrl",
- "mr",
- "nb",
- "nn",
- "or",
- "pa",
- "pt",
- "sl",
- "ta",
- "te",
- "tk",
- "und-Ethi",
- };
- for (int i = 0; i < availableLanguages.length; i++) {
- String languageTag = availableLanguages[i];
- Hyphenator h = loadHyphenator(languageTag);
+ for (int i = 0; i < AVAILABLE_LANGUAGES.length; i++) {
+ HyphenationData data = AVAILABLE_LANGUAGES[i];
+ Hyphenator h = loadHyphenator(data);
if (h != null) {
- sMap.put(Locale.forLanguageTag(languageTag), h);
+ sMap.put(Locale.forLanguageTag(data.mLanguageTag), h);
}
}
diff --git a/core/java/android/text/StaticLayout.java b/core/java/android/text/StaticLayout.java
index cb5b073..94c463c 100644
--- a/core/java/android/text/StaticLayout.java
+++ b/core/java/android/text/StaticLayout.java
@@ -1290,7 +1290,8 @@
private static native void nFreeBuilder(long nativePtr);
private static native void nFinishBuilder(long nativePtr);
- /* package */ static native long nLoadHyphenator(ByteBuffer buf, int offset);
+ /* package */ static native long nLoadHyphenator(ByteBuffer buf, int offset,
+ int minPrefix, int minSuffix);
private static native void nSetLocale(long nativePtr, String locale, long nativeHyphenator);
diff --git a/core/jni/android_text_StaticLayout.cpp b/core/jni/android_text_StaticLayout.cpp
index 90ed6eb..4a445d8 100644
--- a/core/jni/android_text_StaticLayout.cpp
+++ b/core/jni/android_text_StaticLayout.cpp
@@ -121,7 +121,8 @@
b->finish();
}
-static jlong nLoadHyphenator(JNIEnv* env, jclass, jobject buffer, jint offset) {
+static jlong nLoadHyphenator(JNIEnv* env, jclass, jobject buffer, jint offset,
+ jint minPrefix, jint minSuffix) {
const uint8_t* bytebuf = nullptr;
if (buffer != nullptr) {
void* rawbuf = env->GetDirectBufferAddress(buffer);
@@ -131,7 +132,8 @@
ALOGE("failed to get direct buffer address");
}
}
- minikin::Hyphenator* hyphenator = minikin::Hyphenator::loadBinary(bytebuf);
+ minikin::Hyphenator* hyphenator = minikin::Hyphenator::loadBinary(
+ bytebuf, minPrefix, minSuffix);
return reinterpret_cast<jlong>(hyphenator);
}
@@ -191,7 +193,7 @@
{"nNewBuilder", "()J", (void*) nNewBuilder},
{"nFreeBuilder", "(J)V", (void*) nFreeBuilder},
{"nFinishBuilder", "(J)V", (void*) nFinishBuilder},
- {"nLoadHyphenator", "(Ljava/nio/ByteBuffer;I)J", (void*) nLoadHyphenator},
+ {"nLoadHyphenator", "(Ljava/nio/ByteBuffer;III)J", (void*) nLoadHyphenator},
{"nSetLocale", "(JLjava/lang/String;J)V", (void*) nSetLocale},
{"nSetupParagraph", "(J[CIFIF[IIIIZ)V", (void*) nSetupParagraph},
{"nSetIndents", "(J[I)V", (void*) nSetIndents},