Raph Levien | 26d443a | 2015-03-30 14:18:32 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2015 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | package android.text; |
| 18 | |
Roozbeh Pournader | 4d4202b | 2015-05-22 20:43:00 -0700 | [diff] [blame] | 19 | import android.annotation.Nullable; |
Raph Levien | 26d443a | 2015-03-30 14:18:32 -0700 | [diff] [blame] | 20 | import android.util.Log; |
| 21 | |
Raph Levien | 091dba2 | 2015-08-31 16:21:20 -0700 | [diff] [blame] | 22 | import com.android.internal.annotations.GuardedBy; |
Raph Levien | c3f2f7b | 2015-07-06 09:01:36 -0700 | [diff] [blame] | 23 | |
Raph Levien | 26d443a | 2015-03-30 14:18:32 -0700 | [diff] [blame] | 24 | import java.io.File; |
| 25 | import java.io.IOException; |
Raph Levien | 091dba2 | 2015-08-31 16:21:20 -0700 | [diff] [blame] | 26 | import java.io.RandomAccessFile; |
| 27 | import java.nio.ByteBuffer; |
| 28 | import java.nio.MappedByteBuffer; |
| 29 | import java.nio.channels.FileChannel; |
Raph Levien | 26d443a | 2015-03-30 14:18:32 -0700 | [diff] [blame] | 30 | import java.util.HashMap; |
| 31 | import java.util.Locale; |
| 32 | |
| 33 | /** |
| 34 | * Hyphenator is a wrapper class for a native implementation of automatic hyphenation, |
| 35 | * in essence finding valid hyphenation opportunities in a word. |
| 36 | * |
| 37 | * @hide |
| 38 | */ |
Raph Levien | c3dd1c1 | 2015-04-06 10:37:57 -0700 | [diff] [blame] | 39 | public class Hyphenator { |
Raph Levien | 26d443a | 2015-03-30 14:18:32 -0700 | [diff] [blame] | 40 | // This class has deliberately simple lifetime management (no finalizer) because in |
| 41 | // the common case a process will use a very small number of locales. |
| 42 | |
| 43 | private static String TAG = "Hyphenator"; |
| 44 | |
Roozbeh Pournader | 4d4202b | 2015-05-22 20:43:00 -0700 | [diff] [blame] | 45 | private final static Object sLock = new Object(); |
| 46 | |
| 47 | @GuardedBy("sLock") |
Raph Levien | c3f2f7b | 2015-07-06 09:01:36 -0700 | [diff] [blame] | 48 | final static HashMap<Locale, Hyphenator> sMap = new HashMap<Locale, Hyphenator>(); |
Raph Levien | 26d443a | 2015-03-30 14:18:32 -0700 | [diff] [blame] | 49 | |
Raph Levien | 091dba2 | 2015-08-31 16:21:20 -0700 | [diff] [blame] | 50 | final static Hyphenator sEmptyHyphenator = |
| 51 | new Hyphenator(StaticLayout.nLoadHyphenator(null, 0), null); |
Raph Levien | a008961 | 2015-07-27 13:06:24 -0700 | [diff] [blame] | 52 | |
Raph Levien | c3f2f7b | 2015-07-06 09:01:36 -0700 | [diff] [blame] | 53 | final private long mNativePtr; |
Raph Levien | 26d443a | 2015-03-30 14:18:32 -0700 | [diff] [blame] | 54 | |
Raph Levien | 091dba2 | 2015-08-31 16:21:20 -0700 | [diff] [blame] | 55 | // We retain a reference to the buffer to keep the memory mapping valid |
| 56 | @SuppressWarnings("unused") |
| 57 | final private ByteBuffer mBuffer; |
| 58 | |
| 59 | private Hyphenator(long nativePtr, ByteBuffer b) { |
Raph Levien | 26d443a | 2015-03-30 14:18:32 -0700 | [diff] [blame] | 60 | mNativePtr = nativePtr; |
Raph Levien | 091dba2 | 2015-08-31 16:21:20 -0700 | [diff] [blame] | 61 | mBuffer = b; |
Raph Levien | 26d443a | 2015-03-30 14:18:32 -0700 | [diff] [blame] | 62 | } |
| 63 | |
Roozbeh Pournader | e7eac6f | 2015-08-07 15:13:30 -0700 | [diff] [blame] | 64 | public long getNativePtr() { |
| 65 | return mNativePtr; |
| 66 | } |
| 67 | |
| 68 | public static Hyphenator get(@Nullable Locale locale) { |
Roozbeh Pournader | 4d4202b | 2015-05-22 20:43:00 -0700 | [diff] [blame] | 69 | synchronized (sLock) { |
Raph Levien | a008961 | 2015-07-27 13:06:24 -0700 | [diff] [blame] | 70 | Hyphenator result = sMap.get(locale); |
| 71 | if (result != null) { |
Roozbeh Pournader | e7eac6f | 2015-08-07 15:13:30 -0700 | [diff] [blame] | 72 | return result; |
Roozbeh Pournader | 4d4202b | 2015-05-22 20:43:00 -0700 | [diff] [blame] | 73 | } |
| 74 | |
| 75 | // TODO: Convert this a proper locale-fallback system |
| 76 | |
| 77 | // Fall back to language-only, if available |
| 78 | Locale languageOnlyLocale = new Locale(locale.getLanguage()); |
Raph Levien | a008961 | 2015-07-27 13:06:24 -0700 | [diff] [blame] | 79 | result = sMap.get(languageOnlyLocale); |
| 80 | if (result != null) { |
Roozbeh Pournader | 4d4202b | 2015-05-22 20:43:00 -0700 | [diff] [blame] | 81 | sMap.put(locale, result); |
Roozbeh Pournader | e7eac6f | 2015-08-07 15:13:30 -0700 | [diff] [blame] | 82 | return result; |
Roozbeh Pournader | 4d4202b | 2015-05-22 20:43:00 -0700 | [diff] [blame] | 83 | } |
| 84 | |
| 85 | // Fall back to script-only, if available |
| 86 | String script = locale.getScript(); |
| 87 | if (!script.equals("")) { |
| 88 | Locale scriptOnlyLocale = new Locale.Builder() |
| 89 | .setLanguage("und") |
| 90 | .setScript(script) |
| 91 | .build(); |
Raph Levien | a008961 | 2015-07-27 13:06:24 -0700 | [diff] [blame] | 92 | result = sMap.get(scriptOnlyLocale); |
| 93 | if (result != null) { |
Roozbeh Pournader | 4d4202b | 2015-05-22 20:43:00 -0700 | [diff] [blame] | 94 | sMap.put(locale, result); |
Roozbeh Pournader | e7eac6f | 2015-08-07 15:13:30 -0700 | [diff] [blame] | 95 | return result; |
Roozbeh Pournader | 4d4202b | 2015-05-22 20:43:00 -0700 | [diff] [blame] | 96 | } |
| 97 | } |
| 98 | |
Raph Levien | a008961 | 2015-07-27 13:06:24 -0700 | [diff] [blame] | 99 | sMap.put(locale, sEmptyHyphenator); // To remember we found nothing. |
Roozbeh Pournader | 4d4202b | 2015-05-22 20:43:00 -0700 | [diff] [blame] | 100 | } |
Roozbeh Pournader | e7eac6f | 2015-08-07 15:13:30 -0700 | [diff] [blame] | 101 | return sEmptyHyphenator; |
Raph Levien | 26d443a | 2015-03-30 14:18:32 -0700 | [diff] [blame] | 102 | } |
| 103 | |
Roozbeh Pournader | 4d4202b | 2015-05-22 20:43:00 -0700 | [diff] [blame] | 104 | private static Hyphenator loadHyphenator(String languageTag) { |
Raph Levien | 091dba2 | 2015-08-31 16:21:20 -0700 | [diff] [blame] | 105 | String patternFilename = "hyph-" + languageTag.toLowerCase(Locale.US) + ".hyb"; |
Roozbeh Pournader | 4d4202b | 2015-05-22 20:43:00 -0700 | [diff] [blame] | 106 | File patternFile = new File(getSystemHyphenatorLocation(), patternFilename); |
| 107 | try { |
Raph Levien | 091dba2 | 2015-08-31 16:21:20 -0700 | [diff] [blame] | 108 | RandomAccessFile f = new RandomAccessFile(patternFile, "r"); |
| 109 | try { |
| 110 | FileChannel fc = f.getChannel(); |
| 111 | MappedByteBuffer buf = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size()); |
| 112 | long nativePtr = StaticLayout.nLoadHyphenator(buf, 0); |
| 113 | return new Hyphenator(nativePtr, buf); |
| 114 | } finally { |
| 115 | f.close(); |
| 116 | } |
Roozbeh Pournader | 4d4202b | 2015-05-22 20:43:00 -0700 | [diff] [blame] | 117 | } catch (IOException e) { |
| 118 | Log.e(TAG, "error loading hyphenation " + patternFile, e); |
Raph Levien | c3f2f7b | 2015-07-06 09:01:36 -0700 | [diff] [blame] | 119 | return null; |
Raph Levien | 26d443a | 2015-03-30 14:18:32 -0700 | [diff] [blame] | 120 | } |
Raph Levien | 26d443a | 2015-03-30 14:18:32 -0700 | [diff] [blame] | 121 | } |
Raph Levien | c3dd1c1 | 2015-04-06 10:37:57 -0700 | [diff] [blame] | 122 | |
| 123 | private static File getSystemHyphenatorLocation() { |
Raph Levien | c3dd1c1 | 2015-04-06 10:37:57 -0700 | [diff] [blame] | 124 | return new File("/system/usr/hyphen-data"); |
| 125 | } |
| 126 | |
Roozbeh Pournader | 4d4202b | 2015-05-22 20:43:00 -0700 | [diff] [blame] | 127 | // This array holds pairs of language tags that are used to prefill the map from locale to |
| 128 | // hyphenation data: The hyphenation data for the first field will be prefilled from the |
| 129 | // hyphenation data for the second field. |
| 130 | // |
| 131 | // The aliases that are computable by the get() method above are not included. |
| 132 | private static final String[][] LOCALE_FALLBACK_DATA = { |
| 133 | // English locales that fall back to en-US. The data is |
| 134 | // from CLDR. It's all English locales, minus the locales whose |
| 135 | // parent is en-001 (from supplementalData.xml, under <parentLocales>). |
| 136 | // TODO: Figure out how to get this from ICU. |
| 137 | {"en-AS", "en-US"}, // English (American Samoa) |
| 138 | {"en-GU", "en-US"}, // English (Guam) |
| 139 | {"en-MH", "en-US"}, // English (Marshall Islands) |
| 140 | {"en-MP", "en-US"}, // English (Northern Mariana Islands) |
| 141 | {"en-PR", "en-US"}, // English (Puerto Rico) |
| 142 | {"en-UM", "en-US"}, // English (United States Minor Outlying Islands) |
| 143 | {"en-VI", "en-US"}, // English (Virgin Islands) |
| 144 | |
| 145 | // Norwegian is very probably Norwegian Bokmål. |
| 146 | {"no", "nb"}, |
| 147 | |
| 148 | // Fall back to Ethiopic script for languages likely to be written in Ethiopic. |
| 149 | // Data is from CLDR's likelySubtags.xml. |
| 150 | // TODO: Convert this to a mechanism using ICU4J's ULocale#addLikelySubtags(). |
| 151 | {"am", "und-Ethi"}, // Amharic |
| 152 | {"byn", "und-Ethi"}, // Blin |
| 153 | {"gez", "und-Ethi"}, // Geʻez |
| 154 | {"ti", "und-Ethi"}, // Tigrinya |
| 155 | {"wal", "und-Ethi"}, // Wolaytta |
| 156 | }; |
| 157 | |
Raph Levien | c3dd1c1 | 2015-04-06 10:37:57 -0700 | [diff] [blame] | 158 | /** |
| 159 | * Load hyphenation patterns at initialization time. We want to have patterns |
| 160 | * for all locales loaded and ready to use so we don't have to do any file IO |
| 161 | * on the UI thread when drawing text in different locales. |
| 162 | * |
| 163 | * @hide |
| 164 | */ |
| 165 | public static void init() { |
Roozbeh Pournader | 4d4202b | 2015-05-22 20:43:00 -0700 | [diff] [blame] | 166 | sMap.put(null, null); |
| 167 | |
| 168 | // TODO: replace this with a discovery-based method that looks into /system/usr/hyphen-data |
Raph Levien | 091dba2 | 2015-08-31 16:21:20 -0700 | [diff] [blame] | 169 | String[] availableLanguages = {"en-US", "eu", "hu", "hy", "nb", "nn", "und-Ethi"}; |
Roozbeh Pournader | 4d4202b | 2015-05-22 20:43:00 -0700 | [diff] [blame] | 170 | for (int i = 0; i < availableLanguages.length; i++) { |
| 171 | String languageTag = availableLanguages[i]; |
| 172 | Hyphenator h = loadHyphenator(languageTag); |
| 173 | if (h != null) { |
| 174 | sMap.put(Locale.forLanguageTag(languageTag), h); |
| 175 | } |
| 176 | } |
| 177 | |
| 178 | for (int i = 0; i < LOCALE_FALLBACK_DATA.length; i++) { |
| 179 | String language = LOCALE_FALLBACK_DATA[i][0]; |
| 180 | String fallback = LOCALE_FALLBACK_DATA[i][1]; |
| 181 | sMap.put(Locale.forLanguageTag(language), sMap.get(Locale.forLanguageTag(fallback))); |
| 182 | } |
Raph Levien | c3dd1c1 | 2015-04-06 10:37:57 -0700 | [diff] [blame] | 183 | } |
Raph Levien | 26d443a | 2015-03-30 14:18:32 -0700 | [diff] [blame] | 184 | } |