Merge "Speak auto-corrections for accessibility"
diff --git a/dictionaries/cs_wordlist.combined.gz b/dictionaries/cs_wordlist.combined.gz
index d69ef64..7829d65 100644
--- a/dictionaries/cs_wordlist.combined.gz
+++ b/dictionaries/cs_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/da_wordlist.combined.gz b/dictionaries/da_wordlist.combined.gz
index 919d28e..e714019 100644
--- a/dictionaries/da_wordlist.combined.gz
+++ b/dictionaries/da_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/de_wordlist.combined.gz b/dictionaries/de_wordlist.combined.gz
index f5cce9d..6a4bd44 100644
--- a/dictionaries/de_wordlist.combined.gz
+++ b/dictionaries/de_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/en_GB_wordlist.combined.gz b/dictionaries/en_GB_wordlist.combined.gz
index afef676..839f3ef 100644
--- a/dictionaries/en_GB_wordlist.combined.gz
+++ b/dictionaries/en_GB_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/en_US_wordlist.combined.gz b/dictionaries/en_US_wordlist.combined.gz
index eafbc9d..5595c75 100644
--- a/dictionaries/en_US_wordlist.combined.gz
+++ b/dictionaries/en_US_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/en_wordlist.combined.gz b/dictionaries/en_wordlist.combined.gz
index 9cbca0b..69c39d5 100644
--- a/dictionaries/en_wordlist.combined.gz
+++ b/dictionaries/en_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/es_wordlist.combined.gz b/dictionaries/es_wordlist.combined.gz
index 53b8607..0a48b6d 100644
--- a/dictionaries/es_wordlist.combined.gz
+++ b/dictionaries/es_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/fi_wordlist.combined.gz b/dictionaries/fi_wordlist.combined.gz
index 2720116..eefbfe5 100644
--- a/dictionaries/fi_wordlist.combined.gz
+++ b/dictionaries/fi_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/fr_wordlist.combined.gz b/dictionaries/fr_wordlist.combined.gz
index 1815e47..1a18320 100644
--- a/dictionaries/fr_wordlist.combined.gz
+++ b/dictionaries/fr_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/hr_wordlist.combined.gz b/dictionaries/hr_wordlist.combined.gz
index 7694a2a..864f676 100644
--- a/dictionaries/hr_wordlist.combined.gz
+++ b/dictionaries/hr_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/it_wordlist.combined.gz b/dictionaries/it_wordlist.combined.gz
index 3b84cd7..dfb1752 100644
--- a/dictionaries/it_wordlist.combined.gz
+++ b/dictionaries/it_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/lt_wordlist.combined.gz b/dictionaries/lt_wordlist.combined.gz
index 316a5af..029722d 100644
--- a/dictionaries/lt_wordlist.combined.gz
+++ b/dictionaries/lt_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/lv_wordlist.combined.gz b/dictionaries/lv_wordlist.combined.gz
index b036ac2..41e1c28 100644
--- a/dictionaries/lv_wordlist.combined.gz
+++ b/dictionaries/lv_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/nb_wordlist.combined.gz b/dictionaries/nb_wordlist.combined.gz
index b6e0d42..b699912 100644
--- a/dictionaries/nb_wordlist.combined.gz
+++ b/dictionaries/nb_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/nl_wordlist.combined.gz b/dictionaries/nl_wordlist.combined.gz
index 48ab0f4..89c2388 100644
--- a/dictionaries/nl_wordlist.combined.gz
+++ b/dictionaries/nl_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/pl_wordlist.combined.gz b/dictionaries/pl_wordlist.combined.gz
index bf02298..2b53f69 100644
--- a/dictionaries/pl_wordlist.combined.gz
+++ b/dictionaries/pl_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/pt_BR_wordlist.combined.gz b/dictionaries/pt_BR_wordlist.combined.gz
index 876eb71..2d22447 100644
--- a/dictionaries/pt_BR_wordlist.combined.gz
+++ b/dictionaries/pt_BR_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/pt_PT_wordlist.combined.gz b/dictionaries/pt_PT_wordlist.combined.gz
index 4068690..1504165 100644
--- a/dictionaries/pt_PT_wordlist.combined.gz
+++ b/dictionaries/pt_PT_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/sl_wordlist.combined.gz b/dictionaries/sl_wordlist.combined.gz
index 41a576b..55e1bb1 100644
--- a/dictionaries/sl_wordlist.combined.gz
+++ b/dictionaries/sl_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/sr_wordlist.combined.gz b/dictionaries/sr_wordlist.combined.gz
index dec6ae8..8488a08 100644
--- a/dictionaries/sr_wordlist.combined.gz
+++ b/dictionaries/sr_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/sv_wordlist.combined.gz b/dictionaries/sv_wordlist.combined.gz
index 0471772..6342520 100644
--- a/dictionaries/sv_wordlist.combined.gz
+++ b/dictionaries/sv_wordlist.combined.gz
Binary files differ
diff --git a/dictionaries/tr_wordlist.combined.gz b/dictionaries/tr_wordlist.combined.gz
index fae79ca..0251778 100644
--- a/dictionaries/tr_wordlist.combined.gz
+++ b/dictionaries/tr_wordlist.combined.gz
Binary files differ
diff --git a/java/res/raw/main_de.dict b/java/res/raw/main_de.dict
index 5d35e64..69796bb 100644
--- a/java/res/raw/main_de.dict
+++ b/java/res/raw/main_de.dict
Binary files differ
diff --git a/java/res/raw/main_en.dict b/java/res/raw/main_en.dict
index 8660c28..bef6b10 100644
--- a/java/res/raw/main_en.dict
+++ b/java/res/raw/main_en.dict
Binary files differ
diff --git a/java/res/raw/main_es.dict b/java/res/raw/main_es.dict
index f5906c2..261ab8c 100644
--- a/java/res/raw/main_es.dict
+++ b/java/res/raw/main_es.dict
Binary files differ
diff --git a/java/res/raw/main_fr.dict b/java/res/raw/main_fr.dict
index 0d2e518..18f5298 100644
--- a/java/res/raw/main_fr.dict
+++ b/java/res/raw/main_fr.dict
Binary files differ
diff --git a/java/res/raw/main_it.dict b/java/res/raw/main_it.dict
index 523f645..e161c24 100644
--- a/java/res/raw/main_it.dict
+++ b/java/res/raw/main_it.dict
Binary files differ
diff --git a/java/res/raw/main_pt_br.dict b/java/res/raw/main_pt_br.dict
index 98a27c7..21bbe7c 100644
--- a/java/res/raw/main_pt_br.dict
+++ b/java/res/raw/main_pt_br.dict
Binary files differ
diff --git a/java/res/values-land/dimens.xml b/java/res/values-land/dimens.xml
index 42a746b..b874d48 100644
--- a/java/res/values-land/dimens.xml
+++ b/java/res/values-land/dimens.xml
@@ -77,7 +77,7 @@
<!-- Emoji keyboard -->
<fraction name="emoji_keyboard_key_width">10%p</fraction>
<fraction name="emoji_keyboard_row_height">50%p</fraction>
- <fraction name="emoji_keyboard_key_letter_size">60%p</fraction>
+ <fraction name="emoji_keyboard_key_letter_size">54%p</fraction>
<integer name="emoji_keyboard_max_key_count">20</integer>
</resources>
diff --git a/java/res/values-sw600dp-land/dimens.xml b/java/res/values-sw600dp-land/dimens.xml
index 730b7d8..d067265 100644
--- a/java/res/values-sw600dp-land/dimens.xml
+++ b/java/res/values-sw600dp-land/dimens.xml
@@ -66,7 +66,7 @@
<!-- Emoji keyboard -->
<fraction name="emoji_keyboard_key_width">10%p</fraction>
<fraction name="emoji_keyboard_row_height">33%p</fraction>
- <fraction name="emoji_keyboard_key_letter_size">95%p</fraction>
+ <fraction name="emoji_keyboard_key_letter_size">85%p</fraction>
<integer name="emoji_keyboard_max_key_count">30</integer>
</resources>
diff --git a/java/res/values-sw600dp/dimens.xml b/java/res/values-sw600dp/dimens.xml
index 2bcf2fa..591355d 100644
--- a/java/res/values-sw600dp/dimens.xml
+++ b/java/res/values-sw600dp/dimens.xml
@@ -92,7 +92,7 @@
<!-- Emoji keyboard -->
<fraction name="emoji_keyboard_key_width">12.5%p</fraction>
<fraction name="emoji_keyboard_row_height">33%p</fraction>
- <fraction name="emoji_keyboard_key_letter_size">85%p</fraction>
+ <fraction name="emoji_keyboard_key_letter_size">76%p</fraction>
<integer name="emoji_keyboard_max_key_count">24</integer>
</resources>
diff --git a/java/res/values-sw768dp-land/dimens.xml b/java/res/values-sw768dp-land/dimens.xml
index 1e2e1c6..664630b 100644
--- a/java/res/values-sw768dp-land/dimens.xml
+++ b/java/res/values-sw768dp-land/dimens.xml
@@ -67,7 +67,7 @@
<!-- Emoji keyboard -->
<fraction name="emoji_keyboard_key_width">7.69%p</fraction>
<fraction name="emoji_keyboard_row_height">33%p</fraction>
- <fraction name="emoji_keyboard_key_letter_size">75%p</fraction>
+ <fraction name="emoji_keyboard_key_letter_size">68%p</fraction>
<integer name="emoji_keyboard_max_key_count">39</integer>
</resources>
diff --git a/java/res/values-sw768dp/dimens.xml b/java/res/values-sw768dp/dimens.xml
index f62a536..1fd933a 100644
--- a/java/res/values-sw768dp/dimens.xml
+++ b/java/res/values-sw768dp/dimens.xml
@@ -92,7 +92,7 @@
<!-- Emoji keyboard -->
<fraction name="emoji_keyboard_key_width">10%p</fraction>
<fraction name="emoji_keyboard_row_height">33%p</fraction>
- <fraction name="emoji_keyboard_key_letter_size">85%p</fraction>
+ <fraction name="emoji_keyboard_key_letter_size">76%p</fraction>
<integer name="emoji_keyboard_max_key_count">30</integer>
</resources>
diff --git a/java/res/values/dimens.xml b/java/res/values/dimens.xml
index 4e3b2f5..2d626db 100644
--- a/java/res/values/dimens.xml
+++ b/java/res/values/dimens.xml
@@ -118,7 +118,7 @@
<!-- Emoji keyboard -->
<fraction name="emoji_keyboard_key_width">14.2857%p</fraction>
<fraction name="emoji_keyboard_row_height">33%p</fraction>
- <fraction name="emoji_keyboard_key_letter_size">90%p</fraction>
+ <fraction name="emoji_keyboard_key_letter_size">81%p</fraction>
<integer name="emoji_keyboard_max_key_count">21</integer>
<dimen name="emoji_category_page_id_height">3dp</dimen>
diff --git a/java/src/com/android/inputmethod/keyboard/EmojiKeyboardView.java b/java/src/com/android/inputmethod/keyboard/EmojiKeyboardView.java
index 61dc56e..eb48d01 100644
--- a/java/src/com/android/inputmethod/keyboard/EmojiKeyboardView.java
+++ b/java/src/com/android/inputmethod/keyboard/EmojiKeyboardView.java
@@ -718,12 +718,14 @@
@Override
public void run() {
+ int repeatCount = 1;
int timeCount = 0;
while (timeCount < MAX_REPEAT_COUNT_TIME && !mAborted) {
if (timeCount > mKeyRepeatStartTimeout) {
- pressDelete();
+ pressDelete(repeatCount);
}
timeCount += mKeyRepeatInterval;
+ ++repeatCount;
try {
Thread.sleep(mKeyRepeatInterval);
} catch (InterruptedException e) {
@@ -736,9 +738,9 @@
}
}
- public void pressDelete() {
+ public void pressDelete(int repeatCount) {
mKeyboardActionListener.onPressKey(
- Constants.CODE_DELETE, 0 /* repeatCount */, true /* isSinglePointer */);
+ Constants.CODE_DELETE, repeatCount, true /* isSinglePointer */);
mKeyboardActionListener.onCodeInput(
Constants.CODE_DELETE, NOT_A_COORDINATE, NOT_A_COORDINATE);
mKeyboardActionListener.onReleaseKey(
@@ -754,7 +756,7 @@
switch(event.getAction()) {
case MotionEvent.ACTION_DOWN:
v.setBackgroundColor(mDeleteKeyPressedBackgroundColor);
- pressDelete();
+ pressDelete(0 /* repeatCount */);
startRepeat();
return true;
case MotionEvent.ACTION_UP:
diff --git a/java/src/com/android/inputmethod/keyboard/KeyboardSwitcher.java b/java/src/com/android/inputmethod/keyboard/KeyboardSwitcher.java
index cc1ffd1..74edd87 100644
--- a/java/src/com/android/inputmethod/keyboard/KeyboardSwitcher.java
+++ b/java/src/com/android/inputmethod/keyboard/KeyboardSwitcher.java
@@ -315,7 +315,7 @@
}
public boolean isShowingEmojiKeyboard() {
- return mEmojiKeyboardView.getVisibility() == View.VISIBLE;
+ return mEmojiKeyboardView != null && mEmojiKeyboardView.getVisibility() == View.VISIBLE;
}
public boolean isShowingMoreKeysPanel() {
diff --git a/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java b/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java
index 845a9b9..4a0ce37 100644
--- a/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java
+++ b/java/src/com/android/inputmethod/latin/AbstractDictionaryWriter.java
@@ -25,6 +25,7 @@
import java.io.File;
import java.io.IOException;
+import java.util.Map;
// TODO: Quit extending Dictionary after implementing dynamic binary dictionary.
abstract public class AbstractDictionaryWriter extends Dictionary {
@@ -50,16 +51,16 @@
abstract public void removeBigramWords(final String word0, final String word1);
- abstract protected void writeDictionary(final DictEncoder dictEncoder)
- throws IOException, UnsupportedFormatException;
+ abstract protected void writeDictionary(final DictEncoder dictEncoder,
+ final Map<String, String> attributeMap) throws IOException, UnsupportedFormatException;
- public void write(final String fileName) {
+ public void write(final String fileName, final Map<String, String> attributeMap) {
final String tempFileName = fileName + ".temp";
final File file = new File(mContext.getFilesDir(), fileName);
final File tempFile = new File(mContext.getFilesDir(), tempFileName);
try {
final DictEncoder dictEncoder = new Ver3DictEncoder(tempFile);
- writeDictionary(dictEncoder);
+ writeDictionary(dictEncoder, attributeMap);
tempFile.renameTo(file);
} catch (IOException e) {
Log.e(TAG, "IO exception while writing file", e);
diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionary.java b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
index a463651..29c6c04 100644
--- a/java/src/com/android/inputmethod/latin/BinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/BinaryDictionary.java
@@ -48,6 +48,11 @@
// TODO: Remove this heuristic.
private static final int SPACE_COUNT_FOR_AUTO_COMMIT = 3;
+ @UsedForTesting
+ public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
+ @UsedForTesting
+ public static final String BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
+
private long mNativeDict;
private final Locale mLocale;
private final long mDictSize;
@@ -110,7 +115,7 @@
private static native long openNative(String sourceDir, long dictOffset, long dictSize,
boolean isUpdatable);
private static native void flushNative(long dict, String filePath);
- private static native boolean needsToRunGCNative(long dict);
+ private static native boolean needsToRunGCNative(long dict, boolean mindsBlockByGC);
private static native void flushWithGCNative(long dict, String filePath);
private static native void closeNative(long dict);
private static native int getProbabilityNative(long dict, int[] word);
@@ -129,6 +134,7 @@
private static native void removeBigramWordsNative(long dict, int[] word0, int[] word1);
private static native int calculateProbabilityNative(long dict, int unigramProbability,
int bigramProbability);
+ private static native String getPropertyNative(long dict, String query);
@UsedForTesting
public static boolean createEmptyDictFile(final String filePath, final long dictVersion,
@@ -264,7 +270,7 @@
}
private void runGCIfRequired() {
- if (needsToRunGCNative(mNativeDict)) {
+ if (needsToRunGC(true /* mindsBlockByGC */)) {
flushWithGC();
}
}
@@ -320,9 +326,15 @@
reopen();
}
- public boolean needsToRunGC() {
+ /**
+ * Checks whether GC is needed to run or not.
+ * @param mindsBlockByGC Whether to mind operations blocked by GC. We don't need to care about
+ * the blocking in some situations such as in idle time or just before closing.
+ * @return whether GC is needed to run or not.
+ */
+ public boolean needsToRunGC(final boolean mindsBlockByGC) {
if (!isValidDictionary()) return false;
- return needsToRunGCNative(mNativeDict);
+ return needsToRunGCNative(mNativeDict, mindsBlockByGC);
}
@UsedForTesting
@@ -331,6 +343,12 @@
return calculateProbabilityNative(mNativeDict, unigramProbability, bigramProbability);
}
+ @UsedForTesting
+ public String getPropertyForTests(String query) {
+ if (!isValidDictionary()) return "";
+ return getPropertyNative(mNativeDict, query);
+ }
+
@Override
public boolean shouldAutoCommit(final SuggestedWordInfo candidate) {
// TODO: actually use the confidence rather than use this completely broken heuristic
diff --git a/java/src/com/android/inputmethod/latin/DictionaryFactory.java b/java/src/com/android/inputmethod/latin/DictionaryFactory.java
index 3721132..828e54f 100644
--- a/java/src/com/android/inputmethod/latin/DictionaryFactory.java
+++ b/java/src/com/android/inputmethod/latin/DictionaryFactory.java
@@ -51,7 +51,7 @@
if (null == locale) {
Log.e(TAG, "No locale defined for dictionary");
return new DictionaryCollection(Dictionary.TYPE_MAIN,
- createBinaryDictionary(context, locale));
+ createReadOnlyBinaryDictionary(context, locale));
}
final LinkedList<Dictionary> dictList = CollectionUtils.newLinkedList();
@@ -59,11 +59,11 @@
BinaryDictionaryGetter.getDictionaryFiles(locale, context);
if (null != assetFileList) {
for (final AssetFileAddress f : assetFileList) {
- final BinaryDictionary binaryDictionary = new BinaryDictionary(f.mFilename,
- f.mOffset, f.mLength, useFullEditDistance, locale, Dictionary.TYPE_MAIN,
- false /* isUpdatable */);
- if (binaryDictionary.isValidDictionary()) {
- dictList.add(binaryDictionary);
+ final ReadOnlyBinaryDictionary readOnlyBinaryDictionary =
+ new ReadOnlyBinaryDictionary(f.mFilename, f.mOffset, f.mLength,
+ useFullEditDistance, locale, Dictionary.TYPE_MAIN);
+ if (readOnlyBinaryDictionary.isValidDictionary()) {
+ dictList.add(readOnlyBinaryDictionary);
}
}
}
@@ -89,12 +89,12 @@
}
/**
- * Initializes a dictionary from a raw resource file
+ * Initializes a read-only binary dictionary from a raw resource file
* @param context application context for reading resources
* @param locale the locale to use for the resource
- * @return an initialized instance of BinaryDictionary
+ * @return an initialized instance of ReadOnlyBinaryDictionary
*/
- protected static BinaryDictionary createBinaryDictionary(final Context context,
+ protected static ReadOnlyBinaryDictionary createReadOnlyBinaryDictionary(final Context context,
final Locale locale) {
AssetFileDescriptor afd = null;
try {
@@ -113,9 +113,8 @@
Log.e(TAG, "sourceDir is not a file: " + sourceDir);
return null;
}
- return new BinaryDictionary(sourceDir, afd.getStartOffset(), afd.getLength(),
- false /* useFullEditDistance */, locale, Dictionary.TYPE_MAIN,
- false /* isUpdatable */);
+ return new ReadOnlyBinaryDictionary(sourceDir, afd.getStartOffset(), afd.getLength(),
+ false /* useFullEditDistance */, locale, Dictionary.TYPE_MAIN);
} catch (android.content.res.Resources.NotFoundException e) {
Log.e(TAG, "Could not find the resource");
return null;
@@ -142,10 +141,10 @@
final DictionaryCollection dictionaryCollection =
new DictionaryCollection(Dictionary.TYPE_MAIN);
for (final AssetFileAddress address : dictionaryList) {
- final BinaryDictionary binaryDictionary = new BinaryDictionary(address.mFilename,
- address.mOffset, address.mLength, useFullEditDistance, locale,
- Dictionary.TYPE_MAIN, false /* isUpdatable */);
- dictionaryCollection.addDictionary(binaryDictionary);
+ final ReadOnlyBinaryDictionary readOnlyBinaryDictionary = new ReadOnlyBinaryDictionary(
+ address.mFilename, address.mOffset, address.mLength, useFullEditDistance,
+ locale, Dictionary.TYPE_MAIN);
+ dictionaryCollection.addDictionary(readOnlyBinaryDictionary);
}
return dictionaryCollection;
}
diff --git a/java/src/com/android/inputmethod/latin/DictionaryWriter.java b/java/src/com/android/inputmethod/latin/DictionaryWriter.java
index 5a453dd..84abfa6 100644
--- a/java/src/com/android/inputmethod/latin/DictionaryWriter.java
+++ b/java/src/com/android/inputmethod/latin/DictionaryWriter.java
@@ -31,6 +31,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.Map;
/**
* An in memory dictionary for memorizing entries and writing a binary dictionary.
@@ -84,8 +85,11 @@
}
@Override
- protected void writeDictionary(final DictEncoder dictEncoder)
- throws IOException, UnsupportedFormatException {
+ protected void writeDictionary(final DictEncoder dictEncoder,
+ final Map<String, String> attributeMap) throws IOException, UnsupportedFormatException {
+ for (final Map.Entry<String, String> entry : attributeMap.entrySet()) {
+ mFusionDictionary.addOptionAttribute(entry.getKey(), entry.getValue());
+ }
dictEncoder.writeDictionary(mFusionDictionary, FORMAT_OPTIONS);
}
diff --git a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
index 99859de..2d1ca51 100644
--- a/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
+++ b/java/src/com/android/inputmethod/latin/ExpandableBinaryDictionary.java
@@ -34,6 +34,7 @@
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
/**
@@ -69,14 +70,15 @@
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE;
/**
- * A static map of time recorders, each of which records the time of accesses to a single binary
- * dictionary file. The key for this map is the filename and the value is the shared dictionary
- * time recorder associated with that filename.
+ * A static map of update controllers, each of which records the time of accesses to a single
+ * binary dictionary file and tracks whether the file is regenerating. The key for this map is
+ * the filename and the value is the shared dictionary time recorder associated with that
+ * filename.
*/
- private static volatile ConcurrentHashMap<String, DictionaryTimeRecorder>
- sFilenameDictionaryTimeRecorderMap = CollectionUtils.newConcurrentHashMap();
+ private static final ConcurrentHashMap<String, DictionaryUpdateController>
+ sFilenameDictionaryUpdateControllerMap = CollectionUtils.newConcurrentHashMap();
- private static volatile ConcurrentHashMap<String, PrioritizedSerialExecutor>
+ private static final ConcurrentHashMap<String, PrioritizedSerialExecutor>
sFilenameExecutorMap = CollectionUtils.newConcurrentHashMap();
/** The application context. */
@@ -103,13 +105,13 @@
private final boolean mIsUpdatable;
// TODO: remove, once dynamic operations is serialized
- /** Records access to the shared binary dictionary file across multiple instances. */
- private final DictionaryTimeRecorder mFilenameDictionaryTimeRecorder;
+ /** Controls updating the shared binary dictionary file across multiple instances. */
+ private final DictionaryUpdateController mFilenameDictionaryUpdateController;
// TODO: remove, once dynamic operations is serialized
- /** Records access to the local binary dictionary for this instance. */
- private final DictionaryTimeRecorder mPerInstanceDictionaryTimeRecorder =
- new DictionaryTimeRecorder();
+ /** Controls updating the local binary dictionary for this instance. */
+ private final DictionaryUpdateController mPerInstanceDictionaryUpdateController =
+ new DictionaryUpdateController();
/* A extension for a binary dictionary file. */
public static final String DICT_FILE_EXTENSION = ".dict";
@@ -131,15 +133,15 @@
protected abstract boolean hasContentChanged();
/**
- * Gets the dictionary time recorder for the given filename.
+ * Gets the dictionary update controller for the given filename.
*/
- private static DictionaryTimeRecorder getDictionaryTimeRecorder(
+ private static DictionaryUpdateController getDictionaryUpdateController(
String filename) {
- DictionaryTimeRecorder recorder = sFilenameDictionaryTimeRecorderMap.get(filename);
+ DictionaryUpdateController recorder = sFilenameDictionaryUpdateControllerMap.get(filename);
if (recorder == null) {
- synchronized(sFilenameDictionaryTimeRecorderMap) {
- recorder = new DictionaryTimeRecorder();
- sFilenameDictionaryTimeRecorderMap.put(filename, recorder);
+ synchronized(sFilenameDictionaryUpdateControllerMap) {
+ recorder = new DictionaryUpdateController();
+ sFilenameDictionaryUpdateControllerMap.put(filename, recorder);
}
}
return recorder;
@@ -189,7 +191,7 @@
mContext = context;
mIsUpdatable = isUpdatable;
mBinaryDictionary = null;
- mFilenameDictionaryTimeRecorder = getDictionaryTimeRecorder(filename);
+ mFilenameDictionaryUpdateController = getDictionaryUpdateController(filename);
// Currently, only dynamic personalization dictionary is updatable.
mDictionaryWriter = getDictionaryWriter(context, dictType, isUpdatable);
}
@@ -234,6 +236,7 @@
HashMap<String, String> attributeMap = new HashMap<String, String>();
attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
SUPPORTS_DYNAMIC_UPDATE);
+ attributeMap.put(FormatSpec.FileHeader.DICTIONARY_ID_ATTRIBUTE, mFilename);
return attributeMap;
}
@@ -347,6 +350,9 @@
final boolean blockOffensiveWords, final int[] additionalFeaturesOptions,
final int sessionId) {
reloadDictionaryIfRequired();
+ if (isRegenerating()) {
+ return null;
+ }
final ArrayList<SuggestedWordInfo> suggestions = CollectionUtils.newArrayList();
final AsyncResultHolder<ArrayList<SuggestedWordInfo>> holder =
new AsyncResultHolder<ArrayList<SuggestedWordInfo>>();
@@ -407,6 +413,9 @@
}
protected boolean isValidWordInner(final String word) {
+ if (isRegenerating()) {
+ return false;
+ }
final AsyncResultHolder<Boolean> holder = new AsyncResultHolder<Boolean>();
getExecutor(mFilename).executePrioritized(new Runnable() {
@Override
@@ -432,7 +441,7 @@
* dictionary exists, this method will generate one.
*/
protected void loadDictionary() {
- mPerInstanceDictionaryTimeRecorder.mLastUpdateRequestTime = SystemClock.uptimeMillis();
+ mPerInstanceDictionaryUpdateController.mLastUpdateRequestTime = SystemClock.uptimeMillis();
reloadDictionaryIfRequired();
}
@@ -443,8 +452,8 @@
private void loadBinaryDictionary() {
if (DEBUG) {
Log.d(TAG, "Loading binary dictionary: " + mFilename + " request="
- + mFilenameDictionaryTimeRecorder.mLastUpdateRequestTime + " update="
- + mFilenameDictionaryTimeRecorder.mLastUpdateTime);
+ + mFilenameDictionaryUpdateController.mLastUpdateRequestTime + " update="
+ + mFilenameDictionaryUpdateController.mLastUpdateTime);
}
final File file = new File(mContext.getFilesDir(), mFilename);
@@ -482,13 +491,13 @@
private void writeBinaryDictionary() {
if (DEBUG) {
Log.d(TAG, "Generating binary dictionary: " + mFilename + " request="
- + mFilenameDictionaryTimeRecorder.mLastUpdateRequestTime + " update="
- + mFilenameDictionaryTimeRecorder.mLastUpdateTime);
+ + mFilenameDictionaryUpdateController.mLastUpdateRequestTime + " update="
+ + mFilenameDictionaryUpdateController.mLastUpdateTime);
}
if (needsToReloadBeforeWriting()) {
mDictionaryWriter.clear();
loadDictionaryAsync();
- mDictionaryWriter.write(mFilename);
+ mDictionaryWriter.write(mFilename, getHeaderAttributeMap());
} else {
if (ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE) {
if (mBinaryDictionary == null || !mBinaryDictionary.isValidDictionary()) {
@@ -496,14 +505,14 @@
BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
DICTIONARY_FORMAT_VERSION, getHeaderAttributeMap());
} else {
- if (mBinaryDictionary.needsToRunGC()) {
+ if (mBinaryDictionary.needsToRunGC(false /* mindsBlockByGC */)) {
mBinaryDictionary.flushWithGC();
} else {
mBinaryDictionary.flush();
}
}
} else {
- mDictionaryWriter.write(mFilename);
+ mDictionaryWriter.write(mFilename, getHeaderAttributeMap());
}
}
}
@@ -517,11 +526,11 @@
*/
protected void setRequiresReload(final boolean requiresRebuild) {
final long time = SystemClock.uptimeMillis();
- mPerInstanceDictionaryTimeRecorder.mLastUpdateRequestTime = time;
- mFilenameDictionaryTimeRecorder.mLastUpdateRequestTime = time;
+ mPerInstanceDictionaryUpdateController.mLastUpdateRequestTime = time;
+ mFilenameDictionaryUpdateController.mLastUpdateRequestTime = time;
if (DEBUG) {
Log.d(TAG, "Reload request: " + mFilename + ": request=" + time + " update="
- + mFilenameDictionaryTimeRecorder.mLastUpdateTime);
+ + mFilenameDictionaryUpdateController.mLastUpdateTime);
}
}
@@ -530,14 +539,26 @@
*/
public final void reloadDictionaryIfRequired() {
if (!isReloadRequired()) return;
- reloadDictionary();
+ if (setIsRegeneratingIfNotRegenerating()) {
+ reloadDictionary();
+ }
}
/**
* Returns whether a dictionary reload is required.
*/
private boolean isReloadRequired() {
- return mBinaryDictionary == null || mPerInstanceDictionaryTimeRecorder.isOutOfDate();
+ return mBinaryDictionary == null || mPerInstanceDictionaryUpdateController.isOutOfDate();
+ }
+
+ private boolean isRegenerating() {
+ return mFilenameDictionaryUpdateController.mIsRegenerating.get();
+ }
+
+ // Returns whether the dictionary can be regenerated.
+ private boolean setIsRegeneratingIfNotRegenerating() {
+ return mFilenameDictionaryUpdateController.mIsRegenerating.compareAndSet(
+ false /* expect */ , true /* update */);
}
/**
@@ -550,39 +571,44 @@
getExecutor(mFilename).execute(new Runnable() {
@Override
public void run() {
- final long time = SystemClock.uptimeMillis();
- final boolean dictionaryFileExists = dictionaryFileExists();
- if (mFilenameDictionaryTimeRecorder.isOutOfDate() || !dictionaryFileExists) {
- // If the shared dictionary file does not exist or is out of date, the first
- // instance that acquires the lock will generate a new one.
- if (hasContentChanged() || !dictionaryFileExists) {
- // If the source content has changed or the dictionary does not exist,
- // rebuild the binary dictionary. Empty dictionaries are supported (in the
- // case where loadDictionaryAsync() adds nothing) in order to provide a
- // uniform framework.
- mFilenameDictionaryTimeRecorder.mLastUpdateTime = time;
+ try {
+ final long time = SystemClock.uptimeMillis();
+ final boolean dictionaryFileExists = dictionaryFileExists();
+ if (mFilenameDictionaryUpdateController.isOutOfDate()
+ || !dictionaryFileExists) {
+ // If the shared dictionary file does not exist or is out of date, the
+ // first instance that acquires the lock will generate a new one.
+ if (hasContentChanged() || !dictionaryFileExists) {
+ // If the source content has changed or the dictionary does not exist,
+ // rebuild the binary dictionary. Empty dictionaries are supported (in
+ // the case where loadDictionaryAsync() adds nothing) in order to
+ // provide a uniform framework.
+ mFilenameDictionaryUpdateController.mLastUpdateTime = time;
+ writeBinaryDictionary();
+ loadBinaryDictionary();
+ } else {
+ // If not, the reload request was unnecessary so revert
+ // LastUpdateRequestTime to LastUpdateTime.
+ mFilenameDictionaryUpdateController.mLastUpdateRequestTime =
+ mFilenameDictionaryUpdateController.mLastUpdateTime;
+ }
+ } else if (mBinaryDictionary == null ||
+ mPerInstanceDictionaryUpdateController.mLastUpdateTime
+ < mFilenameDictionaryUpdateController.mLastUpdateTime) {
+ // Otherwise, if the local dictionary is older than the shared dictionary,
+ // load the shared dictionary.
+ loadBinaryDictionary();
+ }
+ if (mBinaryDictionary != null && !mBinaryDictionary.isValidDictionary()) {
+ // Binary dictionary is not valid. Regenerate the dictionary file.
+ mFilenameDictionaryUpdateController.mLastUpdateTime = time;
writeBinaryDictionary();
loadBinaryDictionary();
- } else {
- // If not, the reload request was unnecessary so revert
- // LastUpdateRequestTime to LastUpdateTime.
- mFilenameDictionaryTimeRecorder.mLastUpdateRequestTime =
- mFilenameDictionaryTimeRecorder.mLastUpdateTime;
}
- } else if (mBinaryDictionary == null ||
- mPerInstanceDictionaryTimeRecorder.mLastUpdateTime
- < mFilenameDictionaryTimeRecorder.mLastUpdateTime) {
- // Otherwise, if the local dictionary is older than the shared dictionary, load
- // the shared dictionary.
- loadBinaryDictionary();
+ mPerInstanceDictionaryUpdateController.mLastUpdateTime = time;
+ } finally {
+ mFilenameDictionaryUpdateController.mIsRegenerating.set(false);
}
- if (mBinaryDictionary != null && !mBinaryDictionary.isValidDictionary()) {
- // Binary dictionary is not valid. Regenerate the dictionary file.
- mFilenameDictionaryTimeRecorder.mLastUpdateTime = time;
- writeBinaryDictionary();
- loadBinaryDictionary();
- }
- mPerInstanceDictionaryTimeRecorder.mLastUpdateTime = time;
}
});
}
@@ -622,59 +648,19 @@
}
/**
- * Time recorder for tracking whether the dictionary is out of date.
+ * For tracking whether the dictionary is out of date and the dictionary is regenerating.
* Can be shared across multiple dictionary instances that access the same filename.
*/
- private static class DictionaryTimeRecorder {
- private volatile long mLastUpdateTime = 0;
- private volatile long mLastUpdateRequestTime = 0;
+ private static class DictionaryUpdateController {
+ public volatile long mLastUpdateTime = 0;
+ public volatile long mLastUpdateRequestTime = 0;
+ public volatile AtomicBoolean mIsRegenerating = new AtomicBoolean();
- private boolean isOutOfDate() {
+ public boolean isOutOfDate() {
return (mLastUpdateRequestTime > mLastUpdateTime);
}
}
- /**
- * Dynamically adds a word unigram to the dictionary for testing with blocking-lock.
- */
- @UsedForTesting
- protected void addWordDynamicallyForTests(final String word, final String shortcutTarget,
- final int frequency, final boolean isNotAWord) {
- getExecutor(mFilename).executePrioritized(new Runnable() {
- @Override
- public void run() {
- addWordDynamically(word, shortcutTarget, frequency, isNotAWord);
- }
- });
- }
-
- /**
- * Dynamically adds a word bigram in the dictionary for testing with blocking-lock.
- */
- @UsedForTesting
- protected void addBigramDynamicallyForTests(final String word0, final String word1,
- final int frequency, final boolean isValid) {
- getExecutor(mFilename).executePrioritized(new Runnable() {
- @Override
- public void run() {
- addBigramDynamically(word0, word1, frequency, isValid);
- }
- });
- }
-
- /**
- * Dynamically remove a word bigram in the dictionary for testing with blocking-lock.
- */
- @UsedForTesting
- protected void removeBigramDynamicallyForTests(final String word0, final String word1) {
- getExecutor(mFilename).executePrioritized(new Runnable() {
- @Override
- public void run() {
- removeBigramDynamically(word0, word1);
- }
- });
- }
-
// TODO: Implement native binary methods once the dynamic dictionary implementation is done.
@UsedForTesting
public boolean isInDictionaryForTests(final String word) {
@@ -687,7 +673,7 @@
holder.set(mBinaryDictionary.isValidWord(word));
} else {
holder.set(((DynamicPersonalizationDictionaryWriter) mDictionaryWriter)
- .isInDictionaryForTests(word));
+ .isInBigramListForTests(word));
}
}
}
diff --git a/java/src/com/android/inputmethod/latin/InputAttributes.java b/java/src/com/android/inputmethod/latin/InputAttributes.java
index 21b103e..8caf6f1 100644
--- a/java/src/com/android/inputmethod/latin/InputAttributes.java
+++ b/java/src/com/android/inputmethod/latin/InputAttributes.java
@@ -103,6 +103,10 @@
}
}
+ public boolean isTypeNull() {
+ return InputType.TYPE_NULL == mInputType;
+ }
+
public boolean isSameInputType(final EditorInfo editorInfo) {
return editorInfo.inputType == mInputType;
}
diff --git a/java/src/com/android/inputmethod/latin/LatinIME.java b/java/src/com/android/inputmethod/latin/LatinIME.java
index c383f3e..96e16de 100644
--- a/java/src/com/android/inputmethod/latin/LatinIME.java
+++ b/java/src/com/android/inputmethod/latin/LatinIME.java
@@ -81,7 +81,7 @@
import com.android.inputmethod.latin.personalization.PersonalizationDictionarySessionRegister;
import com.android.inputmethod.latin.personalization.PersonalizationHelper;
import com.android.inputmethod.latin.personalization.PersonalizationPredictionDictionary;
-import com.android.inputmethod.latin.personalization.UserHistoryPredictionDictionary;
+import com.android.inputmethod.latin.personalization.UserHistoryDictionary;
import com.android.inputmethod.latin.settings.Settings;
import com.android.inputmethod.latin.settings.SettingsActivity;
import com.android.inputmethod.latin.settings.SettingsValues;
@@ -179,7 +179,7 @@
private boolean mIsMainDictionaryAvailable;
private UserBinaryDictionary mUserDictionary;
- private UserHistoryPredictionDictionary mUserHistoryPredictionDictionary;
+ private UserHistoryDictionary mUserHistoryDictionary;
private PersonalizationPredictionDictionary mPersonalizationPredictionDictionary;
private PersonalizationDictionary mPersonalizationDictionary;
private boolean mIsUserDictionaryAvailable;
@@ -623,9 +623,9 @@
final SharedPreferences prefs = PreferenceManager.getDefaultSharedPreferences(this);
- mUserHistoryPredictionDictionary = PersonalizationHelper
- .getUserHistoryPredictionDictionary(this, localeStr, prefs);
- newSuggest.setUserHistoryPredictionDictionary(mUserHistoryPredictionDictionary);
+ mUserHistoryDictionary = PersonalizationHelper.getUserHistoryDictionary(
+ this, localeStr, prefs);
+ newSuggest.setUserHistoryDictionary(mUserHistoryDictionary);
mPersonalizationDictionary = PersonalizationHelper
.getPersonalizationDictionary(this, localeStr, prefs);
newSuggest.setPersonalizationDictionary(mPersonalizationDictionary);
@@ -1521,7 +1521,7 @@
mSubtypeState.switchSubtype(token, mRichImm);
}
- private void sendDownUpKeyEventForBackwardCompatibility(final int code) {
+ private void sendDownUpKeyEvent(final int code) {
final long eventTime = SystemClock.uptimeMillis();
mConnection.sendKeyEvent(new KeyEvent(eventTime, eventTime,
KeyEvent.ACTION_DOWN, code, 0, 0, KeyCharacterMap.VIRTUAL_KEYBOARD, 0,
@@ -1538,7 +1538,7 @@
// TODO: Remove this special handling of digit letters.
// For backward compatibility. See {@link InputMethodService#sendKeyChar(char)}.
if (code >= '0' && code <= '9') {
- sendDownUpKeyEventForBackwardCompatibility(code - '0' + KeyEvent.KEYCODE_0);
+ sendDownUpKeyEvent(code - '0' + KeyEvent.KEYCODE_0);
return;
}
@@ -1547,7 +1547,7 @@
// a hardware keyboard event on pressing enter or delete. This is bad for many
// reasons (there are race conditions with commits) but some applications are
// relying on this behavior so we continue to support it for older apps.
- sendDownUpKeyEventForBackwardCompatibility(KeyEvent.KEYCODE_ENTER);
+ sendDownUpKeyEvent(KeyEvent.KEYCODE_ENTER);
} else {
final String text = new String(new int[] { code }, 0, 1);
mConnection.commitText(text, text.length());
@@ -2104,12 +2104,16 @@
}
final int lengthToDelete = Character.isSupplementaryCodePoint(
mConnection.getCodePointBeforeCursor()) ? 2 : 1;
- if (mAppWorkAroundsUtils.isBeforeJellyBean()) {
- // Backward compatibility mode. Before Jelly bean, the keyboard would simulate
- // a hardware keyboard event on pressing enter or delete. This is bad for many
- // reasons (there are race conditions with commits) but some applications are
- // relying on this behavior so we continue to support it for older apps.
- sendDownUpKeyEventForBackwardCompatibility(KeyEvent.KEYCODE_DEL);
+ if (mAppWorkAroundsUtils.isBeforeJellyBean() ||
+ currentSettings.mInputAttributes.isTypeNull()) {
+ // There are two possible reasons to send a key event: either the field has
+ // type TYPE_NULL, in which case the keyboard should send events, or we are
+ // running in backward compatibility mode. Before Jelly bean, the keyboard
+ // would simulate a hardware keyboard event on pressing enter or delete. This
+ // is bad for many reasons (there are race conditions with commits) but some
+ // applications are relying on this behavior so we continue to support it for
+ // older apps, so we retain this behavior if the app has target SDK < JellyBean.
+ sendDownUpKeyEvent(KeyEvent.KEYCODE_DEL);
} else {
mConnection.deleteSurroundingText(lengthToDelete, 0);
}
@@ -2751,9 +2755,8 @@
final SettingsValues currentSettings = mSettings.getCurrent();
if (!currentSettings.mCorrectionEnabled) return null;
- final UserHistoryPredictionDictionary userHistoryPredictionDictionary =
- mUserHistoryPredictionDictionary;
- if (userHistoryPredictionDictionary == null) return null;
+ final UserHistoryDictionary userHistoryDictionary = mUserHistoryDictionary;
+ if (userHistoryDictionary == null) return null;
final String prevWord = mConnection.getNthPreviousWord(currentSettings.mWordSeparators, 2);
final String secondWord;
@@ -2767,8 +2770,7 @@
final int maxFreq = AutoCorrectionUtils.getMaxFrequency(
suggest.getUnigramDictionaries(), suggestion);
if (maxFreq == 0) return null;
- userHistoryPredictionDictionary
- .addToPersonalizationPredictionDictionary(prevWord, secondWord, maxFreq > 0);
+ userHistoryDictionary.addToDictionary(prevWord, secondWord, maxFreq > 0);
return prevWord;
}
@@ -2954,7 +2956,7 @@
}
mConnection.deleteSurroundingText(deleteLength, 0);
if (!TextUtils.isEmpty(previousWord) && !TextUtils.isEmpty(committedWord)) {
- mUserHistoryPredictionDictionary.cancelAddingUserHistory(previousWord, committedWord);
+ mUserHistoryDictionary.cancelAddingUserHistory(previousWord, committedWord);
}
final String stringToCommit = originallyTypedWord + mLastComposedWord.mSeparatorString;
if (mSettings.getCurrent().mCurrentLanguageHasSpaces) {
diff --git a/java/src/com/android/inputmethod/latin/ReadOnlyBinaryDictionary.java b/java/src/com/android/inputmethod/latin/ReadOnlyBinaryDictionary.java
new file mode 100644
index 0000000..68505ce
--- /dev/null
+++ b/java/src/com/android/inputmethod/latin/ReadOnlyBinaryDictionary.java
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin;
+
+import com.android.inputmethod.keyboard.ProximityInfo;
+import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
+
+import java.util.ArrayList;
+import java.util.Locale;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+/**
+ * This class provides binary dictionary reading operations with locking. An instance of this class
+ * can be used by multiple threads. Note that different session IDs must be used when multiple
+ * threads get suggestions using this class.
+ */
+public final class ReadOnlyBinaryDictionary extends Dictionary {
+ /**
+ * A lock for accessing binary dictionary. Only closing binary dictionary is the operation
+ * that change the state of dictionary.
+ */
+ private final ReentrantReadWriteLock mLock = new ReentrantReadWriteLock();
+
+ private final BinaryDictionary mBinaryDictionary;
+
+ public ReadOnlyBinaryDictionary(final String filename, final long offset, final long length,
+ final boolean useFullEditDistance, final Locale locale, final String dictType) {
+ super(dictType);
+ mBinaryDictionary = new BinaryDictionary(filename, offset, length, useFullEditDistance,
+ locale, dictType, false /* isUpdatable */);
+ }
+
+ public boolean isValidDictionary() {
+ return mBinaryDictionary.isValidDictionary();
+ }
+
+ @Override
+ public ArrayList<SuggestedWordInfo> getSuggestions(final WordComposer composer,
+ final String prevWord, final ProximityInfo proximityInfo,
+ final boolean blockOffensiveWords, final int[] additionalFeaturesOptions) {
+ return getSuggestionsWithSessionId(composer, prevWord, proximityInfo, blockOffensiveWords,
+ additionalFeaturesOptions, 0 /* sessionId */);
+ }
+
+ @Override
+ public ArrayList<SuggestedWordInfo> getSuggestionsWithSessionId(final WordComposer composer,
+ final String prevWord, final ProximityInfo proximityInfo,
+ final boolean blockOffensiveWords, final int[] additionalFeaturesOptions,
+ final int sessionId) {
+ if (mLock.readLock().tryLock()) {
+ try {
+ return mBinaryDictionary.getSuggestions(composer, prevWord, proximityInfo,
+ blockOffensiveWords, additionalFeaturesOptions);
+ } finally {
+ mLock.readLock().unlock();
+ }
+ }
+ return null;
+ }
+
+ @Override
+ public boolean isValidWord(final String word) {
+ if (mLock.readLock().tryLock()) {
+ try {
+ return mBinaryDictionary.isValidWord(word);
+ } finally {
+ mLock.readLock().unlock();
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public boolean shouldAutoCommit(final SuggestedWordInfo candidate) {
+ if (mLock.readLock().tryLock()) {
+ try {
+ return mBinaryDictionary.shouldAutoCommit(candidate);
+ } finally {
+ mLock.readLock().unlock();
+ }
+ }
+ return false;
+ }
+
+ @Override
+ public int getFrequency(final String word) {
+ if (mLock.readLock().tryLock()) {
+ try {
+ return mBinaryDictionary.getFrequency(word);
+ } finally {
+ mLock.readLock().unlock();
+ }
+ }
+ return NOT_A_PROBABILITY;
+ }
+
+ @Override
+ public void close() {
+ mLock.writeLock().lock();
+ try {
+ mBinaryDictionary.close();
+ } finally {
+ mLock.writeLock().unlock();
+ }
+ }
+}
diff --git a/java/src/com/android/inputmethod/latin/Suggest.java b/java/src/com/android/inputmethod/latin/Suggest.java
index 6c18c94..9fd1f53 100644
--- a/java/src/com/android/inputmethod/latin/Suggest.java
+++ b/java/src/com/android/inputmethod/latin/Suggest.java
@@ -26,7 +26,7 @@
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.personalization.PersonalizationDictionary;
import com.android.inputmethod.latin.personalization.PersonalizationPredictionDictionary;
-import com.android.inputmethod.latin.personalization.UserHistoryPredictionDictionary;
+import com.android.inputmethod.latin.personalization.UserHistoryDictionary;
import com.android.inputmethod.latin.settings.Settings;
import com.android.inputmethod.latin.utils.AutoCorrectionUtils;
import com.android.inputmethod.latin.utils.BoundedTreeSet;
@@ -190,10 +190,8 @@
addOrReplaceDictionaryInternal(Dictionary.TYPE_CONTACTS, contactsDictionary);
}
- public void setUserHistoryPredictionDictionary(
- final UserHistoryPredictionDictionary userHistoryPredictionDictionary) {
- addOrReplaceDictionaryInternal(Dictionary.TYPE_USER_HISTORY,
- userHistoryPredictionDictionary);
+ public void setUserHistoryDictionary(final UserHistoryDictionary userHistoryDictionary) {
+ addOrReplaceDictionaryInternal(Dictionary.TYPE_USER_HISTORY, userHistoryDictionary);
}
public void setPersonalizationPredictionDictionary(
diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
index 6cc0bfb..af61f29 100644
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoderUtils.java
@@ -385,12 +385,14 @@
nodeSize + size, ptNode.mChildren));
}
nodeSize += getShortcutListSize(ptNode.mShortcutTargets);
- if (null != ptNode.mBigrams) {
- for (WeightedString bigram : ptNode.mBigrams) {
- final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray,
- nodeSize + size + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE,
- FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord));
- nodeSize += getByteSize(offset) + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE;
+ if (formatOptions.mVersion < FormatSpec.FIRST_VERSION_WITH_TERMINAL_ID) {
+ if (null != ptNode.mBigrams) {
+ for (WeightedString bigram : ptNode.mBigrams) {
+ final int offset = getOffsetToTargetPtNodeDuringUpdate(ptNodeArray,
+ nodeSize + size + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE,
+ FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord));
+ nodeSize += getByteSize(offset) + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE;
+ }
}
}
ptNode.mCachedSize = nodeSize;
diff --git a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java
index bf3d191..411e265 100644
--- a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java
+++ b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java
@@ -77,7 +77,7 @@
* @param newParentAddress the absolute address of the parent.
* @param formatOptions file format options.
*/
- public static void updateParentAddress(final DictBuffer dictBuffer,
+ private static void updateParentAddress(final DictBuffer dictBuffer,
final int ptNodeOriginAddress, final int newParentAddress,
final FormatOptions formatOptions) {
final int originalPosition = dictBuffer.position();
@@ -109,7 +109,7 @@
* @param newParentAddress the address to be written.
* @param formatOptions file format options.
*/
- public static void updateParentAddresses(final DictBuffer dictBuffer,
+ private static void updateParentAddresses(final DictBuffer dictBuffer,
final int ptNodeOriginAddress, final int newParentAddress,
final FormatOptions formatOptions) {
final int originalPosition = dictBuffer.position();
@@ -136,7 +136,7 @@
* @param newChildrenAddress the absolute address of the child.
* @param formatOptions file format options.
*/
- public static void updateChildrenAddress(final DictBuffer dictBuffer,
+ private static void updateChildrenAddress(final DictBuffer dictBuffer,
final int ptNodeOriginAddress, final int newChildrenAddress,
final FormatOptions formatOptions) {
final int originalPosition = dictBuffer.position();
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index 849bff0..9481a8c 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -265,8 +265,12 @@
static final String FREQ_FILE_EXTENSION = ".freq";
// tat = Terminal Address Table
static final String TERMINAL_ADDRESS_TABLE_FILE_EXTENSION = ".tat";
+ static final String BIGRAM_FILE_EXTENSION = ".bigram";
+ static final String BIGRAM_LOOKUP_TABLE_FILE_EXTENSION = ".bigram_lookup";
+ static final String BIGRAM_ADDRESS_TABLE_FILE_EXTENSION = ".bigram_index";
static final int FREQUENCY_AND_FLAGS_SIZE = 2;
static final int TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE = 3;
+ static final int BIGRAM_ADDRESS_TABLE_BLOCK_SIZE = 4;
static final int NO_CHILDREN_ADDRESS = Integer.MIN_VALUE;
static final int NO_PARENT_ADDRESS = 0;
@@ -331,9 +335,9 @@
public static final String USES_FORGETTING_CURVE_ATTRIBUTE = "USES_FORGETTING_CURVE";
public static final String ATTRIBUTE_VALUE_TRUE = "1";
- private static final String DICTIONARY_VERSION_ATTRIBUTE = "version";
- private static final String DICTIONARY_LOCALE_ATTRIBUTE = "locale";
- private static final String DICTIONARY_ID_ATTRIBUTE = "dictionary";
+ public static final String DICTIONARY_VERSION_ATTRIBUTE = "version";
+ public static final String DICTIONARY_LOCALE_ATTRIBUTE = "locale";
+ public static final String DICTIONARY_ID_ATTRIBUTE = "dictionary";
private static final String DICTIONARY_DESCRIPTION_ATTRIBUTE = "description";
public FileHeader(final int headerSize, final DictionaryOptions dictionaryOptions,
final FormatOptions formatOptions) {
diff --git a/java/src/com/android/inputmethod/latin/makedict/SparseTable.java b/java/src/com/android/inputmethod/latin/makedict/SparseTable.java
index 0b9cf91..96d057a 100644
--- a/java/src/com/android/inputmethod/latin/makedict/SparseTable.java
+++ b/java/src/com/android/inputmethod/latin/makedict/SparseTable.java
@@ -18,6 +18,9 @@
import com.android.inputmethod.annotations.UsedForTesting;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
@@ -147,4 +150,45 @@
BinaryDictEncoderUtils.writeUIntToStream(contentOutStream, index, 4);
}
}
+
+ @UsedForTesting
+ public void writeToFiles(final File lookupTableFile, final File contentFile)
+ throws IOException {
+ FileOutputStream lookupTableOutStream = null;
+ FileOutputStream contentOutStream = null;
+ try {
+ lookupTableOutStream = new FileOutputStream(lookupTableFile);
+ contentOutStream = new FileOutputStream(contentFile);
+ write(lookupTableOutStream, contentOutStream);
+ } finally {
+ if (lookupTableOutStream != null) {
+ lookupTableOutStream.close();
+ }
+ if (contentOutStream != null) {
+ contentOutStream.close();
+ }
+ }
+ }
+
+ private static byte[] readFileToByteArray(final File file) throws IOException {
+ final byte[] contents = new byte[(int) file.length()];
+ FileInputStream inStream = null;
+ try {
+ inStream = new FileInputStream(file);
+ inStream.read(contents);
+ } finally {
+ if (inStream != null) {
+ inStream.close();
+ }
+ }
+ return contents;
+ }
+
+ @UsedForTesting
+ public static SparseTable readFromFiles(final File lookupTableFile, final File contentFile,
+ final int blockSize) throws IOException {
+ final byte[] lookupTable = readFileToByteArray(lookupTableFile);
+ final byte[] content = readFileToByteArray(contentFile);
+ return new SparseTable(lookupTable, content, blockSize);
+ }
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
index 4c8ff8e..0aa4319 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java
@@ -42,12 +42,15 @@
private static final int FILETYPE_TRIE = 1;
private static final int FILETYPE_FREQUENCY = 2;
private static final int FILETYPE_TERMINAL_ADDRESS_TABLE = 3;
+ private static final int FILETYPE_BIGRAM = 4;
private final File mDictDirectory;
private final DictionaryBufferFactory mBufferFactory;
private DictBuffer mDictBuffer;
private DictBuffer mFrequencyBuffer;
private DictBuffer mTerminalAddressTableBuffer;
+ private DictBuffer mBigramBuffer;
+ private SparseTable mBigramAddressTable;
@UsedForTesting
/* package */ Ver4DictDecoder(final File dictDirectory, final int factoryFlag) {
@@ -82,6 +85,9 @@
} else if (fileType == FILETYPE_TERMINAL_ADDRESS_TABLE) {
return new File(mDictDirectory,
mDictDirectory.getName() + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
+ } else if (fileType == FILETYPE_BIGRAM) {
+ return new File(mDictDirectory,
+ mDictDirectory.getName() + FormatSpec.BIGRAM_FILE_EXTENSION);
} else {
throw new RuntimeException("Unsupported kind of file : " + fileType);
}
@@ -94,6 +100,8 @@
mFrequencyBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_FREQUENCY));
mTerminalAddressTableBuffer = mBufferFactory.getDictionaryBuffer(
getFile(FILETYPE_TERMINAL_ADDRESS_TABLE));
+ mBigramBuffer = mBufferFactory.getDictionaryBuffer(getFile(FILETYPE_BIGRAM));
+ loadBigramAddressSparseTable();
}
@Override
@@ -118,6 +126,15 @@
return header;
}
+ private void loadBigramAddressSparseTable() throws IOException {
+ final File lookupIndexFile = new File(mDictDirectory,
+ mDictDirectory.getName() + FormatSpec.BIGRAM_LOOKUP_TABLE_FILE_EXTENSION);
+ final File contentFile = new File(mDictDirectory,
+ mDictDirectory.getName() + FormatSpec.BIGRAM_ADDRESS_TABLE_FILE_EXTENSION);
+ mBigramAddressTable = SparseTable.readFromFiles(lookupIndexFile, contentFile,
+ FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE);
+ }
+
protected static class PtNodeReader extends DictDecoder.PtNodeReader {
protected static int readFrequency(final DictBuffer frequencyBuffer, final int terminalId) {
frequencyBuffer.position(terminalId * FormatSpec.FREQUENCY_AND_FLAGS_SIZE + 1);
@@ -191,8 +208,21 @@
final ArrayList<PendingAttribute> bigrams;
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
bigrams = new ArrayList<PendingAttribute>();
- addressPointer += PtNodeReader.readBigramAddresses(mDictBuffer, bigrams,
- addressPointer);
+ final int posOfBigrams = mBigramAddressTable.get(terminalId);
+ mBigramBuffer.position(posOfBigrams);
+ while (bigrams.size() < FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
+ // If bigrams.size() reaches FormatSpec.MAX_BIGRAMS_IN_A_PTNODE,
+ // remaining bigram entries are ignored.
+ final int bigramFlags = mBigramBuffer.readUnsignedByte();
+ final int targetTerminalId = mBigramBuffer.readUnsignedInt24();
+ mTerminalAddressTableBuffer.position(
+ targetTerminalId * FormatSpec.TERMINAL_ADDRESS_TABLE_ADDRESS_SIZE);
+ final int targetAddress = mTerminalAddressTableBuffer.readUnsignedInt24();
+ bigrams.add(new PendingAttribute(
+ bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_FREQUENCY,
+ targetAddress));
+ if (0 == (bigramFlags & FormatSpec.FLAG_BIGRAM_SHORTCUT_ATTR_HAS_NEXT)) break;
+ }
if (bigrams.size() >= FormatSpec.MAX_BIGRAMS_IN_A_PTNODE) {
MakedictLog.d("too many bigrams in a node.");
}
diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
index 4fb8967..4c25faf 100644
--- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
+++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java
@@ -26,6 +26,7 @@
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
+import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
@@ -43,9 +44,13 @@
private byte[] mTrieBuf;
private int mTriePos;
private int mHeaderSize;
+ private SparseTable mBigramAddressTable;
private OutputStream mTrieOutStream;
private OutputStream mFreqOutStream;
private OutputStream mTerminalAddressTableOutStream;
+ private OutputStream mBigramOutStream;
+ private File mDictDir;
+ private String mBaseFilename;
@UsedForTesting
public Ver4DictEncoder(final File dictPlacedDir) {
@@ -55,12 +60,14 @@
private void openStreams(final FormatOptions formatOptions, final DictionaryOptions dictOptions)
throws FileNotFoundException, IOException {
final FileHeader header = new FileHeader(0, dictOptions, formatOptions);
- final String filename = header.getId() + "." + header.getVersion();
- final File mDictDir = new File(mDictPlacedDir, filename);
- final File trieFile = new File(mDictDir, filename + FormatSpec.TRIE_FILE_EXTENSION);
- final File freqFile = new File(mDictDir, filename + FormatSpec.FREQ_FILE_EXTENSION);
+ mBaseFilename = header.getId() + "." + header.getVersion();
+ mDictDir = new File(mDictPlacedDir, mBaseFilename);
+ final File trieFile = new File(mDictDir, mBaseFilename + FormatSpec.TRIE_FILE_EXTENSION);
+ final File freqFile = new File(mDictDir, mBaseFilename + FormatSpec.FREQ_FILE_EXTENSION);
final File terminalAddressTableFile = new File(mDictDir,
- filename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
+ mBaseFilename + FormatSpec.TERMINAL_ADDRESS_TABLE_FILE_EXTENSION);
+ final File bigramFile = new File(mDictDir,
+ mBaseFilename + FormatSpec.BIGRAM_FILE_EXTENSION);
if (!mDictDir.isDirectory()) {
if (mDictDir.exists()) mDictDir.delete();
mDictDir.mkdirs();
@@ -71,6 +78,7 @@
mTrieOutStream = new FileOutputStream(trieFile);
mFreqOutStream = new FileOutputStream(freqFile);
mTerminalAddressTableOutStream = new FileOutputStream(terminalAddressTableFile);
+ mBigramOutStream = new FileOutputStream(bigramFile);
}
private void close() throws IOException {
@@ -84,10 +92,14 @@
if (mTerminalAddressTableOutStream != null) {
mTerminalAddressTableOutStream.close();
}
+ if (mBigramOutStream != null) {
+ mBigramOutStream.close();
+ }
} finally {
mTrieOutStream = null;
mFreqOutStream = null;
mTerminalAddressTableOutStream = null;
+ mBigramOutStream = null;
}
}
@@ -123,6 +135,10 @@
if (MakedictLog.DBG) BinaryDictEncoderUtils.checkFlatPtNodeArrayList(flatNodes);
writeTerminalData(flatNodes, terminalCount);
+ mBigramAddressTable = new SparseTable(terminalCount,
+ FormatSpec.BIGRAM_ADDRESS_TABLE_BLOCK_SIZE);
+ writeBigrams(flatNodes, dict);
+ writeBigramAddressSparseTable();
final PtNodeArray lastNodeArray = flatNodes.get(flatNodes.size() - 1);
final int bufferSize = lastNodeArray.mCachedAddressAfterUpdate + lastNodeArray.mCachedSize;
@@ -230,24 +246,41 @@
shortcutByteSize, FormatSpec.PTNODE_SHORTCUT_LIST_SIZE_SIZE);
}
- private void writeBigrams(ArrayList<WeightedString> bigrams, FusionDictionary dict) {
- if (bigrams == null) return;
+ private void writeBigrams(final ArrayList<PtNodeArray> flatNodes, final FusionDictionary dict)
+ throws IOException {
+ final ByteArrayOutputStream bigramBuffer = new ByteArrayOutputStream();
- final Iterator<WeightedString> bigramIterator = bigrams.iterator();
- while (bigramIterator.hasNext()) {
- final WeightedString bigram = bigramIterator.next();
- final PtNode target =
- FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
- final int addressOfBigram = target.mCachedAddressAfterUpdate;
- final int unigramFrequencyForThisWord = target.mFrequency;
- final int offset = addressOfBigram
- - (mTriePos + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
- int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
- offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord);
- mTrieBuf[mTriePos++] = (byte) bigramFlags;
- mTriePos += BinaryDictEncoderUtils.writeChildrenPosition(mTrieBuf,
- mTriePos, Math.abs(offset));
+ for (final PtNodeArray nodeArray : flatNodes) {
+ for (final PtNode ptNode : nodeArray.mData) {
+ if (ptNode.mBigrams != null) {
+ final int startPos = bigramBuffer.size();
+ mBigramAddressTable.set(ptNode.mTerminalId, startPos);
+ final Iterator<WeightedString> bigramIterator = ptNode.mBigrams.iterator();
+ while (bigramIterator.hasNext()) {
+ final WeightedString bigram = bigramIterator.next();
+ final PtNode target =
+ FusionDictionary.findWordInTree(dict.mRootNodeArray, bigram.mWord);
+ final int unigramFrequencyForThisWord = target.mFrequency;
+ final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(
+ bigramIterator.hasNext(), 0, bigram.mFrequency,
+ unigramFrequencyForThisWord, bigram.mWord);
+ BinaryDictEncoderUtils.writeUIntToStream(bigramBuffer, bigramFlags,
+ FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
+ BinaryDictEncoderUtils.writeUIntToStream(bigramBuffer, target.mTerminalId,
+ FormatSpec.PTNODE_ATTRIBUTE_MAX_ADDRESS_SIZE);
+ }
+ }
+ }
}
+ bigramBuffer.writeTo(mBigramOutStream);
+ }
+
+ private void writeBigramAddressSparseTable() throws IOException {
+ final File lookupIndexFile =
+ new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_LOOKUP_TABLE_FILE_EXTENSION);
+ final File contentFile =
+ new File(mDictDir, mBaseFilename + FormatSpec.BIGRAM_ADDRESS_TABLE_FILE_EXTENSION);
+ mBigramAddressTable.writeToFiles(lookupIndexFile, contentFile);
}
@Override
@@ -267,7 +300,6 @@
}
writeChildrenPosition(ptNode, formatOptions);
writeShortcuts(ptNode.mShortcutTargets);
- writeBigrams(ptNode.mBigrams, dict);
}
private void writeTerminalData(final ArrayList<PtNodeArray> flatNodes,
diff --git a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java
index 66517a8..7cf4f0c 100644
--- a/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java
+++ b/java/src/com/android/inputmethod/latin/personalization/DecayingExpandableBinaryDictionaryBase.java
@@ -22,6 +22,7 @@
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.Constants;
+import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
import com.android.inputmethod.latin.LatinImeLogger;
import com.android.inputmethod.latin.makedict.DictDecoder;
@@ -50,6 +51,9 @@
/** Any pair being typed or picked */
public static final int FREQUENCY_FOR_TYPED = 2;
+ public static final int FREQUENCY_FOR_WORDS_IN_DICTS = FREQUENCY_FOR_TYPED;
+ public static final int FREQUENCY_FOR_WORDS_NOT_IN_DICTS = Dictionary.NOT_A_PROBABILITY;
+
/** Locale for which this user history dictionary is storing words */
private final String mLocale;
@@ -94,6 +98,8 @@
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE,
FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
+ attributeMap.put(FormatSpec.FileHeader.DICTIONARY_ID_ATTRIBUTE, mFileName);
+ attributeMap.put(FormatSpec.FileHeader.DICTIONARY_LOCALE_ATTRIBUTE, mLocale);
return attributeMap;
}
@@ -117,27 +123,29 @@
}
/**
- * Pair will be added to the personalization prediction dictionary.
+ * Pair will be added to the decaying dictionary.
*
* The first word may be null. That means we don't know the context, in other words,
* it's only a unigram. The first word may also be an empty string : this means start
* context, as in beginning of a sentence for example.
* The second word may not be null (a NullPointerException would be thrown).
*/
- public void addToPersonalizationPredictionDictionary(
- final String word0, final String word1, final boolean isValid) {
+ public void addToDictionary(final String word0, final String word1, final boolean isValid) {
if (word1.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH ||
(word0 != null && word0.length() >= Constants.DICTIONARY_MAX_WORD_LENGTH)) {
return;
}
- addWordDynamically(word1, null /* the "shortcut" parameter is null */, FREQUENCY_FOR_TYPED,
+ final int frequency = ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE ?
+ (isValid ? FREQUENCY_FOR_WORDS_IN_DICTS : FREQUENCY_FOR_WORDS_NOT_IN_DICTS) :
+ FREQUENCY_FOR_TYPED;
+ addWordDynamically(word1, null /* the "shortcut" parameter is null */, frequency,
false /* isNotAWord */);
// Do not insert a word as a bigram of itself
if (word1.equals(word0)) {
return;
}
if (null != word0) {
- addBigramDynamically(word0, word1, FREQUENCY_FOR_TYPED, isValid);
+ addBigramDynamically(word0, word1, frequency, isValid);
}
}
diff --git a/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java b/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java
index 0af028a..039b253 100644
--- a/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java
+++ b/java/src/com/android/inputmethod/latin/personalization/DynamicPersonalizationDictionaryWriter.java
@@ -36,6 +36,7 @@
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Map;
// Currently this class is used to implement dynamic prodiction dictionary.
// TODO: Move to native code.
@@ -79,7 +80,7 @@
public void addUnigramWord(final String word, final String shortcutTarget, final int frequency,
final boolean isNotAWord) {
if (mBigramList.size() > mMaxHistoryBigrams * 2) {
- // Too many entries: just stop adding new vocabrary and wait next refresh.
+ // Too many entries: just stop adding new vocabulary and wait next refresh.
return;
}
mExpandableDictionary.addWord(word, shortcutTarget, frequency);
@@ -90,7 +91,7 @@
public void addBigramWords(final String word0, final String word1, final int frequency,
final boolean isValid, final long lastModifiedTime) {
if (mBigramList.size() > mMaxHistoryBigrams * 2) {
- // Too many entries: just stop adding new vocabrary and wait next refresh.
+ // Too many entries: just stop adding new vocabulary and wait next refresh.
return;
}
if (lastModifiedTime > 0) {
@@ -113,8 +114,8 @@
}
@Override
- protected void writeDictionary(final DictEncoder dictEncoder)
- throws IOException, UnsupportedFormatException {
+ protected void writeDictionary(final DictEncoder dictEncoder,
+ final Map<String, String> attributeMap) throws IOException, UnsupportedFormatException {
UserHistoryDictIOUtils.writeDictionary(dictEncoder,
new FrequencyProvider(mBigramList, mExpandableDictionary, mMaxHistoryBigrams),
mBigramList, FORMAT_OPTIONS);
@@ -176,8 +177,8 @@
}
@UsedForTesting
- public boolean isInDictionaryForTests(final String word) {
+ public boolean isInBigramListForTests(final String word) {
// TODO: Use native method to determine whether the word is in dictionary or not
- return mBigramList.containsKey(word);
+ return mBigramList.containsKey(word) || mBigramList.getBigrams(null).containsKey(word);
}
}
diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java
index c616a29..a86f6e5 100644
--- a/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java
+++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationDictionaryUpdateSession.java
@@ -110,7 +110,7 @@
if (dictionary == null) {
return;
}
- dictionary.addToPersonalizationPredictionDictionary(word0, word1, isValid);
+ dictionary.addToDictionary(word0, word1, isValid);
}
// Bulk import
@@ -122,8 +122,7 @@
return;
}
for (final PersonalizationLanguageModelParam lmParam : lmParams) {
- dictionary.addToPersonalizationPredictionDictionary(
- lmParam.mWord0, lmParam.mWord1, lmParam.mIsValid);
+ dictionary.addToDictionary(lmParam.mWord0, lmParam.mWord1, lmParam.mIsValid);
}
}
}
diff --git a/java/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java b/java/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java
index 5f702ee..8c9484b 100644
--- a/java/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java
+++ b/java/src/com/android/inputmethod/latin/personalization/PersonalizationHelper.java
@@ -30,7 +30,7 @@
private static final String TAG = PersonalizationHelper.class.getSimpleName();
private static final boolean DEBUG = false;
- private static final ConcurrentHashMap<String, SoftReference<UserHistoryPredictionDictionary>>
+ private static final ConcurrentHashMap<String, SoftReference<UserHistoryDictionary>>
sLangUserHistoryDictCache = CollectionUtils.newConcurrentHashMap();
private static final ConcurrentHashMap<String, SoftReference<PersonalizationDictionary>>
@@ -41,25 +41,23 @@
sLangPersonalizationPredictionDictCache =
CollectionUtils.newConcurrentHashMap();
- public static UserHistoryPredictionDictionary getUserHistoryPredictionDictionary(
+ public static UserHistoryDictionary getUserHistoryDictionary(
final Context context, final String locale, final SharedPreferences sp) {
synchronized (sLangUserHistoryDictCache) {
if (sLangUserHistoryDictCache.containsKey(locale)) {
- final SoftReference<UserHistoryPredictionDictionary> ref =
+ final SoftReference<UserHistoryDictionary> ref =
sLangUserHistoryDictCache.get(locale);
- final UserHistoryPredictionDictionary dict = ref == null ? null : ref.get();
+ final UserHistoryDictionary dict = ref == null ? null : ref.get();
if (dict != null) {
if (DEBUG) {
- Log.w(TAG, "Use cached UserHistoryPredictionDictionary for " + locale);
+ Log.w(TAG, "Use cached UserHistoryDictionary for " + locale);
}
dict.reloadDictionaryIfRequired();
return dict;
}
}
- final UserHistoryPredictionDictionary dict =
- new UserHistoryPredictionDictionary(context, locale, sp);
- sLangUserHistoryDictCache.put(
- locale, new SoftReference<UserHistoryPredictionDictionary>(dict));
+ final UserHistoryDictionary dict = new UserHistoryDictionary(context, locale, sp);
+ sLangUserHistoryDictCache.put(locale, new SoftReference<UserHistoryDictionary>(dict));
return dict;
}
}
diff --git a/java/src/com/android/inputmethod/latin/personalization/UserHistoryPredictionDictionary.java b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java
similarity index 84%
rename from java/src/com/android/inputmethod/latin/personalization/UserHistoryPredictionDictionary.java
rename to java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java
index 38e308a..a60226d 100644
--- a/java/src/com/android/inputmethod/latin/personalization/UserHistoryPredictionDictionary.java
+++ b/java/src/com/android/inputmethod/latin/personalization/UserHistoryDictionary.java
@@ -26,10 +26,10 @@
* Locally gathers stats about the words user types and various other signals like auto-correction
* cancellation or manual picks. This allows the keyboard to adapt to the typist over time.
*/
-public class UserHistoryPredictionDictionary extends DecayingExpandableBinaryDictionaryBase {
+public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBase {
/* package for tests */ static final String NAME =
- UserHistoryPredictionDictionary.class.getSimpleName();
- /* package */ UserHistoryPredictionDictionary(final Context context, final String locale,
+ UserHistoryDictionary.class.getSimpleName();
+ /* package */ UserHistoryDictionary(final Context context, final String locale,
final SharedPreferences sp) {
super(context, locale, sp, Dictionary.TYPE_USER_HISTORY, getDictionaryFileName(locale));
}
diff --git a/java/src/com/android/inputmethod/latin/utils/PrioritizedSerialExecutor.java b/java/src/com/android/inputmethod/latin/utils/PrioritizedSerialExecutor.java
index 5dc0b58..201a70d 100644
--- a/java/src/com/android/inputmethod/latin/utils/PrioritizedSerialExecutor.java
+++ b/java/src/com/android/inputmethod/latin/utils/PrioritizedSerialExecutor.java
@@ -16,8 +16,11 @@
package com.android.inputmethod.latin.utils;
-import java.util.ArrayDeque;
import java.util.Queue;
+import java.util.concurrent.ArrayBlockingQueue;
+import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.ThreadPoolExecutor;
+import java.util.concurrent.TimeUnit;
/**
* An object that executes submitted tasks using a thread.
@@ -27,19 +30,20 @@
private final Object mLock = new Object();
- // The default value of capacities of task queues.
- private static final int TASK_QUEUE_CAPACITY = 1000;
private final Queue<Runnable> mTasks;
private final Queue<Runnable> mPrioritizedTasks;
private boolean mIsShutdown;
+ private final ThreadPoolExecutor mThreadPoolExecutor;
// The task which is running now.
private Runnable mActive;
public PrioritizedSerialExecutor() {
- mTasks = new ArrayDeque<Runnable>(TASK_QUEUE_CAPACITY);
- mPrioritizedTasks = new ArrayDeque<Runnable>(TASK_QUEUE_CAPACITY);
+ mTasks = new ConcurrentLinkedQueue<Runnable>();
+ mPrioritizedTasks = new ConcurrentLinkedQueue<Runnable>();
mIsShutdown = false;
+ mThreadPoolExecutor = new ThreadPoolExecutor(1 /* corePoolSize */, 1 /* maximumPoolSize */,
+ 0 /* keepAliveTime */, TimeUnit.MILLISECONDS, new ArrayBlockingQueue<Runnable>(1));
}
/**
@@ -59,7 +63,16 @@
public void execute(final Runnable r) {
synchronized(mLock) {
if (!mIsShutdown) {
- mTasks.offer(r);
+ mTasks.offer(new Runnable() {
+ @Override
+ public void run() {
+ try {
+ r.run();
+ } finally {
+ scheduleNext();
+ }
+ }
+ });
if (mActive == null) {
scheduleNext();
}
@@ -74,45 +87,36 @@
public void executePrioritized(final Runnable r) {
synchronized(mLock) {
if (!mIsShutdown) {
- mPrioritizedTasks.offer(r);
- if (mActive == null) {
+ mPrioritizedTasks.offer(new Runnable() {
+ @Override
+ public void run() {
+ try {
+ r.run();
+ } finally {
+ scheduleNext();
+ }
+ }
+ });
+ if (mActive == null) {
scheduleNext();
}
}
}
}
- private boolean fetchNextTasks() {
- synchronized(mLock) {
- mActive = mPrioritizedTasks.poll();
- if (mActive == null) {
- mActive = mTasks.poll();
- }
- return mActive != null;
+ private boolean fetchNextTasksLocked() {
+ mActive = mPrioritizedTasks.poll();
+ if (mActive == null) {
+ mActive = mTasks.poll();
}
+ return mActive != null;
}
private void scheduleNext() {
synchronized(mLock) {
- if (!fetchNextTasks()) {
- return;
+ if (fetchNextTasksLocked()) {
+ mThreadPoolExecutor.execute(mActive);
}
- new Thread(new Runnable() {
- @Override
- public void run() {
- try {
- do {
- synchronized(mLock) {
- if (mActive != null) {
- mActive.run();
- }
- }
- } while (fetchNextTasks());
- } finally {
- scheduleNext();
- }
- }
- }).start();
}
}
diff --git a/native/jni/Android.mk b/native/jni/Android.mk
index 0594ddf..36afea5 100644
--- a/native/jni/Android.mk
+++ b/native/jni/Android.mk
@@ -85,6 +85,7 @@
$(addprefix suggest/policyimpl/dictionary/utils/, \
buffer_with_extendable_buffer.cpp \
byte_array_utils.cpp \
+ decaying_utils.cpp \
dict_file_writing_utils.cpp \
format_utils.cpp) \
suggest/policyimpl/gesture/gesture_suggest_policy_factory.cpp \
diff --git a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
index 7761ec4..c5ef264 100644
--- a/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
+++ b/native/jni/com_android_inputmethod_latin_BinaryDictionary.cpp
@@ -113,10 +113,10 @@
}
static bool latinime_BinaryDictionary_needsToRunGC(JNIEnv *env, jclass clazz,
- jlong dict) {
+ jlong dict, jboolean mindsBlockByGC) {
Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
if (!dictionary) return false;
- return dictionary->needsToRunGC();
+ return dictionary->needsToRunGC(mindsBlockByGC == JNI_TRUE);
}
static void latinime_BinaryDictionary_flushWithGC(JNIEnv *env, jclass clazz, jlong dict,
@@ -323,6 +323,24 @@
bigramProbability);
}
+static jstring latinime_BinaryDictionary_getProperty(JNIEnv *env, jclass clazz, jlong dict,
+ jstring query) {
+ Dictionary *dictionary = reinterpret_cast<Dictionary *>(dict);
+ if (!dictionary) {
+ return env->NewStringUTF("");
+ }
+ const jsize queryUtf8Length = env->GetStringUTFLength(query);
+ char queryChars[queryUtf8Length + 1];
+ env->GetStringUTFRegion(query, 0, env->GetStringLength(query), queryChars);
+ queryChars[queryUtf8Length] = '\0';
+ static const int GET_PROPERTY_RESULT_LENGTH = 100;
+ char resultChars[GET_PROPERTY_RESULT_LENGTH];
+ resultChars[0] = '\0';
+ dictionary->getDictionaryStructurePolicy()->getProperty(queryChars, resultChars,
+ GET_PROPERTY_RESULT_LENGTH);
+ return env->NewStringUTF(resultChars);
+}
+
static const JNINativeMethod sMethods[] = {
{
const_cast<char *>("createEmptyDictFileNative"),
@@ -346,7 +364,7 @@
},
{
const_cast<char *>("needsToRunGCNative"),
- const_cast<char *>("(J)Z"),
+ const_cast<char *>("(JZ)Z"),
reinterpret_cast<void *>(latinime_BinaryDictionary_needsToRunGC)
},
{
@@ -398,6 +416,11 @@
const_cast<char *>("calculateProbabilityNative"),
const_cast<char *>("(JII)I"),
reinterpret_cast<void *>(latinime_BinaryDictionary_calculateProbabilityNative)
+ },
+ {
+ const_cast<char *>("getPropertyNative"),
+ const_cast<char *>("(JLjava/lang/String;)Ljava/lang/String;"),
+ reinterpret_cast<void *>(latinime_BinaryDictionary_getProperty)
}
};
diff --git a/native/jni/src/defines.h b/native/jni/src/defines.h
index 89dfa39..c2aa8ba 100644
--- a/native/jni/src/defines.h
+++ b/native/jni/src/defines.h
@@ -375,7 +375,7 @@
CT_TERMINAL,
CT_TERMINAL_INSERTION,
// Create new word with space omission
- CT_NEW_WORD_SPACE_OMITTION,
+ CT_NEW_WORD_SPACE_OMISSION,
// Create new word with space substitution
CT_NEW_WORD_SPACE_SUBSTITUTION,
} CorrectionType;
diff --git a/native/jni/src/suggest/core/dicnode/dic_node.h b/native/jni/src/suggest/core/dicnode/dic_node.h
index 41ef9d2..9099e82 100644
--- a/native/jni/src/suggest/core/dicnode/dic_node.h
+++ b/native/jni/src/suggest/core/dicnode/dic_node.h
@@ -38,10 +38,10 @@
INTS_TO_CHARS(mDicNodeState.mDicNodeStatePrevWord.mPrevWord, \
mDicNodeState.mDicNodeStatePrevWord.getPrevWordLength(), prevWordCharBuf, \
NELEMS(prevWordCharBuf)); \
- AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d,,", header, \
+ AKLOGI("#%8s, %5f, %5f, %5f, %5f, %s, %s, %d, %5f,", header, \
getSpatialDistanceForScoring(), getLanguageDistanceForScoring(), \
getNormalizedCompoundDistance(), getRawLength(), prevWordCharBuf, charBuf, \
- getInputIndex(0)); \
+ getInputIndex(0), getNormalizedCompoundDistanceAfterFirstWord()); \
} while (0)
#else
#define LOGI_SHOW_ADD_COST_PROP
@@ -434,6 +434,13 @@
return mDicNodeState.mDicNodeStateScoring.getLanguageDistance();
}
+ // For space-aware gestures, we store the normalized distance at the char index
+ // that ends the first word of the suggestion. We call this the distance after
+ // first word.
+ float getNormalizedCompoundDistanceAfterFirstWord() const {
+ return mDicNodeState.mDicNodeStateScoring.getNormalizedCompoundDistanceAfterFirstWord();
+ }
+
float getLanguageDistanceRatePerWordForScoring() const {
const float langDist = getLanguageDistanceForScoring();
const float totalWordCount =
@@ -565,6 +572,12 @@
inputSize, getTotalInputIndex(), errorType);
}
+ // Saves the current normalized compound distance for space-aware gestures.
+ // See getNormalizedCompoundDistanceAfterFirstWord for details.
+ AK_FORCE_INLINE void saveNormalizedCompoundDistanceAfterFirstWordIfNoneYet() {
+ mDicNodeState.mDicNodeStateScoring.saveNormalizedCompoundDistanceAfterFirstWordIfNoneYet();
+ }
+
// Caveat: Must not be called outside Weighting
// This restriction is guaranteed by "friend"
AK_FORCE_INLINE void forwardInputIndex(const int pointerId, const int count,
diff --git a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
index 4c88422..3c85d0e 100644
--- a/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
+++ b/native/jni/src/suggest/core/dicnode/internal/dic_node_state_scoring.h
@@ -31,7 +31,8 @@
mDigraphIndex(DigraphUtils::NOT_A_DIGRAPH_INDEX),
mEditCorrectionCount(0), mProximityCorrectionCount(0),
mNormalizedCompoundDistance(0.0f), mSpatialDistance(0.0f), mLanguageDistance(0.0f),
- mRawLength(0.0f), mExactMatch(true) {
+ mRawLength(0.0f), mExactMatch(true),
+ mNormalizedCompoundDistanceAfterFirstWord(MAX_VALUE_FOR_WEIGHTING) {
}
virtual ~DicNodeStateScoring() {}
@@ -45,6 +46,7 @@
mRawLength = 0.0f;
mDoubleLetterLevel = NOT_A_DOUBLE_LETTER;
mDigraphIndex = DigraphUtils::NOT_A_DIGRAPH_INDEX;
+ mNormalizedCompoundDistanceAfterFirstWord = MAX_VALUE_FOR_WEIGHTING;
mExactMatch = true;
}
@@ -58,6 +60,8 @@
mDoubleLetterLevel = scoring->mDoubleLetterLevel;
mDigraphIndex = scoring->mDigraphIndex;
mExactMatch = scoring->mExactMatch;
+ mNormalizedCompoundDistanceAfterFirstWord =
+ scoring->mNormalizedCompoundDistanceAfterFirstWord;
}
void addCost(const float spatialCost, const float languageCost, const bool doNormalization,
@@ -86,6 +90,17 @@
}
}
+ // Saves the current normalized distance for space-aware gestures.
+ // See getNormalizedCompoundDistanceAfterFirstWord for details.
+ void saveNormalizedCompoundDistanceAfterFirstWordIfNoneYet() {
+ // We get called here after each word. We only want to store the distance after
+ // the first word, so if we already have a distance we skip saving -- hence "IfNoneYet"
+ // in the method name.
+ if (mNormalizedCompoundDistanceAfterFirstWord >= MAX_VALUE_FOR_WEIGHTING) {
+ mNormalizedCompoundDistanceAfterFirstWord = getNormalizedCompoundDistance();
+ }
+ }
+
void addRawLength(const float rawLength) {
mRawLength += rawLength;
}
@@ -102,6 +117,13 @@
return mNormalizedCompoundDistance;
}
+ // For space-aware gestures, we store the normalized distance at the char index
+ // that ends the first word of the suggestion. We call this the distance after
+ // first word.
+ float getNormalizedCompoundDistanceAfterFirstWord() const {
+ return mNormalizedCompoundDistanceAfterFirstWord;
+ }
+
float getSpatialDistance() const {
return mSpatialDistance;
}
@@ -178,6 +200,7 @@
float mLanguageDistance;
float mRawLength;
bool mExactMatch;
+ float mNormalizedCompoundDistanceAfterFirstWord;
AK_FORCE_INLINE void addDistance(float spatialDistance, float languageDistance,
bool doNormalization, int inputSize, int totalInputIndex) {
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.cpp b/native/jni/src/suggest/core/dictionary/dictionary.cpp
index ec1b63a..b1d01ed 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.cpp
+++ b/native/jni/src/suggest/core/dictionary/dictionary.cpp
@@ -33,6 +33,8 @@
namespace latinime {
+const int Dictionary::HEADER_ATTRIBUTE_BUFFER_SIZE = 32;
+
Dictionary::Dictionary(JNIEnv *env,
DictionaryStructureWithBufferPolicy *const dictionaryStructureWithBufferPolicy)
: mDictionaryStructureWithBufferPolicy(dictionaryStructureWithBufferPolicy),
@@ -121,32 +123,37 @@
mDictionaryStructureWithBufferPolicy->flushWithGC(filePath);
}
-bool Dictionary::needsToRunGC() {
- return mDictionaryStructureWithBufferPolicy->needsToRunGC();
+bool Dictionary::needsToRunGC(const bool mindsBlockByGC) {
+ return mDictionaryStructureWithBufferPolicy->needsToRunGC(mindsBlockByGC);
+}
+
+void Dictionary::getProperty(const char *const query, char *const outResult,
+ const int maxResultLength) const {
+ return mDictionaryStructureWithBufferPolicy->getProperty(query, outResult, maxResultLength);
}
void Dictionary::logDictionaryInfo(JNIEnv *const env) const {
- const int BUFFER_SIZE = 16;
- int dictionaryIdCodePointBuffer[BUFFER_SIZE];
- int versionStringCodePointBuffer[BUFFER_SIZE];
- int dateStringCodePointBuffer[BUFFER_SIZE];
+ int dictionaryIdCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
+ int versionStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
+ int dateStringCodePointBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
const DictionaryHeaderStructurePolicy *const headerPolicy =
getDictionaryStructurePolicy()->getHeaderStructurePolicy();
headerPolicy->readHeaderValueOrQuestionMark("dictionary", dictionaryIdCodePointBuffer,
- BUFFER_SIZE);
+ HEADER_ATTRIBUTE_BUFFER_SIZE);
headerPolicy->readHeaderValueOrQuestionMark("version", versionStringCodePointBuffer,
- BUFFER_SIZE);
- headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer, BUFFER_SIZE);
+ HEADER_ATTRIBUTE_BUFFER_SIZE);
+ headerPolicy->readHeaderValueOrQuestionMark("date", dateStringCodePointBuffer,
+ HEADER_ATTRIBUTE_BUFFER_SIZE);
- char dictionaryIdCharBuffer[BUFFER_SIZE];
- char versionStringCharBuffer[BUFFER_SIZE];
- char dateStringCharBuffer[BUFFER_SIZE];
- intArrayToCharArray(dictionaryIdCodePointBuffer, BUFFER_SIZE,
- dictionaryIdCharBuffer, BUFFER_SIZE);
- intArrayToCharArray(versionStringCodePointBuffer, BUFFER_SIZE,
- versionStringCharBuffer, BUFFER_SIZE);
- intArrayToCharArray(dateStringCodePointBuffer, BUFFER_SIZE,
- dateStringCharBuffer, BUFFER_SIZE);
+ char dictionaryIdCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
+ char versionStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
+ char dateStringCharBuffer[HEADER_ATTRIBUTE_BUFFER_SIZE];
+ intArrayToCharArray(dictionaryIdCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
+ dictionaryIdCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
+ intArrayToCharArray(versionStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
+ versionStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
+ intArrayToCharArray(dateStringCodePointBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE,
+ dateStringCharBuffer, HEADER_ATTRIBUTE_BUFFER_SIZE);
LogUtils::logToJava(env,
"Dictionary info: dictionary = %s ; version = %s ; date = %s",
diff --git a/native/jni/src/suggest/core/dictionary/dictionary.h b/native/jni/src/suggest/core/dictionary/dictionary.h
index 9744474..d8a0f3e 100644
--- a/native/jni/src/suggest/core/dictionary/dictionary.h
+++ b/native/jni/src/suggest/core/dictionary/dictionary.h
@@ -81,7 +81,10 @@
void flushWithGC(const char *const filePath);
- bool needsToRunGC();
+ bool needsToRunGC(const bool mindsBlockByGC);
+
+ void getProperty(const char *const query, char *const outResult,
+ const int maxResultLength) const;
const DictionaryStructureWithBufferPolicy *getDictionaryStructurePolicy() const {
return mDictionaryStructureWithBufferPolicy;
@@ -92,6 +95,8 @@
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(Dictionary);
+ static const int HEADER_ATTRIBUTE_BUFFER_SIZE;
+
DictionaryStructureWithBufferPolicy *const mDictionaryStructureWithBufferPolicy;
const BigramDictionary *const mBigramDictionary;
const SuggestInterface *const mGestureSuggest;
diff --git a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
index b95488e..c7ffef0 100644
--- a/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
+++ b/native/jni/src/suggest/core/policy/dictionary_structure_with_buffer_policy.h
@@ -78,7 +78,10 @@
virtual void flushWithGC(const char *const filePath) = 0;
- virtual bool needsToRunGC() const = 0;
+ virtual bool needsToRunGC(const bool mindsBlockByGC) const = 0;
+
+ virtual void getProperty(const char *const query, char *const outResult,
+ const int maxResultLength) const = 0;
protected:
DictionaryStructureWithBufferPolicy() {}
diff --git a/native/jni/src/suggest/core/policy/weighting.cpp b/native/jni/src/suggest/core/policy/weighting.cpp
index f9b777d..0c40168 100644
--- a/native/jni/src/suggest/core/policy/weighting.cpp
+++ b/native/jni/src/suggest/core/policy/weighting.cpp
@@ -38,7 +38,7 @@
case CT_SUBSTITUTION:
PROF_SUBSTITUTION(node->mProfiler);
return;
- case CT_NEW_WORD_SPACE_OMITTION:
+ case CT_NEW_WORD_SPACE_OMISSION:
PROF_NEW_WORD(node->mProfiler);
return;
case CT_MATCH:
@@ -93,6 +93,11 @@
}
dicNode->addCost(spatialCost, languageCost, weighting->needsToNormalizeCompoundDistance(),
inputSize, errorType);
+ if (CT_NEW_WORD_SPACE_OMISSION == correctionType) {
+ // When we are on a terminal, we save the current distance for evaluating
+ // when to auto-commit partial suggestions.
+ dicNode->saveNormalizedCompoundDistanceAfterFirstWordIfNoneYet();
+ }
}
/* static */ float Weighting::getSpatialCost(const Weighting *const weighting,
@@ -108,7 +113,7 @@
case CT_SUBSTITUTION:
// only used for typing
return weighting->getSubstitutionCost();
- case CT_NEW_WORD_SPACE_OMITTION:
+ case CT_NEW_WORD_SPACE_OMISSION:
return weighting->getNewWordSpatialCost(traverseSession, dicNode, inputStateG);
case CT_MATCH:
return weighting->getMatchedCost(traverseSession, dicNode, inputStateG);
@@ -138,7 +143,7 @@
return 0.0f;
case CT_SUBSTITUTION:
return 0.0f;
- case CT_NEW_WORD_SPACE_OMITTION:
+ case CT_NEW_WORD_SPACE_OMISSION:
return weighting->getNewWordBigramLanguageCost(
traverseSession, parentDicNode, multiBigramMap);
case CT_MATCH:
@@ -173,7 +178,7 @@
return 0; /* 0 because CT_MATCH will be called */
case CT_SUBSTITUTION:
return 0; /* 0 because CT_MATCH will be called */
- case CT_NEW_WORD_SPACE_OMITTION:
+ case CT_NEW_WORD_SPACE_OMISSION:
return 0;
case CT_MATCH:
return 1;
diff --git a/native/jni/src/suggest/core/suggest.cpp b/native/jni/src/suggest/core/suggest.cpp
index b1340e1..e20bc49 100644
--- a/native/jni/src/suggest/core/suggest.cpp
+++ b/native/jni/src/suggest/core/suggest.cpp
@@ -574,7 +574,7 @@
DicNodeUtils::initAsRootWithPreviousWord(
traverseSession->getDictionaryStructurePolicy(), dicNode, &newDicNode);
const CorrectionType correctionType = spaceSubstitution ?
- CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMITTION;
+ CT_NEW_WORD_SPACE_SUBSTITUTION : CT_NEW_WORD_SPACE_OMISSION;
Weighting::addCostAndForwardInputIndex(WEIGHTING, correctionType, traverseSession, dicNode,
&newDicNode, traverseSession->getMultiBigramMap());
if (newDicNode.getCompoundDistance() < static_cast<float>(MAX_VALUE_FOR_WEIGHTING)) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
index 29307b5..67a085d 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.cpp
@@ -17,10 +17,10 @@
#include "suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h"
#include "suggest/core/policy/dictionary_shortcuts_structure_policy.h"
-#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h"
+#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
namespace latinime {
@@ -41,9 +41,14 @@
if (usesAdditionalBuffer && originalBigramPos != NOT_A_DICT_POS) {
originalBigramPos += mBuffer->getOriginalBufferSize();
}
- *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
*outProbability = BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags);
*outHasNext = BigramListReadWriteUtils::hasNext(bigramFlags);
+ if (mIsDecayingDict && !DecayingUtils::isValidBigram(*outProbability)) {
+ // This bigram is too weak to output.
+ *outBigramPos = NOT_A_DICT_POS;
+ } else {
+ *outBigramPos = followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
+ }
if (usesAdditionalBuffer) {
*bigramEntryPos += mBuffer->getOriginalBufferSize();
}
@@ -119,7 +124,7 @@
// Finding useless bigram entries and remove them. Bigram entry is useless when the target PtNode
// has been deleted or is not a valid terminal.
bool DynamicBigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(
- int *const bigramListPos) {
+ int *const bigramListPos, int *const outValidBigramEntryCount) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
if (usesAdditionalBuffer) {
*bigramListPos -= mBuffer->getOriginalBufferSize();
@@ -153,14 +158,22 @@
const int bigramTargetNodePos =
followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos);
nodeReader.fetchNodeInfoInBufferFromPtNodePos(bigramTargetNodePos);
- // TODO: Update probability for supporting probability decaying.
if (nodeReader.isDeleted() || !nodeReader.isTerminal()
|| bigramTargetNodePos == NOT_A_DICT_POS) {
// The target is no longer valid terminal. Invalidate the current bigram entry.
if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
- NOT_A_DICT_POS /* targetOffset */, &bigramEntryPos)) {
+ NOT_A_DICT_POS /* targetPtNodePos */, &bigramEntryPos)) {
return false;
}
+ continue;
+ }
+ bool isRemoved = false;
+ if (!updateProbabilityForDecay(bigramFlags, bigramTargetNodePos, &bigramEntryPos,
+ &isRemoved)) {
+ return false;
+ }
+ if (!isRemoved) {
+ (*outValidBigramEntryCount) += 1;
}
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
return true;
@@ -169,7 +182,7 @@
// Updates bigram target PtNode positions in the list after the placing step in GC.
bool DynamicBigramListPolicy::updateAllBigramTargetPtNodePositions(int *const bigramListPos,
const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
- ptNodePositionRelocationMap) {
+ ptNodePositionRelocationMap, int *const outBigramEntryCount) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
if (usesAdditionalBuffer) {
*bigramListPos -= mBuffer->getOriginalBufferSize();
@@ -211,11 +224,12 @@
return false;
}
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
+ (*outBigramEntryCount) = bigramEntryCount;
return true;
}
bool DynamicBigramListPolicy::addNewBigramEntryToBigramList(const int bigramTargetPos,
- const int probability, int *const bigramListPos) {
+ const int probability, int *const bigramListPos, bool *const outAddedNewBigram) {
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(*bigramListPos);
if (usesAdditionalBuffer) {
*bigramListPos -= mBuffer->getOriginalBufferSize();
@@ -243,8 +257,15 @@
}
if (followBigramLinkAndGetCurrentBigramPtNodePos(originalBigramPos) == bigramTargetPos) {
// Update this bigram entry.
+ *outAddedNewBigram = false;
+ const int originalProbability = BigramListReadWriteUtils::getProbabilityFromFlags(
+ bigramFlags);
+ const int probabilityToWrite = mIsDecayingDict ?
+ DecayingUtils::getUpdatedBigramProbabilityDelta(
+ originalProbability, probability) : probability;
const BigramListReadWriteUtils::BigramFlags updatedFlags =
- BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags, probability);
+ BigramListReadWriteUtils::setProbabilityInFlags(bigramFlags,
+ probabilityToWrite);
return BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedFlags,
originalBigramPos, &entryPos);
}
@@ -254,12 +275,14 @@
// The current last entry is found.
// First, update the flags of the last entry.
if (!BigramListReadWriteUtils::setHasNextFlag(mBuffer, true /* hasNext */, entryPos)) {
+ *outAddedNewBigram = false;
return false;
}
if (usesAdditionalBuffer) {
*bigramListPos += mBuffer->getOriginalBufferSize();
}
// Then, add a new entry after the last entry.
+ *outAddedNewBigram = true;
return writeNewBigramEntry(bigramTargetPos, probability, bigramListPos);
} while(BigramListReadWriteUtils::hasNext(bigramFlags));
// We return directly from the while loop.
@@ -270,8 +293,11 @@
bool DynamicBigramListPolicy::writeNewBigramEntry(const int bigramTargetPos, const int probability,
int *const writingPos) {
// hasNext is false because we are adding a new bigram entry at the end of the bigram list.
+ const int probabilityToWrite = mIsDecayingDict ?
+ DecayingUtils::getUpdatedBigramProbabilityDelta(NOT_A_PROBABILITY, probability) :
+ probability;
return BigramListReadWriteUtils::createAndWriteBigramEntry(mBuffer, bigramTargetPos,
- probability, false /* hasNext */, writingPos);
+ probabilityToWrite, false /* hasNext */, writingPos);
}
bool DynamicBigramListPolicy::removeBigram(const int bigramListPos, const int bigramTargetPos) {
@@ -333,4 +359,33 @@
return currentPos;
}
+bool DynamicBigramListPolicy::updateProbabilityForDecay(
+ BigramListReadWriteUtils::BigramFlags bigramFlags, const int targetPtNodePos,
+ int *const bigramEntryPos, bool *const outRemoved) const {
+ *outRemoved = false;
+ if (mIsDecayingDict) {
+ // Update bigram probability for decaying.
+ const int newProbability = DecayingUtils::getBigramProbabilityDeltaToSave(
+ BigramListReadWriteUtils::getProbabilityFromFlags(bigramFlags));
+ if (DecayingUtils::isValidBigram(newProbability)) {
+ // Write new probability.
+ const BigramListReadWriteUtils::BigramFlags updatedBigramFlags =
+ BigramListReadWriteUtils::setProbabilityInFlags(
+ bigramFlags, newProbability);
+ if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, updatedBigramFlags,
+ targetPtNodePos, bigramEntryPos)) {
+ return false;
+ }
+ } else {
+ // Remove current bigram entry.
+ *outRemoved = true;
+ if (!BigramListReadWriteUtils::writeBigramEntry(mBuffer, bigramFlags,
+ NOT_A_DICT_POS /* targetPtNodePos */, bigramEntryPos)) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
index 8ea318a..b358b4e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/bigram/dynamic_bigram_list_policy.h
@@ -21,6 +21,7 @@
#include "defines.h"
#include "suggest/core/policy/dictionary_bigrams_structure_policy.h"
+#include "suggest/policyimpl/dictionary/bigram/bigram_list_read_write_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
namespace latinime {
@@ -34,8 +35,9 @@
class DynamicBigramListPolicy : public DictionaryBigramsStructurePolicy {
public:
DynamicBigramListPolicy(BufferWithExtendableBuffer *const buffer,
- const DictionaryShortcutsStructurePolicy *const shortcutPolicy)
- : mBuffer(buffer), mShortcutPolicy(shortcutPolicy) {}
+ const DictionaryShortcutsStructurePolicy *const shortcutPolicy,
+ const bool isDecayingDict)
+ : mBuffer(buffer), mShortcutPolicy(shortcutPolicy), mIsDecayingDict(isDecayingDict) {}
~DynamicBigramListPolicy() {}
@@ -50,19 +52,20 @@
bool copyAllBigrams(BufferWithExtendableBuffer *const bufferToWrite, int *const fromPos,
int *const toPos, int *const outBigramsCount) const;
- bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos);
+ bool updateAllBigramEntriesAndDeleteUselessEntries(int *const bigramListPos,
+ int *const outBigramEntryCount);
bool updateAllBigramTargetPtNodePositions(int *const bigramListPos,
const DynamicPatriciaTrieWritingHelper::PtNodePositionRelocationMap *const
- ptNodePositionRelocationMap);
+ ptNodePositionRelocationMap, int *const outValidBigramEntryCount);
bool addNewBigramEntryToBigramList(const int bigramTargetPos, const int probability,
- int *const bigramListPos);
+ int *const bigramListPos, bool *const outAddedNewBigram);
bool writeNewBigramEntry(const int bigramTargetPos, const int probability,
int *const writingPos);
- // Return if targetBigramPos is found or not.
+ // Return whether or not targetBigramPos is found.
bool removeBigram(const int bigramListPos, const int bigramTargetPos);
private:
@@ -73,9 +76,13 @@
BufferWithExtendableBuffer *const mBuffer;
const DictionaryShortcutsStructurePolicy *const mShortcutPolicy;
+ const bool mIsDecayingDict;
// Follow bigram link and return the position of bigram target PtNode that is currently valid.
int followBigramLinkAndGetCurrentBigramPtNodePos(const int originalBigramPos) const;
+
+ bool updateProbabilityForDecay(BigramListReadWriteUtils::BigramFlags bigramFlags,
+ const int targetPtNodePos, int *const bigramEntryPos, bool *const outRemoved) const;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_BIGRAM_LIST_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
index c60e458..081163a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.cpp
@@ -16,6 +16,8 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h"
+#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
+
namespace latinime {
bool DynamicPatriciaTrieGcEventListeners
@@ -25,6 +27,19 @@
// PtNode is useless when the PtNode is not a terminal and doesn't have any not useless
// children.
bool isUselessPtNode = !node->isTerminal();
+ if (node->isTerminal() && mIsDecayingDict) {
+ const int newProbability =
+ DecayingUtils::getUnigramProbabilityToSave(node->getProbability());
+ int writingPos = node->getProbabilityFieldPos();
+ // Update probability.
+ if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(
+ mBuffer, newProbability, &writingPos)) {
+ return false;
+ }
+ if (!DecayingUtils::isValidUnigram(newProbability)) {
+ isUselessPtNode = false;
+ }
+ }
if (mChildrenValue > 0) {
isUselessPtNode = false;
} else if (node->isTerminal()) {
@@ -41,7 +56,27 @@
return false;
}
} else {
- valueStack.back() += 1;
+ mValueStack.back() += 1;
+ if (node->isTerminal()) {
+ mValidUnigramCount += 1;
+ }
+ }
+ return true;
+}
+
+bool DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
+ ::onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
+ const int *const nodeCodePoints) {
+ if (!node->isDeleted()) {
+ int pos = node->getBigramsPos();
+ if (pos != NOT_A_DICT_POS) {
+ int bigramEntryCount = 0;
+ if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos,
+ &bigramEntryCount)) {
+ return false;
+ }
+ mValidBigramEntryCount += bigramEntryCount;
+ }
}
return true;
}
@@ -137,10 +172,15 @@
// Updates bigram target PtNode positions in the bigram list.
int bigramsPos = node->getBigramsPos();
if (bigramsPos != NOT_A_DICT_POS) {
+ int bigramEntryCount;
if (!mBigramPolicy->updateAllBigramTargetPtNodePositions(&bigramsPos,
- &mDictPositionRelocationMap->mPtNodePositionRelocationMap)) {
+ &mDictPositionRelocationMap->mPtNodePositionRelocationMap, &bigramEntryCount)) {
return false;
}
+ mBigramCount += bigramEntryCount;
+ }
+ if (node->isTerminal()) {
+ mUnigramCount++;
}
return true;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
index 4256f22..463715a 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_gc_event_listeners.h
@@ -39,23 +39,23 @@
public:
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
DynamicPatriciaTrieWritingHelper *const writingHelper,
- BufferWithExtendableBuffer *const buffer)
- : mWritingHelper(writingHelper), mBuffer(buffer), valueStack(),
- mChildrenValue(0) {}
+ BufferWithExtendableBuffer *const buffer, const bool isDecayingDict)
+ : mWritingHelper(writingHelper), mBuffer(buffer), mIsDecayingDict(isDecayingDict),
+ mValueStack(), mChildrenValue(0), mValidUnigramCount(0) {}
~TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted() {};
bool onAscend() {
- if (valueStack.empty()) {
+ if (mValueStack.empty()) {
return false;
}
- mChildrenValue = valueStack.back();
- valueStack.pop_back();
+ mChildrenValue = mValueStack.back();
+ mValueStack.pop_back();
return true;
}
bool onDescend(const int ptNodeArrayPos) {
- valueStack.push_back(0);
+ mValueStack.push_back(0);
return true;
}
@@ -64,14 +64,20 @@
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints);
+ int getValidUnigramCount() const {
+ return mValidUnigramCount;
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(
TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted);
DynamicPatriciaTrieWritingHelper *const mWritingHelper;
BufferWithExtendableBuffer *const mBuffer;
- std::vector<int> valueStack;
+ const int mIsDecayingDict;
+ std::vector<int> mValueStack;
int mChildrenValue;
+ int mValidUnigramCount;
};
// Updates all bigram entries that are held by valid PtNodes. This removes useless bigram
@@ -80,7 +86,7 @@
: public DynamicPatriciaTrieReadingHelper::TraversingEventListener {
public:
TraversePolicyToUpdateBigramProbability(DynamicBigramListPolicy *const bigramPolicy)
- : mBigramPolicy(bigramPolicy) {}
+ : mBigramPolicy(bigramPolicy), mValidBigramEntryCount(0) {}
bool onAscend() { return true; }
@@ -89,22 +95,17 @@
bool onReadingPtNodeArrayTail() { return true; }
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
- const int *const nodeCodePoints) {
- if (!node->isDeleted()) {
- int pos = node->getBigramsPos();
- if (pos != NOT_A_DICT_POS) {
- if (!mBigramPolicy->updateAllBigramEntriesAndDeleteUselessEntries(&pos)) {
- return false;
- }
- }
- }
- return true;
+ const int *const nodeCodePoints);
+
+ int getValidBigramEntryCount() const {
+ return mValidBigramEntryCount;
}
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateBigramProbability);
DynamicBigramListPolicy *const mBigramPolicy;
+ int mValidBigramEntryCount;
};
class TraversePolicyToPlaceAndWriteValidPtNodesToBuffer
@@ -150,7 +151,8 @@
dictPositionRelocationMap)
: mWritingHelper(writingHelper), mBigramPolicy(bigramPolicy),
mBufferToWrite(bufferToWrite),
- mDictPositionRelocationMap(dictPositionRelocationMap) {};
+ mDictPositionRelocationMap(dictPositionRelocationMap), mUnigramCount(0),
+ mBigramCount(0) {};
bool onAscend() { return true; }
@@ -161,6 +163,14 @@
bool onVisitingPtNode(const DynamicPatriciaTrieNodeReader *const node,
const int *const nodeCodePoints);
+ int getUnigramCount() const {
+ return mUnigramCount;
+ }
+
+ int getBigramCount() const {
+ return mBigramCount;
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToUpdateAllPositionFields);
@@ -169,6 +179,8 @@
BufferWithExtendableBuffer *const mBufferToWrite;
const DynamicPatriciaTrieWritingHelper::DictPositionRelocationMap *const
mDictPositionRelocationMap;
+ int mUnigramCount;
+ int mBigramCount;
};
private:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
index 456352c..2fa3111 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_node_reader.cpp
@@ -26,7 +26,8 @@
void DynamicPatriciaTrieNodeReader::fetchPtNodeInfoFromBufferAndProcessMovedPtNode(
const int ptNodePos, const int maxCodePointCount, int *const outCodePoints) {
if (ptNodePos < 0 || ptNodePos >= mBuffer->getTailPosition()) {
- AKLOGE("Fetching PtNode info form invalid dictionary position: %d, dictionary size: %d",
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Fetching PtNode info from invalid dictionary position: %d, dictionary size: %d",
ptNodePos, mBuffer->getTailPosition());
ASSERT(false);
invalidatePtNodeInfo();
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
index 42397c1..0d8c927 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.cpp
@@ -16,6 +16,10 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h"
+#include <cstdio>
+#include <cstring>
+#include <ctime>
+
#include "defines.h"
#include "suggest/core/dicnode/dic_node.h"
#include "suggest/core/dicnode/dic_node_vector.h"
@@ -24,10 +28,18 @@
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
+#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
namespace latinime {
+const char *const DynamicPatriciaTriePolicy::UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
+const char *const DynamicPatriciaTriePolicy::BIGRAM_COUNT_QUERY = "BIGRAM_COUNT";
+const int DynamicPatriciaTriePolicy::MAX_DICT_EXTENDED_REGION_SIZE = 1024 * 1024;
+const int DynamicPatriciaTriePolicy::MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS =
+ DynamicPatriciaTrieWritingHelper::MAX_DICTIONARY_SIZE - 1024;
+const int DynamicPatriciaTriePolicy::MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING = 2 * 60 * 60;
+
void DynamicPatriciaTriePolicy::createAndGetAllChildNodes(const DicNode *const dicNode,
DicNodeVector *const childDicNodes) const {
if (!dicNode->hasChildren()) {
@@ -137,14 +149,17 @@
int DynamicPatriciaTriePolicy::getProbability(const int unigramProbability,
const int bigramProbability) const {
- // TODO: check mHeaderPolicy.usesForgettingCurve();
- if (unigramProbability == NOT_A_PROBABILITY) {
- return NOT_A_PROBABILITY;
- } else if (bigramProbability == NOT_A_PROBABILITY) {
- return ProbabilityUtils::backoff(unigramProbability);
+ if (mHeaderPolicy.isDecayingDict()) {
+ return DecayingUtils::getProbability(unigramProbability, bigramProbability);
} else {
- return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
- bigramProbability);
+ if (unigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (bigramProbability == NOT_A_PROBABILITY) {
+ return ProbabilityUtils::backoff(unigramProbability);
+ } else {
+ return ProbabilityUtils::computeProbabilityForBigram(unigramProbability,
+ bigramProbability);
+ }
}
}
@@ -193,12 +208,26 @@
AKLOGI("Warning: addUnigramWord() is called for non-updatable dictionary.");
return false;
}
+ if (mBufferWithExtendableBuffer.getTailPosition()
+ >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update.");
+ return false;
+ }
DynamicPatriciaTrieReadingHelper readingHelper(&mBufferWithExtendableBuffer,
getBigramsStructurePolicy(), getShortcutsStructurePolicy());
readingHelper.initWithPtNodeArrayPos(getRootPosition());
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &mBigramListPolicy, &mShortcutListPolicy);
- return writingHelper.addUnigramWord(&readingHelper, word, length, probability);
+ &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
+ bool addedNewUnigram = false;
+ if (writingHelper.addUnigramWord(&readingHelper, word, length, probability,
+ &addedNewUnigram)) {
+ if (addedNewUnigram) {
+ mUnigramCount++;
+ }
+ return true;
+ } else {
+ return false;
+ }
}
bool DynamicPatriciaTriePolicy::addBigramWords(const int *const word0, const int length0,
@@ -207,6 +236,11 @@
AKLOGI("Warning: addBigramWords() is called for non-updatable dictionary.");
return false;
}
+ if (mBufferWithExtendableBuffer.getTailPosition()
+ >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update.");
+ return false;
+ }
const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
false /* forceLowerCaseSearch */);
if (word0Pos == NOT_A_DICT_POS) {
@@ -218,8 +252,16 @@
return false;
}
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &mBigramListPolicy, &mShortcutListPolicy);
- return writingHelper.addBigramWords(word0Pos, word1Pos, probability);
+ &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
+ bool addedNewBigram = false;
+ if (writingHelper.addBigramWords(word0Pos, word1Pos, probability, &addedNewBigram)) {
+ if (addedNewBigram) {
+ mBigramCount++;
+ }
+ return true;
+ } else {
+ return false;
+ }
}
bool DynamicPatriciaTriePolicy::removeBigramWords(const int *const word0, const int length0,
@@ -228,6 +270,11 @@
AKLOGI("Warning: removeBigramWords() is called for non-updatable dictionary.");
return false;
}
+ if (mBufferWithExtendableBuffer.getTailPosition()
+ >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS) {
+ AKLOGE("The dictionary is too large to dynamically update.");
+ return false;
+ }
const int word0Pos = getTerminalNodePositionOfWord(word0, length0,
false /* forceLowerCaseSearch */);
if (word0Pos == NOT_A_DICT_POS) {
@@ -239,8 +286,13 @@
return false;
}
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &mBigramListPolicy, &mShortcutListPolicy);
- return writingHelper.removeBigramWords(word0Pos, word1Pos);
+ &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
+ if (writingHelper.removeBigramWords(word0Pos, word1Pos)) {
+ mBigramCount--;
+ return true;
+ } else {
+ return false;
+ }
}
void DynamicPatriciaTriePolicy::flush(const char *const filePath) {
@@ -249,8 +301,8 @@
return;
}
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &mBigramListPolicy, &mShortcutListPolicy);
- writingHelper.writeToDictFile(filePath, &mHeaderPolicy);
+ &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
+ writingHelper.writeToDictFile(filePath, &mHeaderPolicy, mUnigramCount, mBigramCount);
}
void DynamicPatriciaTriePolicy::flushWithGC(const char *const filePath) {
@@ -259,17 +311,51 @@
return;
}
DynamicPatriciaTrieWritingHelper writingHelper(&mBufferWithExtendableBuffer,
- &mBigramListPolicy, &mShortcutListPolicy);
+ &mBigramListPolicy, &mShortcutListPolicy, mHeaderPolicy.isDecayingDict());
writingHelper.writeToDictFileWithGC(getRootPosition(), filePath, &mHeaderPolicy);
}
-bool DynamicPatriciaTriePolicy::needsToRunGC() const {
+bool DynamicPatriciaTriePolicy::needsToRunGC(const bool mindsBlockByGC) const {
if (!mBuffer->isUpdatable()) {
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false;
}
- // TODO: Implement more properly.
- return mBufferWithExtendableBuffer.isNearSizeLimit();
+ if (mBufferWithExtendableBuffer.isNearSizeLimit()) {
+ // Additional buffer size is near the limit.
+ return true;
+ } else if (mHeaderPolicy.getExtendedRegionSize()
+ + mBufferWithExtendableBuffer.getUsedAdditionalBufferSize()
+ > MAX_DICT_EXTENDED_REGION_SIZE) {
+ // Total extended region size exceeds the limit.
+ return true;
+ } else if (mBufferWithExtendableBuffer.getTailPosition()
+ >= MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS
+ && mBufferWithExtendableBuffer.getUsedAdditionalBufferSize() > 0) {
+ // Needs to reduce dictionary size.
+ return true;
+ } else if (mHeaderPolicy.isDecayingDict()) {
+ if (mUnigramCount >= DecayingUtils::MAX_UNIGRAM_COUNT) {
+ // Unigram count exceeds the limit.
+ return true;
+ } else if (mBigramCount >= DecayingUtils::MAX_BIGRAM_COUNT) {
+ // Bigram count exceeds the limit.
+ return true;
+ } else if (mindsBlockByGC && mHeaderPolicy.getLastUpdatedTime()
+ + MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING < time(0)) {
+ // Time to update probabilities for decaying.
+ return true;
+ }
+ }
+ return false;
+}
+
+void DynamicPatriciaTriePolicy::getProperty(const char *const query, char *const outResult,
+ const int maxResultLength) const {
+ if (strncmp(query, UNIGRAM_COUNT_QUERY, maxResultLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d", mUnigramCount);
+ } else if (strncmp(query, BIGRAM_COUNT_QUERY, maxResultLength) == 0) {
+ snprintf(outResult, maxResultLength, "%d", mBigramCount);
+ }
}
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
index 06d8095..d3150c6 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_policy.h
@@ -37,7 +37,10 @@
mBufferWithExtendableBuffer(mBuffer->getBuffer() + mHeaderPolicy.getSize(),
mBuffer->getBufferSize() - mHeaderPolicy.getSize()),
mShortcutListPolicy(&mBufferWithExtendableBuffer),
- mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy) {}
+ mBigramListPolicy(&mBufferWithExtendableBuffer, &mShortcutListPolicy,
+ mHeaderPolicy.isDecayingDict()),
+ mUnigramCount(mHeaderPolicy.getUnigramCount()),
+ mBigramCount(mHeaderPolicy.getBigramCount()) {}
~DynamicPatriciaTriePolicy() {
delete mBuffer;
@@ -89,16 +92,27 @@
void flushWithGC(const char *const filePath);
- bool needsToRunGC() const;
+ bool needsToRunGC(const bool mindsBlockByGC) const;
+
+ void getProperty(const char *const query, char *const outResult,
+ const int maxResultLength) const;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTriePolicy);
+ static const char*const UNIGRAM_COUNT_QUERY;
+ static const char*const BIGRAM_COUNT_QUERY;
+ static const int MAX_DICT_EXTENDED_REGION_SIZE;
+ static const int MIN_DICT_SIZE_TO_REFUSE_DYNAMIC_OPERATIONS;
+ static const int MIN_SECONDS_TO_REQUIRE_GC_WHEN_WRITING;
+
const MmappedBuffer *const mBuffer;
const HeaderPolicy mHeaderPolicy;
BufferWithExtendableBuffer mBufferWithExtendableBuffer;
DynamicShortcutListPolicy mShortcutListPolicy;
DynamicBigramListPolicy mBigramListPolicy;
+ int mUnigramCount;
+ int mBigramCount;
};
} // namespace latinime
#endif // LATINIME_DYNAMIC_PATRICIA_TRIE_POLICY_H
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
index f4a2ef3..601ee66 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.cpp
@@ -155,6 +155,15 @@
// Read node array size and process empty node arrays. Nodes and arrays are counted up in this
// method to avoid an infinite loop.
void DynamicPatriciaTrieReadingHelper::nextPtNodeArray() {
+ if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of a bug or a broken dictionary.
+ AKLOGE("Reading PtNode array info from invalid dictionary position: %d, dict size: %d",
+ mReadingState.mPos, mBuffer->getTailPosition());
+ ASSERT(false);
+ mIsError = true;
+ mReadingState.mPos = NOT_A_DICT_POS;
+ return;
+ }
mReadingState.mPosOfLastPtNodeArrayHead = mReadingState.mPos;
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
@@ -191,6 +200,15 @@
// Follow the forward link and read the next node array if exists.
void DynamicPatriciaTrieReadingHelper::followForwardLink() {
+ if (mReadingState.mPos < 0 || mReadingState.mPos >= mBuffer->getTailPosition()) {
+ // Reading invalid position because of bug or broken dictionary.
+ AKLOGE("Reading forward link from invalid dictionary position: %d, dict size: %d",
+ mReadingState.mPos, mBuffer->getTailPosition());
+ ASSERT(false);
+ mIsError = true;
+ mReadingState.mPos = NOT_A_DICT_POS;
+ return;
+ }
const bool usesAdditionalBuffer = mBuffer->isInAdditionalBuffer(mReadingState.mPos);
const uint8_t *const dictBuf = mBuffer->getBuffer(usesAdditionalBuffer);
if (usesAdditionalBuffer) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
index c6d8ddc..512a4d8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_reading_helper.h
@@ -240,6 +240,7 @@
static const int MAX_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP;
static const size_t MAX_READING_STATE_STACK_SIZE;
+ // TODO: Introduce error code to track what caused the error.
bool mIsError;
ReadingState mReadingState;
const BufferWithExtendableBuffer *const mBuffer;
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
index 578645c..28124d2 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.cpp
@@ -25,6 +25,7 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h"
#include "suggest/policyimpl/dictionary/patricia_trie_reading_utils.h"
#include "suggest/policyimpl/dictionary/shortcut/dynamic_shortcut_list_policy.h"
+#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
#include "suggest/policyimpl/dictionary/utils/dict_file_writing_utils.h"
#include "utils/hash_map_compat.h"
@@ -36,7 +37,8 @@
bool DynamicPatriciaTrieWritingHelper::addUnigramWord(
DynamicPatriciaTrieReadingHelper *const readingHelper,
- const int *const wordCodePoints, const int codePointCount, const int probability) {
+ const int *const wordCodePoints, const int codePointCount, const int probability,
+ bool *const outAddedNewUnigram) {
int parentPos = NOT_A_DICT_POS;
while (!readingHelper->isEnd()) {
const int matchedCodePointCount = readingHelper->getPrevTotalCodePointCount();
@@ -54,8 +56,11 @@
const int nextIndex = matchedCodePointCount + j;
if (nextIndex >= codePointCount || !readingHelper->isMatchedCodePoint(j,
wordCodePoints[matchedCodePointCount + j])) {
+ *outAddedNewUnigram = true;
return reallocatePtNodeAndAddNewPtNodes(nodeReader,
- readingHelper->getMergedNodeCodePoints(), j, probability,
+ readingHelper->getMergedNodeCodePoints(), j,
+ getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */,
+ probability),
wordCodePoints + matchedCodePointCount,
codePointCount - matchedCodePointCount);
}
@@ -63,10 +68,12 @@
// All characters are matched.
if (codePointCount == readingHelper->getTotalCodePointCount()) {
return setPtNodeProbability(nodeReader, probability,
- readingHelper->getMergedNodeCodePoints());
+ readingHelper->getMergedNodeCodePoints(), outAddedNewUnigram);
}
if (!nodeReader->hasChildren()) {
- return createChildrenPtNodeArrayAndAChildPtNode(nodeReader, probability,
+ *outAddedNewUnigram = true;
+ return createChildrenPtNodeArrayAndAChildPtNode(nodeReader,
+ getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
wordCodePoints + readingHelper->getTotalCodePointCount(),
codePointCount - readingHelper->getTotalCodePointCount());
}
@@ -79,14 +86,15 @@
return false;
}
int pos = readingHelper->getPosOfLastForwardLinkField();
+ *outAddedNewUnigram = true;
return createAndInsertNodeIntoPtNodeArray(parentPos,
wordCodePoints + readingHelper->getPrevTotalCodePointCount(),
codePointCount - readingHelper->getPrevTotalCodePointCount(),
- probability, &pos);
+ getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability), &pos);
}
bool DynamicPatriciaTrieWritingHelper::addBigramWords(const int word0Pos, const int word1Pos,
- const int probability) {
+ const int probability, bool *const outAddedNewBigram) {
int mMergedNodeCodePoints[MAX_WORD_LENGTH];
DynamicPatriciaTrieNodeReader nodeReader(mBuffer, mBigramPolicy, mShortcutPolicy);
nodeReader.fetchNodeInfoInBufferFromPtNodePosAndGetNodeCodePoints(word0Pos, MAX_WORD_LENGTH,
@@ -107,9 +115,11 @@
if (nodeReader.getBigramsPos() != NOT_A_DICT_POS) {
// Insert a new bigram entry into the existing bigram list.
int bigramListPos = nodeReader.getBigramsPos();
- return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos);
+ return mBigramPolicy->addNewBigramEntryToBigramList(word1Pos, probability, &bigramListPos,
+ outAddedNewBigram);
} else {
// The PtNode doesn't have a bigram list.
+ *outAddedNewBigram = true;
// First, Write a bigram entry at the tail position of the PtNode.
if (!mBigramPolicy->writeNewBigramEntry(word1Pos, probability, &writingPos)) {
return false;
@@ -138,9 +148,12 @@
}
void DynamicPatriciaTrieWritingHelper::writeToDictFile(const char *const fileName,
- const HeaderPolicy *const headerPolicy) {
+ const HeaderPolicy *const headerPolicy, const int unigramCount, const int bigramCount) {
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
- if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */)) {
+ const int extendedRegionSize = headerPolicy->getExtendedRegionSize() +
+ mBuffer->getUsedAdditionalBufferSize();
+ if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, false /* updatesLastUpdatedTime */,
+ unigramCount, bigramCount, extendedRegionSize)) {
return;
}
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, mBuffer);
@@ -148,13 +161,16 @@
void DynamicPatriciaTrieWritingHelper::writeToDictFileWithGC(const int rootPtNodeArrayPos,
const char *const fileName, const HeaderPolicy *const headerPolicy) {
- BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
- if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */)) {
- return;
- }
BufferWithExtendableBuffer newDictBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */,
MAX_DICTIONARY_SIZE);
- if (!runGC(rootPtNodeArrayPos, &newDictBuffer)) {
+ int unigramCount = 0;
+ int bigramCount = 0;
+ if (!runGC(rootPtNodeArrayPos, &newDictBuffer, &unigramCount, &bigramCount)) {
+ return;
+ }
+ BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
+ if (!headerPolicy->writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
+ unigramCount, bigramCount, 0 /* extendedRegionSize */)) {
return;
}
DictFileWritingUtils::flushAllHeaderAndBodyToFile(fileName, &headerBuffer, &newDictBuffer);
@@ -335,23 +351,28 @@
bool DynamicPatriciaTrieWritingHelper::setPtNodeProbability(
const DynamicPatriciaTrieNodeReader *const originalPtNode, const int probability,
- const int *const codePoints) {
+ const int *const codePoints, bool *const outAddedNewUnigram) {
if (originalPtNode->isTerminal()) {
// Overwrites the probability.
+ *outAddedNewUnigram = false;
+ const int probabilityToWrite = getUpdatedProbability(originalPtNode->getProbability(),
+ probability);
int probabilityFieldPos = originalPtNode->getProbabilityFieldPos();
if (!DynamicPatriciaTrieWritingUtils::writeProbabilityAndAdvancePosition(mBuffer,
- probability, &probabilityFieldPos)) {
+ probabilityToWrite, &probabilityFieldPos)) {
return false;
}
} else {
// Make the node terminal and write the probability.
+ *outAddedNewUnigram = true;
int movedPos = mBuffer->getTailPosition();
if (!markNodeAsMovedAndSetPosition(originalPtNode, movedPos, movedPos)) {
return false;
}
if (!writePtNodeToBufferByCopyingPtNodeInfo(mBuffer, originalPtNode,
originalPtNode->getParentPos(), codePoints, originalPtNode->getCodePointCount(),
- probability, &movedPos)) {
+ getUpdatedProbability(NOT_A_PROBABILITY /* originalProbability */, probability),
+ &movedPos)) {
return false;
}
}
@@ -460,17 +481,22 @@
}
bool DynamicPatriciaTrieWritingHelper::runGC(const int rootPtNodeArrayPos,
- BufferWithExtendableBuffer *const bufferToWrite) {
+ BufferWithExtendableBuffer *const bufferToWrite, int *const outUnigramCount,
+ int *const outBigramCount) {
DynamicPatriciaTrieReadingHelper readingHelper(mBuffer, mBigramPolicy, mShortcutPolicy);
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners
::TraversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted(
- this, mBuffer);
+ this, mBuffer, mIsDecayingDict);
if (!readingHelper.traverseAllPtNodesInPostorderDepthFirstManner(
&traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted)) {
return false;
}
+ if (mIsDecayingDict && traversePolicyToUpdateUnigramProbabilityAndMarkUselessPtNodesAsDeleted
+ .getValidUnigramCount() > DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC) {
+ // TODO: Remove more unigrams.
+ }
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
DynamicPatriciaTrieGcEventListeners::TraversePolicyToUpdateBigramProbability
@@ -480,6 +506,11 @@
return false;
}
+ if (mIsDecayingDict && traversePolicyToUpdateBigramProbability.getValidBigramEntryCount()
+ > DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC) {
+ // TODO: Remove more bigrams.
+ }
+
// Mapping from positions in mBuffer to positions in bufferToWrite.
DictPositionRelocationMap dictPositionRelocationMap;
readingHelper.initWithPtNodeArrayPos(rootPtNodeArrayPos);
@@ -493,7 +524,8 @@
// Create policy instance for the GCed dictionary.
DynamicShortcutListPolicy newDictShortcutPolicy(bufferToWrite);
- DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy);
+ DynamicBigramListPolicy newDictBigramPolicy(bufferToWrite, &newDictShortcutPolicy,
+ mIsDecayingDict);
// Create reading helper for the GCed dictionary.
DynamicPatriciaTrieReadingHelper newDictReadingHelper(bufferToWrite, &newDictBigramPolicy,
&newDictShortcutPolicy);
@@ -505,7 +537,18 @@
&traversePolicyToUpdateAllPositionFields)) {
return false;
}
+ *outUnigramCount = traversePolicyToUpdateAllPositionFields.getUnigramCount();
+ *outBigramCount = traversePolicyToUpdateAllPositionFields.getBigramCount();
return true;
}
+int DynamicPatriciaTrieWritingHelper::getUpdatedProbability(const int originalProbability,
+ const int newProbability) {
+ if (mIsDecayingDict) {
+ return DecayingUtils::getUpdatedUnigramProbability(originalProbability, newProbability);
+ } else {
+ return newProbability;
+ }
+}
+
} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
index fe1b243..ecee2cd 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/dynamic_patricia_trie_writing_helper.h
@@ -47,24 +47,30 @@
DISALLOW_COPY_AND_ASSIGN(DictPositionRelocationMap);
};
+ static const size_t MAX_DICTIONARY_SIZE;
+
DynamicPatriciaTrieWritingHelper(BufferWithExtendableBuffer *const buffer,
DynamicBigramListPolicy *const bigramPolicy,
- DynamicShortcutListPolicy *const shortcutPolicy)
- : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy) {}
+ DynamicShortcutListPolicy *const shortcutPolicy, const bool isDecayingDict)
+ : mBuffer(buffer), mBigramPolicy(bigramPolicy), mShortcutPolicy(shortcutPolicy),
+ mIsDecayingDict(isDecayingDict) {}
~DynamicPatriciaTrieWritingHelper() {}
// Add a word to the dictionary. If the word already exists, update the probability.
bool addUnigramWord(DynamicPatriciaTrieReadingHelper *const readingHelper,
- const int *const wordCodePoints, const int codePointCount, const int probability);
+ const int *const wordCodePoints, const int codePointCount, const int probability,
+ bool *const outAddedNewUnigram);
// Add a bigram relation from word0Pos to word1Pos.
- bool addBigramWords(const int word0Pos, const int word1Pos, const int probability);
+ bool addBigramWords(const int word0Pos, const int word1Pos, const int probability,
+ bool *const outAddedNewBigram);
// Remove a bigram relation from word0Pos to word1Pos.
bool removeBigramWords(const int word0Pos, const int word1Pos);
- void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy);
+ void writeToDictFile(const char *const fileName, const HeaderPolicy *const headerPolicy,
+ const int unigramCount, const int bigramCount);
void writeToDictFileWithGC(const int rootPtNodeArrayPos, const char *const fileName,
const HeaderPolicy *const headerPolicy);
@@ -84,11 +90,11 @@
DISALLOW_IMPLICIT_CONSTRUCTORS(DynamicPatriciaTrieWritingHelper);
static const int CHILDREN_POSITION_FIELD_SIZE;
- static const size_t MAX_DICTIONARY_SIZE;
BufferWithExtendableBuffer *const mBuffer;
DynamicBigramListPolicy *const mBigramPolicy;
DynamicShortcutListPolicy *const mShortcutPolicy;
+ const bool mIsDecayingDict;
bool markNodeAsMovedAndSetPosition(const DynamicPatriciaTrieNodeReader *const nodeToUpdate,
const int movedPos, const int bigramLinkedNodePos);
@@ -107,7 +113,7 @@
const int nodeCodePointCount, const int probability, int *const forwardLinkFieldPos);
bool setPtNodeProbability(const DynamicPatriciaTrieNodeReader *const originalNode,
- const int probability, const int *const codePoints);
+ const int probability, const int *const codePoints, bool *const outAddedNewUnigram);
bool createChildrenPtNodeArrayAndAChildPtNode(
const DynamicPatriciaTrieNodeReader *const parentNode, const int probability,
@@ -122,7 +128,10 @@
const int probabilityOfNewPtNode, const int *const newNodeCodePoints,
const int newNodeCodePointCount);
- bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite);
+ bool runGC(const int rootPtNodeArrayPos, BufferWithExtendableBuffer *const bufferToWrite,
+ int *const outUnigramCount, int *const outBigramCount);
+
+ int getUpdatedProbability(const int originalProbability, const int newProbability);
};
} // namespace latinime
#endif /* LATINIME_DYNAMIC_PATRICIA_TRIE_WRITING_HELPER_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
index 7bbeaca..9ce9994 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.cpp
@@ -16,17 +16,16 @@
#include "suggest/policyimpl/dictionary/header/header_policy.h"
-#include <cstddef>
-#include <cstdio>
-#include <ctime>
-
namespace latinime {
-
// Note that these are corresponding definitions in Java side in FormatSpec.FileHeader.
const char *const HeaderPolicy::MULTIPLE_WORDS_DEMOTION_RATE_KEY = "MULTIPLE_WORDS_DEMOTION_RATE";
-const char *const HeaderPolicy::USES_FORGETTING_CURVE_KEY = "USES_FORGETTING_CURVE";
+// TODO: Change attribute string to "IS_DECAYING_DICT".
+const char *const HeaderPolicy::IS_DECAYING_DICT_KEY = "USES_FORGETTING_CURVE";
const char *const HeaderPolicy::LAST_UPDATED_TIME_KEY = "date";
+const char *const HeaderPolicy::UNIGRAM_COUNT_KEY = "UNIGRAM_COUNT";
+const char *const HeaderPolicy::BIGRAM_COUNT_KEY = "BIGRAM_COUNT";
+const char *const HeaderPolicy::EXTENDED_REGION_SIZE_KEY = "EXTENDED_REGION_SIZE";
const int HeaderPolicy::DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE = 100;
const float HeaderPolicy::MULTIPLE_WORD_COST_MULTIPLIER_SCALE = 100.0f;
@@ -55,33 +54,17 @@
}
float HeaderPolicy::readMultipleWordCostMultiplier() const {
- std::vector<int> keyVector;
- HeaderReadWriteUtils::insertCharactersIntoVector(MULTIPLE_WORDS_DEMOTION_RATE_KEY, &keyVector);
const int demotionRate = HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
- &keyVector, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE);
+ MULTIPLE_WORDS_DEMOTION_RATE_KEY, DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE);
if (demotionRate <= 0) {
return static_cast<float>(MAX_VALUE_FOR_WEIGHTING);
}
return MULTIPLE_WORD_COST_MULTIPLIER_SCALE / static_cast<float>(demotionRate);
}
-bool HeaderPolicy::readUsesForgettingCurveFlag() const {
- std::vector<int> keyVector;
- HeaderReadWriteUtils::insertCharactersIntoVector(USES_FORGETTING_CURVE_KEY, &keyVector);
- return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector,
- false /* defaultValue */);
-}
-
-// Returns current time when the key is not found or the value is invalid.
-int HeaderPolicy::readLastUpdatedTime() const {
- std::vector<int> keyVector;
- HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &keyVector);
- return HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap, &keyVector,
- time(0) /* defaultValue */);
-}
-
bool HeaderPolicy::writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
- const bool updatesLastUpdatedTime) const {
+ const bool updatesLastUpdatedTime, const int unigramCount, const int bigramCount,
+ const int extendedRegionSize) const {
int writingPos = 0;
if (!HeaderReadWriteUtils::writeDictionaryVersion(bufferToWrite, mDictFormatVersion,
&writingPos)) {
@@ -97,21 +80,19 @@
&writingPos)) {
return false;
}
+ HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap);
+ HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, UNIGRAM_COUNT_KEY, unigramCount);
+ HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, BIGRAM_COUNT_KEY, bigramCount);
+ HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, EXTENDED_REGION_SIZE_KEY,
+ extendedRegionSize);
if (updatesLastUpdatedTime) {
// Set current time as a last updated time.
- HeaderReadWriteUtils::AttributeMap attributeMapTowrite(mAttributeMap);
- std::vector<int> updatedTimekey;
- HeaderReadWriteUtils::insertCharactersIntoVector(LAST_UPDATED_TIME_KEY, &updatedTimekey);
- HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, &updatedTimekey, time(0));
- if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite,
- &writingPos)) {
- return false;
- }
- } else {
- if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &mAttributeMap,
- &writingPos)) {
- return false;
- }
+ HeaderReadWriteUtils::setIntAttribute(&attributeMapTowrite, LAST_UPDATED_TIME_KEY,
+ time(0));
+ }
+ if (!HeaderReadWriteUtils::writeHeaderAttributes(bufferToWrite, &attributeMapTowrite,
+ &writingPos)) {
+ return false;
}
// Writes an actual header size.
if (!HeaderReadWriteUtils::writeDictionaryHeaderSize(bufferToWrite, writingPos,
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index e97c08c..4261667 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -17,6 +17,7 @@
#ifndef LATINIME_HEADER_POLICY_H
#define LATINIME_HEADER_POLICY_H
+#include <ctime>
#include <stdint.h>
#include "defines.h"
@@ -35,8 +36,16 @@
mSize(HeaderReadWriteUtils::getHeaderSize(dictBuf)),
mAttributeMap(createAttributeMapAndReadAllAttributes(dictBuf)),
mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
- mUsesForgettingCurve(readUsesForgettingCurveFlag()),
- mLastUpdatedTime(readLastUpdatedTime()) {}
+ mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
+ IS_DECAYING_DICT_KEY, false /* defaultValue */)),
+ mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
+ mUnigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ UNIGRAM_COUNT_KEY, 0 /* defaultValue */)),
+ mBigramCount(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ BIGRAM_COUNT_KEY, 0 /* defaultValue */)),
+ mExtendedRegionSize(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ EXTENDED_REGION_SIZE_KEY, 0 /* defaultValue */)) {}
// Constructs header information using an attribute map.
HeaderPolicy(const FormatUtils::FORMAT_VERSION dictFormatVersion,
@@ -44,9 +53,12 @@
: mDictFormatVersion(dictFormatVersion),
mDictionaryFlags(HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
attributeMap)), mSize(0), mAttributeMap(*attributeMap),
- mMultiWordCostMultiplier(readUsesForgettingCurveFlag()),
- mUsesForgettingCurve(readUsesForgettingCurveFlag()),
- mLastUpdatedTime(readLastUpdatedTime()) {}
+ mMultiWordCostMultiplier(readMultipleWordCostMultiplier()),
+ mIsDecayingDict(HeaderReadWriteUtils::readBoolAttributeValue(&mAttributeMap,
+ IS_DECAYING_DICT_KEY, false /* defaultValue */)),
+ mLastUpdatedTime(HeaderReadWriteUtils::readIntAttributeValue(&mAttributeMap,
+ LAST_UPDATED_TIME_KEY, time(0) /* defaultValue */)),
+ mUnigramCount(0), mBigramCount(0), mExtendedRegionSize(0) {}
~HeaderPolicy() {}
@@ -70,26 +82,42 @@
return mMultiWordCostMultiplier;
}
- AK_FORCE_INLINE bool usesForgettingCurve() const {
- return mUsesForgettingCurve;
+ AK_FORCE_INLINE bool isDecayingDict() const {
+ return mIsDecayingDict;
}
AK_FORCE_INLINE int getLastUpdatedTime() const {
return mLastUpdatedTime;
}
+ AK_FORCE_INLINE int getUnigramCount() const {
+ return mUnigramCount;
+ }
+
+ AK_FORCE_INLINE int getBigramCount() const {
+ return mBigramCount;
+ }
+
+ AK_FORCE_INLINE int getExtendedRegionSize() const {
+ return mExtendedRegionSize;
+ }
+
void readHeaderValueOrQuestionMark(const char *const key,
int *outValue, int outValueSize) const;
bool writeHeaderToBuffer(BufferWithExtendableBuffer *const bufferToWrite,
- const bool updatesLastUpdatedTime) const;
+ const bool updatesLastUpdatedTime, const int unigramCount,
+ const int bigramCount, const int extendedRegionSize) const;
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(HeaderPolicy);
static const char *const MULTIPLE_WORDS_DEMOTION_RATE_KEY;
- static const char *const USES_FORGETTING_CURVE_KEY;
+ static const char *const IS_DECAYING_DICT_KEY;
static const char *const LAST_UPDATED_TIME_KEY;
+ static const char *const UNIGRAM_COUNT_KEY;
+ static const char *const BIGRAM_COUNT_KEY;
+ static const char *const EXTENDED_REGION_SIZE_KEY;
static const int DEFAULT_MULTIPLE_WORDS_DEMOTION_RATE;
static const float MULTIPLE_WORD_COST_MULTIPLIER_SCALE;
@@ -98,15 +126,14 @@
const int mSize;
HeaderReadWriteUtils::AttributeMap mAttributeMap;
const float mMultiWordCostMultiplier;
- const bool mUsesForgettingCurve;
+ const bool mIsDecayingDict;
const int mLastUpdatedTime;
+ const int mUnigramCount;
+ const int mBigramCount;
+ const int mExtendedRegionSize;
float readMultipleWordCostMultiplier() const;
- bool readUsesForgettingCurveFlag() const;
-
- int readLastUpdatedTime() const;
-
static HeaderReadWriteUtils::AttributeMap createAttributeMapAndReadAllAttributes(
const uint8_t *const dictBuf);
};
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
index 3b1c780..2694ce8 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -68,18 +68,12 @@
/* static */ HeaderReadWriteUtils::DictionaryFlags
HeaderReadWriteUtils::createAndGetDictionaryFlagsUsingAttributeMap(
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
- AttributeMap::key_type key;
- insertCharactersIntoVector(REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, &key);
- const bool requiresGermanUmlautProcessing = readBoolAttributeValue(attributeMap, &key,
- false /* defaultValue */);
- key.clear();
- insertCharactersIntoVector(REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY, &key);
- const bool requiresFrenchLigatureProcessing = readBoolAttributeValue(attributeMap, &key,
- false /* defaultValue */);
- key.clear();
- insertCharactersIntoVector(SUPPORTS_DYNAMIC_UPDATE_KEY, &key);
- const bool supportsDynamicUpdate = readBoolAttributeValue(attributeMap, &key,
- false /* defaultValue */);
+ const bool requiresGermanUmlautProcessing = readBoolAttributeValue(attributeMap,
+ REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY, false /* defaultValue */);
+ const bool requiresFrenchLigatureProcessing = readBoolAttributeValue(attributeMap,
+ REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY, false /* defaultValue */);
+ const bool supportsDynamicUpdate = readBoolAttributeValue(attributeMap,
+ SUPPORTS_DYNAMIC_UPDATE_KEY, false /* defaultValue */);
DictionaryFlags dictflags = NO_FLAGS;
dictflags |= requiresGermanUmlautProcessing ? GERMAN_UMLAUT_PROCESSING_FLAG : 0;
dictflags |= requiresFrenchLigatureProcessing ? FRENCH_LIGATURE_PROCESSING_FLAG : 0;
@@ -160,11 +154,18 @@
}
/* static */ void HeaderReadWriteUtils::setBoolAttribute(AttributeMap *const headerAttributes,
- const AttributeMap::key_type *const key, const bool value) {
+ const char *const key, const bool value) {
setIntAttribute(headerAttributes, key, value ? 1 : 0);
}
/* static */ void HeaderReadWriteUtils::setIntAttribute(AttributeMap *const headerAttributes,
+ const char *const key, const int value) {
+ AttributeMap::key_type keyVector;
+ insertCharactersIntoVector(key, &keyVector);
+ setIntAttributeInner(headerAttributes, &keyVector, value);
+}
+
+/* static */ void HeaderReadWriteUtils::setIntAttributeInner(AttributeMap *const headerAttributes,
const AttributeMap::key_type *const key, const int value) {
AttributeMap::mapped_type valueVector;
char charBuf[LARGEST_INT_DIGIT_COUNT + 1];
@@ -174,7 +175,7 @@
}
/* static */ bool HeaderReadWriteUtils::readBoolAttributeValue(
- const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key,
+ const AttributeMap *const headerAttributes, const char *const key,
const bool defaultValue) {
const int intDefaultValue = defaultValue ? 1 : 0;
const int intValue = readIntAttributeValue(headerAttributes, key, intDefaultValue);
@@ -182,6 +183,14 @@
}
/* static */ int HeaderReadWriteUtils::readIntAttributeValue(
+ const AttributeMap *const headerAttributes, const char *const key,
+ const int defaultValue) {
+ AttributeMap::key_type keyVector;
+ insertCharactersIntoVector(key, &keyVector);
+ return readIntAttributeValueInner(headerAttributes, &keyVector, defaultValue);
+}
+
+/* static */ int HeaderReadWriteUtils::readIntAttributeValueInner(
const AttributeMap *const headerAttributes, const AttributeMap::key_type *const key,
const int defaultValue) {
AttributeMap::const_iterator it = headerAttributes->find(*key);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
index caa5097..2259683 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.h
@@ -76,16 +76,16 @@
* Methods for header attributes.
*/
static void setBoolAttribute(AttributeMap *const headerAttributes,
- const AttributeMap::key_type *const key, const bool value);
+ const char *const key, const bool value);
static void setIntAttribute(AttributeMap *const headerAttributes,
- const AttributeMap::key_type *const key, const int value);
+ const char *const key, const int value);
static bool readBoolAttributeValue(const AttributeMap *const headerAttributes,
- const AttributeMap::key_type *const key, const bool defaultValue);
+ const char *const key, const bool defaultValue);
static int readIntAttributeValue(const AttributeMap *const headerAttributes,
- const AttributeMap::key_type *const key, const int defaultValue);
+ const char *const key, const int defaultValue);
static void insertCharactersIntoVector(const char *const characters,
AttributeMap::key_type *const key);
@@ -112,6 +112,12 @@
static const char *const SUPPORTS_DYNAMIC_UPDATE_KEY;
static const char *const REQUIRES_GERMAN_UMLAUT_PROCESSING_KEY;
static const char *const REQUIRES_FRENCH_LIGATURE_PROCESSING_KEY;
+
+ static void setIntAttributeInner(AttributeMap *const headerAttributes,
+ const AttributeMap::key_type *const key, const int value);
+
+ static int readIntAttributeValueInner(const AttributeMap *const headerAttributes,
+ const AttributeMap::key_type *const key, const int defaultValue);
};
}
#endif /* LATINIME_HEADER_READ_WRITE_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
index f1de914..8d88c68 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/patricia_trie_policy.h
@@ -107,12 +107,20 @@
AKLOGI("Warning: flushWithGC() is called for non-updatable dictionary.");
}
- bool needsToRunGC() const {
+ bool needsToRunGC(const bool mindsBlockByGC) const {
// This method should not be called for non-updatable dictionary.
AKLOGI("Warning: needsToRunGC() is called for non-updatable dictionary.");
return false;
}
+ void getProperty(const char *const query, char *const outResult,
+ const int maxResultLength) const {
+ // getProperty is not supported for this class.
+ if (maxResultLength > 0) {
+ outResult[0] = '\0';
+ }
+ }
+
private:
DISALLOW_IMPLICIT_CONSTRUCTORS(PatriciaTriePolicy);
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
index 17d2e39..9dc3482 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/buffer_with_extendable_buffer.h
@@ -42,6 +42,10 @@
return mOriginalBufferSize + mUsedAdditionalBufferSize;
}
+ AK_FORCE_INLINE int getUsedAdditionalBufferSize() const {
+ return mUsedAdditionalBufferSize;
+ }
+
/**
* For reading.
*/
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp
new file mode 100644
index 0000000..942a742
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "suggest/policyimpl/dictionary/utils/decaying_utils.h"
+
+#include "suggest/policyimpl/dictionary/utils/probability_utils.h"
+
+namespace latinime {
+
+const int DecayingUtils::MAX_UNIGRAM_COUNT = 12000;
+const int DecayingUtils::MAX_UNIGRAM_COUNT_AFTER_GC = 10000;
+const int DecayingUtils::MAX_BIGRAM_COUNT = 12000;
+const int DecayingUtils::MAX_BIGRAM_COUNT_AFTER_GC = 10000;
+
+const int DecayingUtils::MAX_COMPUTED_PROBABILITY = 127;
+const int DecayingUtils::MAX_UNIGRAM_PROBABILITY = 120;
+const int DecayingUtils::MIN_VALID_UNIGRAM_PROBABILITY = 24;
+const int DecayingUtils::UNIGRAM_PROBABILITY_STEP = 8;
+const int DecayingUtils::MAX_BIGRAM_PROBABILITY_DELTA = 15;
+const int DecayingUtils::MIN_VALID_BIGRAM_PROBABILITY_DELTA = 3;
+const int DecayingUtils::BIGRAM_PROBABILITY_DELTA_STEP = 1;
+
+/* static */ int DecayingUtils::getProbability(const int encodedUnigramProbability,
+ const int encodedBigramProbabilityDelta) {
+ if (encodedUnigramProbability == NOT_A_PROBABILITY) {
+ return NOT_A_PROBABILITY;
+ } else if (encodedBigramProbabilityDelta == NOT_A_PROBABILITY) {
+ const int rawProbability = ProbabilityUtils::backoff(decodeUnigramProbability(
+ encodedUnigramProbability));
+ return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
+ } else {
+ const int rawProbability = ProbabilityUtils::computeProbabilityForBigram(
+ decodeUnigramProbability(encodedUnigramProbability),
+ decodeBigramProbabilityDelta(encodedBigramProbabilityDelta));
+ return min(getDecayedProbability(rawProbability), MAX_COMPUTED_PROBABILITY);
+ }
+}
+
+/* static */ int DecayingUtils::getUpdatedUnigramProbability(const int originalEncodedProbability,
+ const int newProbability) {
+ if (originalEncodedProbability == NOT_A_PROBABILITY) {
+ // The unigram is not in this dictionary.
+ if (newProbability == NOT_A_PROBABILITY) {
+ // The unigram is not in other dictionaries.
+ return 0;
+ } else {
+ return MIN_VALID_UNIGRAM_PROBABILITY;
+ }
+ } else {
+ if (newProbability != NOT_A_PROBABILITY
+ && originalEncodedProbability < MIN_VALID_UNIGRAM_PROBABILITY) {
+ return MIN_VALID_UNIGRAM_PROBABILITY;
+ }
+ return min(originalEncodedProbability + UNIGRAM_PROBABILITY_STEP, MAX_UNIGRAM_PROBABILITY);
+ }
+}
+
+/* static */ int DecayingUtils::getUnigramProbabilityToSave(const int encodedProbability) {
+ return max(encodedProbability - UNIGRAM_PROBABILITY_STEP, 0);
+}
+
+/* static */ int DecayingUtils::getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta) {
+ return max(encodedProbabilityDelta - BIGRAM_PROBABILITY_DELTA_STEP, 0);
+}
+
+/* static */ int DecayingUtils::getUpdatedBigramProbabilityDelta(
+ const int originalEncodedProbabilityDelta, const int newProbability) {
+ if (originalEncodedProbabilityDelta == NOT_A_PROBABILITY) {
+ // The bigram relation is not in this dictionary.
+ if (newProbability == NOT_A_PROBABILITY) {
+ // The bigram target is not in other dictionaries.
+ return 0;
+ } else {
+ return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
+ }
+ } else {
+ if (newProbability != NOT_A_PROBABILITY
+ && originalEncodedProbabilityDelta < MIN_VALID_BIGRAM_PROBABILITY_DELTA) {
+ return MIN_VALID_BIGRAM_PROBABILITY_DELTA;
+ }
+ return min(originalEncodedProbabilityDelta + BIGRAM_PROBABILITY_DELTA_STEP,
+ MAX_BIGRAM_PROBABILITY_DELTA);
+ }
+}
+
+/* static */ int DecayingUtils::isValidUnigram(const int encodedUnigramProbability) {
+ return encodedUnigramProbability >= MIN_VALID_UNIGRAM_PROBABILITY;
+}
+
+/* static */ int DecayingUtils::isValidBigram(const int encodedBigramProbabilityDelta) {
+ return encodedBigramProbabilityDelta >= MIN_VALID_BIGRAM_PROBABILITY_DELTA;
+}
+
+/* static */ int DecayingUtils::decodeUnigramProbability(const int encodedProbability) {
+ const int probability = encodedProbability - MIN_VALID_UNIGRAM_PROBABILITY;
+ if (probability < 0) {
+ return NOT_A_PROBABILITY;
+ } else {
+ return min(probability, MAX_UNIGRAM_PROBABILITY);
+ }
+}
+
+/* static */ int DecayingUtils::decodeBigramProbabilityDelta(const int encodedProbabilityDelta) {
+ const int probabilityDelta = encodedProbabilityDelta - MIN_VALID_BIGRAM_PROBABILITY_DELTA;
+ if (probabilityDelta < 0) {
+ return NOT_A_PROBABILITY;
+ } else {
+ return min(probabilityDelta, MAX_BIGRAM_PROBABILITY_DELTA);
+ }
+}
+
+/* static */ int DecayingUtils::getDecayedProbability(const int rawProbability) {
+ return rawProbability;
+}
+
+} // namespace latinime
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.h
new file mode 100644
index 0000000..1ca0391
--- /dev/null
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/decaying_utils.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright (C) 2013, The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DECAYING_UTILS_H
+#define LATINIME_DECAYING_UTILS_H
+
+#include "defines.h"
+
+namespace latinime {
+
+// TODO: Check the elapsed time and decrease the probability depending on the time. Time field is
+// required to introduced to each terminal PtNode and bigram entry.
+// TODO: Quit using bigram probability to indicate the delta.
+// TODO: Quit using bigram probability delta.
+class DecayingUtils {
+ public:
+ static const int MAX_UNIGRAM_COUNT;
+ static const int MAX_UNIGRAM_COUNT_AFTER_GC;
+ static const int MAX_BIGRAM_COUNT;
+ static const int MAX_BIGRAM_COUNT_AFTER_GC;
+
+ static int getProbability(const int encodedUnigramProbability,
+ const int encodedBigramProbabilityDelta);
+
+ static int getUpdatedUnigramProbability(const int originalEncodedProbability,
+ const int newProbability);
+
+ static int getUpdatedBigramProbabilityDelta(const int originalEncodedProbabilityDelta,
+ const int newProbability);
+
+ static int isValidUnigram(const int encodedUnigramProbability);
+
+ static int isValidBigram(const int encodedProbabilityDelta);
+
+ static int getUnigramProbabilityToSave(const int encodedProbability);
+
+ static int getBigramProbabilityDeltaToSave(const int encodedProbabilityDelta);
+
+ private:
+ DISALLOW_IMPLICIT_CONSTRUCTORS(DecayingUtils);
+
+ static const int MAX_COMPUTED_PROBABILITY;
+ static const int MAX_UNIGRAM_PROBABILITY;
+ static const int MIN_VALID_UNIGRAM_PROBABILITY;
+ static const int UNIGRAM_PROBABILITY_STEP;
+ static const int MAX_BIGRAM_PROBABILITY_DELTA;
+ static const int MIN_VALID_BIGRAM_PROBABILITY_DELTA;
+ static const int BIGRAM_PROBABILITY_DELTA_STEP;
+
+ static int decodeUnigramProbability(const int encodedProbability);
+
+ static int decodeBigramProbabilityDelta(const int encodedProbability);
+
+ static int getDecayedProbability(const int rawProbability);
+};
+} // namespace latinime
+#endif /* LATINIME_DECAYING_UTILS_H */
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
index 2e4ec2e..f22e94c 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/dict_file_writing_utils.cpp
@@ -43,7 +43,8 @@
const HeaderReadWriteUtils::AttributeMap *const attributeMap) {
BufferWithExtendableBuffer headerBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
HeaderPolicy headerPolicy(FormatUtils::VERSION_3, attributeMap);
- headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */);
+ headerPolicy.writeHeaderToBuffer(&headerBuffer, true /* updatesLastUpdatedTime */,
+ 0 /* unigramCount */, 0 /* bigramCount */, 0 /* extendedRegionSize */);
BufferWithExtendableBuffer bodyBuffer(0 /* originalBuffer */, 0 /* originalBufferSize */);
if (!DynamicPatriciaTrieWritingUtils::writeEmptyDictionary(&bodyBuffer, 0 /* rootPos */)) {
return false;
@@ -95,7 +96,7 @@
fclose(file);
return false;
}
- const int additionalBufSize = buffer->getTailPosition() - buffer->getOriginalBufferSize();
+ const int additionalBufSize = buffer->getUsedAdditionalBufferSize();
if (additionalBufSize > 0 && fwrite(buffer->getBuffer(true /* usesAdditionalBuffer */),
additionalBufSize, 1, file) < 1) {
fclose(file);
diff --git a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
index 408b12a..5b6b5e8 100644
--- a/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
+++ b/native/jni/src/suggest/policyimpl/typing/typing_weighting.cpp
@@ -47,7 +47,7 @@
case CT_TERMINAL_INSERTION:
case CT_TRANSPOSITION:
return ET_EDIT_CORRECTION;
- case CT_NEW_WORD_SPACE_OMITTION:
+ case CT_NEW_WORD_SPACE_OMISSION:
case CT_NEW_WORD_SPACE_SUBSTITUTION:
return ET_NEW_WORD;
case CT_TERMINAL:
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
new file mode 100644
index 0000000..cf85d97
--- /dev/null
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryDecayingTests.java
@@ -0,0 +1,171 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.android.inputmethod.latin;
+
+import android.test.AndroidTestCase;
+import android.test.suitebuilder.annotation.LargeTest;
+
+import com.android.inputmethod.latin.makedict.FormatSpec;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+
+@LargeTest
+public class BinaryDictionaryDecayingTests extends AndroidTestCase {
+ private static final String TEST_DICT_FILE_EXTENSION = ".testDict";
+ private static final String TEST_LOCALE = "test";
+
+ private static final int DUMMY_PROBABILITY = 0;
+
+ @Override
+ protected void setUp() throws Exception {
+ super.setUp();
+ }
+
+ @Override
+ protected void tearDown() throws Exception {
+ super.tearDown();
+ }
+
+ private void forcePassingShortTime(final BinaryDictionary binaryDictionary) {
+ binaryDictionary.flushWithGC();
+ }
+
+ private void forcePassingLongTime(final BinaryDictionary binaryDictionary) {
+ // Currently, probabilities are decayed when GC is run. All entries that have never been
+ // typed in 32 GCs are removed.
+ final int count = 32;
+ for (int i = 0; i < count; i++) {
+ binaryDictionary.flushWithGC();
+ }
+ }
+
+ private File createEmptyDictionaryAndGetFile(final String filename) throws IOException {
+ final File file = File.createTempFile(filename, TEST_DICT_FILE_EXTENSION,
+ getContext().getCacheDir());
+ Map<String, String> attributeMap = new HashMap<String, String>();
+ attributeMap.put(FormatSpec.FileHeader.SUPPORTS_DYNAMIC_UPDATE_ATTRIBUTE,
+ FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
+ attributeMap.put(FormatSpec.FileHeader.USES_FORGETTING_CURVE_ATTRIBUTE,
+ FormatSpec.FileHeader.ATTRIBUTE_VALUE_TRUE);
+ if (BinaryDictionary.createEmptyDictFile(file.getAbsolutePath(),
+ 3 /* dictVersion */, attributeMap)) {
+ return file;
+ } else {
+ throw new IOException("Empty dictionary cannot be created.");
+ }
+ }
+
+ public void testAddValidAndInvalidWords() {
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
+ binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY);
+ assertFalse(binaryDictionary.isValidWord("a"));
+ binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY);
+ assertFalse(binaryDictionary.isValidWord("a"));
+ binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY);
+ assertFalse(binaryDictionary.isValidWord("a"));
+ binaryDictionary.addUnigramWord("a", Dictionary.NOT_A_PROBABILITY);
+ assertTrue(binaryDictionary.isValidWord("a"));
+
+ binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
+ assertTrue(binaryDictionary.isValidWord("b"));
+
+ final int unigramProbability = binaryDictionary.getFrequency("a");
+ binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
+ assertFalse(binaryDictionary.isValidBigram("a", "b"));
+ binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
+ assertFalse(binaryDictionary.isValidBigram("a", "b"));
+ binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
+ assertFalse(binaryDictionary.isValidBigram("a", "b"));
+ binaryDictionary.addBigramWords("a", "b", Dictionary.NOT_A_PROBABILITY);
+ assertTrue(binaryDictionary.isValidBigram("a", "b"));
+
+ binaryDictionary.addUnigramWord("c", DUMMY_PROBABILITY);
+ binaryDictionary.addBigramWords("a", "c", DUMMY_PROBABILITY);
+ assertTrue(binaryDictionary.isValidBigram("a", "c"));
+
+ binaryDictionary.close();
+ dictFile.delete();
+ }
+
+ // TODO: Add large tests.
+ public void testDecayingProbability() {
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+ BinaryDictionary binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+
+ binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
+ assertTrue(binaryDictionary.isValidWord("a"));
+ forcePassingShortTime(binaryDictionary);
+ assertFalse(binaryDictionary.isValidWord("a"));
+
+ binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
+ binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
+ binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
+ binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
+ forcePassingShortTime(binaryDictionary);
+ assertTrue(binaryDictionary.isValidWord("a"));
+ forcePassingLongTime(binaryDictionary);
+ assertFalse(binaryDictionary.isValidWord("a"));
+
+ binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
+ binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
+ binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY);
+ assertTrue(binaryDictionary.isValidBigram("a", "b"));
+ forcePassingShortTime(binaryDictionary);
+ assertFalse(binaryDictionary.isValidBigram("a", "b"));
+
+ binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
+ binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
+ binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY);
+ binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
+ binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
+ binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY);
+ binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
+ binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
+ binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY);
+ binaryDictionary.addUnigramWord("a", DUMMY_PROBABILITY);
+ binaryDictionary.addUnigramWord("b", DUMMY_PROBABILITY);
+ binaryDictionary.addBigramWords("a", "b", DUMMY_PROBABILITY);
+ assertTrue(binaryDictionary.isValidBigram("a", "b"));
+ forcePassingShortTime(binaryDictionary);
+ assertTrue(binaryDictionary.isValidBigram("a", "b"));
+ forcePassingLongTime(binaryDictionary);
+ assertFalse(binaryDictionary.isValidBigram("a", "b"));
+
+ binaryDictionary.close();
+ dictFile.delete();
+ }
+}
diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
index 7ed3ee1..6a21522 100644
--- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java
@@ -27,6 +27,7 @@
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Random;
@@ -605,7 +606,7 @@
binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
- while(!binaryDictionary.needsToRunGC()) {
+ while(!binaryDictionary.needsToRunGC(true /* mindsBlockByGC */)) {
final String word = CodePointUtils.generateWord(random, codePointSet);
words.add(word);
final int unigramProbability = random.nextInt(0xFF);
@@ -625,4 +626,57 @@
dictFile.delete();
}
+
+ public void testUnigramAndBigramCount() {
+ final int flashWithGCIterationCount = 10;
+ final int codePointSetSize = 50;
+ final int unigramCountPerIteration = 1000;
+ final int bigramCountPerIteration = 2000;
+ final int seed = 1123581321;
+
+ final Random random = new Random(seed);
+
+ File dictFile = null;
+ try {
+ dictFile = createEmptyDictionaryAndGetFile("TestBinaryDictionary");
+ } catch (IOException e) {
+ fail("IOException while writing an initial dictionary : " + e);
+ }
+
+ final ArrayList<String> words = new ArrayList<String>();
+ final HashSet<Pair<String, String>> bigrams = new HashSet<Pair<String, String>>();
+ final int[] codePointSet = CodePointUtils.generateCodePointSet(codePointSetSize, random);
+
+ BinaryDictionary binaryDictionary;
+ for (int i = 0; i < flashWithGCIterationCount; i++) {
+ binaryDictionary = new BinaryDictionary(dictFile.getAbsolutePath(),
+ 0 /* offset */, dictFile.length(), true /* useFullEditDistance */,
+ Locale.getDefault(), TEST_LOCALE, true /* isUpdatable */);
+ for (int j = 0; j < unigramCountPerIteration; j++) {
+ final String word = CodePointUtils.generateWord(random, codePointSet);
+ words.add(word);
+ final int unigramProbability = random.nextInt(0xFF);
+ binaryDictionary.addUnigramWord(word, unigramProbability);
+ }
+ for (int j = 0; j < bigramCountPerIteration; j++) {
+ final String word0 = words.get(random.nextInt(words.size()));
+ final String word1 = words.get(random.nextInt(words.size()));
+ bigrams.add(new Pair<String, String>(word0, word1));
+ final int bigramProbability = random.nextInt(0xF);
+ binaryDictionary.addBigramWords(word0, word1, bigramProbability);
+ }
+ assertEquals(new HashSet<String>(words).size(), Integer.parseInt(
+ binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
+ assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt(
+ binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY)));
+ binaryDictionary.flushWithGC();
+ assertEquals(new HashSet<String>(words).size(), Integer.parseInt(
+ binaryDictionary.getPropertyForTests(BinaryDictionary.UNIGRAM_COUNT_QUERY)));
+ assertEquals(new HashSet<Pair<String, String>>(bigrams).size(), Integer.parseInt(
+ binaryDictionary.getPropertyForTests(BinaryDictionary.BIGRAM_COUNT_QUERY)));
+ binaryDictionary.close();
+ }
+
+ dictFile.delete();
+ }
}
diff --git a/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java
index d605cdb..ddc9546 100644
--- a/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java
+++ b/tests/src/com/android/inputmethod/latin/personalization/UserHistoryDictionaryTests.java
@@ -75,38 +75,33 @@
return new ArrayList<String>(wordSet);
}
- private void addToDict(final UserHistoryPredictionDictionary dict, final List<String> words) {
+ private void addToDict(final UserHistoryDictionary dict, final List<String> words) {
String prevWord = null;
for (String word : words) {
- dict.addToPersonalizationPredictionDictionary(prevWord, word, true);
+ dict.addToDictionary(prevWord, word, true);
prevWord = word;
}
}
/**
- * @param checksContents if true, checks whether written words are actually in the dictionary
+ * @param checkContents if true, checks whether written words are actually in the dictionary
* or not.
*/
private void addAndWriteRandomWords(final String testFilenameSuffix, final int numberOfWords,
- final Random random, final boolean checksContents) {
+ final Random random, final boolean checkContents) {
final List<String> words = generateWords(numberOfWords, random);
- final UserHistoryPredictionDictionary dict =
- PersonalizationHelper.getUserHistoryPredictionDictionary(getContext(),
+ final UserHistoryDictionary dict =
+ PersonalizationHelper.getUserHistoryDictionary(getContext(),
testFilenameSuffix /* locale */, mPrefs);
// Add random words to the user history dictionary.
addToDict(dict, words);
- if (checksContents) {
+ if (checkContents) {
try {
Thread.sleep(TimeUnit.MILLISECONDS.convert(5L, TimeUnit.SECONDS));
} catch (InterruptedException e) {
}
- // Limit word count to check when using a Java on memory dictionary.
- final int wordCountToCheck =
- ExpandableBinaryDictionary.ENABLE_BINARY_DICTIONARY_DYNAMIC_UPDATE ?
- numberOfWords : 10;
- for (int i = 0; i < wordCountToCheck; ++i) {
+ for (int i = 0; i < numberOfWords; ++i) {
final String word = words.get(i);
- // This may fail as long as we use tryLock on inserting the bigram words
assertTrue(dict.isInDictionaryForTests(word));
}
}
@@ -127,8 +122,8 @@
true /* checksContents */);
} finally {
try {
- final UserHistoryPredictionDictionary dict =
- PersonalizationHelper.getUserHistoryPredictionDictionary(getContext(),
+ final UserHistoryDictionary dict =
+ PersonalizationHelper.getUserHistoryDictionary(getContext(),
testFilenameSuffix, mPrefs);
Log.d(TAG, "waiting for writing ...");
dict.shutdownExecutorForTests();
@@ -139,7 +134,7 @@
Log.d(TAG, "InterruptedException: " + e);
}
- final String fileName = UserHistoryPredictionDictionary.NAME + "." + testFilenameSuffix
+ final String fileName = UserHistoryDictionary.NAME + "." + testFilenameSuffix
+ ExpandableBinaryDictionary.DICT_FILE_EXTENSION;
dictFile = new File(getContext().getFilesDir(), fileName);
@@ -164,7 +159,7 @@
// Create filename suffixes for this test.
for (int i = 0; i < numberOfLanguages; i++) {
testFilenameSuffixes[i] = "testSwitchingLanguages" + i;
- final String fileName = UserHistoryPredictionDictionary.NAME + "." +
+ final String fileName = UserHistoryDictionary.NAME + "." +
testFilenameSuffixes[i] + ExpandableBinaryDictionary.DICT_FILE_EXTENSION;
dictFiles[i] = new File(getContext().getFilesDir(), fileName);
}
@@ -186,8 +181,8 @@
try {
Log.d(TAG, "waiting for writing ...");
for (int i = 0; i < numberOfLanguages; i++) {
- final UserHistoryPredictionDictionary dict =
- PersonalizationHelper.getUserHistoryPredictionDictionary(getContext(),
+ final UserHistoryDictionary dict =
+ PersonalizationHelper.getUserHistoryDictionary(getContext(),
testFilenameSuffixes[i], mPrefs);
dict.shutdownExecutorForTests();
while (!dict.isTerminatedForTests()) {
@@ -215,8 +210,8 @@
10000 : 1000;
final Random random = new Random(123456);
- UserHistoryPredictionDictionary dict =
- PersonalizationHelper.getUserHistoryPredictionDictionary(getContext(),
+ UserHistoryDictionary dict =
+ PersonalizationHelper.getUserHistoryDictionary(getContext(),
testFilenameSuffix, mPrefs);
try {
addAndWriteRandomWords(testFilenameSuffix, numberOfWords, random,
@@ -232,7 +227,7 @@
} catch (InterruptedException e) {
Log.d(TAG, "InterruptedException: ", e);
}
- final String fileName = UserHistoryPredictionDictionary.NAME + "." + testFilenameSuffix
+ final String fileName = UserHistoryDictionary.NAME + "." + testFilenameSuffix
+ ExpandableBinaryDictionary.DICT_FILE_EXTENSION;
dictFile = new File(getContext().getFilesDir(), fileName);
if (dictFile != null) {
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
index 6c4cbcf..bd06e9f 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java
@@ -80,17 +80,17 @@
}
/**
- * Returns a decrypted/uncompressed binary dictionary.
+ * Returns a decrypted/uncompressed dictionary.
*
- * This will decrypt/uncompress any number of times as necessary until it finds the binary
+ * This will decrypt/uncompress any number of times as necessary until it finds the
* dictionary signature, and copy the decoded file to a temporary place.
- * If this is not a binary dictionary, the method returns null.
+ * If this is not a dictionary, the method returns null.
*/
- public static DecoderChainSpec getRawBinaryDictionaryOrNull(final File src) {
- return getRawBinaryDictionaryOrNullInternal(new DecoderChainSpec(), src, 0);
+ public static DecoderChainSpec getRawDictionaryOrNull(final File src) {
+ return getRawDictionaryOrNullInternal(new DecoderChainSpec(), src, 0);
}
- private static DecoderChainSpec getRawBinaryDictionaryOrNullInternal(
+ private static DecoderChainSpec getRawDictionaryOrNullInternal(
final DecoderChainSpec spec, final File src, final int depth) {
// Unfortunately the decoding scheme we use can consider any data to be encrypted
// and will product some output, meaning it's not possible to reliably detect encrypted
@@ -98,7 +98,8 @@
// over and over, ending in a stack overflow. Hence we limit the depth at which we try
// decoding the file.
if (depth > MAX_DECODE_DEPTH) return null;
- if (BinaryDictDecoderUtils.isBinaryDictionary(src)) {
+ if (BinaryDictDecoderUtils.isBinaryDictionary(src)
+ || CombinedInputOutput.isCombinedDictionary(src.getAbsolutePath())) {
spec.mFile = src;
return spec;
}
@@ -106,7 +107,7 @@
final File uncompressedFile = tryGetUncompressedFile(src);
if (null != uncompressedFile) {
final DecoderChainSpec newSpec =
- getRawBinaryDictionaryOrNullInternal(spec, uncompressedFile, depth + 1);
+ getRawDictionaryOrNullInternal(spec, uncompressedFile, depth + 1);
if (null == newSpec) return null;
return newSpec.addStep(COMPRESSION);
}
@@ -114,7 +115,7 @@
final File decryptedFile = tryGetDecryptedFile(src);
if (null != decryptedFile) {
final DecoderChainSpec newSpec =
- getRawBinaryDictionaryOrNullInternal(spec, decryptedFile, depth + 1);
+ getRawDictionaryOrNullInternal(spec, decryptedFile, depth + 1);
if (null == newSpec) return null;
return newSpec.addStep(ENCRYPTION);
}
@@ -175,15 +176,16 @@
return XmlDictInputOutput.readDictionaryXml(
new BufferedInputStream(new FileInputStream(file)),
null /* shortcuts */, null /* bigrams */);
- } else if (CombinedInputOutput.isCombinedDictionary(filename)) {
- if (report) System.out.println("Format : Combined format");
- return CombinedInputOutput.readDictionaryCombined(
- new BufferedInputStream(new FileInputStream(file)));
} else {
- final DecoderChainSpec decodedSpec = getRawBinaryDictionaryOrNull(file);
+ final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file);
if (null == decodedSpec) {
crash(filename, new RuntimeException(
filename + " does not seem to be a dictionary file"));
+ } else if (CombinedInputOutput.isCombinedDictionary(
+ decodedSpec.mFile.getAbsolutePath())){
+ if (report) System.out.println("Format : Combined format");
+ return CombinedInputOutput.readDictionaryCombined(
+ new BufferedInputStream(new FileInputStream(decodedSpec.mFile)));
} else {
final DictDecoder dictDecoder = FormatSpec.getDictDecoder(decodedSpec.mFile,
DictDecoder.USE_BYTEARRAY);
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
index 9274dcd..dff3387 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Package.java
@@ -79,7 +79,7 @@
throw new RuntimeException("Too many/too few arguments for command " + COMMAND);
}
final BinaryDictOffdeviceUtils.DecoderChainSpec decodedSpec =
- BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(new File(mArgs[0]));
+ BinaryDictOffdeviceUtils.getRawDictionaryOrNull(new File(mArgs[0]));
if (null == decodedSpec) {
System.out.println(mArgs[0] + " does not seem to be a dictionary");
return;
diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
index 1eff497..1baeb7a 100644
--- a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
+++ b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java
@@ -64,7 +64,7 @@
// Test for an actually compressed dictionary and its contents
final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec =
- BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(dst);
+ BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst);
for (final String step : decodeSpec.mDecoderSpec) {
assertEquals("Wrong decode spec", BinaryDictOffdeviceUtils.COMPRESSION, step);
}
@@ -90,7 +90,7 @@
// Test that a random data file actually fails
assertNull("Wrongly identified data file",
- BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(dst));
+ BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst));
final File gzDst = File.createTempFile("testGetRawDict", ".tmp");
gzDst.deleteOnExit();
@@ -103,6 +103,6 @@
// Test that a compressed random data file actually fails
assertNull("Wrongly identified data file",
- BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(gzDst));
+ BinaryDictOffdeviceUtils.getRawDictionaryOrNull(gzDst));
}
}