Merge "Using "blacklist" flag as "possibly offensive""
diff --git a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
index 78d79ae..a450032 100644
--- a/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
+++ b/java/src/com/android/inputmethod/latin/makedict/FormatSpec.java
@@ -171,6 +171,7 @@
     // ExpandableDictionary.matchesExpectedBinaryDictFormatVersionForThisType().
     public static final int VERSION2 = 2;
     public static final int VERSION201 = 201;
+    public static final int VERSION202 = 202;
     public static final int MINIMUM_SUPPORTED_VERSION_OF_CODE_POINT_TABLE = VERSION201;
     // Dictionary version used for testing.
     public static final int VERSION4_ONLY_FOR_TESTING = 399;
diff --git a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
index 4e0f5f5..8699f2c 100644
--- a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
+++ b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java
@@ -36,7 +36,8 @@
     public static final String WORD_TAG = "word";
     public static final String BEGINNING_OF_SENTENCE_TAG = "beginning_of_sentence";
     public static final String NOT_A_WORD_TAG = "not_a_word";
-    public static final String BLACKLISTED_TAG = "blacklisted";
+    public static final String POSSIBLY_OFFENSIVE_TAG = "possibly_offensive";
+    public static final String TRUE_VALUE = "true";
 
     public static String formatAttributeMap(final HashMap<String, String> attributeMap) {
         final StringBuilder builder = new StringBuilder();
@@ -61,13 +62,13 @@
         builder.append(",");
         builder.append(formatProbabilityInfo(wordProperty.mProbabilityInfo));
         if (wordProperty.mIsBeginningOfSentence) {
-            builder.append("," + BEGINNING_OF_SENTENCE_TAG + "=true");
+            builder.append("," + BEGINNING_OF_SENTENCE_TAG + "=" + TRUE_VALUE);
         }
         if (wordProperty.mIsNotAWord) {
-            builder.append("," + NOT_A_WORD_TAG + "=true");
+            builder.append("," + NOT_A_WORD_TAG + "=" + TRUE_VALUE);
         }
         if (wordProperty.mIsPossiblyOffensive) {
-            builder.append("," + BLACKLISTED_TAG + "=true");
+            builder.append("," + POSSIBLY_OFFENSIVE_TAG + "=" + TRUE_VALUE);
         }
         builder.append("\n");
         if (wordProperty.mHasShortcuts) {
@@ -111,4 +112,8 @@
         }
         return builder.toString();
     }
+
+    public static boolean isLiteralTrue(final String value) {
+        return TRUE_VALUE.equalsIgnoreCase(value);
+    }
 }
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
index 44c2f44..abc7f99 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_policy.h
@@ -134,9 +134,11 @@
         // same so we use them for both here.
         switch (mDictFormatVersion) {
             case FormatUtils::VERSION_2:
-                return FormatUtils::VERSION_2;
             case FormatUtils::VERSION_201:
-                return FormatUtils::VERSION_201;
+                AKLOGE("Dictionary versions 2 and 201 are incompatible with this version");
+                return FormatUtils::UNKNOWN_VERSION;
+            case FormatUtils::VERSION_202:
+                return FormatUtils::VERSION_202;
             case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
                 return FormatUtils::VERSION_4_ONLY_FOR_TESTING;
             case FormatUtils::VERSION_4:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
index 41a8b13..d69a53f 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/header/header_read_write_utils.cpp
@@ -111,7 +111,8 @@
     switch (version) {
         case FormatUtils::VERSION_2:
         case FormatUtils::VERSION_201:
-            // Version 2 or 201 dictionary writing is not supported.
+        case FormatUtils::VERSION_202:
+            // None of the static dictionaries (v2x) support writing
             return false;
         case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
         case FormatUtils::VERSION_4:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
index 08e39ce..9455222 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/backward/v402/ver4_patricia_trie_policy.cpp
@@ -140,7 +140,7 @@
 
 const WordAttributes Ver4PatriciaTriePolicy::getWordAttributes(const int probability,
         const PtNodeParams &ptNodeParams) const {
-    return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(),
+    return WordAttributes(probability, false /* isBlacklisted */, ptNodeParams.isNotAWord(),
             ptNodeParams.getProbability() == 0);
 }
 
@@ -164,7 +164,7 @@
     }
     const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
     const PtNodeParams ptNodeParams(mNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos));
-    if (ptNodeParams.isDeleted() || ptNodeParams.isBlacklisted() || ptNodeParams.isNotAWord()) {
+    if (ptNodeParams.isDeleted() || ptNodeParams.isNotAWord()) {
         return NOT_A_PROBABILITY;
     }
     if (prevWordIds.empty()) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
index 372c9e3..a19a384 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/dictionary_structure_with_buffer_policy_factory.cpp
@@ -115,7 +115,8 @@
     switch (formatVersion) {
         case FormatUtils::VERSION_2:
         case FormatUtils::VERSION_201:
-            AKLOGE("Given path is a directory but the format is version 2 or 201. path: %s", path);
+        case FormatUtils::VERSION_202:
+            AKLOGE("Given path is a directory but the format is version 2xx. path: %s", path);
             break;
         case FormatUtils::VERSION_4: {
             return newPolicyForV4Dict<backward::v402::Ver4DictConstants,
@@ -177,6 +178,9 @@
     switch (FormatUtils::detectFormatVersion(mmappedBuffer->getReadOnlyByteArrayView())) {
         case FormatUtils::VERSION_2:
         case FormatUtils::VERSION_201:
+            AKLOGE("Dictionary versions 2 and 201 are incompatible with this version");
+            break;
+        case FormatUtils::VERSION_202:
             return DictionaryStructureWithBufferPolicy::StructurePolicyPtr(
                     new PatriciaTriePolicy(std::move(mmappedBuffer)));
         case FormatUtils::VERSION_4_ONLY_FOR_TESTING:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
index 585e87a..e52706e 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/pt_common/pt_node_params.h
@@ -144,17 +144,6 @@
         return PatriciaTrieReadingUtils::isTerminal(mFlags);
     }
 
-    AK_FORCE_INLINE bool isBlacklisted() const {
-        // Note: this method will be removed in the next change.
-        // It is used in getProbabilityOfWord and getWordAttributes for both v402 and v403.
-        // * getProbabilityOfWord will be changed to no longer return NOT_A_PROBABILITY
-        //   when isBlacklisted (i.e. to only check if isNotAWord or isDeleted)
-        // * getWordAttributes will be changed to always return blacklisted=false and
-        //   isPossiblyOffensive according to the function below (instead of the current
-        //   behaviour of checking if the probability is zero)
-        return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags);
-    }
-
     AK_FORCE_INLINE bool isPossiblyOffensive() const {
         return PatriciaTrieReadingUtils::isPossiblyOffensive(mFlags);
     }
diff --git a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
index 66fd18a..5987361 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.cpp
@@ -14,7 +14,6 @@
  * limitations under the License.
  */
 
-
 #include "suggest/policyimpl/dictionary/structure/v2/patricia_trie_policy.h"
 
 #include "defines.h"
@@ -317,8 +316,8 @@
 
 const WordAttributes PatriciaTriePolicy::getWordAttributes(const int probability,
         const PtNodeParams &ptNodeParams) const {
-    return WordAttributes(probability, ptNodeParams.isBlacklisted(), ptNodeParams.isNotAWord(),
-            ptNodeParams.getProbability() == 0);
+    return WordAttributes(probability, false /* isBlacklisted */, ptNodeParams.isNotAWord(),
+            ptNodeParams.isPossiblyOffensive());
 }
 
 int PatriciaTriePolicy::getProbability(const int unigramProbability,
@@ -345,10 +344,9 @@
     const int ptNodePos = getTerminalPtNodePosFromWordId(wordId);
     const PtNodeParams ptNodeParams =
             mPtNodeReader.fetchPtNodeParamsInBufferFromPtNodePos(ptNodePos);
-    if (ptNodeParams.isNotAWord() || ptNodeParams.isBlacklisted()) {
-        // If this is not a word, or if it's a blacklisted entry, it should behave as
-        // having no probability outside of the suggestion process (where it should be used
-        // for shortcuts).
+    if (ptNodeParams.isNotAWord()) {
+        // If this is not a word, it should behave as having no probability outside of the
+        // suggestion process (where it should be used for shortcuts).
         return NOT_A_PROBABILITY;
     }
     if (!prevWordIds.empty()) {
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
index 0cffe56..8b47147 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.cpp
@@ -28,9 +28,11 @@
 /* static */ FormatUtils::FORMAT_VERSION FormatUtils::getFormatVersion(const int formatVersion) {
     switch (formatVersion) {
         case VERSION_2:
-            return VERSION_2;
         case VERSION_201:
-            return VERSION_201;
+            AKLOGE("Dictionary versions 2 and 201 are incompatible with this version");
+            return UNKNOWN_VERSION;
+        case VERSION_202:
+            return VERSION_202;
         case VERSION_4_ONLY_FOR_TESTING:
             return VERSION_4_ONLY_FOR_TESTING;
         case VERSION_4:
diff --git a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
index 9631008..05bd7eb 100644
--- a/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
+++ b/native/jni/src/suggest/policyimpl/dictionary/utils/format_utils.h
@@ -31,8 +31,12 @@
  public:
     enum FORMAT_VERSION {
         // These MUST have the same values as the relevant constants in FormatSpec.java.
+        // TODO: Remove VERSION_2 and VERSION_201 when we:
+        // * Confirm that old versions of LatinIME download old-format dictionaries
+        // * We no longer need the corresponding constants on the Java side for dicttool
         VERSION_2 = 2,
         VERSION_201 = 201,
+        VERSION_202 = 202,
         VERSION_4_ONLY_FOR_TESTING = 399,
         VERSION_4 = 402,
         VERSION_4_DEV = 403,
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
index 457e7af..5c261a9 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java
@@ -178,7 +178,8 @@
             throw new IOException("Cannot read the dictionary header.");
         }
         if (header.mFormatOptions.mVersion != FormatSpec.VERSION2 &&
-                header.mFormatOptions.mVersion != FormatSpec.VERSION201) {
+                header.mFormatOptions.mVersion != FormatSpec.VERSION201 &&
+                header.mFormatOptions.mVersion != FormatSpec.VERSION202) {
             throw new UnsupportedFormatException("File header has a wrong version : "
                     + header.mFormatOptions.mVersion);
         }
diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java
index 2c2152b..b52b8c4 100644
--- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java
+++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java
@@ -124,7 +124,8 @@
     @Override
     public void writeDictionary(final FusionDictionary dict, final FormatOptions formatOptions)
             throws IOException, UnsupportedFormatException {
-        if (formatOptions.mVersion > FormatSpec.VERSION201) {
+        // We no longer support anything but the latest version of v2.
+        if (formatOptions.mVersion != FormatSpec.VERSION202) {
             throw new UnsupportedFormatException(
                     "The given format options has wrong version number : "
                     + formatOptions.mVersion);
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
index 48d2e59..955c572 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java
@@ -98,6 +98,7 @@
         String word = null;
         ProbabilityInfo probabilityInfo = new ProbabilityInfo(0);
         boolean isNotAWord = false;
+        boolean isPossiblyOffensive = false;
         ArrayList<WeightedString> bigrams = new ArrayList<>();
         ArrayList<WeightedString> shortcuts = new ArrayList<>();
         while (null != (line = reader.readLine())) {
@@ -106,7 +107,7 @@
             if (args[0].matches(CombinedFormatUtils.WORD_TAG + "=.*")) {
                 if (null != word) {
                     dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts,
-                            isNotAWord, false /* isPossiblyOffensive */);
+                            isNotAWord, isPossiblyOffensive);
                     for (WeightedString s : bigrams) {
                         dict.setBigram(word, s.mWord, s.mProbabilityInfo);
                     }
@@ -114,27 +115,37 @@
                 if (!shortcuts.isEmpty()) shortcuts = new ArrayList<>();
                 if (!bigrams.isEmpty()) bigrams = new ArrayList<>();
                 isNotAWord = false;
+                isPossiblyOffensive = false;
                 for (String param : args) {
                     final String params[] = param.split("=", 2);
                     if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
-                    if (CombinedFormatUtils.WORD_TAG.equals(params[0])) {
-                        word = params[1];
-                    } else if (CombinedFormatUtils.PROBABILITY_TAG.equals(params[0])) {
-                        probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]),
-                                probabilityInfo.mTimestamp, probabilityInfo.mLevel,
-                                probabilityInfo.mCount);
-                    } else if (CombinedFormatUtils.HISTORICAL_INFO_TAG.equals(params[0])) {
-                        final String[] historicalInfoParams =
-                                params[1].split(CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
-                        if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
-                            throw new RuntimeException("Wrong format (historical info) : " + line);
-                        }
-                        probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability,
-                                Integer.parseInt(historicalInfoParams[0]),
-                                Integer.parseInt(historicalInfoParams[1]),
-                                Integer.parseInt(historicalInfoParams[2]));
-                    } else if (CombinedFormatUtils.NOT_A_WORD_TAG.equals(params[0])) {
-                        isNotAWord = "true".equals(params[1]);
+                    switch (params[0]) {
+                        case CombinedFormatUtils.WORD_TAG:
+                            word = params[1];
+                            break;
+                        case CombinedFormatUtils.PROBABILITY_TAG:
+                            probabilityInfo = new ProbabilityInfo(Integer.parseInt(params[1]),
+                                    probabilityInfo.mTimestamp, probabilityInfo.mLevel,
+                                    probabilityInfo.mCount);
+                            break;
+                        case CombinedFormatUtils.HISTORICAL_INFO_TAG:
+                            final String[] historicalInfoParams = params[1].split(
+                                    CombinedFormatUtils.HISTORICAL_INFO_SEPARATOR);
+                            if (historicalInfoParams.length != HISTORICAL_INFO_ELEMENT_COUNT) {
+                                throw new RuntimeException("Wrong format (historical info) : "
+                                        + line);
+                            }
+                            probabilityInfo = new ProbabilityInfo(probabilityInfo.mProbability,
+                                    Integer.parseInt(historicalInfoParams[0]),
+                                    Integer.parseInt(historicalInfoParams[1]),
+                                    Integer.parseInt(historicalInfoParams[2]));
+                            break;
+                        case CombinedFormatUtils.NOT_A_WORD_TAG:
+                            isNotAWord = CombinedFormatUtils.isLiteralTrue(params[1]);
+                            break;
+                        case CombinedFormatUtils.POSSIBLY_OFFENSIVE_TAG:
+                            isPossiblyOffensive = CombinedFormatUtils.isLiteralTrue(params[1]);
+                            break;
                     }
                 }
             } else if (args[0].matches(CombinedFormatUtils.SHORTCUT_TAG + "=.*")) {
@@ -190,7 +201,7 @@
         }
         if (null != word) {
             dict.add(word, probabilityInfo, shortcuts.isEmpty() ? null : shortcuts, isNotAWord,
-                    false /* isPossiblyOffensive */);
+                    isPossiblyOffensive);
             for (WeightedString s : bigrams) {
                 dict.setBigram(word, s.mWord, s.mProbabilityInfo);
             }
diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
index 8f9e4a3..6187853 100644
--- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
+++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java
@@ -120,7 +120,7 @@
             String inputCombined = null;
             String outputBinary = null;
             String outputCombined = null;
-            int outputBinaryFormatVersion = FormatSpec.VERSION201; // the default version is 201.
+            int outputBinaryFormatVersion = FormatSpec.VERSION202; // the default version is 202.
             // Don't use code point table by default.
             int codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_OFF;