Introduce OffdeviceIntermediateDict for dicttolkit.

Bug: 10059681
Change-Id: Ib6e9019502b59dd959c04c8f4996ca932c2b1ba8
diff --git a/native/dicttoolkit/NativeFileList.mk b/native/dicttoolkit/NativeFileList.mk
index 925ec45..b39a248 100644
--- a/native/dicttoolkit/NativeFileList.mk
+++ b/native/dicttoolkit/NativeFileList.mk
@@ -22,8 +22,13 @@
         help_executor.cpp \
         info_executor.cpp \
         makedict_executor.cpp) \
+    $(addprefix offdevice_intermediate_dict/, \
+        offdevice_intermediate_dict.cpp) \
     utils/command_utils.cpp
 
 LATIN_IME_DICT_TOOLKIT_TEST_FILES := \
     dict_toolkit_defines_test.cpp \
-    utils/command_utils_test.cpp
+    $(addprefix offdevice_intermediate_dict/, \
+        offdevice_intermediate_dict_test.cpp) \
+    $(addprefix utils/, \
+        command_utils_test.cpp)
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp
new file mode 100644
index 0000000..af28131
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.cpp
@@ -0,0 +1,126 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h"
+
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+bool OffdeviceIntermediateDict::addWord(const WordProperty &wordProperty) {
+    const CodePointArrayView codePoints = wordProperty.getCodePoints();
+    if (codePoints.empty() || codePoints.size() > MAX_WORD_LENGTH) {
+        return false;
+    }
+    return addWordInner(codePoints, wordProperty, mRootPtNodeArray);
+}
+
+bool OffdeviceIntermediateDict::addWordInner(const CodePointArrayView codePoints,
+        const WordProperty &wordProperty, OffdeviceIntermediateDictPtNodeArray &ptNodeArray) {
+    auto ptNodeList = ptNodeArray.getMutablePtNodeList();
+    auto ptNodeIt = ptNodeList->begin();
+    for (; ptNodeIt != ptNodeList->end(); ++ptNodeIt) {
+        const auto &ptNode = *ptNodeIt;
+        const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
+        if (codePoints[0] < ptNodeCodePoints[0]) {
+            continue;
+        }
+        if (codePoints[0] > ptNodeCodePoints[0]) {
+            break;
+        }
+        size_t i = 1;
+        for (; i < codePoints.size(); ++i) {
+            if (i >= ptNodeCodePoints.size()) {
+                // Add new child.
+                return addWordInner(codePoints.skip(i), wordProperty,
+                        ptNode->getChildrenPtNodeArray());
+            }
+            if (codePoints[i] != ptNodeCodePoints[i]) {
+                break;
+            }
+        }
+        if (codePoints.size() == i && codePoints.size() == ptNodeCodePoints.size()) {
+            // All code points matched.
+            if (ptNode->getWordProperty()) {
+                //  Adding the same word multiple times is not supported.
+                return false;
+            }
+            ptNodeList->insert(ptNodeIt,
+                    std::make_shared<OffdeviceIntermediateDictPtNode>(wordProperty, *ptNode));
+            ptNodeList->erase(ptNodeIt);
+            return true;
+        }
+        // The (i+1)-th elements are different.
+        // Create and Add new parent ptNode for the common part.
+        auto newPtNode = codePoints.size() == i
+                ? std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty)
+                : std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints.limit(i));
+        ptNodeList->insert(ptNodeIt, newPtNode);
+        OffdeviceIntermediateDictPtNodeArray &childrenPtNodeArray =
+                newPtNode->getChildrenPtNodeArray();
+        // Add new child for the existing ptNode.
+        childrenPtNodeArray.getMutablePtNodeList()->push_back(
+                std::make_shared<OffdeviceIntermediateDictPtNode>(
+                        ptNodeCodePoints.skip(i), *ptNode));
+        ptNodeList->erase(ptNodeIt);
+        if (codePoints.size() != i) {
+            // Add a child for the new word.
+            return addWordInner(codePoints.skip(i), wordProperty, childrenPtNodeArray);
+        }
+        return true;
+    }
+    ptNodeList->insert(ptNodeIt,
+            std::make_shared<OffdeviceIntermediateDictPtNode>(codePoints, wordProperty));
+    return true;
+}
+
+const WordProperty *OffdeviceIntermediateDict::getWordProperty(
+        const CodePointArrayView codePoints) const {
+    const OffdeviceIntermediateDictPtNodeArray *ptNodeArray = &mRootPtNodeArray;
+    for (size_t i = 0; i < codePoints.size();) {
+        bool foundNext = false;
+        for (const auto ptNode : ptNodeArray->getPtNodeList()) {
+            const CodePointArrayView ptNodeCodePoints = ptNode->getPtNodeCodePoints();
+            if (codePoints[i] < ptNodeCodePoints[0]) {
+                continue;
+            }
+            if (codePoints[i] > ptNodeCodePoints[0]
+                     || codePoints.size() < ptNodeCodePoints.size()) {
+                return nullptr;
+            }
+            for (size_t j = 1; j < ptNodeCodePoints.size(); ++j) {
+                if (codePoints[i + j] != ptNodeCodePoints[j]) {
+                    return nullptr;
+                }
+            }
+            i += ptNodeCodePoints.size();
+            if (i == codePoints.size()) {
+                return ptNode->getWordProperty();
+            }
+            ptNodeArray = &ptNode->getChildrenPtNodeArray();
+            foundNext = true;
+            break;
+        }
+        if (!foundNext) {
+            break;
+        }
+    }
+    return nullptr;
+}
+
+} // namespace dicttoolkit
+} // namespace latinime
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h
new file mode 100644
index 0000000..dfdb6a8
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
+
+#include "dict_toolkit_defines.h"
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h"
+#include "suggest/core/dictionary/property/word_property.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+/**
+ * On memory patricia trie to represent a dictionary.
+ */
+class OffdeviceIntermediateDict final {
+ public:
+    bool addWord(const WordProperty &wordProperty);
+    // The returned value will be invalid after modifying the dictionary. e.g. calling addWord().
+    const WordProperty *getWordProperty(const CodePointArrayView codePoints) const;
+
+ private:
+    DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDict);
+
+    OffdeviceIntermediateDictPtNodeArray mRootPtNodeArray;
+
+    bool addWordInner(const CodePointArrayView codePoints, const WordProperty &wordProperty,
+            OffdeviceIntermediateDictPtNodeArray &ptNodeArray);
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_H
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h
new file mode 100644
index 0000000..721ccd7
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
+
+#include <memory>
+
+#include "dict_toolkit_defines.h"
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h"
+#include "suggest/core/dictionary/property/word_property.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class OffdeviceIntermediateDictPtNode final {
+ public:
+    // Non-terminal
+    OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints)
+            : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(),
+              mWortProperty(nullptr) {}
+
+    // Terminal
+    OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints,
+            const WordProperty &wordProperty)
+             : mPtNodeCodePoints(ptNodeCodePoints.toVector()), mChildrenPtNodeArray(),
+               mWortProperty(new WordProperty(wordProperty)) {}
+
+    // Replacing PtNodeCodePoints.
+    OffdeviceIntermediateDictPtNode(const CodePointArrayView ptNodeCodePoints,
+            const OffdeviceIntermediateDictPtNode &ptNode)
+            : mPtNodeCodePoints(ptNodeCodePoints.toVector()),
+              mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray),
+              mWortProperty(new WordProperty(*ptNode.mWortProperty)) {}
+
+    // Replacing WordProperty.
+    OffdeviceIntermediateDictPtNode(const WordProperty &wordProperty,
+            const OffdeviceIntermediateDictPtNode &ptNode)
+            : mPtNodeCodePoints(ptNode.mPtNodeCodePoints),
+              mChildrenPtNodeArray(ptNode.mChildrenPtNodeArray),
+              mWortProperty(new WordProperty(wordProperty)) {}
+
+    const WordProperty *getWordProperty() const {
+        return mWortProperty.get();
+    }
+
+    const CodePointArrayView getPtNodeCodePoints() const {
+        return CodePointArrayView(mPtNodeCodePoints);
+    }
+
+    OffdeviceIntermediateDictPtNodeArray &getChildrenPtNodeArray() {
+        return mChildrenPtNodeArray;
+    }
+
+ private:
+    DISALLOW_COPY_AND_ASSIGN(OffdeviceIntermediateDictPtNode);
+
+    const std::vector<int> mPtNodeCodePoints;
+    OffdeviceIntermediateDictPtNodeArray mChildrenPtNodeArray;
+    const std::unique_ptr<WordProperty> mWortProperty;
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_H
diff --git a/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h
new file mode 100644
index 0000000..f87456c
--- /dev/null
+++ b/native/dicttoolkit/src/offdevice_intermediate_dict/offdevice_intermediate_dict_pt_node_array.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
+#define LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
+
+#include <list>
+#include <memory>
+
+#include "dict_toolkit_defines.h"
+
+namespace latinime {
+namespace dicttoolkit {
+
+class OffdeviceIntermediateDictPtNode;
+
+class OffdeviceIntermediateDictPtNodeArray final {
+ public:
+    const std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> &getPtNodeList() const {
+        return mPtNodes;
+    }
+
+    std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> *getMutablePtNodeList() {
+        return &mPtNodes;
+    }
+
+ private:
+    DISALLOW_ASSIGNMENT_OPERATOR(OffdeviceIntermediateDictPtNodeArray);
+
+    std::list<std::shared_ptr<OffdeviceIntermediateDictPtNode>> mPtNodes;
+};
+
+} // namespace dicttoolkit
+} // namespace latinime
+#endif // LATINIME_DICT_TOOLKIT_OFFDEVICE_INTERMEDIATE_DICT_PT_NODE_ARRAY_H
diff --git a/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp b/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp
new file mode 100644
index 0000000..3bcb89e
--- /dev/null
+++ b/native/dicttoolkit/tests/offdevice_intermediate_dict/offdevice_intermediate_dict_test.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (C) 2014 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "offdevice_intermediate_dict/offdevice_intermediate_dict.h"
+
+#include <gtest/gtest.h>
+
+#include <vector>
+
+#include "suggest/core/dictionary/property/word_property.h"
+#include "utils/int_array_view.h"
+
+namespace latinime {
+namespace dicttoolkit {
+namespace {
+
+const std::vector<int> getCodePointVector(const char *str) {
+    std::vector<int> codePoints;
+    while (*str) {
+        codePoints.push_back(*str);
+        ++str;
+    }
+    return codePoints;
+}
+
+const WordProperty getDummpWordProperty(const std::vector<int> &&codePoints) {
+    return WordProperty(std::move(codePoints), UnigramProperty(), std::vector<NgramProperty>());
+}
+
+TEST(OffdeviceIntermediateDictTest, TestAddWordProperties) {
+    OffdeviceIntermediateDict dict;
+    EXPECT_EQ(nullptr, dict.getWordProperty(CodePointArrayView()));
+
+    const WordProperty wordProperty0 = getDummpWordProperty(getCodePointVector("abcd"));
+    EXPECT_TRUE(dict.addWord(wordProperty0));
+    EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints()));
+
+    const WordProperty wordProperty1 = getDummpWordProperty(getCodePointVector("efgh"));
+    EXPECT_TRUE(dict.addWord(wordProperty1));
+    EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints()));
+
+    const WordProperty wordProperty2 = getDummpWordProperty(getCodePointVector("ab"));
+    EXPECT_TRUE(dict.addWord(wordProperty2));
+    EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints()));
+
+    const WordProperty wordProperty3 = getDummpWordProperty(getCodePointVector("abcdefg"));
+    EXPECT_TRUE(dict.addWord(wordProperty3));
+    EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints()));
+
+    const WordProperty wordProperty4 = getDummpWordProperty(getCodePointVector("efef"));
+    EXPECT_TRUE(dict.addWord(wordProperty4));
+    EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints()));
+
+    const WordProperty wordProperty5 = getDummpWordProperty(getCodePointVector("ef"));
+    EXPECT_TRUE(dict.addWord(wordProperty5));
+    EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints()));
+
+    const WordProperty wordProperty6 = getDummpWordProperty(getCodePointVector("abcd"));
+    EXPECT_FALSE(dict.addWord(wordProperty6)) << "Adding the same word multiple times should fail.";
+
+    EXPECT_NE(nullptr, dict.getWordProperty(wordProperty0.getCodePoints()));
+    EXPECT_NE(nullptr, dict.getWordProperty(wordProperty1.getCodePoints()));
+    EXPECT_NE(nullptr, dict.getWordProperty(wordProperty2.getCodePoints()));
+    EXPECT_NE(nullptr, dict.getWordProperty(wordProperty3.getCodePoints()));
+    EXPECT_NE(nullptr, dict.getWordProperty(wordProperty4.getCodePoints()));
+    EXPECT_NE(nullptr, dict.getWordProperty(wordProperty5.getCodePoints()));
+}
+
+} // namespace
+} // namespace dicttoolkit
+} // namespace latinime