Export libtextclassifier and make some native tests working again am: d4b52f3f0d am: f556113acd Change-Id: I02182ef95bccd8ef7e5fe0b578d5716c29eea697

commit: 0b2ab9d9f143091dffa6412fd2b257fe5ad0bc95 [log] [tgz]
author: Tony Mak <tonymak@google.com> Fri May 29 20:39:29 2020 +0000
committer: Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com> Fri May 29 20:39:29 2020 +0000
tree: 1f182d619c80835b42155acf299b3dc3dccf9b9c
parent: c750912d9355f64e91224a0de7d954edefb1434f [diff]
parent: f556113acd1b2ffd9a676c22a02ab3730326e95a [diff]
diff --git a/TEST_MAPPING b/TEST_MAPPING
index 2b02610..3c8e10b 100644
--- a/TEST_MAPPING
+++ b/TEST_MAPPING

@@ -7,6 +7,9 @@
           "exclude-annotation": "androidx.test.filters.FlakyTest"
         }
       ]
+    },
+    {
+      "name": "libtextclassifier_tests"
     }
   ]
 }
\ No newline at end of file

diff --git a/native/Android.bp b/native/Android.bp
index 3c43ec4..ebbd423 100644
--- a/native/Android.bp
+++ b/native/Android.bp

@@ -301,10 +301,11 @@
     exclude_srcs: [
         "**/*_test.cc",
         "**/*-test-lib.cc",
-        "utils/testing/*.cc",
+        "**/testing/*.cc",
         "**/*test-util.*",
         "**/*test-utils.*",
         "**/*_test-include.*",
+        "**/*unittest.cc",
     ],
 
     version_script: "jni.lds",
@@ -330,17 +331,14 @@
     ],
 
     srcs: ["**/*.cc"],
-    // TODO: Do not filter out tflite test once the dependency issue is resolved.
-    exclude_srcs: [
-        "utils/tflite/*_test.cc",
-        "utils/flatbuffers_test.cc",
-        "utils/calendar/*_test-include.*",
-        "utils/utf8/*_test-include.*"
-    ],
 
-    static_libs: ["libgmock_ndk"],
     header_libs: ["jni_headers"],
 
+    static_libs: [
+        "libgmock_ndk",
+        "libgtest_ndk_c++",
+    ],
+
     multilib: {
         lib32: {
             cppflags: ["-DTC3_TEST_DATA_DIR=\"/data/nativetest/libtextclassifier_tests/test_data/\""],

diff --git a/native/actions/feature-processor_test.cc b/native/actions/feature-processor_test.cc
new file mode 100644
index 0000000..969bbf7
--- /dev/null
+++ b/native/actions/feature-processor_test.cc

@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "actions/feature-processor.h"
+
+#include "actions/actions_model_generated.h"
+#include "annotator/model-executor.h"
+#include "utils/tensor-view.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using ::testing::FloatEq;
+using ::testing::SizeIs;
+
+// EmbeddingExecutor that always returns features based on
+// the id of the sparse features.
+class FakeEmbeddingExecutor : public EmbeddingExecutor {
+ public:
+  bool AddEmbedding(const TensorView<int>& sparse_features, float* dest,
+                    const int dest_size) const override {
+    TC3_CHECK_GE(dest_size, 4);
+    EXPECT_THAT(sparse_features, SizeIs(1));
+    dest[0] = sparse_features.data()[0];
+    dest[1] = sparse_features.data()[0];
+    dest[2] = -sparse_features.data()[0];
+    dest[3] = -sparse_features.data()[0];
+    return true;
+  }
+
+ private:
+  std::vector<float> storage_;
+};
+
+class FeatureProcessorTest : public ::testing::Test {
+ protected:
+  FeatureProcessorTest() : INIT_UNILIB_FOR_TESTING(unilib_) {}
+
+  flatbuffers::DetachedBuffer PackFeatureProcessorOptions(
+      ActionsTokenFeatureProcessorOptionsT* options) const {
+    flatbuffers::FlatBufferBuilder builder;
+    builder.Finish(CreateActionsTokenFeatureProcessorOptions(builder, options));
+    return builder.Release();
+  }
+
+  FakeEmbeddingExecutor embedding_executor_;
+  UniLib unilib_;
+};
+
+TEST_F(FeatureProcessorTest, TokenEmbeddings) {
+  ActionsTokenFeatureProcessorOptionsT options;
+  options.embedding_size = 4;
+  options.tokenizer_options.reset(new ActionsTokenizerOptionsT);
+
+  flatbuffers::DetachedBuffer options_fb =
+      PackFeatureProcessorOptions(&options);
+  ActionsFeatureProcessor feature_processor(
+      flatbuffers::GetRoot<ActionsTokenFeatureProcessorOptions>(
+          options_fb.data()),
+      &unilib_);
+
+  Token token("aaa", 0, 3);
+  std::vector<float> token_features;
+  EXPECT_TRUE(feature_processor.AppendTokenFeatures(token, &embedding_executor_,
+                                                    &token_features));
+  EXPECT_THAT(token_features, SizeIs(4));
+}
+
+TEST_F(FeatureProcessorTest, TokenEmbeddingsCaseFeature) {
+  ActionsTokenFeatureProcessorOptionsT options;
+  options.embedding_size = 4;
+  options.extract_case_feature = true;
+  options.tokenizer_options.reset(new ActionsTokenizerOptionsT);
+
+  flatbuffers::DetachedBuffer options_fb =
+      PackFeatureProcessorOptions(&options);
+  ActionsFeatureProcessor feature_processor(
+      flatbuffers::GetRoot<ActionsTokenFeatureProcessorOptions>(
+          options_fb.data()),
+      &unilib_);
+
+  Token token("Aaa", 0, 3);
+  std::vector<float> token_features;
+  EXPECT_TRUE(feature_processor.AppendTokenFeatures(token, &embedding_executor_,
+                                                    &token_features));
+  EXPECT_THAT(token_features, SizeIs(5));
+  EXPECT_THAT(token_features[4], FloatEq(1.0));
+}
+
+TEST_F(FeatureProcessorTest, MultipleTokenEmbeddingsCaseFeature) {
+  ActionsTokenFeatureProcessorOptionsT options;
+  options.embedding_size = 4;
+  options.extract_case_feature = true;
+  options.tokenizer_options.reset(new ActionsTokenizerOptionsT);
+
+  flatbuffers::DetachedBuffer options_fb =
+      PackFeatureProcessorOptions(&options);
+  ActionsFeatureProcessor feature_processor(
+      flatbuffers::GetRoot<ActionsTokenFeatureProcessorOptions>(
+          options_fb.data()),
+      &unilib_);
+
+  const std::vector<Token> tokens = {Token("Aaa", 0, 3), Token("bbb", 4, 7),
+                                     Token("Cccc", 8, 12)};
+  std::vector<float> token_features;
+  EXPECT_TRUE(feature_processor.AppendTokenFeatures(
+      tokens, &embedding_executor_, &token_features));
+  EXPECT_THAT(token_features, SizeIs(15));
+  EXPECT_THAT(token_features[4], FloatEq(1.0));
+  EXPECT_THAT(token_features[9], FloatEq(-1.0));
+  EXPECT_THAT(token_features[14], FloatEq(1.0));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/actions/flatbuffer-utils.cc b/native/actions/flatbuffer-utils.cc
deleted file mode 100644
index 6d60c2f..0000000
--- a/native/actions/flatbuffer-utils.cc
+++ /dev/null

@@ -1,88 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "actions/flatbuffer-utils.h"
-
-#include <memory>
-
-#include "utils/base/logging.h"
-#include "utils/flatbuffers.h"
-#include "flatbuffers/reflection.h"
-
-namespace libtextclassifier3 {
-
-bool SwapFieldNamesForOffsetsInPathInActionsModel(ActionsModelT* model) {
-  if (model->actions_entity_data_schema.empty()) {
-    // Nothing to do.
-    return true;
-  }
-
-  const reflection::Schema* schema =
-      LoadAndVerifyFlatbuffer<reflection::Schema>(
-          model->actions_entity_data_schema.data(),
-          model->actions_entity_data_schema.size());
-
-  // Resolve offsets in regex rules.
-  if (model->rules != nullptr) {
-    for (std::unique_ptr<RulesModel_::RegexRuleT>& rule :
-         model->rules->regex_rule) {
-      for (std::unique_ptr<RulesModel_::RuleActionSpecT>& rule_action :
-           rule->actions) {
-        for (std::unique_ptr<RulesModel_::RuleActionSpec_::RuleCapturingGroupT>&
-                 capturing_group : rule_action->capturing_group) {
-          if (capturing_group->entity_field == nullptr) {
-            continue;
-          }
-          if (!SwapFieldNamesForOffsetsInPath(
-                  schema, capturing_group->entity_field.get())) {
-            return false;
-          }
-        }
-      }
-    }
-  }
-
-  // Resolve offsets in annotation action mapping.
-  if (model->annotation_actions_spec != nullptr) {
-    for (std::unique_ptr<AnnotationActionsSpec_::AnnotationMappingT>& mapping :
-         model->annotation_actions_spec->annotation_mapping) {
-      if (mapping->entity_field == nullptr) {
-        continue;
-      }
-      if (!SwapFieldNamesForOffsetsInPath(schema,
-                                          mapping->entity_field.get())) {
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-std::string SwapFieldNamesForOffsetsInPathInSerializedActionsModel(
-    const std::string& model) {
-  std::unique_ptr<ActionsModelT> unpacked_model =
-      UnPackActionsModel(model.c_str());
-  TC3_CHECK(unpacked_model != nullptr);
-  TC3_CHECK(SwapFieldNamesForOffsetsInPathInActionsModel(unpacked_model.get()));
-  flatbuffers::FlatBufferBuilder builder;
-  FinishActionsModelBuffer(builder,
-                           ActionsModel::Pack(builder, unpacked_model.get()));
-  return std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
-                     builder.GetSize());
-}
-
-}  // namespace libtextclassifier3

diff --git a/native/actions/flatbuffer-utils.h b/native/actions/flatbuffer-utils.h
deleted file mode 100644
index 2479599..0000000
--- a/native/actions/flatbuffer-utils.h
+++ /dev/null

@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Utility functions for working with FlatBuffers in the actions model.
-
-#ifndef LIBTEXTCLASSIFIER_ACTIONS_FLATBUFFER_UTILS_H_
-#define LIBTEXTCLASSIFIER_ACTIONS_FLATBUFFER_UTILS_H_
-
-#include <string>
-
-#include "actions/actions_model_generated.h"
-
-namespace libtextclassifier3 {
-
-// Resolves field lookups by name to the concrete field offsets in the regex
-// rules of the model.
-bool SwapFieldNamesForOffsetsInPathInActionsModel(ActionsModelT* model);
-
-// Same as above but for a serialized model.
-std::string SwapFieldNamesForOffsetsInPathInSerializedActionsModel(
-    const std::string& model);
-
-}  // namespace libtextclassifier3
-
-#endif  // LIBTEXTCLASSIFIER_ACTIONS_FLATBUFFER_UTILS_H_

diff --git a/native/actions/lua-actions_test.cc b/native/actions/lua-actions_test.cc
new file mode 100644
index 0000000..72cae2c
--- /dev/null
+++ b/native/actions/lua-actions_test.cc

@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "actions/lua-actions.h"
+
+#include <map>
+#include <string>
+
+#include "actions/test-utils.h"
+#include "actions/types.h"
+#include "utils/tflite-model-executor.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using testing::ElementsAre;
+
+TEST(LuaActions, SimpleAction) {
+  Conversation conversation;
+  const std::string test_snippet = R"(
+    return {{ type = "test_action" }}
+  )";
+  std::vector<ActionSuggestion> actions;
+  EXPECT_TRUE(LuaActionsSuggestions::CreateLuaActionsSuggestions(
+                  test_snippet, conversation,
+                  /*model_executor=*/nullptr,
+                  /*model_spec=*/nullptr,
+                  /*interpreter=*/nullptr,
+                  /*actions_entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr)
+                  ->SuggestActions(&actions));
+  EXPECT_THAT(actions, ElementsAre(IsActionOfType("test_action")));
+}
+
+TEST(LuaActions, ConversationActions) {
+  Conversation conversation;
+  conversation.messages.push_back({/*user_id=*/0, "hello there!"});
+  conversation.messages.push_back({/*user_id=*/1, "general kenobi!"});
+  const std::string test_snippet = R"(
+    local actions = {}
+    for i, message in pairs(messages) do
+      if i < #messages then
+        if message.text == "hello there!" and
+           messages[i+1].text == "general kenobi!" then
+           table.insert(actions, {
+             type = "text_reply",
+             response_text = "you are a bold one!"
+           })
+        end
+        if message.text == "i am the senate!" and
+           messages[i+1].text == "not yet!" then
+           table.insert(actions, {
+             type = "text_reply",
+             response_text = "it's treason then"
+           })
+        end
+      end
+    end
+    return actions;
+  )";
+  std::vector<ActionSuggestion> actions;
+  EXPECT_TRUE(LuaActionsSuggestions::CreateLuaActionsSuggestions(
+                  test_snippet, conversation,
+                  /*model_executor=*/nullptr,
+                  /*model_spec=*/nullptr,
+                  /*interpreter=*/nullptr,
+                  /*actions_entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr)
+                  ->SuggestActions(&actions));
+  EXPECT_THAT(actions, ElementsAre(IsSmartReply("you are a bold one!")));
+}
+
+TEST(LuaActions, SimpleModelAction) {
+  Conversation conversation;
+  const std::string test_snippet = R"(
+    if #model.actions_scores == 0 then
+      return {{ type = "test_action" }}
+    end
+    return {}
+  )";
+  std::vector<ActionSuggestion> actions;
+  EXPECT_TRUE(LuaActionsSuggestions::CreateLuaActionsSuggestions(
+                  test_snippet, conversation,
+                  /*model_executor=*/nullptr,
+                  /*model_spec=*/nullptr,
+                  /*interpreter=*/nullptr,
+                  /*actions_entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr)
+                  ->SuggestActions(&actions));
+  EXPECT_THAT(actions, ElementsAre(IsActionOfType("test_action")));
+}
+
+TEST(LuaActions, SimpleModelRepliesAction) {
+  Conversation conversation;
+  const std::string test_snippet = R"(
+    if #model.reply == 0 then
+      return {{ type = "test_action" }}
+    end
+    return {}
+  )";
+  std::vector<ActionSuggestion> actions;
+  EXPECT_TRUE(LuaActionsSuggestions::CreateLuaActionsSuggestions(
+                  test_snippet, conversation,
+                  /*model_executor=*/nullptr,
+                  /*model_spec=*/nullptr,
+                  /*interpreter=*/nullptr,
+                  /*actions_entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr)
+                  ->SuggestActions(&actions));
+  EXPECT_THAT(actions, ElementsAre(IsActionOfType("test_action")));
+}
+
+TEST(LuaActions, AnnotationActions) {
+  AnnotatedSpan annotation;
+  annotation.span = {11, 15};
+  annotation.classification = {ClassificationResult("address", 1.0)};
+  Conversation conversation = {{{/*user_id=*/1, "are you at home?",
+                                 /*reference_time_ms_utc=*/0,
+                                 /*reference_timezone=*/"Europe/Zurich",
+                                 /*annotations=*/{annotation},
+                                 /*locales=*/"en"}}};
+  const std::string test_snippet = R"(
+    local actions = {}
+    local last_message = messages[#messages]
+    for i, annotation in pairs(last_message.annotation) do
+      if #annotation.classification > 0 then
+        if annotation.classification[1].collection == "address" then
+           local text = string.sub(last_message.text,
+                            annotation.span["begin"] + 1,
+                            annotation.span["end"])
+           table.insert(actions, {
+             type = "text_reply",
+             response_text = "i am at " .. text,
+             annotation = {{
+               name = "location",
+               span = {
+                 text = text
+               },
+               entity = annotation.classification[1]
+             }},
+           })
+        end
+      end
+    end
+    return actions;
+  )";
+  std::vector<ActionSuggestion> actions;
+  EXPECT_TRUE(LuaActionsSuggestions::CreateLuaActionsSuggestions(
+                  test_snippet, conversation,
+                  /*model_executor=*/nullptr,
+                  /*model_spec=*/nullptr,
+                  /*interpreter=*/nullptr,
+                  /*actions_entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr)
+                  ->SuggestActions(&actions));
+  EXPECT_THAT(actions, ElementsAre(IsSmartReply("i am at home")));
+  EXPECT_EQ("address", actions[0].annotations[0].entity.collection);
+}
+
+TEST(LuaActions, EntityData) {
+  std::string test_schema = TestEntityDataSchema();
+  Conversation conversation = {{{/*user_id=*/1, "hello there"}}};
+  const std::string test_snippet = R"(
+    return {{
+      type = "test",
+      entity = {
+        greeting = "hello",
+        location = "there",
+        person = "Kenobi",
+      },
+    }};
+  )";
+  std::vector<ActionSuggestion> actions;
+  EXPECT_TRUE(LuaActionsSuggestions::CreateLuaActionsSuggestions(
+                  test_snippet, conversation,
+                  /*model_executor=*/nullptr,
+                  /*model_spec=*/nullptr,
+                  /*interpreter=*/nullptr,
+                  /*actions_entity_data_schema=*/
+                  flatbuffers::GetRoot<reflection::Schema>(test_schema.data()),
+                  /*annotations_entity_data_schema=*/nullptr)
+                  ->SuggestActions(&actions));
+  EXPECT_THAT(actions, testing::SizeIs(1));
+  EXPECT_EQ("test", actions.front().type);
+  const flatbuffers::Table* entity =
+      flatbuffers::GetAnyRoot(reinterpret_cast<const unsigned char*>(
+          actions.front().serialized_entity_data.data()));
+  EXPECT_EQ(entity->GetPointer<const flatbuffers::String*>(/*field=*/4)->str(),
+            "hello");
+  EXPECT_EQ(entity->GetPointer<const flatbuffers::String*>(/*field=*/6)->str(),
+            "there");
+  EXPECT_EQ(entity->GetPointer<const flatbuffers::String*>(/*field=*/8)->str(),
+            "Kenobi");
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/actions/lua-ranker_test.cc b/native/actions/lua-ranker_test.cc
new file mode 100644
index 0000000..a790042
--- /dev/null
+++ b/native/actions/lua-ranker_test.cc

@@ -0,0 +1,269 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "actions/lua-ranker.h"
+
+#include <string>
+
+#include "actions/types.h"
+#include "utils/flatbuffers.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+MATCHER_P2(IsAction, type, response_text, "") {
+  return testing::Value(arg.type, type) &&
+         testing::Value(arg.response_text, response_text);
+}
+
+MATCHER_P(IsActionType, type, "") { return testing::Value(arg.type, type); }
+
+std::string TestEntitySchema() {
+  // Create fake entity data schema meta data.
+  // Cannot use object oriented API here as that is not available for the
+  // reflection schema.
+  flatbuffers::FlatBufferBuilder schema_builder;
+  std::vector<flatbuffers::Offset<reflection::Field>> fields = {
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("test"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::String),
+          /*id=*/0,
+          /*offset=*/4)};
+  std::vector<flatbuffers::Offset<reflection::Enum>> enums;
+  std::vector<flatbuffers::Offset<reflection::Object>> objects = {
+      reflection::CreateObject(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("EntityData"),
+          /*fields=*/
+          schema_builder.CreateVectorOfSortedTables(&fields))};
+  schema_builder.Finish(reflection::CreateSchema(
+      schema_builder, schema_builder.CreateVectorOfSortedTables(&objects),
+      schema_builder.CreateVectorOfSortedTables(&enums),
+      /*(unused) file_ident=*/0,
+      /*(unused) file_ext=*/0,
+      /*root_table*/ objects[0]));
+  return std::string(
+      reinterpret_cast<const char*>(schema_builder.GetBufferPointer()),
+      schema_builder.GetSize());
+}
+
+TEST(LuaRankingTest, PassThrough) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0},
+      {/*response_text=*/"", /*type=*/"share_location", /*score=*/0.5},
+      {/*response_text=*/"", /*type=*/"add_to_collection", /*score=*/0.1}};
+  const std::string test_snippet = R"(
+    local result = {}
+    for i=1,#actions do
+      table.insert(result, i)
+    end
+    return result
+  )";
+
+  EXPECT_TRUE(ActionsSuggestionsLuaRanker::Create(
+                  conversation, test_snippet, /*entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr, &response)
+                  ->RankActions());
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsActionType("text_reply"),
+                                         IsActionType("share_location"),
+                                         IsActionType("add_to_collection")}));
+}
+
+TEST(LuaRankingTest, Filtering) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0},
+      {/*response_text=*/"", /*type=*/"share_location", /*score=*/0.5},
+      {/*response_text=*/"", /*type=*/"add_to_collection", /*score=*/0.1}};
+  const std::string test_snippet = R"(
+    return {}
+  )";
+
+  EXPECT_TRUE(ActionsSuggestionsLuaRanker::Create(
+                  conversation, test_snippet, /*entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr, &response)
+                  ->RankActions());
+  EXPECT_THAT(response.actions, testing::IsEmpty());
+}
+
+TEST(LuaRankingTest, Duplication) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0},
+      {/*response_text=*/"", /*type=*/"share_location", /*score=*/0.5},
+      {/*response_text=*/"", /*type=*/"add_to_collection", /*score=*/0.1}};
+  const std::string test_snippet = R"(
+    local result = {}
+    for i=1,#actions do
+      table.insert(result, 1)
+    end
+    return result
+  )";
+
+  EXPECT_TRUE(ActionsSuggestionsLuaRanker::Create(
+                  conversation, test_snippet, /*entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr, &response)
+                  ->RankActions());
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsActionType("text_reply"),
+                                         IsActionType("text_reply"),
+                                         IsActionType("text_reply")}));
+}
+
+TEST(LuaRankingTest, SortByScore) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0},
+      {/*response_text=*/"", /*type=*/"share_location", /*score=*/0.5},
+      {/*response_text=*/"", /*type=*/"add_to_collection", /*score=*/0.1}};
+  const std::string test_snippet = R"(
+    function testScoreSorter(a, b)
+      return actions[a].score < actions[b].score
+    end
+    local result = {}
+    for i=1,#actions do
+      result[i] = i
+    end
+    table.sort(result, testScoreSorter)
+    return result
+  )";
+
+  EXPECT_TRUE(ActionsSuggestionsLuaRanker::Create(
+                  conversation, test_snippet, /*entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr, &response)
+                  ->RankActions());
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsActionType("add_to_collection"),
+                                         IsActionType("share_location"),
+                                         IsActionType("text_reply")}));
+}
+
+TEST(LuaRankingTest, SuppressType) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0},
+      {/*response_text=*/"", /*type=*/"share_location", /*score=*/0.5},
+      {/*response_text=*/"", /*type=*/"add_to_collection", /*score=*/0.1}};
+  const std::string test_snippet = R"(
+    local result = {}
+    for id, action in pairs(actions) do
+      if action.type ~= "text_reply" then
+        table.insert(result, id)
+      end
+    end
+    return result
+  )";
+
+  EXPECT_TRUE(ActionsSuggestionsLuaRanker::Create(
+                  conversation, test_snippet, /*entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr, &response)
+                  ->RankActions());
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsActionType("share_location"),
+                                         IsActionType("add_to_collection")}));
+}
+
+TEST(LuaRankingTest, HandlesConversation) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0},
+      {/*response_text=*/"", /*type=*/"share_location", /*score=*/0.5},
+      {/*response_text=*/"", /*type=*/"add_to_collection", /*score=*/0.1}};
+  const std::string test_snippet = R"(
+    local result = {}
+    if messages[1].text ~= "hello hello" then
+      return result
+    end
+    for id, action in pairs(actions) do
+      if action.type ~= "text_reply" then
+        table.insert(result, id)
+      end
+    end
+    return result
+  )";
+
+  EXPECT_TRUE(ActionsSuggestionsLuaRanker::Create(
+                  conversation, test_snippet, /*entity_data_schema=*/nullptr,
+                  /*annotations_entity_data_schema=*/nullptr, &response)
+                  ->RankActions());
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsActionType("share_location"),
+                                         IsActionType("add_to_collection")}));
+}
+
+TEST(LuaRankingTest, HandlesEntityData) {
+  std::string serialized_schema = TestEntitySchema();
+  const reflection::Schema* entity_data_schema =
+      flatbuffers::GetRoot<reflection::Schema>(serialized_schema.data());
+
+  // Create test entity data.
+  ReflectiveFlatbufferBuilder builder(entity_data_schema);
+  std::unique_ptr<ReflectiveFlatbuffer> buffer = builder.NewRoot();
+  buffer->Set("test", "value_a");
+  const std::string serialized_entity_data_a = buffer->Serialize();
+  buffer->Set("test", "value_b");
+  const std::string serialized_entity_data_b = buffer->Serialize();
+
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"", /*type=*/"test",
+       /*score=*/1.0, /*priority_score=*/1.0, /*annotations=*/{},
+       /*serialized_entity_data=*/serialized_entity_data_a},
+      {/*response_text=*/"", /*type=*/"test",
+       /*score=*/1.0, /*priority_score=*/1.0, /*annotations=*/{},
+       /*serialized_entity_data=*/serialized_entity_data_b},
+      {/*response_text=*/"", /*type=*/"share_location", /*score=*/0.5},
+      {/*response_text=*/"", /*type=*/"add_to_collection", /*score=*/0.1}};
+  const std::string test_snippet = R"(
+    local result = {}
+    for id, action in pairs(actions) do
+      if action.type == "test" and action.test == "value_a" then
+        table.insert(result, id)
+      end
+    end
+    return result
+  )";
+
+  EXPECT_TRUE(ActionsSuggestionsLuaRanker::Create(
+                  conversation, test_snippet, entity_data_schema,
+                  /*annotations_entity_data_schema=*/nullptr, &response)
+                  ->RankActions());
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsActionType("test")}));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/actions/ranker_test.cc b/native/actions/ranker_test.cc
new file mode 100644
index 0000000..b52cf45
--- /dev/null
+++ b/native/actions/ranker_test.cc

@@ -0,0 +1,382 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "actions/ranker.h"
+
+#include <string>
+
+#include "actions/types.h"
+#include "utils/zlib/zlib.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+MATCHER_P3(IsAction, type, response_text, score, "") {
+  return testing::Value(arg.type, type) &&
+         testing::Value(arg.response_text, response_text) &&
+         testing::Value(arg.score, score);
+}
+
+MATCHER_P(IsActionType, type, "") { return testing::Value(arg.type, type); }
+
+TEST(RankingTest, DeduplicationSmartReply) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0},
+      {/*response_text=*/"hello there", /*type=*/"text_reply", /*score=*/0.5}};
+
+  RankingOptionsT options;
+  options.deduplicate_suggestions = true;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      /*decompressor=*/nullptr, /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+  EXPECT_THAT(
+      response.actions,
+      testing::ElementsAreArray({IsAction("text_reply", "hello there", 1.0)}));
+}
+
+TEST(RankingTest, DeduplicationExtraData) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0, /*priority_score=*/0.0},
+      {/*response_text=*/"hello there", /*type=*/"text_reply", /*score=*/0.5,
+       /*priority_score=*/0.0},
+      {/*response_text=*/"hello there", /*type=*/"text_reply", /*score=*/0.6,
+       /*priority_score=*/0.0,
+       /*annotations=*/{}, /*serialized_entity_data=*/"test"},
+  };
+
+  RankingOptionsT options;
+  options.deduplicate_suggestions = true;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      /*decompressor=*/nullptr, /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+  EXPECT_THAT(
+      response.actions,
+      testing::ElementsAreArray({IsAction("text_reply", "hello there", 1.0),
+                                 // Is kept as it has different entity data.
+                                 IsAction("text_reply", "hello there", 0.6)}));
+}
+
+TEST(RankingTest, DeduplicationAnnotations) {
+  const Conversation conversation = {
+      {{/*user_id=*/1, "742 Evergreen Terrace, the number is 1-800-TESTING"}}};
+  ActionsSuggestionsResponse response;
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{0, 21},
+                       /*text=*/"742 Evergreen Terrace"};
+    annotation.entity = ClassificationResult("address", 0.5);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"view_map",
+                                /*score=*/0.5,
+                                /*priority_score=*/1.0,
+                                /*annotations=*/{annotation}});
+  }
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{0, 21},
+                       /*text=*/"742 Evergreen Terrace"};
+    annotation.entity = ClassificationResult("address", 1.0);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"view_map",
+                                /*score=*/1.0,
+                                /*priority_score=*/2.0,
+                                /*annotations=*/{annotation}});
+  }
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{37, 50},
+                       /*text=*/"1-800-TESTING"};
+    annotation.entity = ClassificationResult("phone", 0.5);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"call_phone",
+                                /*score=*/0.5,
+                                /*priority_score=*/1.0,
+                                /*annotations=*/{annotation}});
+  }
+
+  RankingOptionsT options;
+  options.deduplicate_suggestions = true;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      /*decompressor=*/nullptr, /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsAction("view_map", "", 1.0),
+                                         IsAction("call_phone", "", 0.5)}));
+}
+
+TEST(RankingTest, DeduplicationAnnotationsByPriorityScore) {
+  const Conversation conversation = {
+      {{/*user_id=*/1, "742 Evergreen Terrace, the number is 1-800-TESTING"}}};
+  ActionsSuggestionsResponse response;
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{0, 21},
+                       /*text=*/"742 Evergreen Terrace"};
+    annotation.entity = ClassificationResult("address", 0.5);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"view_map",
+                                /*score=*/0.6,
+                                /*priority_score=*/2.0,
+                                /*annotations=*/{annotation}});
+  }
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{0, 21},
+                       /*text=*/"742 Evergreen Terrace"};
+    annotation.entity = ClassificationResult("address", 1.0);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"view_map",
+                                /*score=*/1.0,
+                                /*priority_score=*/1.0,
+                                /*annotations=*/{annotation}});
+  }
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{37, 50},
+                       /*text=*/"1-800-TESTING"};
+    annotation.entity = ClassificationResult("phone", 0.5);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"call_phone",
+                                /*score=*/0.5,
+                                /*priority_score=*/1.0,
+                                /*annotations=*/{annotation}});
+  }
+
+  RankingOptionsT options;
+  options.deduplicate_suggestions = true;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      /*decompressor=*/nullptr, /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+  EXPECT_THAT(
+      response.actions,
+      testing::ElementsAreArray(
+          {IsAction("view_map", "",
+                    0.6),  // lower score wins, as priority score is higher
+           IsAction("call_phone", "", 0.5)}));
+}
+
+TEST(RankingTest, DeduplicatesConflictingActions) {
+  const Conversation conversation = {{{/*user_id=*/1, "code A-911"}}};
+  ActionsSuggestionsResponse response;
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{7, 10},
+                       /*text=*/"911"};
+    annotation.entity = ClassificationResult("phone", 1.0);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"call_phone",
+                                /*score=*/1.0,
+                                /*priority_score=*/1.0,
+                                /*annotations=*/{annotation}});
+  }
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{5, 10},
+                       /*text=*/"A-911"};
+    annotation.entity = ClassificationResult("code", 1.0);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"copy_code",
+                                /*score=*/1.0,
+                                /*priority_score=*/2.0,
+                                /*annotations=*/{annotation}});
+  }
+  RankingOptionsT options;
+  options.deduplicate_suggestions = true;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      /*decompressor=*/nullptr, /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsAction("copy_code", "", 1.0)}));
+}
+
+TEST(RankingTest, HandlesCompressedLuaScript) {
+  const Conversation conversation = {{{/*user_id=*/1, "hello hello"}}};
+  ActionsSuggestionsResponse response;
+  response.actions = {
+      {/*response_text=*/"hello there", /*type=*/"text_reply",
+       /*score=*/1.0},
+      {/*response_text=*/"", /*type=*/"share_location", /*score=*/0.5},
+      {/*response_text=*/"", /*type=*/"add_to_collection", /*score=*/0.1}};
+  const std::string test_snippet = R"(
+    local result = {}
+    for id, action in pairs(actions) do
+      if action.type ~= "text_reply" then
+        table.insert(result, id)
+      end
+    end
+    return result
+  )";
+  RankingOptionsT options;
+  options.compressed_lua_ranking_script.reset(new CompressedBufferT);
+  std::unique_ptr<ZlibCompressor> compressor = ZlibCompressor::Instance();
+  compressor->Compress(test_snippet,
+                       options.compressed_lua_ranking_script.get());
+  options.deduplicate_suggestions = true;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+
+  std::unique_ptr<ZlibDecompressor> decompressor = ZlibDecompressor::Instance();
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      decompressor.get(), /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsActionType("share_location"),
+                                         IsActionType("add_to_collection")}));
+}
+
+TEST(RankingTest, SuppressSmartRepliesWithAction) {
+  const Conversation conversation = {{{/*user_id=*/1, "should i call 911"}}};
+  ActionsSuggestionsResponse response;
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{5, 8},
+                       /*text=*/"911"};
+    annotation.entity = ClassificationResult("phone", 1.0);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"call_phone",
+                                /*score=*/1.0,
+                                /*priority_score=*/1.0,
+                                /*annotations=*/{annotation}});
+  }
+  response.actions.push_back({/*response_text=*/"How are you?",
+                              /*type=*/"text_reply"});
+  RankingOptionsT options;
+  options.suppress_smart_replies_with_actions = true;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      /*decompressor=*/nullptr, /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+
+  EXPECT_THAT(response.actions,
+              testing::ElementsAreArray({IsAction("call_phone", "", 1.0)}));
+}
+
+TEST(RankingTest, GroupsActionsByAnnotations) {
+  const Conversation conversation = {{{/*user_id=*/1, "should i call 911"}}};
+  ActionsSuggestionsResponse response;
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{5, 8},
+                       /*text=*/"911"};
+    annotation.entity = ClassificationResult("phone", 1.0);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"call_phone",
+                                /*score=*/1.0,
+                                /*priority_score=*/1.0,
+                                /*annotations=*/{annotation}});
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"add_contact",
+                                /*score=*/0.0,
+                                /*priority_score=*/0.0,
+                                /*annotations=*/{annotation}});
+  }
+  response.actions.push_back({/*response_text=*/"How are you?",
+                              /*type=*/"text_reply",
+                              /*score=*/0.5});
+  RankingOptionsT options;
+  options.group_by_annotations = true;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      /*decompressor=*/nullptr, /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+
+  // The text reply should be last, even though it has a higher score than the
+  // `add_contact` action.
+  EXPECT_THAT(
+      response.actions,
+      testing::ElementsAreArray({IsAction("call_phone", "", 1.0),
+                                 IsAction("add_contact", "", 0.0),
+                                 IsAction("text_reply", "How are you?", 0.5)}));
+}
+
+TEST(RankingTest, SortsActionsByScore) {
+  const Conversation conversation = {{{/*user_id=*/1, "should i call 911"}}};
+  ActionsSuggestionsResponse response;
+  {
+    ActionSuggestionAnnotation annotation;
+    annotation.span = {/*message_index=*/0, /*span=*/{5, 8},
+                       /*text=*/"911"};
+    annotation.entity = ClassificationResult("phone", 1.0);
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"call_phone",
+                                /*score=*/1.0,
+                                /*priority_score=*/1.0,
+                                /*annotations=*/{annotation}});
+    response.actions.push_back({/*response_text=*/"",
+                                /*type=*/"add_contact",
+                                /*score=*/0.0,
+                                /*priority_score=*/0.0,
+                                /*annotations=*/{annotation}});
+  }
+  response.actions.push_back({/*response_text=*/"How are you?",
+                              /*type=*/"text_reply",
+                              /*score=*/0.5});
+  RankingOptionsT options;
+  // Don't group by annotation.
+  options.group_by_annotations = false;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(RankingOptions::Pack(builder, &options));
+  auto ranker = ActionsSuggestionsRanker::CreateActionsSuggestionsRanker(
+      flatbuffers::GetRoot<RankingOptions>(builder.GetBufferPointer()),
+      /*decompressor=*/nullptr, /*smart_reply_action_type=*/"text_reply");
+
+  ranker->RankActions(conversation, &response);
+
+  EXPECT_THAT(
+      response.actions,
+      testing::ElementsAreArray({IsAction("call_phone", "", 1.0),
+                                 IsAction("text_reply", "How are you?", 0.5),
+                                 IsAction("add_contact", "", 0.0)}));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/actions/test-utils.cc b/native/actions/test-utils.cc
new file mode 100644
index 0000000..9b003dd
--- /dev/null
+++ b/native/actions/test-utils.cc

@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "actions/test-utils.h"
+
+namespace libtextclassifier3 {
+
+std::string TestEntityDataSchema() {
+  // Create fake entity data schema meta data.
+  // Cannot use object oriented API here as that is not available for the
+  // reflection schema.
+  flatbuffers::FlatBufferBuilder schema_builder;
+  std::vector<flatbuffers::Offset<reflection::Field>> fields = {
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("greeting"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::String),
+          /*id=*/0,
+          /*offset=*/4),
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("location"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::String),
+          /*id=*/1,
+          /*offset=*/6),
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("person"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::String),
+          /*id=*/2,
+          /*offset=*/8)};
+  std::vector<flatbuffers::Offset<reflection::Enum>> enums;
+  std::vector<flatbuffers::Offset<reflection::Object>> objects = {
+      reflection::CreateObject(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("EntityData"),
+          /*fields=*/
+          schema_builder.CreateVectorOfSortedTables(&fields))};
+  schema_builder.Finish(reflection::CreateSchema(
+      schema_builder, schema_builder.CreateVectorOfSortedTables(&objects),
+      schema_builder.CreateVectorOfSortedTables(&enums),
+      /*(unused) file_ident=*/0,
+      /*(unused) file_ext=*/0,
+      /*root_table*/ objects[0]));
+
+  return std::string(
+      reinterpret_cast<const char*>(schema_builder.GetBufferPointer()),
+      schema_builder.GetSize());
+}
+
+void SetTestEntityDataSchema(ActionsModelT* test_model) {
+  const std::string serialized_schema = TestEntityDataSchema();
+
+  test_model->actions_entity_data_schema.assign(
+      serialized_schema.data(),
+      serialized_schema.data() + serialized_schema.size());
+}
+
+}  // namespace libtextclassifier3

diff --git a/native/actions/test-utils.h b/native/actions/test-utils.h
new file mode 100644
index 0000000..c05d6a9
--- /dev/null
+++ b/native/actions/test-utils.h

@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_ACTIONS_TEST_UTILS_H_
+#define LIBTEXTCLASSIFIER_ACTIONS_TEST_UTILS_H_
+
+#include <string>
+
+#include "actions/actions_model_generated.h"
+#include "utils/flatbuffers.h"
+#include "gmock/gmock.h"
+
+namespace libtextclassifier3 {
+
+using testing::ExplainMatchResult;
+using testing::Value;
+
+// Create test entity data schema.
+std::string TestEntityDataSchema();
+void SetTestEntityDataSchema(ActionsModelT* test_model);
+
+MATCHER_P(IsActionOfType, type, "") { return Value(arg.type, type); }
+MATCHER_P(IsSmartReply, response_text, "") {
+  return ExplainMatchResult(IsActionOfType("text_reply"), arg,
+                            result_listener) &&
+         Value(arg.response_text, response_text);
+}
+MATCHER_P(IsSpan, span, "") {
+  return Value(arg.first, span.first) && Value(arg.second, span.second);
+}
+MATCHER_P3(IsActionSuggestionAnnotation, name, text, span, "") {
+  return Value(arg.name, name) && Value(arg.span.text, text) &&
+         ExplainMatchResult(IsSpan(span), arg.span.span, result_listener);
+}
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_ACTIONS_TEST_UTILS_H_

diff --git a/native/actions/zlib-utils_test.cc b/native/actions/zlib-utils_test.cc
new file mode 100644
index 0000000..75e4c78
--- /dev/null
+++ b/native/actions/zlib-utils_test.cc

@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "actions/zlib-utils.h"
+
+#include <memory>
+
+#include "actions/actions_model_generated.h"
+#include "utils/zlib/zlib.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using testing::ElementsAre;
+using testing::Field;
+using testing::Pointee;
+
+TEST(ActionsZlibUtilsTest, CompressModel) {
+  ActionsModelT model;
+  constexpr char kTestPattern1[] = "this is a test pattern";
+  constexpr char kTestPattern2[] = "this is a second test pattern";
+  constexpr char kTestOutputPattern[] = "this is an output pattern";
+  model.rules.reset(new RulesModelT);
+  model.rules->regex_rule.emplace_back(new RulesModel_::RegexRuleT);
+  model.rules->regex_rule.back()->pattern = kTestPattern1;
+  model.rules->regex_rule.emplace_back(new RulesModel_::RegexRuleT);
+  model.rules->regex_rule.back()->pattern = kTestPattern2;
+  model.rules->regex_rule.back()->output_pattern = kTestOutputPattern;
+
+  // Compress the model.
+  EXPECT_TRUE(CompressActionsModel(&model));
+
+  // Sanity check that uncompressed field is removed.
+  const auto is_empty_pattern =
+      Pointee(Field(&libtextclassifier3::RulesModel_::RegexRuleT::pattern,
+                    testing::IsEmpty()));
+  EXPECT_THAT(model.rules->regex_rule,
+              ElementsAre(is_empty_pattern, is_empty_pattern));
+  // Pack and load the model.
+  flatbuffers::FlatBufferBuilder builder;
+  FinishActionsModelBuffer(builder, ActionsModel::Pack(builder, &model));
+  const ActionsModel* compressed_model = GetActionsModel(
+      reinterpret_cast<const char*>(builder.GetBufferPointer()));
+  ASSERT_TRUE(compressed_model != nullptr);
+
+  // Decompress the fields again and check that they match the original.
+  std::unique_ptr<ZlibDecompressor> decompressor = ZlibDecompressor::Instance();
+  ASSERT_TRUE(decompressor != nullptr);
+  std::string uncompressed_pattern;
+  EXPECT_TRUE(decompressor->MaybeDecompress(
+      compressed_model->rules()->regex_rule()->Get(0)->compressed_pattern(),
+      &uncompressed_pattern));
+  EXPECT_EQ(uncompressed_pattern, kTestPattern1);
+  EXPECT_TRUE(decompressor->MaybeDecompress(
+      compressed_model->rules()->regex_rule()->Get(1)->compressed_pattern(),
+      &uncompressed_pattern));
+  EXPECT_EQ(uncompressed_pattern, kTestPattern2);
+  EXPECT_TRUE(DecompressActionsModel(&model));
+  EXPECT_EQ(model.rules->regex_rule[0]->pattern, kTestPattern1);
+  EXPECT_EQ(model.rules->regex_rule[1]->pattern, kTestPattern2);
+  EXPECT_EQ(model.rules->regex_rule[1]->output_pattern, kTestOutputPattern);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/annotator/annotator_jni_test.cc b/native/annotator/annotator_jni_test.cc
new file mode 100644
index 0000000..929fb59
--- /dev/null
+++ b/native/annotator/annotator_jni_test.cc

@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/annotator_jni.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(Annotator, ConvertIndicesBMPUTF8) {
+  // Test boundary cases.
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("hello", {0, 5}), std::make_pair(0, 5));
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello", {0, 5}), std::make_pair(0, 5));
+
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("hello world", {0, 5}),
+            std::make_pair(0, 5));
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello world", {0, 5}),
+            std::make_pair(0, 5));
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("😁ello world", {0, 6}),
+            std::make_pair(0, 5));
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("😁ello world", {0, 5}),
+            std::make_pair(0, 6));
+
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("hello world", {6, 11}),
+            std::make_pair(6, 11));
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello world", {6, 11}),
+            std::make_pair(6, 11));
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("hello worl😁", {6, 12}),
+            std::make_pair(6, 11));
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("hello worl😁", {6, 11}),
+            std::make_pair(6, 12));
+
+  // Simple example where the longer character is before the selection.
+  //  character 😁 is 0x1f601
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("😁 Hello World.", {3, 8}),
+            std::make_pair(2, 7));
+
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("😁 Hello World.", {2, 7}),
+            std::make_pair(3, 8));
+
+  // Longer character is before and in selection.
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("😁 Hell😁 World.", {3, 9}),
+            std::make_pair(2, 7));
+
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("😁 Hell😁 World.", {2, 7}),
+            std::make_pair(3, 9));
+
+  // Longer character is before and after selection.
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("😁 Hello😁World.", {3, 8}),
+            std::make_pair(2, 7));
+
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("😁 Hello😁World.", {2, 7}),
+            std::make_pair(3, 8));
+
+  // Longer character is before in after selection.
+  EXPECT_EQ(ConvertIndicesBMPToUTF8("😁 Hell😁😁World.", {3, 9}),
+            std::make_pair(2, 7));
+
+  EXPECT_EQ(ConvertIndicesUTF8ToBMP("😁 Hell😁😁World.", {2, 7}),
+            std::make_pair(3, 9));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/annotator/cached-features_test.cc b/native/annotator/cached-features_test.cc
new file mode 100644
index 0000000..702f3ca
--- /dev/null
+++ b/native/annotator/cached-features_test.cc

@@ -0,0 +1,157 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/cached-features.h"
+
+#include "annotator/model-executor.h"
+#include "utils/tensor-view.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using testing::ElementsAreArray;
+using testing::FloatEq;
+using testing::Matcher;
+
+namespace libtextclassifier3 {
+namespace {
+
+Matcher<std::vector<float>> ElementsAreFloat(const std::vector<float>& values) {
+  std::vector<Matcher<float>> matchers;
+  for (const float value : values) {
+    matchers.push_back(FloatEq(value));
+  }
+  return ElementsAreArray(matchers);
+}
+
+std::unique_ptr<std::vector<float>> MakeFeatures(int num_tokens) {
+  std::unique_ptr<std::vector<float>> features(new std::vector<float>());
+  for (int i = 1; i <= num_tokens; ++i) {
+    features->push_back(i * 11.0f);
+    features->push_back(-i * 11.0f);
+    features->push_back(i * 0.1f);
+  }
+  return features;
+}
+
+std::vector<float> GetCachedClickContextFeatures(
+    const CachedFeatures& cached_features, int click_pos) {
+  std::vector<float> output_features;
+  cached_features.AppendClickContextFeaturesForClick(click_pos,
+                                                     &output_features);
+  return output_features;
+}
+
+std::vector<float> GetCachedBoundsSensitiveFeatures(
+    const CachedFeatures& cached_features, TokenSpan selected_span) {
+  std::vector<float> output_features;
+  cached_features.AppendBoundsSensitiveFeaturesForSpan(selected_span,
+                                                       &output_features);
+  return output_features;
+}
+
+TEST(CachedFeaturesTest, ClickContext) {
+  FeatureProcessorOptionsT options;
+  options.context_size = 2;
+  options.feature_version = 1;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(CreateFeatureProcessorOptions(builder, &options));
+  flatbuffers::DetachedBuffer options_fb = builder.Release();
+
+  std::unique_ptr<std::vector<float>> features = MakeFeatures(9);
+  std::unique_ptr<std::vector<float>> padding_features(
+      new std::vector<float>{112233.0, -112233.0, 321.0});
+
+  const std::unique_ptr<CachedFeatures> cached_features =
+      CachedFeatures::Create(
+          {3, 10}, std::move(features), std::move(padding_features),
+          flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
+          /*feature_vector_size=*/3);
+  ASSERT_TRUE(cached_features);
+
+  EXPECT_THAT(GetCachedClickContextFeatures(*cached_features, 5),
+              ElementsAreFloat({11.0, -11.0, 0.1, 22.0, -22.0, 0.2, 33.0, -33.0,
+                                0.3, 44.0, -44.0, 0.4, 55.0, -55.0, 0.5}));
+
+  EXPECT_THAT(GetCachedClickContextFeatures(*cached_features, 6),
+              ElementsAreFloat({22.0, -22.0, 0.2, 33.0, -33.0, 0.3, 44.0, -44.0,
+                                0.4, 55.0, -55.0, 0.5, 66.0, -66.0, 0.6}));
+
+  EXPECT_THAT(GetCachedClickContextFeatures(*cached_features, 7),
+              ElementsAreFloat({33.0, -33.0, 0.3, 44.0, -44.0, 0.4, 55.0, -55.0,
+                                0.5, 66.0, -66.0, 0.6, 77.0, -77.0, 0.7}));
+}
+
+TEST(CachedFeaturesTest, BoundsSensitive) {
+  std::unique_ptr<FeatureProcessorOptions_::BoundsSensitiveFeaturesT> config(
+      new FeatureProcessorOptions_::BoundsSensitiveFeaturesT());
+  config->enabled = true;
+  config->num_tokens_before = 2;
+  config->num_tokens_inside_left = 2;
+  config->num_tokens_inside_right = 2;
+  config->num_tokens_after = 2;
+  config->include_inside_bag = true;
+  config->include_inside_length = true;
+  FeatureProcessorOptionsT options;
+  options.bounds_sensitive_features = std::move(config);
+  options.feature_version = 2;
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(CreateFeatureProcessorOptions(builder, &options));
+  flatbuffers::DetachedBuffer options_fb = builder.Release();
+
+  std::unique_ptr<std::vector<float>> features = MakeFeatures(9);
+  std::unique_ptr<std::vector<float>> padding_features(
+      new std::vector<float>{112233.0, -112233.0, 321.0});
+
+  const std::unique_ptr<CachedFeatures> cached_features =
+      CachedFeatures::Create(
+          {3, 9}, std::move(features), std::move(padding_features),
+          flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
+          /*feature_vector_size=*/3);
+  ASSERT_TRUE(cached_features);
+
+  EXPECT_THAT(
+      GetCachedBoundsSensitiveFeatures(*cached_features, {5, 8}),
+      ElementsAreFloat({11.0,     -11.0,     0.1,   22.0,  -22.0, 0.2,   33.0,
+                        -33.0,    0.3,       44.0,  -44.0, 0.4,   44.0,  -44.0,
+                        0.4,      55.0,      -55.0, 0.5,   66.0,  -66.0, 0.6,
+                        112233.0, -112233.0, 321.0, 44.0,  -44.0, 0.4,   3.0}));
+
+  EXPECT_THAT(
+      GetCachedBoundsSensitiveFeatures(*cached_features, {5, 7}),
+      ElementsAreFloat({11.0,  -11.0, 0.1,   22.0,  -22.0, 0.2,   33.0,
+                        -33.0, 0.3,   44.0,  -44.0, 0.4,   33.0,  -33.0,
+                        0.3,   44.0,  -44.0, 0.4,   55.0,  -55.0, 0.5,
+                        66.0,  -66.0, 0.6,   38.5,  -38.5, 0.35,  2.0}));
+
+  EXPECT_THAT(
+      GetCachedBoundsSensitiveFeatures(*cached_features, {6, 8}),
+      ElementsAreFloat({22.0,     -22.0,     0.2,   33.0,  -33.0, 0.3,   44.0,
+                        -44.0,    0.4,       55.0,  -55.0, 0.5,   44.0,  -44.0,
+                        0.4,      55.0,      -55.0, 0.5,   66.0,  -66.0, 0.6,
+                        112233.0, -112233.0, 321.0, 49.5,  -49.5, 0.45,  2.0}));
+
+  EXPECT_THAT(
+      GetCachedBoundsSensitiveFeatures(*cached_features, {6, 7}),
+      ElementsAreFloat({22.0,     -22.0,     0.2,   33.0,     -33.0,     0.3,
+                        44.0,     -44.0,     0.4,   112233.0, -112233.0, 321.0,
+                        112233.0, -112233.0, 321.0, 44.0,     -44.0,     0.4,
+                        55.0,     -55.0,     0.5,   66.0,     -66.0,     0.6,
+                        44.0,     -44.0,     0.4,   1.0}));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/annotator/duration/duration_test.cc b/native/annotator/duration/duration_test.cc
new file mode 100644
index 0000000..a0985a2
--- /dev/null
+++ b/native/annotator/duration/duration_test.cc

@@ -0,0 +1,567 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/duration/duration.h"
+
+#include <string>
+#include <vector>
+
+#include "annotator/collections.h"
+#include "annotator/model_generated.h"
+#include "annotator/types-test-util.h"
+#include "annotator/types.h"
+#include "utils/test-utils.h"
+#include "utils/utf8/unicodetext.h"
+#include "utils/utf8/unilib.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using testing::AllOf;
+using testing::ElementsAre;
+using testing::Field;
+using testing::IsEmpty;
+
+const DurationAnnotatorOptions* TestingDurationAnnotatorOptions() {
+  static const flatbuffers::DetachedBuffer* options_data = []() {
+    DurationAnnotatorOptionsT options;
+    options.enabled = true;
+
+    options.week_expressions.push_back("week");
+    options.week_expressions.push_back("weeks");
+
+    options.day_expressions.push_back("day");
+    options.day_expressions.push_back("days");
+
+    options.hour_expressions.push_back("hour");
+    options.hour_expressions.push_back("hours");
+
+    options.minute_expressions.push_back("minute");
+    options.minute_expressions.push_back("minutes");
+
+    options.second_expressions.push_back("second");
+    options.second_expressions.push_back("seconds");
+
+    options.filler_expressions.push_back("and");
+    options.filler_expressions.push_back("a");
+    options.filler_expressions.push_back("an");
+    options.filler_expressions.push_back("one");
+
+    options.half_expressions.push_back("half");
+
+    options.sub_token_separator_codepoints.push_back('-');
+
+    flatbuffers::FlatBufferBuilder builder;
+    builder.Finish(DurationAnnotatorOptions::Pack(builder, &options));
+    return new flatbuffers::DetachedBuffer(builder.Release());
+  }();
+
+  return flatbuffers::GetRoot<DurationAnnotatorOptions>(options_data->data());
+}
+
+std::unique_ptr<FeatureProcessor> BuildFeatureProcessor(const UniLib* unilib) {
+  static const flatbuffers::DetachedBuffer* options_data = []() {
+    FeatureProcessorOptionsT options;
+    options.context_size = 1;
+    options.max_selection_span = 1;
+    options.snap_label_span_boundaries_to_containing_tokens = false;
+    options.ignored_span_boundary_codepoints.push_back(',');
+
+    options.tokenization_codepoint_config.emplace_back(
+        new TokenizationCodepointRangeT());
+    auto& config = options.tokenization_codepoint_config.back();
+    config->start = 32;
+    config->end = 33;
+    config->role = TokenizationCodepointRange_::Role_WHITESPACE_SEPARATOR;
+
+    flatbuffers::FlatBufferBuilder builder;
+    builder.Finish(FeatureProcessorOptions::Pack(builder, &options));
+    return new flatbuffers::DetachedBuffer(builder.Release());
+  }();
+
+  const FeatureProcessorOptions* feature_processor_options =
+      flatbuffers::GetRoot<FeatureProcessorOptions>(options_data->data());
+
+  return std::unique_ptr<FeatureProcessor>(
+      new FeatureProcessor(feature_processor_options, unilib));
+}
+
+class DurationAnnotatorTest : public ::testing::Test {
+ protected:
+  DurationAnnotatorTest()
+      : INIT_UNILIB_FOR_TESTING(unilib_),
+        feature_processor_(BuildFeatureProcessor(&unilib_)),
+        duration_annotator_(TestingDurationAnnotatorOptions(),
+                            feature_processor_.get(), &unilib_) {}
+
+  std::vector<Token> Tokenize(const UnicodeText& text) {
+    return feature_processor_->Tokenize(text);
+  }
+
+  UniLib unilib_;
+  std::unique_ptr<FeatureProcessor> feature_processor_;
+  DurationAnnotator duration_annotator_;
+};
+
+TEST_F(DurationAnnotatorTest, ClassifiesSimpleDuration) {
+  ClassificationResult classification;
+  EXPECT_TRUE(duration_annotator_.ClassifyText(
+      UTF8ToUnicodeText("Wake me up in 15 minutes ok?"), {14, 24},
+      AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification));
+
+  EXPECT_THAT(classification,
+              AllOf(Field(&ClassificationResult::collection, "duration"),
+                    Field(&ClassificationResult::duration_ms, 15 * 60 * 1000)));
+}
+
+TEST_F(DurationAnnotatorTest, ClassifiesWhenTokensDontAlignWithSelection) {
+  ClassificationResult classification;
+  EXPECT_TRUE(duration_annotator_.ClassifyText(
+      UTF8ToUnicodeText("Wake me up in15 minutesok?"), {13, 23},
+      AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification));
+
+  EXPECT_THAT(classification,
+              AllOf(Field(&ClassificationResult::collection, "duration"),
+                    Field(&ClassificationResult::duration_ms, 15 * 60 * 1000)));
+}
+
+TEST_F(DurationAnnotatorTest, DoNotClassifyWhenInputIsInvalid) {
+  ClassificationResult classification;
+  EXPECT_FALSE(duration_annotator_.ClassifyText(
+      UTF8ToUnicodeText("Weird space"), {5, 6},
+      AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification));
+}
+
+TEST_F(DurationAnnotatorTest, FindsSimpleDuration) {
+  const UnicodeText text = UTF8ToUnicodeText("Wake me up in 15 minutes ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(14, 24)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                15 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, FindsDurationWithHalfExpression) {
+  const UnicodeText text =
+      UTF8ToUnicodeText("Set a timer for 3 and half minutes ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(16, 34)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                3.5 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, FindsComposedDuration) {
+  const UnicodeText text =
+      UTF8ToUnicodeText("Wake me up in 3 hours and 5 seconds ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(14, 35)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                3 * 60 * 60 * 1000 + 5 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, AllUnitsAreCovered) {
+  const UnicodeText text = UTF8ToUnicodeText(
+      "See you in a week and a day and an hour and a minute and a second");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(13, 65)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                7 * 24 * 60 * 60 * 1000 + 24 * 60 * 60 * 1000 +
+                                    60 * 60 * 1000 + 60 * 1000 + 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, FindsHalfAnHour) {
+  const UnicodeText text = UTF8ToUnicodeText("Set a timer for half an hour");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(16, 28)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                0.5 * 60 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, FindsWhenHalfIsAfterGranularitySpecification) {
+  const UnicodeText text =
+      UTF8ToUnicodeText("Set a timer for 1 hour and a half");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(16, 33)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                1.5 * 60 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, FindsAnHourAndAHalf) {
+  const UnicodeText text =
+      UTF8ToUnicodeText("Set a timer for an hour and a half");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(19, 34)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                1.5 * 60 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest,
+       FindsCorrectlyWhenSecondsComeSecondAndDontHaveNumber) {
+  const UnicodeText text =
+      UTF8ToUnicodeText("Set a timer for 10 minutes and a second ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(16, 39)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                10 * 60 * 1000 + 1 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, DoesNotGreedilyTakeFillerWords) {
+  const UnicodeText text = UTF8ToUnicodeText(
+      "Set a timer for a a a 10 minutes and 2 seconds an and an ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(22, 46)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                10 * 60 * 1000 + 2 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, DoesNotCrashWhenJustHalfIsSaid) {
+  const UnicodeText text = UTF8ToUnicodeText("Set a timer for half ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  ASSERT_EQ(result.size(), 0);
+}
+
+TEST_F(DurationAnnotatorTest, StripsPunctuationFromTokens) {
+  const UnicodeText text =
+      UTF8ToUnicodeText("Set a timer for 10 ,minutes, ,and, ,2, seconds, ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(16, 46)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                10 * 60 * 1000 + 2 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, FindsCorrectlyWithCombinedQuantityUnitToken) {
+  const UnicodeText text = UTF8ToUnicodeText("Show 5-minute timer.");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(5, 13)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                5 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest,
+       DoesNotIntOverflowWithDurationThatHasMoreThanInt32Millis) {
+  ClassificationResult classification;
+  EXPECT_TRUE(duration_annotator_.ClassifyText(
+      UTF8ToUnicodeText("1400 hours"), {0, 10},
+      AnnotationUsecase_ANNOTATION_USECASE_RAW, &classification));
+
+  EXPECT_THAT(classification,
+              AllOf(Field(&ClassificationResult::collection, "duration"),
+                    Field(&ClassificationResult::duration_ms,
+                          1400LL * 60LL * 60LL * 1000LL)));
+}
+
+TEST_F(DurationAnnotatorTest, FindsSimpleDurationIgnoringCase) {
+  const UnicodeText text = UTF8ToUnicodeText("Wake me up in 15 MiNuTeS ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(14, 24)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                15 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, FindsDurationWithHalfExpressionIgnoringCase) {
+  const UnicodeText text =
+      UTF8ToUnicodeText("Set a timer for 3 and HaLf minutes ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(16, 34)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                3.5 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest,
+       FindsDurationWithHalfExpressionIgnoringFillerWordCase) {
+  const UnicodeText text =
+      UTF8ToUnicodeText("Set a timer for 3 AnD half minutes ok?");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(16, 34)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                3.5 * 60 * 1000)))))));
+}
+
+TEST_F(DurationAnnotatorTest, CorrectlyAnnotatesSpanWithDanglingQuantity) {
+  const UnicodeText text = UTF8ToUnicodeText("20 minutes 10");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  // TODO(b/144752747) Include test for duration_ms.
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(0, 13)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(Field(&ClassificationResult::collection,
+                                              "duration")))))));
+}
+
+const DurationAnnotatorOptions* TestingJapaneseDurationAnnotatorOptions() {
+  static const flatbuffers::DetachedBuffer* options_data = []() {
+    DurationAnnotatorOptionsT options;
+    options.enabled = true;
+
+    options.week_expressions.push_back("週間");
+
+    options.day_expressions.push_back("日間");
+
+    options.hour_expressions.push_back("時間");
+
+    options.minute_expressions.push_back("分");
+    options.minute_expressions.push_back("分間");
+
+    options.second_expressions.push_back("秒");
+    options.second_expressions.push_back("秒間");
+
+    options.half_expressions.push_back("半");
+
+    options.require_quantity = true;
+    options.enable_dangling_quantity_interpretation = false;
+
+    flatbuffers::FlatBufferBuilder builder;
+    builder.Finish(DurationAnnotatorOptions::Pack(builder, &options));
+    return new flatbuffers::DetachedBuffer(builder.Release());
+  }();
+
+  return flatbuffers::GetRoot<DurationAnnotatorOptions>(options_data->data());
+}
+
+class JapaneseDurationAnnotatorTest : public ::testing::Test {
+ protected:
+  JapaneseDurationAnnotatorTest()
+      : INIT_UNILIB_FOR_TESTING(unilib_),
+        feature_processor_(BuildFeatureProcessor(&unilib_)),
+        duration_annotator_(TestingJapaneseDurationAnnotatorOptions(),
+                            feature_processor_.get(), &unilib_) {}
+
+  std::vector<Token> Tokenize(const UnicodeText& text) {
+    return feature_processor_->Tokenize(text);
+  }
+
+  UniLib unilib_;
+  std::unique_ptr<FeatureProcessor> feature_processor_;
+  DurationAnnotator duration_annotator_;
+};
+
+TEST_F(JapaneseDurationAnnotatorTest, FindsDuration) {
+  const UnicodeText text = UTF8ToUnicodeText("10 分 の アラーム");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(0, 4)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                10 * 60 * 1000)))))));
+}
+
+TEST_F(JapaneseDurationAnnotatorTest, FindsDurationWithHalfExpression) {
+  const UnicodeText text = UTF8ToUnicodeText("2 分 半 の アラーム");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(0, 5)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                2.5 * 60 * 1000)))))));
+}
+
+TEST_F(JapaneseDurationAnnotatorTest, IgnoresDurationWithoutQuantity) {
+  const UnicodeText text = UTF8ToUnicodeText("分 の アラーム");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(result, IsEmpty());
+}
+
+TEST_F(JapaneseDurationAnnotatorTest, IgnoresDanglingQuantity) {
+  const UnicodeText text = UTF8ToUnicodeText("2 分 10 の アラーム");
+  std::vector<Token> tokens = Tokenize(text);
+  std::vector<AnnotatedSpan> result;
+  EXPECT_TRUE(duration_annotator_.FindAll(
+      text, tokens, AnnotationUsecase_ANNOTATION_USECASE_RAW, &result));
+
+  EXPECT_THAT(
+      result,
+      ElementsAre(
+          AllOf(Field(&AnnotatedSpan::span, CodepointSpan(0, 3)),
+                Field(&AnnotatedSpan::classification,
+                      ElementsAre(AllOf(
+                          Field(&ClassificationResult::collection, "duration"),
+                          Field(&ClassificationResult::duration_ms,
+                                2 * 60 * 1000)))))));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/annotator/flatbuffer-utils.cc b/native/annotator/flatbuffer-utils.cc
deleted file mode 100644
index d83d2bb..0000000
--- a/native/annotator/flatbuffer-utils.cc
+++ /dev/null

@@ -1,64 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "annotator/flatbuffer-utils.h"
-
-#include <memory>
-
-#include "utils/base/logging.h"
-#include "utils/flatbuffers.h"
-#include "flatbuffers/reflection.h"
-
-namespace libtextclassifier3 {
-
-bool SwapFieldNamesForOffsetsInPath(ModelT* model) {
-  if (model->regex_model == nullptr || model->entity_data_schema.empty()) {
-    // Nothing to do.
-    return true;
-  }
-  const reflection::Schema* schema =
-      LoadAndVerifyFlatbuffer<reflection::Schema>(
-          model->entity_data_schema.data(), model->entity_data_schema.size());
-
-  for (std::unique_ptr<RegexModel_::PatternT>& pattern :
-       model->regex_model->patterns) {
-    for (std::unique_ptr<CapturingGroupT>& group : pattern->capturing_group) {
-      if (group->entity_field_path == nullptr) {
-        continue;
-      }
-
-      if (!SwapFieldNamesForOffsetsInPath(schema,
-                                          group->entity_field_path.get())) {
-        return false;
-      }
-    }
-  }
-
-  return true;
-}
-
-std::string SwapFieldNamesForOffsetsInPathInSerializedModel(
-    const std::string& model) {
-  std::unique_ptr<ModelT> unpacked_model = UnPackModel(model.c_str());
-  TC3_CHECK(unpacked_model != nullptr);
-  TC3_CHECK(SwapFieldNamesForOffsetsInPath(unpacked_model.get()));
-  flatbuffers::FlatBufferBuilder builder;
-  FinishModelBuffer(builder, Model::Pack(builder, unpacked_model.get()));
-  return std::string(reinterpret_cast<const char*>(builder.GetBufferPointer()),
-                     builder.GetSize());
-}
-
-}  // namespace libtextclassifier3

diff --git a/native/annotator/flatbuffer-utils.h b/native/annotator/flatbuffer-utils.h
deleted file mode 100644
index a7e5d64..0000000
--- a/native/annotator/flatbuffer-utils.h
+++ /dev/null

@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// Utility functions for working with FlatBuffers in the annotator model.
-
-#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_FLATBUFFER_UTILS_H_
-#define LIBTEXTCLASSIFIER_ANNOTATOR_FLATBUFFER_UTILS_H_
-
-#include <string>
-
-#include "annotator/model_generated.h"
-
-namespace libtextclassifier3 {
-
-// Resolves field lookups by name to the concrete field offsets in the regex
-// rules of the model.
-bool SwapFieldNamesForOffsetsInPath(ModelT* model);
-
-// Same as above but for a serialized model.
-std::string SwapFieldNamesForOffsetsInPathInSerializedModel(
-    const std::string& model);
-
-}  // namespace libtextclassifier3
-
-#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_FLATBUFFER_UTILS_H_

diff --git a/native/annotator/grammar/dates/annotations/annotation-util_test.cc b/native/annotator/grammar/dates/annotations/annotation-util_test.cc
new file mode 100644
index 0000000..6d25d64
--- /dev/null
+++ b/native/annotator/grammar/dates/annotations/annotation-util_test.cc

@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/grammar/dates/annotations/annotation-util.h"
+
+#include "annotator/grammar/dates/annotations/annotation.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(AnnotationUtilTest, VerifyIntFunctions) {
+  Annotation annotation;
+
+  int index_key1 = AddIntProperty("key1", 1, &annotation);
+  int index_key2 = AddIntProperty("key2", 2, &annotation);
+
+  static const int kValuesKey3[] = {3, 4, 5};
+  int index_key3 =
+      AddRepeatedIntProperty("key3", kValuesKey3, /*size=*/3, &annotation);
+
+  EXPECT_EQ(2, GetIntProperty("key2", annotation));
+  EXPECT_EQ(1, GetIntProperty("key1", annotation));
+
+  EXPECT_EQ(index_key1, GetPropertyIndex("key1", annotation));
+  EXPECT_EQ(index_key2, GetPropertyIndex("key2", annotation));
+  EXPECT_EQ(index_key3, GetPropertyIndex("key3", annotation));
+  EXPECT_EQ(-1, GetPropertyIndex("invalid_key", annotation));
+}
+
+TEST(AnnotationUtilTest, VerifyAnnotationDataFunctions) {
+  Annotation annotation;
+
+  AnnotationData true_annotation_data;
+  Property true_property;
+  true_property.bool_values.push_back(true);
+  true_annotation_data.properties.push_back(true_property);
+  int index_key1 =
+      AddAnnotationDataProperty("key1", true_annotation_data, &annotation);
+
+  AnnotationData false_annotation_data;
+  Property false_property;
+  false_property.bool_values.push_back(false);
+  true_annotation_data.properties.push_back(false_property);
+  int index_key2 =
+      AddAnnotationDataProperty("key2", false_annotation_data, &annotation);
+
+  EXPECT_EQ(index_key1, GetPropertyIndex("key1", annotation));
+  EXPECT_EQ(index_key2, GetPropertyIndex("key2", annotation));
+  EXPECT_EQ(-1, GetPropertyIndex("invalid_key", annotation));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/annotator/grammar/dates/timezone-code.fbs b/native/annotator/grammar/dates/timezone-code.fbs
index ae74982..ff615ee 100755
--- a/native/annotator/grammar/dates/timezone-code.fbs
+++ b/native/annotator/grammar/dates/timezone-code.fbs

@@ -17,9 +17,7 @@
 namespace libtextclassifier3.dates;
 enum TimezoneCode : int {
   TIMEZONE_CODE_NONE = -1,
-
   ETC_UNKNOWN = 0,
-
   PST8PDT = 1,
   // Delegate.
 

diff --git a/native/annotator/grammar/dates/utils/date-match_test.cc b/native/annotator/grammar/dates/utils/date-match_test.cc
new file mode 100644
index 0000000..f10f32a
--- /dev/null
+++ b/native/annotator/grammar/dates/utils/date-match_test.cc

@@ -0,0 +1,397 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/grammar/dates/utils/date-match.h"
+
+#include <stdint.h>
+
+#include <string>
+
+#include "annotator/grammar/dates/dates_generated.h"
+#include "annotator/grammar/dates/timezone-code_generated.h"
+#include "annotator/grammar/dates/utils/date-utils.h"
+#include "utils/strings/append.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace dates {
+namespace {
+
+class DateMatchTest : public ::testing::Test {
+ protected:
+  enum {
+    X = NO_VAL,
+  };
+
+  static DayOfWeek DOW_X() { return DayOfWeek_DOW_NONE; }
+  static DayOfWeek SUN() { return DayOfWeek_SUNDAY; }
+
+  static BCAD BCAD_X() { return BCAD_BCAD_NONE; }
+  static BCAD BC() { return BCAD_BC; }
+
+  DateMatch& SetDate(DateMatch* date, int year, int8 month, int8 day,
+                     DayOfWeek day_of_week = DOW_X(), BCAD bc_ad = BCAD_X()) {
+    date->year = year;
+    date->month = month;
+    date->day = day;
+    date->day_of_week = day_of_week;
+    date->bc_ad = bc_ad;
+    return *date;
+  }
+
+  DateMatch& SetTimeValue(DateMatch* date, int8 hour, int8 minute = X,
+                          int8 second = X, double fraction_second = X) {
+    date->hour = hour;
+    date->minute = minute;
+    date->second = second;
+    date->fraction_second = fraction_second;
+    return *date;
+  }
+
+  DateMatch& SetTimeSpan(DateMatch* date, TimespanCode time_span_code) {
+    date->time_span_code = time_span_code;
+    return *date;
+  }
+
+  DateMatch& SetTimeZone(DateMatch* date, TimezoneCode time_zone_code,
+                         int16 time_zone_offset = INT16_MIN) {
+    date->time_zone_code = time_zone_code;
+    date->time_zone_offset = time_zone_offset;
+    return *date;
+  }
+
+  bool SameDate(const DateMatch& a, const DateMatch& b) {
+    return (a.day == b.day && a.month == b.month && a.year == b.year &&
+            a.day_of_week == b.day_of_week);
+  }
+
+  DateMatch& SetDayOfWeek(DateMatch* date, DayOfWeek dow) {
+    date->day_of_week = dow;
+    return *date;
+  }
+};
+
+TEST_F(DateMatchTest, BitFieldWidth) {
+  // For DateMatch::day_of_week (:8).
+  EXPECT_GE(DayOfWeek_MIN, INT8_MIN);
+  EXPECT_LE(DayOfWeek_MAX, INT8_MAX);
+
+  // For DateMatch::bc_ad (:8).
+  EXPECT_GE(BCAD_MIN, INT8_MIN);
+  EXPECT_LE(BCAD_MAX, INT8_MAX);
+
+  // For DateMatch::time_span_code (:16).
+  EXPECT_GE(TimespanCode_MIN, INT16_MIN);
+  EXPECT_LE(TimespanCode_MAX, INT16_MAX);
+}
+
+TEST_F(DateMatchTest, IsValid) {
+  // Valid: dates.
+  {
+    DateMatch d;
+    SetDate(&d, 2014, 1, 26);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, 2014, 1, X);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, 2014, X, X);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, X, 1, 26);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, X, 1, X);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, X, X, 26);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, 2014, 1, 26, SUN());
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, X, 1, 26, SUN());
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, X, X, 26, SUN());
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, 2014, 1, 26, DOW_X(), BC());
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  // Valid: times.
+  {
+    DateMatch d;
+    SetTimeValue(&d, 12, 30, 59, 0.99);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetTimeValue(&d, 12, 30, 59);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetTimeValue(&d, 12, 30);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetTimeValue(&d, 12);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  // Valid: mixed.
+  {
+    DateMatch d;
+    SetDate(&d, 2014, 1, 26);
+    SetTimeValue(&d, 12, 30, 59, 0.99);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, X, 1, 26);
+    SetTimeValue(&d, 12, 30, 59);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, X, X, X, SUN());
+    SetTimeValue(&d, 12, 30);
+    EXPECT_TRUE(d.IsValid()) << d.DebugString();
+  }
+  // Invalid: dates.
+  {
+    DateMatch d;
+    SetDate(&d, X, 1, 26, DOW_X(), BC());
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, 2014, X, 26);
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, 2014, X, X, SUN());
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetDate(&d, X, 1, X, SUN());
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  // Invalid: times.
+  {
+    DateMatch d;
+    SetTimeValue(&d, 12, X, 59);
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetTimeValue(&d, 12, X, X, 0.99);
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetTimeValue(&d, 12, 30, X, 0.99);
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  {
+    DateMatch d;
+    SetTimeValue(&d, X, 30);
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  // Invalid: mixed.
+  {
+    DateMatch d;
+    SetDate(&d, 2014, 1, X);
+    SetTimeValue(&d, 12);
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+  // Invalid: empty.
+  {
+    DateMatch d;
+    EXPECT_FALSE(d.IsValid()) << d.DebugString();
+  }
+}
+
+std::string DebugStrings(const std::vector<DateMatch>& instances) {
+  std::string res;
+  for (int i = 0; i < instances.size(); ++i) {
+    ::libtextclassifier3::strings::SStringAppendF(
+        &res, 0, "[%d] == %s\n", i, instances[i].DebugString().c_str());
+  }
+  return res;
+}
+
+TEST_F(DateMatchTest, IsRefinement) {
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, X);
+    DateMatch b;
+    SetDate(&b, 2014, X, X);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    DateMatch b;
+    SetDate(&b, 2014, 2, X);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    DateMatch b;
+    SetDate(&b, X, 2, 24);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    SetTimeValue(&a, 9, X, X);
+    DateMatch b;
+    SetDate(&b, 2014, 2, 24);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    SetTimeValue(&a, 9, 0, X);
+    DateMatch b;
+    SetDate(&b, 2014, 2, 24);
+    SetTimeValue(&b, 9, X, X);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    SetTimeValue(&a, 9, 0, 0);
+    DateMatch b;
+    SetDate(&b, 2014, 2, 24);
+    SetTimeValue(&b, 9, 0, X);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    SetTimeValue(&a, 9, X, X);
+    SetTimeSpan(&a, TimespanCode_AM);
+    DateMatch b;
+    SetDate(&b, 2014, 2, 24);
+    SetTimeValue(&b, 9, X, X);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    SetTimeValue(&a, 9, X, X);
+    SetTimeZone(&a, TimezoneCode_PST8PDT);
+    DateMatch b;
+    SetDate(&b, 2014, 2, 24);
+    SetTimeValue(&b, 9, X, X);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    SetTimeValue(&a, 9, X, X);
+    a.priority += 10;
+    DateMatch b;
+    SetDate(&b, 2014, 2, 24);
+    SetTimeValue(&b, 9, X, X);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    SetTimeValue(&a, 9, X, X);
+    DateMatch b;
+    SetDate(&b, 2014, 2, 24);
+    SetTimeValue(&b, 9, X, X);
+    EXPECT_TRUE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, 2014, 2, 24);
+    SetTimeValue(&a, 9, X, X);
+    DateMatch b;
+    SetDate(&b, X, 2, 24);
+    SetTimeValue(&b, 9, 0, X);
+    EXPECT_FALSE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetDate(&a, X, 2, 24);
+    SetTimeValue(&a, 9, X, X);
+    DateMatch b;
+    SetDate(&b, 2014, 2, 24);
+    EXPECT_FALSE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+  {
+    DateMatch a;
+    SetTimeValue(&a, 9, 0, 0);
+    DateMatch b;
+    SetTimeValue(&b, 9, X, X);
+    SetTimeSpan(&b, TimespanCode_AM);
+    EXPECT_FALSE(IsRefinement(a, b)) << DebugStrings({a, b});
+  }
+}
+
+TEST_F(DateMatchTest, FillDateInstance_AnnotatorPriorityScore) {
+  DateMatch date_match;
+  SetDate(&date_match, 2014, 2, X);
+  date_match.annotator_priority_score = 0.5;
+  DatetimeParseResultSpan datetime_parse_result_span;
+  FillDateInstance(date_match, &datetime_parse_result_span);
+  EXPECT_FLOAT_EQ(datetime_parse_result_span.priority_score, 0.5)
+      << DebugStrings({date_match});
+}
+
+TEST_F(DateMatchTest, MergeDateMatch_AnnotatorPriorityScore) {
+  DateMatch a;
+  SetDate(&a, 2014, 2, 4);
+  a.annotator_priority_score = 0.5;
+
+  DateMatch b;
+  SetTimeValue(&b, 10, 45, 23);
+  b.annotator_priority_score = 1.0;
+
+  MergeDateMatch(b, &a, false);
+  EXPECT_FLOAT_EQ(a.annotator_priority_score, 1.0);
+}
+
+}  // namespace
+}  // namespace dates
+}  // namespace libtextclassifier3

diff --git a/native/annotator/quantization_test.cc b/native/annotator/quantization_test.cc
new file mode 100644
index 0000000..b995096
--- /dev/null
+++ b/native/annotator/quantization_test.cc

@@ -0,0 +1,163 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/quantization.h"
+
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using testing::ElementsAreArray;
+using testing::FloatEq;
+using testing::Matcher;
+
+namespace libtextclassifier3 {
+namespace {
+
+Matcher<std::vector<float>> ElementsAreFloat(const std::vector<float>& values) {
+  std::vector<Matcher<float>> matchers;
+  for (const float value : values) {
+    matchers.push_back(FloatEq(value));
+  }
+  return ElementsAreArray(matchers);
+}
+
+TEST(QuantizationTest, DequantizeAdd8bit) {
+  std::vector<float> scales{{0.1, 9.0, -7.0}};
+  std::vector<uint8> embeddings{{/*0: */ 0x00, 0xFF, 0x09, 0x00,
+                                 /*1: */ 0xFF, 0x09, 0x00, 0xFF,
+                                 /*2: */ 0x09, 0x00, 0xFF, 0x09}};
+
+  const int quantization_bits = 8;
+  const int bytes_per_embedding = 4;
+  const int num_sparse_features = 7;
+  {
+    const int bucket_id = 0;
+    std::vector<float> dest(4, 0.0);
+    DequantizeAdd(scales.data(), embeddings.data(), bytes_per_embedding,
+                  num_sparse_features, quantization_bits, bucket_id,
+                  dest.data(), dest.size());
+
+    EXPECT_THAT(dest,
+                ElementsAreFloat(std::vector<float>{
+                    // clang-format off
+                    {1.0 / 7 * 0.1 * (0x00 - 128),
+                     1.0 / 7 * 0.1 * (0xFF - 128),
+                     1.0 / 7 * 0.1 * (0x09 - 128),
+                     1.0 / 7 * 0.1 * (0x00 - 128)}
+                    // clang-format on
+                }));
+  }
+
+  {
+    const int bucket_id = 1;
+    std::vector<float> dest(4, 0.0);
+    DequantizeAdd(scales.data(), embeddings.data(), bytes_per_embedding,
+                  num_sparse_features, quantization_bits, bucket_id,
+                  dest.data(), dest.size());
+
+    EXPECT_THAT(dest,
+                ElementsAreFloat(std::vector<float>{
+                    // clang-format off
+                    {1.0 / 7 * 9.0 * (0xFF - 128),
+                     1.0 / 7 * 9.0 * (0x09 - 128),
+                     1.0 / 7 * 9.0 * (0x00 - 128),
+                     1.0 / 7 * 9.0 * (0xFF - 128)}
+                    // clang-format on
+                }));
+  }
+}
+
+TEST(QuantizationTest, DequantizeAdd1bitZeros) {
+  const int bytes_per_embedding = 4;
+  const int num_buckets = 3;
+  const int num_sparse_features = 7;
+  const int quantization_bits = 1;
+  const int bucket_id = 1;
+
+  std::vector<float> scales(num_buckets);
+  std::vector<uint8> embeddings(bytes_per_embedding * num_buckets);
+  std::fill(scales.begin(), scales.end(), 1);
+  std::fill(embeddings.begin(), embeddings.end(), 0);
+
+  std::vector<float> dest(32);
+  DequantizeAdd(scales.data(), embeddings.data(), bytes_per_embedding,
+                num_sparse_features, quantization_bits, bucket_id, dest.data(),
+                dest.size());
+
+  std::vector<float> expected(32);
+  std::fill(expected.begin(), expected.end(),
+            1.0 / num_sparse_features * (0 - 1));
+  EXPECT_THAT(dest, ElementsAreFloat(expected));
+}
+
+TEST(QuantizationTest, DequantizeAdd1bitOnes) {
+  const int bytes_per_embedding = 4;
+  const int num_buckets = 3;
+  const int num_sparse_features = 7;
+  const int quantization_bits = 1;
+  const int bucket_id = 1;
+
+  std::vector<float> scales(num_buckets, 1.0);
+  std::vector<uint8> embeddings(bytes_per_embedding * num_buckets, 0xFF);
+
+  std::vector<float> dest(32);
+  DequantizeAdd(scales.data(), embeddings.data(), bytes_per_embedding,
+                num_sparse_features, quantization_bits, bucket_id, dest.data(),
+                dest.size());
+  std::vector<float> expected(32);
+  std::fill(expected.begin(), expected.end(),
+            1.0 / num_sparse_features * (1 - 1));
+  EXPECT_THAT(dest, ElementsAreFloat(expected));
+}
+
+TEST(QuantizationTest, DequantizeAdd3bit) {
+  const int bytes_per_embedding = 4;
+  const int num_buckets = 3;
+  const int num_sparse_features = 7;
+  const int quantization_bits = 3;
+  const int bucket_id = 1;
+
+  std::vector<float> scales(num_buckets, 1.0);
+  scales[1] = 9.0;
+  std::vector<uint8> embeddings(bytes_per_embedding * num_buckets, 0);
+  // For bucket_id=1, the embedding has values 0..9 for indices 0..9:
+  embeddings[4] = (1 << 7) | (1 << 6) | (1 << 4) | 1;
+  embeddings[5] = (1 << 6) | (1 << 4) | (1 << 3);
+  embeddings[6] = (1 << 4) | (1 << 3) | (1 << 2) | (1 << 1) | 1;
+
+  std::vector<float> dest(10);
+  DequantizeAdd(scales.data(), embeddings.data(), bytes_per_embedding,
+                num_sparse_features, quantization_bits, bucket_id, dest.data(),
+                dest.size());
+
+  std::vector<float> expected;
+  expected.push_back(1.0 / num_sparse_features * (1 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (2 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (3 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (4 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (5 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (6 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (7 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (0 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (0 - 4) * scales[bucket_id]);
+  expected.push_back(1.0 / num_sparse_features * (0 - 4) * scales[bucket_id]);
+  EXPECT_THAT(dest, ElementsAreFloat(expected));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/annotator/strip-unpaired-brackets_test.cc b/native/annotator/strip-unpaired-brackets_test.cc
new file mode 100644
index 0000000..32585ce
--- /dev/null
+++ b/native/annotator/strip-unpaired-brackets_test.cc

@@ -0,0 +1,66 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/strip-unpaired-brackets.h"
+
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+class StripUnpairedBracketsTest : public ::testing::Test {
+ protected:
+  StripUnpairedBracketsTest() : INIT_UNILIB_FOR_TESTING(unilib_) {}
+  UniLib unilib_;
+};
+
+TEST_F(StripUnpairedBracketsTest, StripUnpairedBrackets) {
+  // If the brackets match, nothing gets stripped.
+  EXPECT_EQ(StripUnpairedBrackets("call me (123) 456 today", {8, 17}, unilib_),
+            std::make_pair(8, 17));
+  EXPECT_EQ(StripUnpairedBrackets("call me (123 456) today", {8, 17}, unilib_),
+            std::make_pair(8, 17));
+
+  // If the brackets don't match, they get stripped.
+  EXPECT_EQ(StripUnpairedBrackets("call me (123 456 today", {8, 16}, unilib_),
+            std::make_pair(9, 16));
+  EXPECT_EQ(StripUnpairedBrackets("call me )123 456 today", {8, 16}, unilib_),
+            std::make_pair(9, 16));
+  EXPECT_EQ(StripUnpairedBrackets("call me 123 456) today", {8, 16}, unilib_),
+            std::make_pair(8, 15));
+  EXPECT_EQ(StripUnpairedBrackets("call me 123 456( today", {8, 16}, unilib_),
+            std::make_pair(8, 15));
+
+  // Strips brackets correctly from length-1 selections that consist of
+  // a bracket only.
+  EXPECT_EQ(StripUnpairedBrackets("call me at ) today", {11, 12}, unilib_),
+            std::make_pair(12, 12));
+  EXPECT_EQ(StripUnpairedBrackets("call me at ( today", {11, 12}, unilib_),
+            std::make_pair(12, 12));
+
+  // Handles invalid spans gracefully.
+  EXPECT_EQ(StripUnpairedBrackets("call me at  today", {11, 11}, unilib_),
+            std::make_pair(11, 11));
+  EXPECT_EQ(StripUnpairedBrackets("hello world", {0, 0}, unilib_),
+            std::make_pair(0, 0));
+  EXPECT_EQ(StripUnpairedBrackets("hello world", {11, 11}, unilib_),
+            std::make_pair(11, 11));
+  EXPECT_EQ(StripUnpairedBrackets("hello world", {-1, -1}, unilib_),
+            std::make_pair(-1, -1));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/annotator/test_data/test_grammar_model.fb b/native/annotator/test_data/test_grammar_model.fb
deleted file mode 100644
index 30f133e..0000000
--- a/native/annotator/test_data/test_grammar_model.fb
+++ /dev/null
Binary files differ

diff --git a/native/annotator/test_data/test_model.fb b/native/annotator/test_data/test_model.fb
deleted file mode 100644
index 8a49bcc..0000000
--- a/native/annotator/test_data/test_model.fb
+++ /dev/null
Binary files differ

diff --git a/native/annotator/test_data/test_person_name_model.fb b/native/annotator/test_data/test_person_name_model.fb
deleted file mode 100644
index 4752a23..0000000
--- a/native/annotator/test_data/test_person_name_model.fb
+++ /dev/null
Binary files differ

diff --git a/native/annotator/test_data/wrong_embeddings.fb b/native/annotator/test_data/wrong_embeddings.fb
deleted file mode 100644
index cee4004..0000000
--- a/native/annotator/test_data/wrong_embeddings.fb
+++ /dev/null
Binary files differ

diff --git a/native/annotator/types-test-util.h b/native/annotator/types-test-util.h
new file mode 100644
index 0000000..1d018a1
--- /dev/null
+++ b/native/annotator/types-test-util.h

@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_ANNOTATOR_TYPES_TEST_UTIL_H_
+#define LIBTEXTCLASSIFIER_ANNOTATOR_TYPES_TEST_UTIL_H_
+
+#include <ostream>
+
+#include "annotator/types.h"
+#include "utils/base/logging.h"
+
+namespace libtextclassifier3 {
+
+#define TC3_DECLARE_PRINT_OPERATOR(TYPE_NAME)               \
+  inline std::ostream& operator<<(std::ostream& stream,     \
+                                  const TYPE_NAME& value) { \
+    logging::LoggingStringStream tmp_stream;                \
+    tmp_stream << value;                                    \
+    return stream << tmp_stream.message;                    \
+  }
+
+TC3_DECLARE_PRINT_OPERATOR(AnnotatedSpan)
+TC3_DECLARE_PRINT_OPERATOR(ClassificationResult)
+TC3_DECLARE_PRINT_OPERATOR(DatetimeParsedData)
+TC3_DECLARE_PRINT_OPERATOR(DatetimeParseResultSpan)
+TC3_DECLARE_PRINT_OPERATOR(Token)
+
+#undef TC3_DECLARE_PRINT_OPERATOR
+
+}  // namespace libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_ANNOTATOR_TYPES_TEST_UTIL_H_

diff --git a/native/annotator/zlib-utils_test.cc b/native/annotator/zlib-utils_test.cc
new file mode 100644
index 0000000..df33ea1
--- /dev/null
+++ b/native/annotator/zlib-utils_test.cc

@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "annotator/zlib-utils.h"
+
+#include <memory>
+
+#include "annotator/model_generated.h"
+#include "utils/zlib/zlib.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+
+TEST(AnnotatorZlibUtilsTest, CompressModel) {
+  ModelT model;
+  model.regex_model.reset(new RegexModelT);
+  model.regex_model->patterns.emplace_back(new RegexModel_::PatternT);
+  model.regex_model->patterns.back()->pattern = "this is a test pattern";
+  model.regex_model->patterns.emplace_back(new RegexModel_::PatternT);
+  model.regex_model->patterns.back()->pattern = "this is a second test pattern";
+
+  model.datetime_model.reset(new DatetimeModelT);
+  model.datetime_model->patterns.emplace_back(new DatetimeModelPatternT);
+  model.datetime_model->patterns.back()->regexes.emplace_back(
+      new DatetimeModelPattern_::RegexT);
+  model.datetime_model->patterns.back()->regexes.back()->pattern =
+      "an example datetime pattern";
+  model.datetime_model->extractors.emplace_back(new DatetimeModelExtractorT);
+  model.datetime_model->extractors.back()->pattern =
+      "an example datetime extractor";
+
+  model.intent_options.reset(new IntentFactoryModelT);
+  model.intent_options->generator.emplace_back(
+      new IntentFactoryModel_::IntentGeneratorT);
+  const std::string intent_generator1 = "lua generator 1";
+  model.intent_options->generator.back()->lua_template_generator =
+      std::vector<uint8_t>(intent_generator1.begin(), intent_generator1.end());
+  model.intent_options->generator.emplace_back(
+      new IntentFactoryModel_::IntentGeneratorT);
+  const std::string intent_generator2 = "lua generator 2";
+  model.intent_options->generator.back()->lua_template_generator =
+      std::vector<uint8_t>(intent_generator2.begin(), intent_generator2.end());
+
+  // NOTE: The resource strings contain some repetition, so that the compressed
+  // version is smaller than the uncompressed one. Because the compression code
+  // looks at that as well.
+  model.resources.reset(new ResourcePoolT);
+  model.resources->resource_entry.emplace_back(new ResourceEntryT);
+  model.resources->resource_entry.back()->resource.emplace_back(new ResourceT);
+  model.resources->resource_entry.back()->resource.back()->content =
+      "rrrrrrrrrrrrr1.1";
+  model.resources->resource_entry.back()->resource.emplace_back(new ResourceT);
+  model.resources->resource_entry.back()->resource.back()->content =
+      "rrrrrrrrrrrrr1.2";
+  model.resources->resource_entry.emplace_back(new ResourceEntryT);
+  model.resources->resource_entry.back()->resource.emplace_back(new ResourceT);
+  model.resources->resource_entry.back()->resource.back()->content =
+      "rrrrrrrrrrrrr2.1";
+  model.resources->resource_entry.back()->resource.emplace_back(new ResourceT);
+  model.resources->resource_entry.back()->resource.back()->content =
+      "rrrrrrrrrrrrr2.2";
+
+  // Compress the model.
+  EXPECT_TRUE(CompressModel(&model));
+
+  // Sanity check that uncompressed field is removed.
+  EXPECT_TRUE(model.regex_model->patterns[0]->pattern.empty());
+  EXPECT_TRUE(model.regex_model->patterns[1]->pattern.empty());
+  EXPECT_TRUE(model.datetime_model->patterns[0]->regexes[0]->pattern.empty());
+  EXPECT_TRUE(model.datetime_model->extractors[0]->pattern.empty());
+  EXPECT_TRUE(
+      model.intent_options->generator[0]->lua_template_generator.empty());
+  EXPECT_TRUE(
+      model.intent_options->generator[1]->lua_template_generator.empty());
+  EXPECT_TRUE(model.resources->resource_entry[0]->resource[0]->content.empty());
+  EXPECT_TRUE(model.resources->resource_entry[0]->resource[1]->content.empty());
+  EXPECT_TRUE(model.resources->resource_entry[1]->resource[0]->content.empty());
+  EXPECT_TRUE(model.resources->resource_entry[1]->resource[1]->content.empty());
+
+  // Pack and load the model.
+  flatbuffers::FlatBufferBuilder builder;
+  builder.Finish(Model::Pack(builder, &model));
+  const Model* compressed_model =
+      GetModel(reinterpret_cast<const char*>(builder.GetBufferPointer()));
+  ASSERT_TRUE(compressed_model != nullptr);
+
+  // Decompress the fields again and check that they match the original.
+  std::unique_ptr<ZlibDecompressor> decompressor = ZlibDecompressor::Instance();
+  ASSERT_TRUE(decompressor != nullptr);
+  std::string uncompressed_pattern;
+  EXPECT_TRUE(decompressor->MaybeDecompress(
+      compressed_model->regex_model()->patterns()->Get(0)->compressed_pattern(),
+      &uncompressed_pattern));
+  EXPECT_EQ(uncompressed_pattern, "this is a test pattern");
+  EXPECT_TRUE(decompressor->MaybeDecompress(
+      compressed_model->regex_model()->patterns()->Get(1)->compressed_pattern(),
+      &uncompressed_pattern));
+  EXPECT_EQ(uncompressed_pattern, "this is a second test pattern");
+  EXPECT_TRUE(decompressor->MaybeDecompress(compressed_model->datetime_model()
+                                                ->patterns()
+                                                ->Get(0)
+                                                ->regexes()
+                                                ->Get(0)
+                                                ->compressed_pattern(),
+                                            &uncompressed_pattern));
+  EXPECT_EQ(uncompressed_pattern, "an example datetime pattern");
+  EXPECT_TRUE(decompressor->MaybeDecompress(compressed_model->datetime_model()
+                                                ->extractors()
+                                                ->Get(0)
+                                                ->compressed_pattern(),
+                                            &uncompressed_pattern));
+  EXPECT_EQ(uncompressed_pattern, "an example datetime extractor");
+
+  EXPECT_TRUE(DecompressModel(&model));
+  EXPECT_EQ(model.regex_model->patterns[0]->pattern, "this is a test pattern");
+  EXPECT_EQ(model.regex_model->patterns[1]->pattern,
+            "this is a second test pattern");
+  EXPECT_EQ(model.datetime_model->patterns[0]->regexes[0]->pattern,
+            "an example datetime pattern");
+  EXPECT_EQ(model.datetime_model->extractors[0]->pattern,
+            "an example datetime extractor");
+  EXPECT_EQ(
+      model.intent_options->generator[0]->lua_template_generator,
+      std::vector<uint8_t>(intent_generator1.begin(), intent_generator1.end()));
+  EXPECT_EQ(
+      model.intent_options->generator[1]->lua_template_generator,
+      std::vector<uint8_t>(intent_generator2.begin(), intent_generator2.end()));
+  EXPECT_EQ(model.resources->resource_entry[0]->resource[0]->content,
+            "rrrrrrrrrrrrr1.1");
+  EXPECT_EQ(model.resources->resource_entry[0]->resource[1]->content,
+            "rrrrrrrrrrrrr1.2");
+  EXPECT_EQ(model.resources->resource_entry[1]->resource[0]->content,
+            "rrrrrrrrrrrrr2.1");
+  EXPECT_EQ(model.resources->resource_entry[1]->resource[1]->content,
+            "rrrrrrrrrrrrr2.2");
+}
+
+}  // namespace libtextclassifier3

diff --git a/native/models/textclassifier.ar.model b/native/models/textclassifier.ar.model
index 2224598..dbd685b 100755
--- a/native/models/textclassifier.ar.model
+++ b/native/models/textclassifier.ar.model
Binary files differ

diff --git a/native/models/textclassifier.en.model b/native/models/textclassifier.en.model
index fbb5a6c..c930fe6 100755
--- a/native/models/textclassifier.en.model
+++ b/native/models/textclassifier.en.model
Binary files differ

diff --git a/native/models/textclassifier.es.model b/native/models/textclassifier.es.model
index 2ef143c..26e3908 100755
--- a/native/models/textclassifier.es.model
+++ b/native/models/textclassifier.es.model
Binary files differ

diff --git a/native/models/textclassifier.fr.model b/native/models/textclassifier.fr.model
index 76babd6..9746ec9 100755
--- a/native/models/textclassifier.fr.model
+++ b/native/models/textclassifier.fr.model
Binary files differ

diff --git a/native/models/textclassifier.it.model b/native/models/textclassifier.it.model
index c5cd3b6..1ce898c 100755
--- a/native/models/textclassifier.it.model
+++ b/native/models/textclassifier.it.model
Binary files differ

diff --git a/native/models/textclassifier.ja.model b/native/models/textclassifier.ja.model
index a1c3bed..bc61400 100755
--- a/native/models/textclassifier.ja.model
+++ b/native/models/textclassifier.ja.model
Binary files differ

diff --git a/native/models/textclassifier.ko.model b/native/models/textclassifier.ko.model
index 8fe96cd..59a9cde 100755
--- a/native/models/textclassifier.ko.model
+++ b/native/models/textclassifier.ko.model
Binary files differ

diff --git a/native/models/textclassifier.nl.model b/native/models/textclassifier.nl.model
index e97afd9..aa95ca4 100755
--- a/native/models/textclassifier.nl.model
+++ b/native/models/textclassifier.nl.model
Binary files differ

diff --git a/native/models/textclassifier.pl.model b/native/models/textclassifier.pl.model
index 5b25d5e..10e36e1 100755
--- a/native/models/textclassifier.pl.model
+++ b/native/models/textclassifier.pl.model
Binary files differ

diff --git a/native/models/textclassifier.pt.model b/native/models/textclassifier.pt.model
index 1d8fd1a..c76e430 100755
--- a/native/models/textclassifier.pt.model
+++ b/native/models/textclassifier.pt.model
Binary files differ

diff --git a/native/models/textclassifier.ru.model b/native/models/textclassifier.ru.model
index b579beb..b9a3ffd 100755
--- a/native/models/textclassifier.ru.model
+++ b/native/models/textclassifier.ru.model
Binary files differ

diff --git a/native/models/textclassifier.th.model b/native/models/textclassifier.th.model
index 78c2bc9..a67237a 100755
--- a/native/models/textclassifier.th.model
+++ b/native/models/textclassifier.th.model
Binary files differ

diff --git a/native/models/textclassifier.tr.model b/native/models/textclassifier.tr.model
index d56e5ce..e3cfd68 100755
--- a/native/models/textclassifier.tr.model
+++ b/native/models/textclassifier.tr.model
Binary files differ

diff --git a/native/models/textclassifier.universal.model b/native/models/textclassifier.universal.model
index 27f023d..7f7476c 100755
--- a/native/models/textclassifier.universal.model
+++ b/native/models/textclassifier.universal.model
Binary files differ

diff --git a/native/models/textclassifier.zh.model b/native/models/textclassifier.zh.model
index d700417..fe11975 100755
--- a/native/models/textclassifier.zh.model
+++ b/native/models/textclassifier.zh.model
Binary files differ

diff --git a/native/utils/base/arena_leakage_unittest.cc b/native/utils/base/arena_leakage_unittest.cc
new file mode 100644
index 0000000..642dacd
--- /dev/null
+++ b/native/utils/base/arena_leakage_unittest.cc

@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/base/arena.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+
+TEST(Arena, Leakage) {
+  UnsafeArena arena(32);
+  // Grab just 10 bytes.
+  EXPECT_EQ(arena.bytes_until_next_allocation(), 32);
+  const char* block = arena.Alloc(10);
+  EXPECT_NE(block, nullptr);
+  EXPECT_EQ(arena.bytes_until_next_allocation(), 22);
+  // Grab the rest.
+  const char* expected_next_block = block + 10;
+  const char* next_block = arena.Alloc(22);
+  // If the below test fails, a new block has been allocated for "next_block".
+  // This means that the last 22 bytes of the previous block have been lost.
+  EXPECT_EQ(next_block, expected_next_block);
+  EXPECT_EQ(arena.bytes_until_next_allocation(), 0);
+  // Try allocating a 0 bytes block. Arena should remain unchanged.
+  const char* null_block = arena.Alloc(0);
+  EXPECT_EQ(null_block, nullptr);
+  EXPECT_EQ(arena.bytes_until_next_allocation(), 0);
+}
+
+}  //  namespace libtextclassifier3

diff --git a/native/utils/base/prefixvarint.cc b/native/utils/base/prefixvarint.cc
deleted file mode 100644
index 5febbc5..0000000
--- a/native/utils/base/prefixvarint.cc
+++ /dev/null

@@ -1,68 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "utils/base/prefixvarint.h"
-
-#include "utils/base/integral_types.h"
-
-namespace libtextclassifier3 {
-
-const int PrefixVarint::kMax32;
-const int PrefixVarint::kMax64;
-const int PrefixVarint::kSlopBytes;
-const int PrefixVarint::kEncode32SlopBytes;
-const int PrefixVarint::kEncode64SlopBytes;
-
-char* PrefixVarint::SafeEncode32(char* ptr, uint32 val) {
-  return SafeEncode32Inline(ptr, val);
-}
-
-char* PrefixVarint::SafeEncode64(char* ptr, uint64 val) {
-  return SafeEncode64Inline(ptr, val);
-}
-
-void PrefixVarint::Append32Slow(std::string* s, uint32 value) {
-  size_t start = s->size();
-  s->resize(start + PrefixVarint::Length32(value));
-  PrefixVarint::SafeEncode32(&((*s)[start]), value);
-}
-
-void PrefixVarint::Append64Slow(std::string* s, uint64 value) {
-  size_t start = s->size();
-  s->resize(start + PrefixVarint::Length64(value));
-  PrefixVarint::SafeEncode64(&((*s)[start]), value);
-}
-
-const char* PrefixVarint::Parse32Fallback(uint32 code, const char* ptr,
-                                          uint32* val) {
-  return Parse32FallbackInline(code, ptr, val);
-}
-
-const char* PrefixVarint::Parse64Fallback(uint64 code, const char* ptr,
-                                          uint64* val) {
-  return Parse64FallbackInline(code, ptr, val);
-}
-
-#if 0
-const PrefixVarint::CodeInfo PrefixVarint::code_info_[8] = {
-  {2, 0xff00}, {2, 0xff00},
-  {2, 0xff00}, {2, 0xff00},
-  {3, 0xffff00}, {3, 0xffff00},
-  {4, 0xffffff00}, {5, 0xffffff00}
-};
-#endif
-
-}  // namespace libtextclassifier3

diff --git a/native/utils/base/prefixvarint.h b/native/utils/base/prefixvarint.h
deleted file mode 100644
index 8e4f308..0000000
--- a/native/utils/base/prefixvarint.h
+++ /dev/null

@@ -1,609 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-// PrefixVarint is an integer encoding method that has the exact same
-// compression size as Varint, but is faster to decode because all of the
-// length information is encoded in the first byte.
-// On a Warp 19 it can parse up to 42% faster than Varint, for the distributions
-// tested below.
-// On an Ilium it can parse up to 37% faster than Varint.
-//
-// But there are a few caveats:
-// - This is fastest if both the encoder and decoder are little endian.
-//   Somewhat slower versions are provided for encoding and decoding on big
-//   endian machines.
-// - This doesn't support backwards decoding.
-//
-// The PrefixVarint encoding uses a unary code in the high bits of the first
-// byte to encode the total number of bytes, as follows:
-// - 32bit encoding:
-//     1 byte:  "0" + 7 value bits
-//     2 bytes: "10" + 6 value bits
-//     3 bytes: "110" + 5 value bits
-//     4 bytes: "1110" + 4 value bits
-//     5 bytes: "1111" + no value bits (value is in the next 4 bytes)
-//
-// - 64bit encoding:
-//     1 byte:  "0" + 7 value bits
-//     2 bytes: "10" + 6 value bits
-//     3 bytes: "110" + 5 value bits
-//     4 bytes: "1110" + 4 value bits
-//     5 bytes: "11110" + 3 value bits
-//     6 bytes: "111110" + 2 value bits
-//     7 bytes: "1111110" + 1 value bits
-//     8 bytes: "11111110" + no value bits (value is in the next 7 bytes)
-//     9 bytes: "11111111" + no value bits (value is in the next 8 bytes)
-//
-// Note that 32bit and 64bit PrefixVarint encoding are same for values between
-// 0 and (1<<28)-1 (i.e., upto 4 byte-encodable value).
-//
-// The following are benchmark results (in cycles per operation, so lower is
-// better) on randomly generated sequences of values whose encodings have the
-// given distribution of byte lengths.  The cycle counts include some overhead
-// (1-2 cycles) for the testing loop operation.
-//
-// UNIFORM 2^14 means the values are randomly generated in the range [0-2^14),
-// so the majority will require 2 bytes to encode.  MIXED 60:20:10:6:4, on the
-// other hand, means 60% of the values encode to 1 byte, 20% to 2 bytes, and
-// so on.  The MIXED 15:71:13:1.2:0.1 distribution simulates a power law with
-// median value of 1024.
-//
-// VI is Varint, PVI is PrefixVarint.  In both cases, Parse32Inline was used.
-//
-// Warp 19 (Opteron):
-//                            Encode     Parse       Skip
-// Byte Len Dist              VI  PVI    VI  PVI    VI  PVI
-// UNIFORM 2^7              12.2  9.9   3.4  3.3   3.2  3.2
-// UNIFORM 2^14             18.2 14.0   8.8  6.0   5.4  6.4
-// UNIFORM 2^21             18.1 15.1  13.0  9.7   6.7  9.5
-// UNIFORM 2^28             18.9 14.9  15.4 12.1   9.8 10.7
-// UNIFORM 2^31             23.6 19.3  20.1 14.9  12.7 10.7
-// MIXED 50:50:0:0:0        19.4 19.8  15.0 12.7  11.8 12.6
-// MIXED 20:20:20:20:20     28.2 27.3  24.9 21.8  20.7 18.8
-// MIXED 60:20:10:6:4       23.5 23.3  29.7 17.3  16.7 16.3
-// MIXED 80:12:5:2:1        16.5 16.3  11.6  9.9   9.7  9.6
-// MIXED 90:7:2:1:0         12.9 12.9   8.2  6.2   6.1  6.1
-// MIXED 15:71:13:1.2:0.1   18.9 19.2  13.8 11.2  11.0 11.8
-//
-// Ilium:
-//                            Encode     Parse       Skip
-// Byte Len Dist              VI  PVI    VI  PVI    VI  PVI
-// UNIFORM 2^7              10.2  8.7   3.1  3.1   2.9  2.1
-// UNIFORM 2^14             15.8 13.2   7.1  4.5   4.2  3.4
-// UNIFORM 2^21             15.6 14.1  10.1  6.6   5.4  5.7
-// UNIFORM 2^28             18.1 15.2  12.7  8.8   7.3  8.3
-// UNIFORM 2^31             21.8 16.5  17.9 13.3  13.9  8.1
-// MIXED 50:50:0:0:0        19.8 20.7  14.2 13.0  12.4 12.2
-// MIXED 20:20:20:20:20     29.8 30.1  27.7 24.3  22.7 20.2
-// MIXED 60:20:10:6:4       24.2 24.9  20.1 18.9  18.7 17.2
-// MIXED 80:12:5:2:1        16.3 16.6  12.0 11.6  11.3 10.7
-// MIXED 90:7:2:1:0         12.1 12.3   7.2  7.0   6.8  6.5
-// MIXED 15:71:13:1.2:0.1   19.2 20.1  14.2 13.1  12.5 12.0
-//
-
-#ifndef LIBTEXTCLASSIFIER_UTILS_BASE_PREFIXVARINT_H_
-#define LIBTEXTCLASSIFIER_UTILS_BASE_PREFIXVARINT_H_
-
-#include <string>
-
-#include "utils/base/casts.h"
-#include "utils/base/endian.h"
-#include "utils/base/integral_types.h"
-#include "utils/base/unaligned_access.h"
-
-namespace libtextclassifier3 {
-
-class PrefixVarint {
- public:
-  // The max bytes used to encode a uint32:
-  static constexpr int kMax32 = 5;
-  static constexpr int kMax64 = 9;
-
-  // This decoder does not read past the encoded buffer.
-  static constexpr int kSlopBytes = 0;
-
-  // Returns the number of bytes used to encode the given value:
-  static int Length32(uint32 val);
-  static int Length64(uint64 val);
-
-  // The Encode functions could reset up to the following bytes past the last
-  // encoded byte. Use the slower SafeEncode equivalent if you want the encode
-  // to not use any slop bytes.
-  static constexpr int kEncode32SlopBytes = 1;
-  static constexpr int kEncode64SlopBytes = 3;
-
-  // The safer version of the Encode functions, which don't need any slop bytes.
-  static char* SafeEncode32(char* ptr, uint32 val);
-  static char* SafeEncode64(char* ptr, uint64 val);
-  // Inlined version:
-  static char* SafeEncode32Inline(char* ptr, uint32 val);
-  static char* SafeEncode64Inline(char* ptr, uint64 val);
-
-  // Appends the encoded value to *s.
-  static void Append32(std::string* s, uint32 value);
-  static void Append64(std::string* s, uint64 value);
-
-  // Parses the next value in the ptr buffer and returns the pointer advanced
-  // past the end of the encoded value.
-  static const char* Parse32(const char* ptr, uint32* val);
-  static const char* Parse64(const char* ptr, uint64* val);
-  // Use this in time-critical code:
-  static const char* Parse32Inline(const char* ptr, uint32* val);
-  static const char* Parse64Inline(const char* ptr, uint64* val);
-
- private:
-  static constexpr int kMin2Bytes = (1 << 7);
-  static constexpr int kMin3Bytes = (1 << 14);
-  static constexpr int kMin4Bytes = (1 << 21);
-  static constexpr int kMin5Bytes = (1 << 28);
-  static constexpr int64 kMin6Bytes = (1LL << 35);
-  static constexpr int64 kMin7Bytes = (1LL << 42);
-  static constexpr int64 kMin8Bytes = (1LL << 49);
-  static constexpr int64 kMin9Bytes = (1LL << 56);
-
-  static void Append32Slow(std::string* s, uint32 value);
-  static void Append64Slow(std::string* s, uint64 value);
-  static const char* Parse32Fallback(uint32 code, const char* ptr, uint32* val);
-  static const char* Parse64Fallback(uint64 code, const char* ptr, uint64* val);
-  static const char* Parse32FallbackInline(uint32 code, const char* ptr,
-                                           uint32* val);
-  static const char* Parse64FallbackInline(uint64 code, const char* ptr,
-                                           uint64* val);
-
-  // Casting helpers to aid in making this code signed-char-clean.
-  static uint8* MakeUnsigned(char* p) { return bit_cast<uint8*>(p); }
-  static const uint8* MakeUnsigned(const char* p) {
-    return bit_cast<const uint8*>(p);
-  }
-};
-
-inline int PrefixVarint::Length32(uint32 val) {
-  if (val < kMin2Bytes) return 1;
-  if (val < kMin3Bytes) return 2;
-  if (val < kMin4Bytes) return 3;
-  if (val < kMin5Bytes) return 4;
-  return 5;
-}
-
-inline int PrefixVarint::Length64(uint64 val) {
-  if (val < kMin2Bytes) return 1;
-  if (val < kMin3Bytes) return 2;
-  if (val < kMin4Bytes) return 3;
-  if (val < kMin5Bytes) return 4;
-  if (val < kMin6Bytes) return 5;
-  if (val < kMin7Bytes) return 6;
-  if (val < kMin8Bytes) return 7;
-  if (val < kMin9Bytes) return 8;
-  return 9;
-}
-
-inline char* PrefixVarint::SafeEncode32Inline(char* p, uint32 val) {
-  uint8* const ptr = MakeUnsigned(p);
-  if (val < kMin2Bytes) {
-    ptr[0] = val;
-    return p + 1;
-  } else if (val < kMin3Bytes) {
-    val <<= 2;
-    uint8 low = val;
-    ptr[0] = (low >> 2) | 128;
-    ptr[1] = val >> 8;
-    return p + 2;
-  } else if (val < kMin4Bytes) {
-    val <<= 3;
-    uint8 low = val;
-    ptr[0] = (low >> 3) | 192;
-    ptr[1] = val >> 8;
-    ptr[2] = val >> 16;
-    return p + 3;
-  } else if (val < kMin5Bytes) {
-    val <<= 4;
-    uint8 low = val;
-    ptr[0] = (low >> 4) | 224;
-    ptr[1] = val >> 8;
-    ptr[2] = val >> 16;
-    ptr[3] = val >> 24;
-    return p + 4;
-  } else {
-    ptr[0] = 0xff;
-    ptr[1] = val;
-    ptr[2] = val >> 8;
-    ptr[3] = val >> 16;
-    ptr[4] = val >> 24;
-    return p + 5;
-  }
-}
-
-inline char* PrefixVarint::SafeEncode64Inline(char* p, uint64 val) {
-  uint8* const ptr = MakeUnsigned(p);
-  if (val < kMin2Bytes) {
-    ptr[0] = val;
-    return p + 1;
-  } else if (val < kMin3Bytes) {
-    val <<= 2;
-    uint8 low = val;
-    ptr[0] = (low >> 2) | 128;
-    ptr[1] = val >> 8;
-    return p + 2;
-  } else if (val < kMin4Bytes) {
-    val <<= 3;
-    uint8 low = val;
-    ptr[0] = (low >> 3) | 192;
-    ptr[1] = val >> 8;
-    ptr[2] = val >> 16;
-    return p + 3;
-  } else if (val < kMin5Bytes) {
-    val <<= 4;
-    uint8 low = val;
-    ptr[0] = (low >> 4) | 224;
-    ptr[1] = val >> 8;
-    ptr[2] = val >> 16;
-    ptr[3] = val >> 24;
-    return p + 4;
-  } else if (val < kMin6Bytes) {
-    val <<= 5;
-    uint8 low = val;
-    ptr[0] = (low >> 5) | 240;
-    ptr[1] = val >> 8;
-    ptr[2] = val >> 16;
-    ptr[3] = val >> 24;
-    ptr[4] = val >> 32;
-    return p + 5;
-  } else if (val < kMin7Bytes) {
-    val <<= 6;
-    uint8 low = val;
-    ptr[0] = (low >> 6) | 248;
-    ptr[1] = val >> 8;
-    ptr[2] = val >> 16;
-    ptr[3] = val >> 24;
-    ptr[4] = val >> 32;
-    ptr[5] = val >> 40;
-    return p + 6;
-  } else if (val < kMin8Bytes) {
-    val <<= 7;
-    uint8 low = val;
-    ptr[0] = (low >> 7) | 252;
-    ptr[1] = val >> 8;
-    ptr[2] = val >> 16;
-    ptr[3] = val >> 24;
-    ptr[4] = val >> 32;
-    ptr[5] = val >> 40;
-    ptr[6] = val >> 48;
-    return p + 7;
-  } else if (val < kMin9Bytes) {
-    ptr[0] = 254;
-    ptr[1] = val;
-    ptr[2] = val >> 8;
-    ptr[3] = val >> 16;
-    ptr[4] = val >> 24;
-    ptr[5] = val >> 32;
-    ptr[6] = val >> 40;
-    ptr[7] = val >> 48;
-    return p + 8;
-  } else {
-    ptr[0] = 255;
-    ptr[1] = val;
-    ptr[2] = val >> 8;
-    ptr[3] = val >> 16;
-    ptr[4] = val >> 24;
-    ptr[5] = val >> 32;
-    ptr[6] = val >> 40;
-    ptr[7] = val >> 48;
-    ptr[8] = val >> 56;
-    return p + 9;
-  }
-}
-
-inline void PrefixVarint::Append32(std::string* s, uint32 value) {
-  // Inline the fast-path for single-character output, but fall back to the .cc
-  // file for the full version. The size<capacity check is so the compiler can
-  // optimize out the string resize code.
-  if (value < kMin2Bytes && s->size() < s->capacity()) {
-    s->push_back(static_cast<unsigned char>(value));
-  } else {
-    Append32Slow(s, value);
-  }
-}
-
-inline void PrefixVarint::Append64(std::string* s, uint64 value) {
-  // Inline the fast-path for single-character output, but fall back to the .cc
-  // file for the full version. The size<capacity check is so the compiler can
-  // optimize out the string resize code.
-  if (value < kMin2Bytes && s->size() < s->capacity()) {
-    s->push_back(static_cast<unsigned char>(value));
-  } else {
-    Append64Slow(s, value);
-  }
-}
-
-#ifdef IS_LITTLE_ENDIAN
-
-inline const char* PrefixVarint::Parse32(const char* p, uint32* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  uint32 code = *ptr;
-  if (code < 128) {
-    *val = code;
-    return p + 1;
-  } else if (code < 192) {
-    uint32 v = ptr[1];
-    *val = (code & 0x3f) | (v << 6);
-    return p + 2;
-  } else {
-    return Parse32Fallback(code, p, val);
-  }
-}
-
-inline const char* PrefixVarint::Parse64(const char* p, uint64* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  uint64 code = *ptr;
-  if (code < 128) {
-    *val = code;
-    return p + 1;
-  } else if (code < 192) {
-    uint64 v = ptr[1];
-    *val = (code & 0x3fLLU) | (v << 6);
-    return p + 2;
-  } else {
-    return Parse64Fallback(code, p, val);
-  }
-}
-
-inline const char* PrefixVarint::Parse32Inline(const char* p, uint32* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  uint32 code = *ptr;
-  if (code < 128) {
-    *val = code;
-    return p + 1;
-  } else if (code < 192) {
-    uint32 v = ptr[1];
-    *val = (code & 0x3f) | (v << 6);
-    return p + 2;
-  } else {
-    return Parse32FallbackInline(code, p, val);
-  }
-}
-
-inline const char* PrefixVarint::Parse64Inline(const char* p, uint64* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  uint64 code = *ptr;
-  if (code < 128) {
-    *val = code;
-    return p + 1;
-  } else if (code < 192) {
-    uint64 v = ptr[1];
-    *val = (code & 0x3f) | (v << 6);
-    return p + 2;
-  } else {
-    return Parse64FallbackInline(code, p, val);
-  }
-}
-
-// Only handles cases with 3-5 bytes
-inline const char* PrefixVarint::Parse32FallbackInline(uint32 code,
-                                                       const char* p,
-                                                       uint32* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  if (code < 224) {
-    uint32 v = TC3_UNALIGNED_LOAD16(ptr + 1);
-    *val = (code & 0x1f) | (v << 5);
-    return p + 3;
-  } else if (code < 240) {
-    uint32 v = ptr[3];
-    v = (v << 16) | TC3_UNALIGNED_LOAD16(ptr + 1);
-    *val = (code & 0xf) | (v << 4);
-    return p + 4;
-  } else {
-    *val = TC3_UNALIGNED_LOAD32(ptr + 1);
-    return p + 5;
-  }
-}
-
-// Only handles cases with 3-9 bytes
-inline const char* PrefixVarint::Parse64FallbackInline(uint64 code,
-                                                       const char* p,
-                                                       uint64* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  if (code < 224) {
-    uint64 v = TC3_UNALIGNED_LOAD16(ptr + 1);
-    *val = (code & 0x1fLLU) | (v << 5);
-    return p + 3;
-  } else if (code < 240) {
-    uint64 v = ptr[3];
-    v = (v << 16) | TC3_UNALIGNED_LOAD16(ptr + 1);
-    *val = (code & 0xfLLU) | (v << 4);
-    return p + 4;
-  } else if (code < 248) {
-    uint64 v = TC3_UNALIGNED_LOAD32(ptr + 1);
-    *val = (code & 0x7LLU) | (v << 3);
-    return p + 5;
-  } else if (code < 252) {
-    uint64 v = ptr[5];
-    v = (v << 32) | TC3_UNALIGNED_LOAD32(ptr + 1);
-    *val = (code & 0x3LLU) | (v << 2);
-    return p + 6;
-  } else if (code < 254) {
-    uint64 v = TC3_UNALIGNED_LOAD16(ptr + 5);
-    v = (v << 32) | TC3_UNALIGNED_LOAD32(ptr + 1);
-    *val = (code & 0x1LLU) | (v << 1);
-    return p + 7;
-  } else if (code < 255) {
-    uint64 v = TC3_UNALIGNED_LOAD64(ptr);
-    *val = v >> 8;
-    return p + 8;
-  } else {
-    *val = TC3_UNALIGNED_LOAD64(ptr + 1);
-    return p + 9;
-  }
-}
-
-#else  // IS_BIG_ENDIAN
-
-// This works on big-endian machines.  Performance is 1-16% slower, depending
-// on the data.
-inline const char* PrefixVarint::Parse32(const char* p, uint32* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  uint32 code = *ptr;
-  if (code < 128) {
-    *val = code;
-    return p + 1;
-  } else if (code < 192) {
-    uint32 v = ptr[1];
-    *val = (code & 0x3f) | (v << 6);
-    return p + 2;
-  } else {
-    return Parse32Fallback(code, p, val);
-  }
-}
-
-inline const char* PrefixVarint::Parse64(const char* p, uint64* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  uint64 code = *ptr;
-  if (code < 128) {
-    *val = code;
-    return p + 1;
-  } else if (code < 192) {
-    uint64 v = ptr[1];
-    *val = (code & 0x3fLLU) | (v << 6);
-    return p + 2;
-  } else {
-    return Parse64Fallback(code, p, val);
-  }
-}
-
-inline const char* PrefixVarint::Parse32Inline(const char* p, uint32* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  uint32 code = *ptr;
-  if (code < 128) {
-    *val = code;
-    return p + 1;
-  } else if (code < 192) {
-    uint32 v = ptr[1];
-    *val = (code & 0x3f) | (v << 6);
-    return p + 2;
-  } else {
-    return Parse32FallbackInline(code, p, val);
-  }
-}
-
-inline const char* PrefixVarint::Parse64Inline(const char* p, uint64* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  uint64 code = *ptr;
-  if (code < 128) {
-    *val = code;
-    return p + 1;
-  } else if (code < 192) {
-    uint64 v = ptr[1];
-    *val = (code & 0x3fLLU) | (v << 6);
-    return p + 2;
-  } else {
-    return Parse64FallbackInline(code, p, val);
-  }
-}
-
-// Only handles cases with 3-5 bytes
-inline const char* PrefixVarint::Parse32FallbackInline(uint32 code,
-                                                       const char* p,
-                                                       uint32* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  if (code < 224) {
-    uint32 v = ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = (code & 0x1f) | (v << 5);
-    return p + 3;
-  } else if (code < 240) {
-    uint32 v = ptr[3];
-    v = (v << 8) | ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = (code & 0xf) | (v << 4);
-    return p + 4;
-  } else {
-    uint32 v = ptr[4];
-    v = (v << 8) | ptr[3];
-    v = (v << 8) | ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = v;
-    return p + 5;
-  }
-}
-
-// Only handles cases with 3-9 bytes
-inline const char* PrefixVarint::Parse64FallbackInline(uint64 code,
-                                                       const char* p,
-                                                       uint64* val) {
-  const uint8* const ptr = MakeUnsigned(p);
-  if (code < 224) {
-    uint64 v = ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = (code & 0x1f) | (v << 5);
-    return p + 3;
-  } else if (code < 240) {
-    uint64 v = ptr[3];
-    v = (v << 8) | ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = (code & 0xf) | (v << 4);
-    return p + 4;
-  } else if (code < 248) {
-    uint64 v = ptr[4];
-    v = (v << 8) | ptr[3];
-    v = (v << 8) | ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = (code & 0x7) | (v << 3);
-    return p + 5;
-  } else if (code < 252) {
-    uint64 v = ptr[5];
-    v = (v << 8) | ptr[4];
-    v = (v << 8) | ptr[3];
-    v = (v << 8) | ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = (code & 0x3) | (v << 2);
-    return p + 6;
-  } else if (code < 254) {
-    uint64 v = ptr[6];
-    v = (v << 8) | ptr[5];
-    v = (v << 8) | ptr[4];
-    v = (v << 8) | ptr[3];
-    v = (v << 8) | ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = (code & 0x1) | (v << 1);
-    return p + 7;
-  } else if (code < 255) {
-    uint64 v = ptr[7];
-    v = (v << 8) | ptr[6];
-    v = (v << 8) | ptr[5];
-    v = (v << 8) | ptr[4];
-    v = (v << 8) | ptr[3];
-    v = (v << 8) | ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = v;
-    return p + 8;
-  } else {
-    uint64 v = ptr[8];
-    v = (v << 8) | ptr[7];
-    v = (v << 8) | ptr[6];
-    v = (v << 8) | ptr[5];
-    v = (v << 8) | ptr[4];
-    v = (v << 8) | ptr[3];
-    v = (v << 8) | ptr[2];
-    v = (v << 8) | ptr[1];
-    *val = v;
-    return p + 9;
-  }
-}
-
-#endif  // IS_LITTLE_ENDIAN
-
-}  // namespace libtextclassifier3
-
-#endif  // LIBTEXTCLASSIFIER_UTILS_BASE_PREFIXVARINT_H_

diff --git a/native/utils/base/status_test.cc b/native/utils/base/status_test.cc
new file mode 100644
index 0000000..82d5aad
--- /dev/null
+++ b/native/utils/base/status_test.cc

@@ -0,0 +1,156 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/base/status.h"
+
+#include "utils/base/logging.h"
+#include "utils/base/status_macros.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(StatusTest, PrintsAbortedStatus) {
+  logging::LoggingStringStream stream;
+  stream << Status::UNKNOWN;
+  EXPECT_EQ(Status::UNKNOWN.error_code(), 2);
+  EXPECT_EQ(Status::UNKNOWN.CanonicalCode(), StatusCode::UNKNOWN);
+  EXPECT_EQ(Status::UNKNOWN.error_message(), "");
+  EXPECT_EQ(stream.message, "2");
+}
+
+TEST(StatusTest, PrintsOKStatus) {
+  logging::LoggingStringStream stream;
+  stream << Status::OK;
+  EXPECT_EQ(Status::OK.error_code(), 0);
+  EXPECT_EQ(Status::OK.CanonicalCode(), StatusCode::OK);
+  EXPECT_EQ(Status::OK.error_message(), "");
+  EXPECT_EQ(stream.message, "0");
+}
+
+TEST(StatusTest, UnknownStatusHasRightAttributes) {
+  EXPECT_EQ(Status::UNKNOWN.error_code(), 2);
+  EXPECT_EQ(Status::UNKNOWN.CanonicalCode(), StatusCode::UNKNOWN);
+  EXPECT_EQ(Status::UNKNOWN.error_message(), "");
+}
+
+TEST(StatusTest, OkStatusHasRightAttributes) {
+  EXPECT_EQ(Status::OK.error_code(), 0);
+  EXPECT_EQ(Status::OK.CanonicalCode(), StatusCode::OK);
+  EXPECT_EQ(Status::OK.error_message(), "");
+}
+
+TEST(StatusTest, CustomStatusHasRightAttributes) {
+  Status status(StatusCode::INVALID_ARGUMENT, "You can't put this here!");
+  EXPECT_EQ(status.error_code(), 3);
+  EXPECT_EQ(status.CanonicalCode(), StatusCode::INVALID_ARGUMENT);
+  EXPECT_EQ(status.error_message(), "You can't put this here!");
+}
+
+TEST(StatusTest, AssignmentPreservesMembers) {
+  Status status(StatusCode::INVALID_ARGUMENT, "You can't put this here!");
+
+  Status status2 = status;
+
+  EXPECT_EQ(status2.error_code(), 3);
+  EXPECT_EQ(status2.CanonicalCode(), StatusCode::INVALID_ARGUMENT);
+  EXPECT_EQ(status2.error_message(), "You can't put this here!");
+}
+
+TEST(StatusTest, ReturnIfErrorOkStatus) {
+  bool returned_due_to_error = true;
+  auto lambda = [&returned_due_to_error](const Status& s) {
+    TC3_RETURN_IF_ERROR(s);
+    returned_due_to_error = false;
+    return Status::OK;
+  };
+
+  // OK should allow execution to continue and the returned status should also
+  // be OK.
+  Status status = lambda(Status());
+  EXPECT_EQ(status.error_code(), 0);
+  EXPECT_EQ(status.CanonicalCode(), StatusCode::OK);
+  EXPECT_EQ(status.error_message(), "");
+  EXPECT_FALSE(returned_due_to_error);
+}
+
+TEST(StatusTest, ReturnIfErrorInvalidArgumentStatus) {
+  bool returned_due_to_error = true;
+  auto lambda = [&returned_due_to_error](const Status& s) {
+    TC3_RETURN_IF_ERROR(s);
+    returned_due_to_error = false;
+    return Status::OK;
+  };
+
+  // INVALID_ARGUMENT should cause an early return.
+  Status invalid_arg_status(StatusCode::INVALID_ARGUMENT, "You can't do that!");
+  Status status = lambda(invalid_arg_status);
+  EXPECT_EQ(status.error_code(), 3);
+  EXPECT_EQ(status.CanonicalCode(), StatusCode::INVALID_ARGUMENT);
+  EXPECT_EQ(status.error_message(), "You can't do that!");
+  EXPECT_TRUE(returned_due_to_error);
+}
+
+TEST(StatusTest, ReturnIfErrorUnknownStatus) {
+  bool returned_due_to_error = true;
+  auto lambda = [&returned_due_to_error](const Status& s) {
+    TC3_RETURN_IF_ERROR(s);
+    returned_due_to_error = false;
+    return Status::OK;
+  };
+
+  // UNKNOWN should cause an early return.
+  Status unknown_status(StatusCode::UNKNOWN,
+                        "We also know there are known unknowns.");
+  libtextclassifier3::Status status = lambda(unknown_status);
+  EXPECT_EQ(status.error_code(), 2);
+  EXPECT_EQ(status.CanonicalCode(), StatusCode::UNKNOWN);
+  EXPECT_EQ(status.error_message(), "We also know there are known unknowns.");
+  EXPECT_TRUE(returned_due_to_error);
+}
+
+TEST(StatusTest, ReturnIfErrorOnlyInvokesExpressionOnce) {
+  int num_invocations = 0;
+  auto ok_internal_expr = [&num_invocations]() {
+    ++num_invocations;
+    return Status::OK;
+  };
+  auto ok_lambda = [&ok_internal_expr]() {
+    TC3_RETURN_IF_ERROR(ok_internal_expr());
+    return Status::OK;
+  };
+
+  libtextclassifier3::Status status = ok_lambda();
+  EXPECT_EQ(status.CanonicalCode(), StatusCode::OK);
+  EXPECT_EQ(num_invocations, 1);
+
+  num_invocations = 0;
+  auto error_internal_expr = [&num_invocations]() {
+    ++num_invocations;
+    return Status::UNKNOWN;
+  };
+  auto error_lambda = [&error_internal_expr]() {
+    TC3_RETURN_IF_ERROR(error_internal_expr());
+    return Status::OK;
+  };
+
+  status = error_lambda();
+  EXPECT_EQ(status.CanonicalCode(), StatusCode::UNKNOWN);
+  EXPECT_EQ(num_invocations, 1);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/base/statusor_test.cc b/native/utils/base/statusor_test.cc
new file mode 100644
index 0000000..23165b0
--- /dev/null
+++ b/native/utils/base/statusor_test.cc

@@ -0,0 +1,133 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/base/statusor.h"
+
+#include "utils/base/logging.h"
+#include "utils/base/status.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(StatusOrTest, DoesntDieWhenOK) {
+  StatusOr<std::string> status_or_string = std::string("Hello World");
+  EXPECT_TRUE(status_or_string.ok());
+  EXPECT_EQ(status_or_string.ValueOrDie(), "Hello World");
+}
+
+TEST(StatusOrTest, DiesWhenNotOK) {
+  StatusOr<std::string> status_or_string = {Status::UNKNOWN};
+  EXPECT_FALSE(status_or_string.ok());
+  // Android does not print the error message to stderr, so we are not checking
+  // the error message here.
+  EXPECT_DEATH(status_or_string.ValueOrDie(), "");
+}
+
+// Foo is NOT default constructible and can be implicitly converted to from int.
+class Foo {
+ public:
+  // Copy value conversion
+  Foo(int i) : i_(i) {}  // NOLINT
+  int i() const { return i_; }
+
+ private:
+  int i_;
+};
+
+TEST(StatusOrTest, HandlesNonDefaultConstructibleValues) {
+  StatusOr<Foo> foo_or(Foo(7));
+  EXPECT_TRUE(foo_or.ok());
+  EXPECT_EQ(foo_or.ValueOrDie().i(), 7);
+
+  StatusOr<Foo> error_or(Status::UNKNOWN);
+  EXPECT_FALSE(error_or.ok());
+  EXPECT_EQ(error_or.status().CanonicalCode(), StatusCode::UNKNOWN);
+}
+
+class Bar {
+ public:
+  // Move value conversion
+  Bar(Foo&& f) : i_(2 * f.i()) {}  // NOLINT
+
+  // Movable, but not copyable.
+  Bar(const Bar& other) = delete;
+  Bar& operator=(const Bar& rhs) = delete;
+  Bar(Bar&& other) = default;
+  Bar& operator=(Bar&& rhs) = default;
+
+  int i() const { return i_; }
+
+ private:
+  int i_;
+};
+
+TEST(StatusOrTest, HandlesValueConversion) {
+  // Copy value conversion constructor : StatusOr<Foo>(const int&)
+  StatusOr<Foo> foo_status(19);
+  EXPECT_TRUE(foo_status.ok());
+  EXPECT_EQ(foo_status.ValueOrDie().i(), 19);
+
+  // Move value conversion constructor : StatusOr<Bar>(Foo&&)
+  StatusOr<Bar> bar_status(std::move(foo_status));
+  EXPECT_TRUE(bar_status.ok());
+  EXPECT_EQ(bar_status.ValueOrDie().i(), 38);
+
+  StatusOr<int> int_status(19);
+  // Copy conversion constructor : StatusOr<Foo>(const StatusOr<int>&)
+  StatusOr<Foo> copied_status(int_status);
+  EXPECT_TRUE(copied_status.ok());
+  EXPECT_EQ(copied_status.ValueOrDie().i(), 19);
+
+  // Move conversion constructor : StatusOr<Bar>(StatusOr<Foo>&&)
+  StatusOr<Bar> moved_status(std::move(copied_status));
+  EXPECT_TRUE(moved_status.ok());
+  EXPECT_EQ(moved_status.ValueOrDie().i(), 38);
+
+  // Move conversion constructor with error : StatusOr<Bar>(StatusOr<Foo>&&)
+  StatusOr<Foo> error_status(Status::UNKNOWN);
+  StatusOr<Bar> moved_error_status(std::move(error_status));
+  EXPECT_FALSE(moved_error_status.ok());
+}
+
+struct OkFn {
+  StatusOr<int> operator()() { return 42; }
+};
+TEST(StatusOrTest, AssignOrReturnValOk) {
+  auto lambda = []() {
+    TC3_ASSIGN_OR_RETURN(int i, OkFn()(), -1);
+    return i;
+  };
+
+  // OkFn() should return a valid integer, so lambda should return that integer.
+  EXPECT_EQ(lambda(), 42);
+}
+
+struct FailFn {
+  StatusOr<int> operator()() { return Status::UNKNOWN; }
+};
+TEST(StatusOrTest, AssignOrReturnValError) {
+  auto lambda = []() {
+    TC3_ASSIGN_OR_RETURN(int i, FailFn()(), -1);
+    return i;
+  };
+
+  // FailFn() should return an error, so lambda should return -1.
+  EXPECT_EQ(lambda(), -1);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/base/unaligned_access.h b/native/utils/base/unaligned_access.h
deleted file mode 100644
index 68fe207..0000000
--- a/native/utils/base/unaligned_access.h
+++ /dev/null

@@ -1,300 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LIBTEXTCLASSIFIER_UTILS_BASE_UNALIGNED_ACCESS_H_
-#define LIBTEXTCLASSIFIER_UTILS_BASE_UNALIGNED_ACCESS_H_
-
-#include <string.h>
-
-#include <cstdint>
-
-#include "utils/base/integral_types.h"
-#include "utils/base/macros.h"
-
-// unaligned APIs
-
-// Portable handling of unaligned loads, stores, and copies.
-// On some platforms, like ARM, the copy functions can be more efficient
-// then a load and a store.
-//
-// It is possible to implement all of these these using constant-length memcpy
-// calls, which is portable and will usually be inlined into simple loads and
-// stores if the architecture supports it. However, such inlining usually
-// happens in a pass that's quite late in compilation, which means the resulting
-// loads and stores cannot participate in many other optimizations, leading to
-// overall worse code.
-
-// The unaligned API is C++ only.  The declarations use C++ features
-// (namespaces, inline) which are absent or incompatible in C.
-#if defined(__cplusplus)
-
-#if defined(ADDRESS_SANITIZER) || defined(THREAD_SANITIZER) ||\
-    defined(MEMORY_SANITIZER)
-// Consider we have an unaligned load/store of 4 bytes from address 0x...05.
-// AddressSanitizer will treat it as a 3-byte access to the range 05:07 and
-// will miss a bug if 08 is the first unaddressable byte.
-// ThreadSanitizer will also treat this as a 3-byte access to 05:07 and will
-// miss a race between this access and some other accesses to 08.
-// MemorySanitizer will correctly propagate the shadow on unaligned stores
-// and correctly report bugs on unaligned loads, but it may not properly
-// update and report the origin of the uninitialized memory.
-// For all three tools, replacing an unaligned access with a tool-specific
-// callback solves the problem.
-
-// Make sure uint16_t/uint32_t/uint64_t are defined.
-#include <stdint.h>
-
-extern "C" {
-uint16_t __sanitizer_unaligned_load16(const void *p);
-uint32_t __sanitizer_unaligned_load32(const void *p);
-uint64_t __sanitizer_unaligned_load64(const void *p);
-void __sanitizer_unaligned_store16(void *p, uint16_t v);
-void __sanitizer_unaligned_store32(void *p, uint32_t v);
-void __sanitizer_unaligned_store64(void *p, uint64_t v);
-}  // extern "C"
-
-namespace libtextclassifier3 {
-
-inline uint16_t UnalignedLoad16(const void *p) {
-  return __sanitizer_unaligned_load16(p);
-}
-
-inline uint32_t UnalignedLoad32(const void *p) {
-  return __sanitizer_unaligned_load32(p);
-}
-
-inline uint64 UnalignedLoad64(const void *p) {
-  return __sanitizer_unaligned_load64(p);
-}
-
-inline void UnalignedStore16(void *p, uint16_t v) {
-  __sanitizer_unaligned_store16(p, v);
-}
-
-inline void UnalignedStore32(void *p, uint32_t v) {
-  __sanitizer_unaligned_store32(p, v);
-}
-
-inline void UnalignedStore64(void *p, uint64 v) {
-  __sanitizer_unaligned_store64(p, v);
-}
-
-}  // namespace libtextclassifier3
-
-#define TC3_UNALIGNED_LOAD16(_p) (::libtextclassifier3::UnalignedLoad16(_p))
-#define TC3_UNALIGNED_LOAD32(_p) (::libtextclassifier3::UnalignedLoad32(_p))
-#define TC3_UNALIGNED_LOAD64(_p) \
-  (::libtextclassifier3::UnalignedLoad64(_p))
-
-#define TC3_UNALIGNED_STORE16(_p, _val) \
-  (::libtextclassifier3::UnalignedStore16(_p, _val))
-#define TC3_UNALIGNED_STORE32(_p, _val) \
-  (::libtextclassifier3::UnalignedStore32(_p, _val))
-#define TC3_UNALIGNED_STORE64(_p, _val) \
-  (::libtextclassifier3::UnalignedStore64(_p, _val))
-
-#elif defined(UNDEFINED_BEHAVIOR_SANITIZER)
-
-namespace libtextclassifier3 {
-
-inline uint16_t UnalignedLoad16(const void *p) {
-  uint16_t t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline uint32_t UnalignedLoad32(const void *p) {
-  uint32_t t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline uint64 UnalignedLoad64(const void *p) {
-  uint64 t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline void UnalignedStore16(void *p, uint16_t v) { memcpy(p, &v, sizeof v); }
-
-inline void UnalignedStore32(void *p, uint32_t v) { memcpy(p, &v, sizeof v); }
-
-inline void UnalignedStore64(void *p, uint64 v) { memcpy(p, &v, sizeof v); }
-
-}  // namespace libtextclassifier3
-
-#define TC3_UNALIGNED_LOAD16(_p) (::libtextclassifier3::UnalignedLoad16(_p))
-#define TC3_UNALIGNED_LOAD32(_p) (::libtextclassifier3::UnalignedLoad32(_p))
-#define TC3_UNALIGNED_LOAD64(_p) (::libtextclassifier3::UnalignedLoad64(_p))
-
-#define TC3_UNALIGNED_STORE16(_p, _val) \
-  (::libtextclassifier3::UnalignedStore16(_p, _val))
-#define TC3_UNALIGNED_STORE32(_p, _val) \
-  (::libtextclassifier3::UnalignedStore32(_p, _val))
-#define TC3_UNALIGNED_STORE64(_p, _val) \
-  (::libtextclassifier3::UnalignedStore64(_p, _val))
-
-#elif defined(__x86_64__) || defined(_M_X64) || defined(__i386) || \
-    defined(_M_IX86) || defined(__ppc__) || defined(__PPC__) ||    \
-    defined(__ppc64__) || defined(__PPC64__)
-
-// x86 and x86-64 can perform unaligned loads/stores directly;
-// modern PowerPC hardware can also do unaligned integer loads and stores;
-// but note: the FPU still sends unaligned loads and stores to a trap handler!
-
-#define TC3_UNALIGNED_LOAD16(_p) \
-  (*reinterpret_cast<const uint16_t *>(_p))
-#define TC3_UNALIGNED_LOAD32(_p) \
-  (*reinterpret_cast<const uint32_t *>(_p))
-#define TC3_UNALIGNED_LOAD64(_p) \
-  (*reinterpret_cast<const uint64 *>(_p))
-
-#define TC3_UNALIGNED_STORE16(_p, _val) \
-  (*reinterpret_cast<uint16_t *>(_p) = (_val))
-#define TC3_UNALIGNED_STORE32(_p, _val) \
-  (*reinterpret_cast<uint32_t *>(_p) = (_val))
-#define TC3_UNALIGNED_STORE64(_p, _val) \
-  (*reinterpret_cast<uint64 *>(_p) = (_val))
-
-#elif defined(__arm__) && \
-      !defined(__ARM_ARCH_5__) && \
-      !defined(__ARM_ARCH_5T__) && \
-      !defined(__ARM_ARCH_5TE__) && \
-      !defined(__ARM_ARCH_5TEJ__) && \
-      !defined(__ARM_ARCH_6__) && \
-      !defined(__ARM_ARCH_6J__) && \
-      !defined(__ARM_ARCH_6K__) && \
-      !defined(__ARM_ARCH_6Z__) && \
-      !defined(__ARM_ARCH_6ZK__) && \
-      !defined(__ARM_ARCH_6T2__)
-
-
-// ARMv7 and newer support native unaligned accesses, but only of 16-bit
-// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
-// do an unaligned read and rotate the words around a bit, or do the reads very
-// slowly (trip through kernel mode). There's no simple #define that says just
-// "ARMv7 or higher", so we have to filter away all ARMv5 and ARMv6
-// sub-architectures. Newer gcc (>= 4.6) set an __ARM_FEATURE_ALIGNED #define,
-// so in time, maybe we can move on to that.
-//
-// This is a mess, but there's not much we can do about it.
-//
-// To further complicate matters, only LDR instructions (single reads) are
-// allowed to be unaligned, not LDRD (two reads) or LDM (many reads). Unless we
-// explicitly tell the compiler that these accesses can be unaligned, it can and
-// will combine accesses. On armcc, the way to signal this is done by accessing
-// through the type (uint32_t __packed *), but GCC has no such attribute
-// (it ignores __attribute__((packed)) on individual variables). However,
-// we can tell it that a _struct_ is unaligned, which has the same effect,
-// so we do that.
-
-namespace libtextclassifier3 {
-
-struct Unaligned16Struct {
-  uint16_t value;
-  uint8_t dummy;  // To make the size non-power-of-two.
-} TC3_ATTRIBUTE_PACKED;
-
-struct Unaligned32Struct {
-  uint32_t value;
-  uint8_t dummy;  // To make the size non-power-of-two.
-} TC3_ATTRIBUTE_PACKED;
-
-}  // namespace libtextclassifier3
-
-#define TC3_UNALIGNED_LOAD16(_p)                                  \
-  ((reinterpret_cast<const ::libtextclassifier3::Unaligned16Struct *>(_p)) \
-       ->value)
-#define TC3_UNALIGNED_LOAD32(_p)                                  \
-  ((reinterpret_cast<const ::libtextclassifier3::Unaligned32Struct *>(_p)) \
-       ->value)
-
-#define TC3_UNALIGNED_STORE16(_p, _val)                      \
-  ((reinterpret_cast< ::libtextclassifier3::Unaligned16Struct *>(_p)) \
-       ->value = (_val))
-#define TC3_UNALIGNED_STORE32(_p, _val)                      \
-  ((reinterpret_cast< ::libtextclassifier3::Unaligned32Struct *>(_p)) \
-       ->value = (_val))
-
-namespace libtextclassifier3 {
-
-inline uint64 UnalignedLoad64(const void *p) {
-  uint64 t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline void UnalignedStore64(void *p, uint64 v) { memcpy(p, &v, sizeof v); }
-
-}  // namespace libtextclassifier3
-
-#define TC3_UNALIGNED_LOAD64(_p) (::libtextclassifier3::UnalignedLoad64(_p))
-#define TC3_UNALIGNED_STORE64(_p, _val) \
-  (::libtextclassifier3::UnalignedStore64(_p, _val))
-
-#else
-
-// TC3_NEED_ALIGNED_LOADS is defined when the underlying platform
-// doesn't support unaligned access.
-#define TC3_NEED_ALIGNED_LOADS
-
-// These functions are provided for architectures that don't support
-// unaligned loads and stores.
-
-namespace libtextclassifier3 {
-
-inline uint16_t UnalignedLoad16(const void *p) {
-  uint16_t t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline uint32_t UnalignedLoad32(const void *p) {
-  uint32_t t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline uint64 UnalignedLoad64(const void *p) {
-  uint64 t;
-  memcpy(&t, p, sizeof t);
-  return t;
-}
-
-inline void UnalignedStore16(void *p, uint16_t v) { memcpy(p, &v, sizeof v); }
-
-inline void UnalignedStore32(void *p, uint32_t v) { memcpy(p, &v, sizeof v); }
-
-inline void UnalignedStore64(void *p, uint64 v) { memcpy(p, &v, sizeof v); }
-
-}  // namespace libtextclassifier3
-
-#define TC3_UNALIGNED_LOAD16(_p) (::libtextclassifier3::UnalignedLoad16(_p))
-#define TC3_UNALIGNED_LOAD32(_p) (::libtextclassifier3::UnalignedLoad32(_p))
-#define TC3_UNALIGNED_LOAD64(_p) (::libtextclassifier3::UnalignedLoad64(_p))
-
-#define TC3_UNALIGNED_STORE16(_p, _val) \
-  (::libtextclassifier3::UnalignedStore16(_p, _val))
-#define TC3_UNALIGNED_STORE32(_p, _val) \
-  (::libtextclassifier3::UnalignedStore32(_p, _val))
-#define TC3_UNALIGNED_STORE64(_p, _val) \
-  (::libtextclassifier3::UnalignedStore64(_p, _val))
-
-#endif
-
-#endif  // defined(__cplusplus), end of unaligned API
-
-#endif  // LIBTEXTCLASSIFIER_UTILS_BASE_UNALIGNED_ACCESS_H_

diff --git a/native/utils/checksum_test.cc b/native/utils/checksum_test.cc
new file mode 100644
index 0000000..dd04956
--- /dev/null
+++ b/native/utils/checksum_test.cc

@@ -0,0 +1,57 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/checksum.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(LuhnTest, CorrectlyHandlesSimpleCases) {
+  EXPECT_TRUE(VerifyLuhnChecksum("3782 8224 6310 005"));
+  EXPECT_FALSE(VerifyLuhnChecksum("0"));
+  EXPECT_FALSE(VerifyLuhnChecksum("1"));
+  EXPECT_FALSE(VerifyLuhnChecksum("0A"));
+}
+
+TEST(LuhnTest, CorrectlyVerifiesPaymentCardNumbers) {
+  // Fake test numbers.
+  EXPECT_TRUE(VerifyLuhnChecksum("3782 8224 6310 005"));
+  EXPECT_TRUE(VerifyLuhnChecksum("371449635398431"));
+  EXPECT_TRUE(VerifyLuhnChecksum("5610591081018250"));
+  EXPECT_TRUE(VerifyLuhnChecksum("38520000023237"));
+  EXPECT_TRUE(VerifyLuhnChecksum("6011000990139424"));
+  EXPECT_TRUE(VerifyLuhnChecksum("3566002020360505"));
+  EXPECT_TRUE(VerifyLuhnChecksum("5105105105105100"));
+  EXPECT_TRUE(VerifyLuhnChecksum("4012 8888 8888 1881"));
+}
+
+TEST(LuhnTest, HandlesWhitespace) {
+  EXPECT_TRUE(
+      VerifyLuhnChecksum("3782 8224 6310 005 ", /*ignore_whitespace=*/true));
+  EXPECT_FALSE(
+      VerifyLuhnChecksum("3782 8224 6310 005 ", /*ignore_whitespace=*/false));
+}
+
+TEST(LuhnTest, HandlesEdgeCases) {
+  EXPECT_FALSE(VerifyLuhnChecksum("    ", /*ignore_whitespace=*/true));
+  EXPECT_FALSE(VerifyLuhnChecksum("    ", /*ignore_whitespace=*/false));
+  EXPECT_FALSE(VerifyLuhnChecksum("", /*ignore_whitespace=*/true));
+  EXPECT_FALSE(VerifyLuhnChecksum("", /*ignore_whitespace=*/false));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/container/sorted-strings-table_test.cc b/native/utils/container/sorted-strings-table_test.cc
new file mode 100644
index 0000000..a93b197
--- /dev/null
+++ b/native/utils/container/sorted-strings-table_test.cc

@@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/container/sorted-strings-table.h"
+
+#include <vector>
+
+#include "utils/base/integral_types.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(SortedStringsTest, Lookup) {
+  const char pieces[] = "hell\0hello\0o\0there\0";
+  const uint32 offsets[] = {0, 5, 11, 13};
+
+  SortedStringsTable table(/*num_pieces=*/4, offsets, StringPiece(pieces, 18),
+                           /*use_linear_scan_threshold=*/1);
+
+  {
+    std::vector<StringSet::Match> matches;
+    EXPECT_TRUE(table.FindAllPrefixMatches("hello there", &matches));
+    EXPECT_EQ(matches.size(), 2);
+    EXPECT_EQ(matches[0].id, 0 /*hell*/);
+    EXPECT_EQ(matches[0].match_length, 4 /*hell*/);
+    EXPECT_EQ(matches[1].id, 1 /*hello*/);
+    EXPECT_EQ(matches[1].match_length, 5 /*hello*/);
+  }
+
+  {
+    std::vector<StringSet::Match> matches;
+    EXPECT_TRUE(table.FindAllPrefixMatches("he", &matches));
+    EXPECT_THAT(matches, testing::IsEmpty());
+  }
+
+  {
+    std::vector<StringSet::Match> matches;
+    EXPECT_TRUE(table.FindAllPrefixMatches("he", &matches));
+    EXPECT_THAT(matches, testing::IsEmpty());
+  }
+
+  {
+    std::vector<StringSet::Match> matches;
+    EXPECT_TRUE(table.FindAllPrefixMatches("abcd", &matches));
+    EXPECT_THAT(matches, testing::IsEmpty());
+  }
+
+  {
+    std::vector<StringSet::Match> matches;
+    EXPECT_TRUE(table.FindAllPrefixMatches("", &matches));
+    EXPECT_THAT(matches, testing::IsEmpty());
+  }
+
+  {
+    std::vector<StringSet::Match> matches;
+    EXPECT_TRUE(table.FindAllPrefixMatches("hi there", &matches));
+    EXPECT_THAT(matches, testing::IsEmpty());
+  }
+
+  {
+    std::vector<StringSet::Match> matches;
+    EXPECT_TRUE(table.FindAllPrefixMatches(StringPiece("\0", 1), &matches));
+    EXPECT_THAT(matches, testing::IsEmpty());
+  }
+
+  {
+    std::vector<StringSet::Match> matches;
+    EXPECT_TRUE(
+        table.FindAllPrefixMatches(StringPiece("\xff, \xfe", 2), &matches));
+    EXPECT_THAT(matches, testing::IsEmpty());
+  }
+
+  {
+    StringSet::Match match;
+    EXPECT_TRUE(table.LongestPrefixMatch("hella there", &match));
+    EXPECT_EQ(match.id, 0 /*hell*/);
+  }
+
+  {
+    StringSet::Match match;
+    EXPECT_TRUE(table.LongestPrefixMatch("hello there", &match));
+    EXPECT_EQ(match.id, 1 /*hello*/);
+  }
+
+  {
+    StringSet::Match match;
+    EXPECT_TRUE(table.LongestPrefixMatch("abcd", &match));
+    EXPECT_EQ(match.id, -1);
+  }
+
+  {
+    StringSet::Match match;
+    EXPECT_TRUE(table.LongestPrefixMatch("", &match));
+    EXPECT_EQ(match.id, -1);
+  }
+
+  {
+    int value;
+    EXPECT_TRUE(table.Find("hell", &value));
+    EXPECT_EQ(value, 0);
+  }
+
+  {
+    int value;
+    EXPECT_FALSE(table.Find("hella", &value));
+  }
+
+  {
+    int value;
+    EXPECT_TRUE(table.Find("hello", &value));
+    EXPECT_EQ(value, 1 /*hello*/);
+  }
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/flatbuffers_test_extended.fbs b/native/utils/flatbuffers_test_extended.fbs
deleted file mode 100644
index ca679dc..0000000
--- a/native/utils/flatbuffers_test_extended.fbs
+++ /dev/null

@@ -1,50 +0,0 @@
-//
-// Copyright (C) 2018 The Android Open Source Project
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//      http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-namespace libtextclassifier3.test;
-
-table FlightNumberInfo {
-  carrier_code: string;
-  flight_code: int;
-}
-
-table ContactInfo {
-  first_name: string;
-  last_name: string;
-  phone_number: string;
-  score: float;
-}
-
-table Reminder {
-  title: string;
-  notes: [string];
-}
-
-table EntityData {
-  an_int_field: int;
-  a_long_field: int64;
-  a_bool_field: bool;
-  a_float_field: float;
-  a_double_field: double;
-  flight_number: FlightNumberInfo;
-  contact_info: ContactInfo;
-  reminders: [Reminder];
-  numbers: [int];
-  strings: [string];
-  mystic: string;  // Extra field.
-}
-
-root_type libtextclassifier3.test.EntityData;

diff --git a/native/utils/grammar/rules-utils_test.cc b/native/utils/grammar/rules-utils_test.cc
new file mode 100644
index 0000000..6391be1
--- /dev/null
+++ b/native/utils/grammar/rules-utils_test.cc

@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/grammar/rules-utils.h"
+
+#include <vector>
+
+#include "utils/grammar/match.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3::grammar {
+namespace {
+
+using testing::ElementsAre;
+using testing::Value;
+
+// Create test match object.
+Match CreateMatch(const CodepointIndex begin, const CodepointIndex end) {
+  Match match;
+  match.Init(0, CodepointSpan{begin, end},
+             /*arg_match_offset=*/begin);
+  return match;
+}
+
+MATCHER_P(IsDerivation, candidate, "") {
+  return Value(arg.rule_id, candidate.rule_id) &&
+         Value(arg.match, candidate.match);
+}
+
+TEST(UtilsTest, DeduplicatesMatches) {
+  // Overlapping matches from the same rule.
+  Match matches[] = {CreateMatch(0, 1), CreateMatch(1, 2), CreateMatch(0, 2)};
+  const std::vector<Derivation> candidates = {{&matches[0], /*rule_id=*/0},
+                                              {&matches[1], /*rule_id=*/0},
+                                              {&matches[2], /*rule_id=*/0}};
+
+  // Keep longest.
+  EXPECT_THAT(DeduplicateDerivations(candidates),
+              ElementsAre(IsDerivation(candidates[2])));
+}
+
+TEST(UtilsTest, DeduplicatesMatchesPerRule) {
+  // Overlapping matches from different rules.
+  Match matches[] = {CreateMatch(0, 1), CreateMatch(1, 2), CreateMatch(0, 2)};
+  const std::vector<Derivation> candidates = {{&matches[0], /*rule_id=*/0},
+                                              {&matches[1], /*rule_id=*/0},
+                                              {&matches[2], /*rule_id=*/0},
+                                              {&matches[0], /*rule_id=*/1}};
+
+  // Keep longest for rule 0, but also keep match from rule 1.
+  EXPECT_THAT(
+      DeduplicateDerivations(candidates),
+      ElementsAre(IsDerivation(candidates[2]), IsDerivation(candidates[3])));
+}
+
+TEST(UtilsTest, KeepNonoverlapping) {
+  // Non-overlapping matches.
+  Match matches[] = {CreateMatch(0, 1), CreateMatch(1, 2), CreateMatch(2, 3)};
+  const std::vector<Derivation> candidates = {{&matches[0], /*rule_id=*/0},
+                                              {&matches[1], /*rule_id=*/0},
+                                              {&matches[2], /*rule_id=*/0}};
+
+  // Keep all matches.
+  EXPECT_THAT(
+      DeduplicateDerivations(candidates),
+      ElementsAre(IsDerivation(candidates[0]), IsDerivation(candidates[1]),
+                  IsDerivation(candidates[2])));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3::grammar

diff --git a/native/utils/grammar/utils/ir_test.cc b/native/utils/grammar/utils/ir_test.cc
new file mode 100644
index 0000000..d2438dd
--- /dev/null
+++ b/native/utils/grammar/utils/ir_test.cc

@@ -0,0 +1,238 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/grammar/utils/ir.h"
+
+#include "utils/grammar/rules_generated.h"
+#include "utils/grammar/types.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3::grammar {
+namespace {
+
+using ::testing::Eq;
+using ::testing::IsEmpty;
+using ::testing::Ne;
+using ::testing::SizeIs;
+
+TEST(IrTest, HandlesSharingWithTerminalRules) {
+  Ir ir;
+
+  // <t1> ::= the
+  const Nonterm t1 = ir.Add(kUnassignedNonterm, "the");
+
+  // <t2> ::= quick
+  const Nonterm t2 = ir.Add(kUnassignedNonterm, "quick");
+
+  // <t3> ::= quick    -- should share with <t2>
+  const Nonterm t3 = ir.Add(kUnassignedNonterm, "quick");
+
+  // <t4> ::= quick    -- specify unshareable <t4>
+  // <t4> ::= brown
+  const Nonterm t4_unshareable = ir.AddUnshareableNonterminal();
+  ir.Add(t4_unshareable, "quick");
+  ir.Add(t4_unshareable, "brown");
+
+  // <t5> ::= brown    -- should not be shared with <t4>
+  const Nonterm t5 = ir.Add(kUnassignedNonterm, "brown");
+
+  // <t6> ::= brown    -- specify unshareable <t6>
+  const Nonterm t6_unshareable = ir.AddUnshareableNonterminal();
+  ir.Add(t6_unshareable, "brown");
+
+  // <t7> ::= brown    -- should share with <t5>
+  const Nonterm t7 = ir.Add(kUnassignedNonterm, "brown");
+
+  EXPECT_THAT(t1, Ne(kUnassignedNonterm));
+  EXPECT_THAT(t2, Ne(kUnassignedNonterm));
+  EXPECT_THAT(t1, Ne(t2));
+  EXPECT_THAT(t2, Eq(t3));
+  EXPECT_THAT(t4_unshareable, Ne(kUnassignedNonterm));
+  EXPECT_THAT(t4_unshareable, Ne(t3));
+  EXPECT_THAT(t4_unshareable, Ne(t5));
+  EXPECT_THAT(t6_unshareable, Ne(kUnassignedNonterm));
+  EXPECT_THAT(t6_unshareable, Ne(t4_unshareable));
+  EXPECT_THAT(t6_unshareable, Ne(t5));
+  EXPECT_THAT(t7, Eq(t5));
+}
+
+TEST(IrTest, HandlesSharingWithNonterminalRules) {
+  Ir ir;
+
+  // Setup a few terminal rules.
+  const std::vector<Nonterm> rhs = {
+      ir.Add(kUnassignedNonterm, "the"), ir.Add(kUnassignedNonterm, "quick"),
+      ir.Add(kUnassignedNonterm, "brown"), ir.Add(kUnassignedNonterm, "fox")};
+
+  // Check for proper sharing using nonterminal rules.
+  for (int rhs_length = 1; rhs_length <= rhs.size(); rhs_length++) {
+    std::vector<Nonterm> rhs_truncated = rhs;
+    rhs_truncated.resize(rhs_length);
+    const Nonterm nt_u = ir.AddUnshareableNonterminal();
+    ir.Add(nt_u, rhs_truncated);
+    const Nonterm nt_1 = ir.Add(kUnassignedNonterm, rhs_truncated);
+    const Nonterm nt_2 = ir.Add(kUnassignedNonterm, rhs_truncated);
+
+    EXPECT_THAT(nt_1, Eq(nt_2));
+    EXPECT_THAT(nt_1, Ne(nt_u));
+  }
+}
+
+TEST(IrTest, HandlesSharingWithCallbacksWithSameParameters) {
+  // Test sharing in the presence of callbacks.
+  constexpr CallbackId kOutput1 = 1;
+  constexpr CallbackId kOutput2 = 2;
+  constexpr CallbackId kFilter1 = 3;
+  constexpr CallbackId kFilter2 = 4;
+  Ir ir(/*filters=*/{kFilter1, kFilter2});
+
+  const Nonterm x1 = ir.Add(kUnassignedNonterm, "hello");
+  const Nonterm x2 =
+      ir.Add(Ir::Lhs{kUnassignedNonterm, {kOutput1, 0}}, "hello");
+  const Nonterm x3 =
+      ir.Add(Ir::Lhs{kUnassignedNonterm, {kFilter1, 0}}, "hello");
+  const Nonterm x4 =
+      ir.Add(Ir::Lhs{kUnassignedNonterm, {kOutput2, 0}}, "hello");
+  const Nonterm x5 =
+      ir.Add(Ir::Lhs{kUnassignedNonterm, {kFilter2, 0}}, "hello");
+
+  // Duplicate entry.
+  const Nonterm x6 =
+      ir.Add(Ir::Lhs{kUnassignedNonterm, {kOutput2, 0}}, "hello");
+
+  EXPECT_THAT(x2, Eq(x1));
+  EXPECT_THAT(x3, Ne(x1));
+  EXPECT_THAT(x4, Eq(x1));
+  EXPECT_THAT(x5, Ne(x1));
+  EXPECT_THAT(x5, Ne(x3));
+  EXPECT_THAT(x6, Ne(x3));
+}
+
+TEST(IrTest, HandlesSharingWithCallbacksWithDifferentParameters) {
+  // Test sharing in the presence of callbacks.
+  constexpr CallbackId kOutput = 1;
+  constexpr CallbackId kFilter = 2;
+  Ir ir(/*filters=*/{kFilter});
+
+  const Nonterm x1 = ir.Add(Ir::Lhs{kUnassignedNonterm, {kOutput, 0}}, "world");
+  const Nonterm x2 = ir.Add(Ir::Lhs{kUnassignedNonterm, {kOutput, 1}}, "world");
+  const Nonterm x3 = ir.Add(Ir::Lhs{kUnassignedNonterm, {kFilter, 0}}, "world");
+  const Nonterm x4 = ir.Add(Ir::Lhs{kUnassignedNonterm, {kFilter, 1}}, "world");
+
+  EXPECT_THAT(x2, Eq(x1));
+  EXPECT_THAT(x3, Ne(x1));
+  EXPECT_THAT(x4, Ne(x1));
+  EXPECT_THAT(x4, Ne(x3));
+}
+
+TEST(IrTest, SerializesRulesToFlatbufferFormat) {
+  constexpr CallbackId kOutput = 1;
+  Ir ir;
+  const Nonterm verb = ir.AddUnshareableNonterminal();
+  ir.Add(verb, "buy");
+  ir.Add(Ir::Lhs{verb, {kOutput}}, "bring");
+  ir.Add(verb, "upbring");
+  ir.Add(verb, "remind");
+  const Nonterm set_reminder = ir.AddUnshareableNonterminal();
+  ir.Add(set_reminder,
+         std::vector<Nonterm>{ir.Add(kUnassignedNonterm, "remind"),
+                              ir.Add(kUnassignedNonterm, "me"),
+                              ir.Add(kUnassignedNonterm, "to"), verb});
+  const Nonterm action = ir.AddUnshareableNonterminal();
+  ir.Add(action, set_reminder);
+  RulesSetT rules;
+  ir.Serialize(/*include_debug_information=*/false, &rules);
+
+  EXPECT_THAT(rules.rules, SizeIs(1));
+
+  // Only one rule uses a callback, the rest will be encoded directly.
+  EXPECT_THAT(rules.lhs, SizeIs(1));
+  EXPECT_THAT(rules.lhs.front().callback_id(), kOutput);
+
+  // 6 distinct terminals: "buy", "upbring", "bring", "remind", "me" and "to".
+  EXPECT_THAT(rules.rules.front()->lowercase_terminal_rules->terminal_offsets,
+              SizeIs(6));
+  EXPECT_THAT(rules.rules.front()->terminal_rules->terminal_offsets, IsEmpty());
+
+  // As "bring" is a suffix of "upbring" it is expected to be suffix merged in
+  // the string pool
+  EXPECT_THAT(rules.terminals,
+              Eq(std::string("buy\0me\0remind\0to\0upbring\0", 25)));
+
+  EXPECT_THAT(rules.rules.front()->binary_rules, SizeIs(3));
+
+  // One unary rule: <action> ::= <set_reminder>
+  EXPECT_THAT(rules.rules.front()->unary_rules, SizeIs(1));
+}
+
+TEST(IrTest, HandlesRulesSharding) {
+  Ir ir(/*filters=*/{}, /*num_shards=*/2);
+  const Nonterm verb = ir.AddUnshareableNonterminal();
+  const Nonterm set_reminder = ir.AddUnshareableNonterminal();
+
+  // Shard 0: en
+  ir.Add(verb, "buy");
+  ir.Add(verb, "bring");
+  ir.Add(verb, "remind");
+  ir.Add(set_reminder,
+         std::vector<Nonterm>{ir.Add(kUnassignedNonterm, "remind"),
+                              ir.Add(kUnassignedNonterm, "me"),
+                              ir.Add(kUnassignedNonterm, "to"), verb});
+
+  // Shard 1: de
+  ir.Add(verb, "kaufen", /*case_sensitive=*/false, /*shard=*/1);
+  ir.Add(verb, "bringen", /*case_sensitive=*/false, /*shard=*/1);
+  ir.Add(verb, "erinnern", /*case_sensitive=*/false, /*shard=*/1);
+  ir.Add(set_reminder,
+         std::vector<Nonterm>{ir.Add(kUnassignedNonterm, "erinnere",
+                                     /*case_sensitive=*/false, /*shard=*/1),
+                              ir.Add(kUnassignedNonterm, "mich",
+                                     /*case_sensitive=*/false, /*shard=*/1),
+                              ir.Add(kUnassignedNonterm, "zu",
+                                     /*case_sensitive=*/false, /*shard=*/1),
+                              verb},
+         /*shard=*/1);
+
+  // Test that terminal strings are correctly merged into the shared
+  // string pool.
+  RulesSetT rules;
+  ir.Serialize(/*include_debug_information=*/false, &rules);
+
+  EXPECT_THAT(rules.rules, SizeIs(2));
+
+  // 5 distinct terminals: "buy", "bring", "remind", "me" and "to".
+  EXPECT_THAT(rules.rules[0]->lowercase_terminal_rules->terminal_offsets,
+              SizeIs(5));
+  EXPECT_THAT(rules.rules[0]->terminal_rules->terminal_offsets, IsEmpty());
+
+  // 6 distinct terminals: "kaufen", "bringen", "erinnern", "erinnere", "mich"
+  // and "zu".
+  EXPECT_THAT(rules.rules[1]->lowercase_terminal_rules->terminal_offsets,
+              SizeIs(6));
+  EXPECT_THAT(rules.rules[1]->terminal_rules->terminal_offsets, IsEmpty());
+
+  EXPECT_THAT(rules.terminals,
+              Eq(std::string("bring\0bringen\0buy\0erinnere\0erinnern\0kaufen\0"
+                             "me\0mich\0remind\0to\0zu\0",
+                             64)));
+
+  EXPECT_THAT(rules.rules[0]->binary_rules, SizeIs(3));
+  EXPECT_THAT(rules.rules[1]->binary_rules, SizeIs(3));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3::grammar

diff --git a/native/utils/grammar/utils/rules_test.cc b/native/utils/grammar/utils/rules_test.cc
new file mode 100644
index 0000000..6761118
--- /dev/null
+++ b/native/utils/grammar/utils/rules_test.cc

@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/grammar/utils/rules.h"
+
+#include "utils/grammar/rules_generated.h"
+#include "utils/grammar/utils/ir.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3::grammar {
+namespace {
+
+using ::testing::IsEmpty;
+using ::testing::SizeIs;
+
+TEST(SerializeRulesTest, HandlesSimpleRuleSet) {
+  Rules rules;
+
+  rules.Add("<verb>", {"buy"});
+  rules.Add("<verb>", {"bring"});
+  rules.Add("<verb>", {"remind"});
+  rules.Add("<reminder>", {"remind", "me", "to", "<verb>"});
+  rules.Add("<action>", {"<reminder>"});
+
+  const Ir ir = rules.Finalize();
+  RulesSetT frozen_rules;
+  ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
+
+  EXPECT_THAT(frozen_rules.rules, SizeIs(1));
+  EXPECT_THAT(frozen_rules.lhs, IsEmpty());
+  EXPECT_EQ(frozen_rules.terminals,
+            std::string("bring\0buy\0me\0remind\0to\0", 23));
+  EXPECT_THAT(frozen_rules.rules.front()->binary_rules, SizeIs(3));
+  EXPECT_THAT(frozen_rules.rules.front()->unary_rules, SizeIs(1));
+}
+
+TEST(SerializeRulesTest, HandlesRulesSetWithCallbacks) {
+  Rules rules;
+  const CallbackId output = 1;
+  const CallbackId filter = 2;
+  rules.DefineFilter(filter);
+
+  rules.Add("<verb>", {"buy"});
+  rules.Add("<verb>", {"bring"}, output, 0);
+  rules.Add("<verb>", {"remind"}, output, 0);
+  rules.Add("<reminder>", {"remind", "me", "to", "<verb>"});
+  rules.Add("<action>", {"<reminder>"}, filter, 0);
+
+  const Ir ir = rules.Finalize();
+  RulesSetT frozen_rules;
+  ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
+
+  EXPECT_THAT(frozen_rules.rules, SizeIs(1));
+  EXPECT_EQ(frozen_rules.terminals,
+            std::string("bring\0buy\0me\0remind\0to\0", 23));
+
+  // We have two identical output calls and one filter call in the rule set
+  // definition above.
+  EXPECT_THAT(frozen_rules.lhs, SizeIs(2));
+
+  EXPECT_THAT(frozen_rules.rules.front()->binary_rules, SizeIs(3));
+  EXPECT_THAT(frozen_rules.rules.front()->unary_rules, SizeIs(1));
+}
+
+TEST(SerializeRulesTest, HandlesRulesWithWhitespaceGapLimits) {
+  Rules rules;
+  rules.Add("<iata>", {"lx"});
+  rules.Add("<iata>", {"aa"});
+  rules.Add("<flight>", {"<iata>", "<4_digits>"}, kNoCallback, 0,
+            /*max_whitespace_gap=*/0);
+
+  const Ir ir = rules.Finalize();
+  RulesSetT frozen_rules;
+  ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
+
+  EXPECT_THAT(frozen_rules.rules, SizeIs(1));
+  EXPECT_EQ(frozen_rules.terminals, std::string("aa\0lx\0", 6));
+  EXPECT_THAT(frozen_rules.lhs, SizeIs(1));
+}
+
+TEST(SerializeRulesTest, HandlesCaseSensitiveTerminals) {
+  Rules rules;
+  rules.Add("<iata>", {"LX"}, kNoCallback, 0, /*max_whitespace_gap=*/-1,
+            /*case_sensitive=*/true);
+  rules.Add("<iata>", {"AA"}, kNoCallback, 0, /*max_whitespace_gap=*/-1,
+            /*case_sensitive=*/true);
+  rules.Add("<iata>", {"dl"}, kNoCallback, 0, /*max_whitespace_gap=*/-1,
+            /*case_sensitive=*/false);
+  rules.Add("<flight>", {"<iata>", "<4_digits>"}, kNoCallback, 0,
+            /*max_whitespace_gap=*/0);
+
+  const Ir ir = rules.Finalize();
+  RulesSetT frozen_rules;
+  ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
+
+  EXPECT_THAT(frozen_rules.rules, SizeIs(1));
+  EXPECT_EQ(frozen_rules.terminals, std::string("AA\0LX\0dl\0", 9));
+  EXPECT_THAT(frozen_rules.lhs, SizeIs(1));
+}
+
+TEST(SerializeRulesTest, HandlesMultipleShards) {
+  Rules rules(/*num_shards=*/2);
+  rules.Add("<iata>", {"LX"}, kNoCallback, 0, /*max_whitespace_gap=*/-1,
+            /*case_sensitive=*/true, /*shard=*/0);
+  rules.Add("<iata>", {"aa"}, kNoCallback, 0, /*max_whitespace_gap=*/-1,
+            /*case_sensitive=*/false, /*shard=*/1);
+
+  const Ir ir = rules.Finalize();
+  RulesSetT frozen_rules;
+  ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
+
+  EXPECT_THAT(frozen_rules.rules, SizeIs(2));
+  EXPECT_EQ(frozen_rules.terminals, std::string("LX\0aa\0", 6));
+}
+
+TEST(SerializeRulesTest, HandlesRegexRules) {
+  Rules rules;
+  rules.AddRegex("<code>", "[A-Z]+");
+  rules.AddRegex("<numbers>", "\\d+");
+  RulesSetT frozen_rules;
+  rules.Finalize().Serialize(/*include_debug_information=*/false,
+                             &frozen_rules);
+  EXPECT_THAT(frozen_rules.regex_annotator, SizeIs(2));
+}
+
+TEST(SerializeRulesTest, HandlesAlias) {
+  Rules rules;
+  rules.Add("<iata>", {"lx"});
+  rules.Add("<iata>", {"aa"});
+  rules.Add("<flight>", {"<iata>", "<4_digits>"});
+  rules.AddAlias("<flight_number>", "<flight>");
+
+  const Ir ir = rules.Finalize();
+  RulesSetT frozen_rules;
+  ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
+
+  EXPECT_THAT(frozen_rules.rules, SizeIs(1));
+  EXPECT_EQ(frozen_rules.terminals, std::string("aa\0lx\0", 6));
+  EXPECT_THAT(frozen_rules.rules.front()->binary_rules, SizeIs(1));
+
+  // Only alias, no rule.
+  EXPECT_THAT(frozen_rules.rules.front()->unary_rules, IsEmpty());
+
+  EXPECT_THAT(frozen_rules.lhs, IsEmpty());
+}
+
+TEST(SerializeRulesTest, ResolvesAnchorsAndFillers) {
+  Rules rules;
+  rules.Add("<code>",
+            {"<^>", "<filler>", "this", "is", "a", "test", "<filler>", "<$>"});
+  const Ir ir = rules.Finalize();
+  RulesSetT frozen_rules;
+  ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
+
+  EXPECT_THAT(frozen_rules.rules, SizeIs(1));
+  EXPECT_EQ(frozen_rules.terminals, std::string("a\0test\0this\0", 12));
+
+  // Expect removal of anchors and fillers in this case.
+  // The rule above is equivalent to: <code> ::= this is a test, binarized into
+  // <tmp_0> ::= this is
+  // <tmp_1> ::= <tmp_0> a
+  // <code>  ::= <tmp_1> test
+  EXPECT_THAT(frozen_rules.rules.front()->binary_rules, SizeIs(3));
+
+  EXPECT_THAT(frozen_rules.rules.front()->unary_rules, IsEmpty());
+  EXPECT_THAT(frozen_rules.lhs, IsEmpty());
+}
+
+TEST(SerializeRulesTest, HandlesAnnotations) {
+  Rules rules;
+  rules.AddAnnotation("phone");
+  rules.AddAnnotation("url");
+  rules.AddAnnotation("tracking_number");
+  const Ir ir = rules.Finalize();
+  RulesSetT frozen_rules;
+  ir.Serialize(/*include_debug_information=*/false, &frozen_rules);
+
+  EXPECT_THAT(frozen_rules.rules, SizeIs(1));
+  EXPECT_THAT(frozen_rules.nonterminals->annotation_nt, SizeIs(3));
+  EXPECT_EQ(frozen_rules.nonterminals->annotation_nt[0]->key, "phone");
+  EXPECT_EQ(frozen_rules.nonterminals->annotation_nt[1]->key,
+            "tracking_number");
+  EXPECT_EQ(frozen_rules.nonterminals->annotation_nt[2]->key, "url");
+}
+
+}  // namespace
+}  // namespace libtextclassifier3::grammar

diff --git a/native/utils/i18n/locale_test.cc b/native/utils/i18n/locale_test.cc
new file mode 100644
index 0000000..faea4f6
--- /dev/null
+++ b/native/utils/i18n/locale_test.cc

@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/i18n/locale.h"
+
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(LocaleTest, ParseUnknown) {
+  Locale locale = Locale::Invalid();
+  EXPECT_FALSE(locale.IsValid());
+}
+
+TEST(LocaleTest, ParseSwissEnglish) {
+  Locale locale = Locale::FromBCP47("en-CH");
+  EXPECT_TRUE(locale.IsValid());
+  EXPECT_EQ(locale.Language(), "en");
+  EXPECT_EQ(locale.Script(), "");
+  EXPECT_EQ(locale.Region(), "CH");
+}
+
+TEST(LocaleTest, ParseChineseChina) {
+  Locale locale = Locale::FromBCP47("zh-CN");
+  EXPECT_TRUE(locale.IsValid());
+  EXPECT_EQ(locale.Language(), "zh");
+  EXPECT_EQ(locale.Script(), "");
+  EXPECT_EQ(locale.Region(), "CN");
+}
+
+TEST(LocaleTest, ParseChineseTaiwan) {
+  Locale locale = Locale::FromBCP47("zh-Hant-TW");
+  EXPECT_TRUE(locale.IsValid());
+  EXPECT_EQ(locale.Language(), "zh");
+  EXPECT_EQ(locale.Script(), "Hant");
+  EXPECT_EQ(locale.Region(), "TW");
+}
+
+TEST(LocaleTest, ParseEnglish) {
+  Locale locale = Locale::FromBCP47("en");
+  EXPECT_TRUE(locale.IsValid());
+  EXPECT_EQ(locale.Language(), "en");
+  EXPECT_EQ(locale.Script(), "");
+  EXPECT_EQ(locale.Region(), "");
+}
+
+TEST(LocaleTest, ParseCineseTraditional) {
+  Locale locale = Locale::FromBCP47("zh-Hant");
+  EXPECT_TRUE(locale.IsValid());
+  EXPECT_EQ(locale.Language(), "zh");
+  EXPECT_EQ(locale.Script(), "Hant");
+  EXPECT_EQ(locale.Region(), "");
+}
+
+TEST(LocaleTest, IsAnyLocaleSupportedMatch) {
+  std::vector<Locale> locales = {Locale::FromBCP47("zh-HK"),
+                                 Locale::FromBCP47("en-UK")};
+  std::vector<Locale> supported_locales = {Locale::FromBCP47("en")};
+
+  EXPECT_TRUE(Locale::IsAnyLocaleSupported(locales, supported_locales,
+                                           /*default_value=*/false));
+}
+
+TEST(LocaleTest, IsAnyLocaleSupportedNotMatch) {
+  std::vector<Locale> locales = {Locale::FromBCP47("zh-tw")};
+  std::vector<Locale> supported_locales = {Locale::FromBCP47("en"),
+                                           Locale::FromBCP47("fr")};
+
+  EXPECT_FALSE(Locale::IsAnyLocaleSupported(locales, supported_locales,
+                                            /*default_value=*/false));
+}
+
+TEST(LocaleTest, IsAnyLocaleSupportedAnyLocale) {
+  std::vector<Locale> locales = {Locale::FromBCP47("zh-tw")};
+  std::vector<Locale> supported_locales = {Locale::FromBCP47("*")};
+
+  EXPECT_TRUE(Locale::IsAnyLocaleSupported(locales, supported_locales,
+                                           /*default_value=*/false));
+}
+
+TEST(LocaleTest, IsAnyLocaleSupportedEmptyLocales) {
+  std::vector<Locale> supported_locales = {Locale::FromBCP47("en")};
+
+  EXPECT_TRUE(Locale::IsAnyLocaleSupported({}, supported_locales,
+                                           /*default_value=*/true));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/intents/intent-config.fbs b/native/utils/intents/intent-config.fbs
index 76a0ddc..672eb9d 100755
--- a/native/utils/intents/intent-config.fbs
+++ b/native/utils/intents/intent-config.fbs

@@ -132,7 +132,6 @@
   type:AndroidSimpleIntentGeneratorExtraType;
 
   string_:string (shared);
-
   bool_:bool;
   int32_:int;
 }
@@ -141,9 +140,7 @@
 namespace libtextclassifier3;
 table AndroidSimpleIntentGeneratorCondition {
   type:AndroidSimpleIntentGeneratorConditionType;
-
   string_:string (shared);
-
   int32_:int;
   int64_:long;
 }

diff --git a/native/utils/lua-utils_test.cc b/native/utils/lua-utils_test.cc
new file mode 100644
index 0000000..8c9f8de
--- /dev/null
+++ b/native/utils/lua-utils_test.cc

@@ -0,0 +1,333 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/lua-utils.h"
+
+#include <string>
+
+#include "utils/flatbuffers.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using testing::ElementsAre;
+using testing::Eq;
+using testing::FloatEq;
+
+std::string TestFlatbufferSchema() {
+  // Creates a test schema for flatbuffer passing tests.
+  // Cannot use the object oriented API here as that is not available for the
+  // reflection schema.
+  flatbuffers::FlatBufferBuilder schema_builder;
+  std::vector<flatbuffers::Offset<reflection::Field>> fields = {
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("float_field"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::Float),
+          /*id=*/0,
+          /*offset=*/4),
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("nested_field"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::Obj,
+                                 /*element=*/reflection::None,
+                                 /*index=*/0 /* self */),
+          /*id=*/1,
+          /*offset=*/6),
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("repeated_nested_field"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::Vector,
+                                 /*element=*/reflection::Obj,
+                                 /*index=*/0 /* self */),
+          /*id=*/2,
+          /*offset=*/8),
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("repeated_string_field"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::Vector,
+                                 /*element=*/reflection::String),
+          /*id=*/3,
+          /*offset=*/10),
+      reflection::CreateField(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("string_field"),
+          /*type=*/
+          reflection::CreateType(schema_builder,
+                                 /*base_type=*/reflection::String),
+          /*id=*/4,
+          /*offset=*/12)};
+
+  std::vector<flatbuffers::Offset<reflection::Enum>> enums;
+  std::vector<flatbuffers::Offset<reflection::Object>> objects = {
+      reflection::CreateObject(
+          schema_builder,
+          /*name=*/schema_builder.CreateString("TestData"),
+          /*fields=*/
+          schema_builder.CreateVectorOfSortedTables(&fields))};
+  schema_builder.Finish(reflection::CreateSchema(
+      schema_builder, schema_builder.CreateVectorOfSortedTables(&objects),
+      schema_builder.CreateVectorOfSortedTables(&enums),
+      /*(unused) file_ident=*/0,
+      /*(unused) file_ext=*/0,
+      /*root_table*/ objects[0]));
+  return std::string(
+      reinterpret_cast<const char*>(schema_builder.GetBufferPointer()),
+      schema_builder.GetSize());
+}
+
+class LuaUtilsTest : public testing::Test, protected LuaEnvironment {
+ protected:
+  LuaUtilsTest()
+      : serialized_flatbuffer_schema_(TestFlatbufferSchema()),
+        schema_(flatbuffers::GetRoot<reflection::Schema>(
+            serialized_flatbuffer_schema_.data())),
+        flatbuffer_builder_(schema_) {
+    EXPECT_THAT(RunProtected([this] {
+                  LoadDefaultLibraries();
+                  return LUA_OK;
+                }),
+                Eq(LUA_OK));
+  }
+
+  void RunScript(StringPiece script) {
+    EXPECT_THAT(luaL_loadbuffer(state_, script.data(), script.size(),
+                                /*name=*/nullptr),
+                Eq(LUA_OK));
+    EXPECT_THAT(
+        lua_pcall(state_, /*nargs=*/0, /*num_results=*/1, /*errfunc=*/0),
+        Eq(LUA_OK));
+  }
+
+  const std::string serialized_flatbuffer_schema_;
+  const reflection::Schema* schema_;
+  ReflectiveFlatbufferBuilder flatbuffer_builder_;
+};
+
+TEST_F(LuaUtilsTest, HandlesVectors) {
+  {
+    PushVector(std::vector<int64>{1, 2, 3, 4, 5});
+    EXPECT_THAT(ReadVector<int64>(), ElementsAre(1, 2, 3, 4, 5));
+  }
+  {
+    PushVector(std::vector<std::string>{"hello", "there"});
+    EXPECT_THAT(ReadVector<std::string>(), ElementsAre("hello", "there"));
+  }
+  {
+    PushVector(std::vector<bool>{true, true, false});
+    EXPECT_THAT(ReadVector<bool>(), ElementsAre(true, true, false));
+  }
+}
+
+TEST_F(LuaUtilsTest, HandlesVectorIterators) {
+  {
+    const std::vector<int64> elements = {1, 2, 3, 4, 5};
+    PushVectorIterator(&elements);
+    EXPECT_THAT(ReadVector<int64>(), ElementsAre(1, 2, 3, 4, 5));
+  }
+  {
+    const std::vector<std::string> elements = {"hello", "there"};
+    PushVectorIterator(&elements);
+    EXPECT_THAT(ReadVector<std::string>(), ElementsAre("hello", "there"));
+  }
+  {
+    const std::vector<bool> elements = {true, true, false};
+    PushVectorIterator(&elements);
+    EXPECT_THAT(ReadVector<bool>(), ElementsAre(true, true, false));
+  }
+}
+
+TEST_F(LuaUtilsTest, ReadsFlatbufferResults) {
+  // Setup.
+  RunScript(R"lua(
+    return {
+        float_field = 42.1,
+        string_field = "hello there",
+
+        -- Nested field.
+        nested_field = {
+          float_field = 64,
+          string_field = "hello nested",
+        },
+
+        -- Repeated fields.
+        repeated_string_field = { "a", "bold", "one" },
+        repeated_nested_field = {
+          { string_field = "a" },
+          { string_field = "b" },
+          { repeated_string_field = { "nested", "nested2" } },
+        },
+    }
+  )lua");
+
+  // Read the flatbuffer.
+  std::unique_ptr<ReflectiveFlatbuffer> buffer = flatbuffer_builder_.NewRoot();
+  ReadFlatbuffer(/*index=*/-1, buffer.get());
+  const std::string serialized_buffer = buffer->Serialize();
+
+  // Check fields. As we do not have flatbuffer compiled generated code for the
+  // ad hoc generated test schema, we have to read by manually using field
+  // offsets.
+  const flatbuffers::Table* flatbuffer_data =
+      flatbuffers::GetRoot<flatbuffers::Table>(serialized_buffer.data());
+  EXPECT_THAT(flatbuffer_data->GetField<float>(/*field=*/4, /*defaultval=*/0),
+              FloatEq(42.1));
+  EXPECT_THAT(
+      flatbuffer_data->GetPointer<const flatbuffers::String*>(/*field=*/12)
+          ->str(),
+      "hello there");
+
+  // Read the nested field.
+  const flatbuffers::Table* nested_field =
+      flatbuffer_data->GetPointer<const flatbuffers::Table*>(/*field=*/6);
+  EXPECT_THAT(nested_field->GetField<float>(/*field=*/4, /*defaultval=*/0),
+              FloatEq(64));
+  EXPECT_THAT(
+      nested_field->GetPointer<const flatbuffers::String*>(/*field=*/12)->str(),
+      "hello nested");
+
+  // Read the repeated string field.
+  auto repeated_strings = flatbuffer_data->GetPointer<
+      flatbuffers::Vector<flatbuffers::Offset<flatbuffers::String>>*>(
+      /*field=*/10);
+  EXPECT_THAT(repeated_strings->size(), Eq(3));
+  EXPECT_THAT(repeated_strings->GetAsString(0)->str(), Eq("a"));
+  EXPECT_THAT(repeated_strings->GetAsString(1)->str(), Eq("bold"));
+  EXPECT_THAT(repeated_strings->GetAsString(2)->str(), Eq("one"));
+
+  // Read the repeated nested field.
+  auto repeated_nested_fields = flatbuffer_data->GetPointer<
+      flatbuffers::Vector<flatbuffers::Offset<flatbuffers::Table>>*>(
+      /*field=*/8);
+  EXPECT_THAT(repeated_nested_fields->size(), Eq(3));
+  EXPECT_THAT(repeated_nested_fields->Get(0)
+                  ->GetPointer<const flatbuffers::String*>(/*field=*/12)
+                  ->str(),
+              "a");
+  EXPECT_THAT(repeated_nested_fields->Get(1)
+                  ->GetPointer<const flatbuffers::String*>(/*field=*/12)
+                  ->str(),
+              "b");
+}
+
+TEST_F(LuaUtilsTest, HandlesSimpleFlatbufferFields) {
+  // Create test flatbuffer.
+  std::unique_ptr<ReflectiveFlatbuffer> buffer = flatbuffer_builder_.NewRoot();
+  buffer->Set("float_field", 42.f);
+  const std::string serialized_buffer = buffer->Serialize();
+  PushFlatbuffer(schema_, flatbuffers::GetRoot<flatbuffers::Table>(
+                              serialized_buffer.data()));
+  lua_setglobal(state_, "arg");
+
+  // Setup.
+  RunScript(R"lua(
+    return arg.float_field
+  )lua");
+
+  EXPECT_THAT(Read<float>(), FloatEq(42));
+}
+
+TEST_F(LuaUtilsTest, HandlesRepeatedFlatbufferFields) {
+  // Create test flatbuffer.
+  std::unique_ptr<ReflectiveFlatbuffer> buffer = flatbuffer_builder_.NewRoot();
+  RepeatedField* repeated_field = buffer->Repeated("repeated_string_field");
+  repeated_field->Add("this");
+  repeated_field->Add("is");
+  repeated_field->Add("a");
+  repeated_field->Add("test");
+  const std::string serialized_buffer = buffer->Serialize();
+  PushFlatbuffer(schema_, flatbuffers::GetRoot<flatbuffers::Table>(
+                              serialized_buffer.data()));
+  lua_setglobal(state_, "arg");
+
+  // Return flatbuffer repeated field as vector.
+  RunScript(R"lua(
+    return arg.repeated_string_field
+  )lua");
+
+  EXPECT_THAT(ReadVector<std::string>(),
+              ElementsAre("this", "is", "a", "test"));
+}
+
+TEST_F(LuaUtilsTest, HandlesRepeatedNestedFlatbufferFields) {
+  // Create test flatbuffer.
+  std::unique_ptr<ReflectiveFlatbuffer> buffer = flatbuffer_builder_.NewRoot();
+  RepeatedField* repeated_field = buffer->Repeated("repeated_nested_field");
+  repeated_field->Add()->Set("string_field", "hello");
+  repeated_field->Add()->Set("string_field", "my");
+  ReflectiveFlatbuffer* nested = repeated_field->Add();
+  nested->Set("string_field", "old");
+  RepeatedField* nested_repeated = nested->Repeated("repeated_string_field");
+  nested_repeated->Add("friend");
+  nested_repeated->Add("how");
+  nested_repeated->Add("are");
+  repeated_field->Add()->Set("string_field", "you?");
+  const std::string serialized_buffer = buffer->Serialize();
+  PushFlatbuffer(schema_, flatbuffers::GetRoot<flatbuffers::Table>(
+                              serialized_buffer.data()));
+  lua_setglobal(state_, "arg");
+
+  RunScript(R"lua(
+    result = {}
+    for _, nested in pairs(arg.repeated_nested_field) do
+      result[#result + 1] = nested.string_field
+      for _, nested_string in pairs(nested.repeated_string_field) do
+        result[#result + 1] = nested_string
+      end
+    end
+    return result
+  )lua");
+
+  EXPECT_THAT(
+      ReadVector<std::string>(),
+      ElementsAre("hello", "my", "old", "friend", "how", "are", "you?"));
+}
+
+TEST_F(LuaUtilsTest, CorrectlyReadsTwoFlatbuffersSimultaneously) {
+  // The first flatbuffer.
+  std::unique_ptr<ReflectiveFlatbuffer> buffer = flatbuffer_builder_.NewRoot();
+  buffer->Set("string_field", "first");
+  const std::string serialized_buffer = buffer->Serialize();
+  PushFlatbuffer(schema_, flatbuffers::GetRoot<flatbuffers::Table>(
+                              serialized_buffer.data()));
+  lua_setglobal(state_, "arg");
+  // The second flatbuffer.
+  std::unique_ptr<ReflectiveFlatbuffer> buffer2 = flatbuffer_builder_.NewRoot();
+  buffer2->Set("string_field", "second");
+  const std::string serialized_buffer2 = buffer2->Serialize();
+  PushFlatbuffer(schema_, flatbuffers::GetRoot<flatbuffers::Table>(
+                              serialized_buffer2.data()));
+  lua_setglobal(state_, "arg2");
+
+  RunScript(R"lua(
+    return {arg.string_field, arg2.string_field}
+  )lua");
+
+  EXPECT_THAT(ReadVector<std::string>(), ElementsAre("first", "second"));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/normalization_test.cc b/native/utils/normalization_test.cc
new file mode 100644
index 0000000..1f731c7
--- /dev/null
+++ b/native/utils/normalization_test.cc

@@ -0,0 +1,121 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/normalization.h"
+
+#include <string>
+
+#include "utils/base/integral_types.h"
+#include "utils/utf8/unicodetext.h"
+#include "utils/utf8/unilib.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using testing::Eq;
+
+class NormalizationTest : public testing::Test {
+ protected:
+  NormalizationTest() : INIT_UNILIB_FOR_TESTING(unilib_) {}
+
+  std::string NormalizeTextCodepointWise(const std::string& text,
+                                         const int32 codepointwise_ops) {
+    return libtextclassifier3::NormalizeTextCodepointWise(
+               unilib_, codepointwise_ops,
+               UTF8ToUnicodeText(text, /*do_copy=*/false))
+        .ToUTF8String();
+  }
+
+  UniLib unilib_;
+};
+
+TEST_F(NormalizationTest, ReturnsIdenticalStringWhenNoNormalization) {
+  EXPECT_THAT(NormalizeTextCodepointWise(
+                  "Never gonna let you down.",
+                  NormalizationOptions_::CodepointwiseNormalizationOp_NONE),
+              Eq("Never gonna let you down."));
+}
+
+#if !defined(TC3_UNILIB_DUMMY)
+TEST_F(NormalizationTest, DropsWhitespace) {
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "Never gonna let you down.",
+          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_WHITESPACE),
+      Eq("Nevergonnaletyoudown."));
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "Never\tgonna\t\tlet\tyou\tdown.",
+          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_WHITESPACE),
+      Eq("Nevergonnaletyoudown."));
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "Never\u2003gonna\u2003let\u2003you\u2003down.",
+          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_WHITESPACE),
+      Eq("Nevergonnaletyoudown."));
+}
+
+TEST_F(NormalizationTest, DropsPunctuation) {
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "Never gonna let you down.",
+          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_PUNCTUATION),
+      Eq("Never gonna let you down"));
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "αʹ. Σημεῖόν ἐστιν, οὗ μέρος οὐθέν.",
+          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_PUNCTUATION),
+      Eq("αʹ Σημεῖόν ἐστιν οὗ μέρος οὐθέν"));
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "978—3—16—148410—0",
+          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_PUNCTUATION),
+      Eq("9783161484100"));
+}
+
+TEST_F(NormalizationTest, LowercasesUnicodeText) {
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "αʹ. Σημεῖόν ἐστιν, οὗ μέρος οὐθέν.",
+          NormalizationOptions_::CodepointwiseNormalizationOp_LOWERCASE),
+      Eq("αʹ. σημεῖόν ἐστιν, οὗ μέρος οὐθέν."));
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "αʹ. Σημεῖόν ἐστιν, οὗ μέρος οὐθέν.",
+          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_WHITESPACE |
+              NormalizationOptions_::CodepointwiseNormalizationOp_LOWERCASE),
+      Eq("αʹ.σημεῖόνἐστιν,οὗμέροςοὐθέν."));
+}
+
+TEST_F(NormalizationTest, UppercasesUnicodeText) {
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "Κανένας άνθρωπος δεν ξέρει",
+          NormalizationOptions_::CodepointwiseNormalizationOp_UPPERCASE),
+      Eq("ΚΑΝΈΝΑΣ ΆΝΘΡΩΠΟΣ ΔΕΝ ΞΈΡΕΙ"));
+  EXPECT_THAT(
+      NormalizeTextCodepointWise(
+          "Κανένας άνθρωπος δεν ξέρει",
+          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_WHITESPACE |
+              NormalizationOptions_::CodepointwiseNormalizationOp_UPPERCASE),
+      Eq("ΚΑΝΈΝΑΣΆΝΘΡΩΠΟΣΔΕΝΞΈΡΕΙ"));
+}
+#endif
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/regex-match_test.cc b/native/utils/regex-match_test.cc
new file mode 100644
index 0000000..c45fb29
--- /dev/null
+++ b/native/utils/regex-match_test.cc

@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/regex-match.h"
+
+#include <memory>
+
+#include "utils/utf8/unicodetext.h"
+#include "utils/utf8/unilib.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+class RegexMatchTest : public testing::Test {
+ protected:
+  RegexMatchTest() : INIT_UNILIB_FOR_TESTING(unilib_) {}
+  UniLib unilib_;
+};
+
+#ifdef TC3_UNILIB_ICU
+#ifndef TC3_DISABLE_LUA
+TEST_F(RegexMatchTest, HandlesSimpleVerification) {
+  EXPECT_TRUE(VerifyMatch(/*context=*/"", /*matcher=*/nullptr, "return true;"));
+}
+#endif  // TC3_DISABLE_LUA
+
+#ifndef TC3_DISABLE_LUA
+TEST_F(RegexMatchTest, HandlesCustomVerification) {
+  UnicodeText pattern = UTF8ToUnicodeText("(\\d{16})",
+                                          /*do_copy=*/true);
+  UnicodeText message = UTF8ToUnicodeText("cc: 4012888888881881",
+                                          /*do_copy=*/true);
+  const std::string verifier = R"(
+function luhn(candidate)
+    local sum = 0
+    local num_digits = string.len(candidate)
+    local parity = num_digits % 2
+    for pos = 1,num_digits do
+      d = tonumber(string.sub(candidate, pos, pos))
+      if pos % 2 ~= parity then
+        d = d * 2
+      end
+      if d > 9 then
+        d = d - 9
+      end
+      sum = sum + d
+    end
+    return (sum % 10) == 0
+end
+return luhn(match[1].text);
+  )";
+  const std::unique_ptr<UniLib::RegexPattern> regex_pattern =
+      unilib_.CreateRegexPattern(pattern);
+  ASSERT_TRUE(regex_pattern != nullptr);
+  const std::unique_ptr<UniLib::RegexMatcher> matcher =
+      regex_pattern->Matcher(message);
+  ASSERT_TRUE(matcher != nullptr);
+  int status = UniLib::RegexMatcher::kNoError;
+  ASSERT_TRUE(matcher->Find(&status) &&
+              status == UniLib::RegexMatcher::kNoError);
+
+  EXPECT_TRUE(VerifyMatch(message.ToUTF8String(), matcher.get(), verifier));
+}
+#endif  // TC3_DISABLE_LUA
+
+TEST_F(RegexMatchTest, RetrievesMatchGroupTest) {
+  UnicodeText pattern =
+      UTF8ToUnicodeText("never gonna (?:give (you) up|let (you) down)",
+                        /*do_copy=*/true);
+  const std::unique_ptr<UniLib::RegexPattern> regex_pattern =
+      unilib_.CreateRegexPattern(pattern);
+  ASSERT_TRUE(regex_pattern != nullptr);
+  UnicodeText message =
+      UTF8ToUnicodeText("never gonna give you up - never gonna let you down");
+  const std::unique_ptr<UniLib::RegexMatcher> matcher =
+      regex_pattern->Matcher(message);
+  ASSERT_TRUE(matcher != nullptr);
+  int status = UniLib::RegexMatcher::kNoError;
+
+  ASSERT_TRUE(matcher->Find(&status) &&
+              status == UniLib::RegexMatcher::kNoError);
+  EXPECT_THAT(GetCapturingGroupText(matcher.get(), 0).value(),
+              testing::Eq("never gonna give you up"));
+  EXPECT_THAT(GetCapturingGroupText(matcher.get(), 1).value(),
+              testing::Eq("you"));
+  EXPECT_FALSE(GetCapturingGroupText(matcher.get(), 2).has_value());
+
+  ASSERT_TRUE(matcher->Find(&status) &&
+              status == UniLib::RegexMatcher::kNoError);
+  EXPECT_THAT(GetCapturingGroupText(matcher.get(), 0).value(),
+              testing::Eq("never gonna let you down"));
+  EXPECT_FALSE(GetCapturingGroupText(matcher.get(), 1).has_value());
+  EXPECT_THAT(GetCapturingGroupText(matcher.get(), 2).value(),
+              testing::Eq("you"));
+}
+#endif
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/resources_test.cc b/native/utils/resources_test.cc
new file mode 100644
index 0000000..c385f39
--- /dev/null
+++ b/native/utils/resources_test.cc

@@ -0,0 +1,287 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/resources.h"
+#include "utils/i18n/locale.h"
+#include "utils/resources_generated.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+class ResourcesTest
+    : public testing::TestWithParam<testing::tuple<bool, bool>> {
+ protected:
+  ResourcesTest() {}
+
+  std::string BuildTestResources(bool add_default_language = true) const {
+    ResourcePoolT test_resources;
+
+    // Test locales.
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "en";
+    test_resources.locale.back()->region = "US";
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "en";
+    test_resources.locale.back()->region = "GB";
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "de";
+    test_resources.locale.back()->region = "DE";
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "fr";
+    test_resources.locale.back()->region = "FR";
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "pt";
+    test_resources.locale.back()->region = "PT";
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "pt";
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "zh";
+    test_resources.locale.back()->script = "Hans";
+    test_resources.locale.back()->region = "CN";
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "zh";
+    test_resources.locale.emplace_back(new LanguageTagT);
+    test_resources.locale.back()->language = "fr";
+    test_resources.locale.back()->language = "fr-CA";
+    if (add_default_language) {
+      test_resources.locale.emplace_back(new LanguageTagT);  // default
+    }
+
+    // Test entries.
+    test_resources.resource_entry.emplace_back(new ResourceEntryT);
+    test_resources.resource_entry.back()->name = /*resource_name=*/"A";
+
+    // en-US, default
+    test_resources.resource_entry.back()->resource.emplace_back(new ResourceT);
+    test_resources.resource_entry.back()->resource.back()->content = "localize";
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(0);
+    if (add_default_language) {
+      test_resources.resource_entry.back()->resource.back()->locale.push_back(
+          9);
+    }
+
+    // en-GB
+    test_resources.resource_entry.back()->resource.emplace_back(new ResourceT);
+    test_resources.resource_entry.back()->resource.back()->content = "localise";
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(1);
+
+    // de-DE
+    test_resources.resource_entry.back()->resource.emplace_back(new ResourceT);
+    test_resources.resource_entry.back()->resource.back()->content =
+        "lokalisieren";
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(2);
+
+    // fr-FR, fr-CA
+    test_resources.resource_entry.back()->resource.emplace_back(new ResourceT);
+    test_resources.resource_entry.back()->resource.back()->content =
+        "localiser";
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(3);
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(8);
+
+    // pt-PT
+    test_resources.resource_entry.back()->resource.emplace_back(new ResourceT);
+    test_resources.resource_entry.back()->resource.back()->content =
+        "localizar";
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(4);
+
+    // pt
+    test_resources.resource_entry.back()->resource.emplace_back(new ResourceT);
+    test_resources.resource_entry.back()->resource.back()->content =
+        "concentrar";
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(5);
+
+    // zh-Hans-CN
+    test_resources.resource_entry.back()->resource.emplace_back(new ResourceT);
+    test_resources.resource_entry.back()->resource.back()->content = "龙";
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(6);
+
+    // zh
+    test_resources.resource_entry.back()->resource.emplace_back(new ResourceT);
+    test_resources.resource_entry.back()->resource.back()->content = "龍";
+    test_resources.resource_entry.back()->resource.back()->locale.push_back(7);
+
+    if (compress()) {
+      EXPECT_TRUE(CompressResources(
+          &test_resources,
+          /*build_compression_dictionary=*/build_dictionary()));
+    }
+
+    flatbuffers::FlatBufferBuilder builder;
+    builder.Finish(ResourcePool::Pack(builder, &test_resources));
+
+    return std::string(
+        reinterpret_cast<const char*>(builder.GetBufferPointer()),
+        builder.GetSize());
+  }
+
+  bool compress() const { return testing::get<0>(GetParam()); }
+
+  bool build_dictionary() const { return testing::get<1>(GetParam()); }
+};
+
+INSTANTIATE_TEST_SUITE_P(Compression, ResourcesTest,
+                         testing::Combine(testing::Bool(), testing::Bool()));
+
+TEST_P(ResourcesTest, CorrectlyHandlesExactMatch) {
+  std::string test_resources = BuildTestResources();
+  Resources resources(
+      flatbuffers::GetRoot<ResourcePool>(test_resources.data()));
+  std::string content;
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("en-US")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("localize", content);
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("en-GB")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("localise", content);
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("pt-PT")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("localizar", content);
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("zh-Hans-CN")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("龙", content);
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("zh")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("龍", content);
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("fr-CA")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("localiser", content);
+}
+
+TEST_P(ResourcesTest, CorrectlyHandlesTie) {
+  std::string test_resources = BuildTestResources();
+  Resources resources(
+      flatbuffers::GetRoot<ResourcePool>(test_resources.data()));
+  // Uses first best match in case of a tie.
+  std::string content;
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("en-CA")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("localize", content);
+}
+
+TEST_P(ResourcesTest, RequiresLanguageMatch) {
+  {
+    std::string test_resources =
+        BuildTestResources(/*add_default_language=*/false);
+    Resources resources(
+        flatbuffers::GetRoot<ResourcePool>(test_resources.data()));
+    EXPECT_FALSE(resources.GetResourceContent({Locale::FromBCP47("es-US")},
+                                              /*resource_name=*/"A",
+                                              /*result=*/nullptr));
+  }
+  {
+    std::string test_resources =
+        BuildTestResources(/*add_default_language=*/true);
+    Resources resources(
+        flatbuffers::GetRoot<ResourcePool>(test_resources.data()));
+    std::string content;
+    EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("es-US")},
+                                             /*resource_name=*/"A",
+                                             /*result=*/&content));
+    EXPECT_EQ("localize", content);
+  }
+}
+
+TEST_P(ResourcesTest, HandlesFallback) {
+  std::string test_resources = BuildTestResources();
+  Resources resources(
+      flatbuffers::GetRoot<ResourcePool>(test_resources.data()));
+  std::string content;
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("fr-CH")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("localiser", content);
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("zh-Hans")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("龙", content);
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("zh-Hans-ZZ")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("龙", content);
+
+  // Fallback to default, en-US.
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("ru")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("localize", content);
+}
+
+TEST_P(ResourcesTest, HandlesFallbackMultipleLocales) {
+  std::string test_resources = BuildTestResources();
+  Resources resources(
+      flatbuffers::GetRoot<ResourcePool>(test_resources.data()));
+  std::string content;
+
+  // Still use inexact match with primary locale if language matches,
+  // even though secondary locale would match exactly.
+  EXPECT_TRUE(resources.GetResourceContent(
+      {Locale::FromBCP47("fr-CH"), Locale::FromBCP47("en-US")},
+      /*resource_name=*/"A", &content));
+  EXPECT_EQ("localiser", content);
+
+  // Use secondary language instead of default fallback if that is an exact
+  // language match.
+  EXPECT_TRUE(resources.GetResourceContent(
+      {Locale::FromBCP47("ru"), Locale::FromBCP47("de")},
+      /*resource_name=*/"A", &content));
+  EXPECT_EQ("lokalisieren", content);
+
+  // Use tertiary language.
+  EXPECT_TRUE(resources.GetResourceContent(
+      {Locale::FromBCP47("ru"), Locale::FromBCP47("it-IT"),
+       Locale::FromBCP47("de")},
+      /*resource_name=*/"A", &content));
+  EXPECT_EQ("lokalisieren", content);
+
+  // Default fallback if no locale matches.
+  EXPECT_TRUE(resources.GetResourceContent(
+      {Locale::FromBCP47("ru"), Locale::FromBCP47("it-IT"),
+       Locale::FromBCP47("es")},
+      /*resource_name=*/"A", &content));
+  EXPECT_EQ("localize", content);
+}
+
+TEST_P(ResourcesTest, PreferGenericCallback) {
+  std::string test_resources = BuildTestResources();
+  Resources resources(
+      flatbuffers::GetRoot<ResourcePool>(test_resources.data()));
+  std::string content;
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("pt-BR")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("concentrar", content);  // Falls back to pt, not pt-PT.
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("zh-Hant")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("龍", content);  // Falls back to zh, not zh-Hans-CN.
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("zh-Hant-CN")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("龍", content);  // Falls back to zh, not zh-Hans-CN.
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("zh-CN")},
+                                           /*resource_name=*/"A", &content));
+  EXPECT_EQ("龍", content);  // Falls back to zh, not zh-Hans-CN.
+}
+
+TEST_P(ResourcesTest, PreferGenericWhenGeneric) {
+  std::string test_resources = BuildTestResources();
+  Resources resources(
+      flatbuffers::GetRoot<ResourcePool>(test_resources.data()));
+  std::string content;
+  EXPECT_TRUE(resources.GetResourceContent({Locale::FromBCP47("pt")},
+                                           /*resource_name=*/"A", &content));
+
+  // Uses pt, not pt-PT.
+  EXPECT_EQ("concentrar", content);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/sentencepiece/encoder_test.cc b/native/utils/sentencepiece/encoder_test.cc
new file mode 100644
index 0000000..740db35
--- /dev/null
+++ b/native/utils/sentencepiece/encoder_test.cc

@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/sentencepiece/encoder.h"
+
+#include <memory>
+#include <vector>
+
+#include "utils/base/integral_types.h"
+#include "utils/container/sorted-strings-table.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using testing::ElementsAre;
+
+TEST(EncoderTest, SimpleTokenization) {
+  const char pieces_table[] = "hell\0hello\0o\0there\0";
+  const uint32 offsets[] = {0, 5, 11, 13};
+  float scores[] = {-0.5, -1.0, -10.0, -1.0};
+  std::unique_ptr<StringSet> pieces(new SortedStringsTable(
+      /*num_pieces=*/4, offsets, StringPiece(pieces_table, 18)));
+  const Encoder encoder(pieces.get(),
+                        /*num_pieces=*/4, scores);
+
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("hellothere", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 3, 5, 1));
+  }
+
+  // Make probability of hello very low:
+  // hello gets now tokenized as hell + o.
+  scores[1] = -100.0;
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("hellothere", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 2, 4, 5, 1));
+  }
+}
+
+TEST(EncoderTest, HandlesEdgeCases) {
+  const char pieces_table[] = "hell\0hello\0o\0there\0";
+  const uint32 offsets[] = {0, 5, 11, 13};
+  float scores[] = {-0.5, -1.0, -10.0, -1.0};
+  std::unique_ptr<StringSet> pieces(new SortedStringsTable(
+      /*num_pieces=*/4, offsets, StringPiece(pieces_table, 18)));
+  const Encoder encoder(pieces.get(),
+                        /*num_pieces=*/4, scores);
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("hellhello", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 2, 3, 1));
+  }
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("hellohell", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 3, 2, 1));
+  }
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 1));
+  }
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("hellathere", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 1));
+  }
+}
+
+TEST(EncoderTest, HandlesOutOfDictionary) {
+  const char pieces_table[] = "hell\0hello\0o\0there\0";
+  const uint32 offsets[] = {0, 5, 11, 13};
+  float scores[] = {-0.5, -1.0, -10.0, -1.0};
+  std::unique_ptr<StringSet> pieces(new SortedStringsTable(
+      /*num_pieces=*/4, offsets, StringPiece(pieces_table, 18)));
+  const Encoder encoder(pieces.get(),
+                        /*num_pieces=*/4, scores,
+                        /*start_code=*/0, /*end_code=*/1,
+                        /*encoding_offset=*/3, /*unknown_code=*/2,
+                        /*unknown_score=*/-100.0);
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("hellhello", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 3, 4, 1));
+  }
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("hellohell", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 4, 3, 1));
+  }
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("", &encoded_text));
+    EXPECT_THAT(encoded_text, ElementsAre(0, 1));
+  }
+  {
+    std::vector<int> encoded_text;
+    EXPECT_TRUE(encoder.Encode("hellathere", &encoded_text));
+    EXPECT_THAT(encoded_text,
+                ElementsAre(0, /*hell*/ 3, /*unknown*/ 2, /*there*/ 6, 1));
+  }
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/sentencepiece/test_utils.cc b/native/utils/sentencepiece/test_utils.cc
deleted file mode 100644
index f277a14..0000000
--- a/native/utils/sentencepiece/test_utils.cc
+++ /dev/null

@@ -1,42 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "utils/sentencepiece/test_utils.h"
-
-#include <memory>
-
-#include "utils/base/integral_types.h"
-#include "utils/container/double-array-trie.h"
-#include "utils/strings/stringpiece.h"
-
-namespace libtextclassifier3 {
-
-SentencePieceNormalizer NormalizerFromSpec(StringPiece spec,
-                                           bool add_dummy_prefix,
-                                           bool remove_extra_whitespaces,
-                                           bool escape_whitespaces) {
-  const uint32 trie_blob_size = reinterpret_cast<const uint32*>(spec.data())[0];
-  spec.RemovePrefix(sizeof(trie_blob_size));
-  const TrieNode* trie_blob = reinterpret_cast<const TrieNode*>(spec.data());
-  spec.RemovePrefix(trie_blob_size);
-  const int num_nodes = trie_blob_size / sizeof(TrieNode);
-  return SentencePieceNormalizer(
-      DoubleArrayTrie(trie_blob, num_nodes),
-      /*charsmap_normalized=*/StringPiece(spec.data(), spec.size()),
-      add_dummy_prefix, remove_extra_whitespaces, escape_whitespaces);
-}
-
-}  // namespace libtextclassifier3

diff --git a/native/utils/sentencepiece/test_utils.h b/native/utils/sentencepiece/test_utils.h
deleted file mode 100644
index 0c833da..0000000
--- a/native/utils/sentencepiece/test_utils.h
+++ /dev/null

@@ -1,35 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_TEST_UTILS_H_
-#define LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_TEST_UTILS_H_
-
-#include <string>
-#include <vector>
-
-#include "utils/sentencepiece/normalizer.h"
-#include "utils/strings/stringpiece.h"
-
-namespace libtextclassifier3 {
-
-SentencePieceNormalizer NormalizerFromSpec(StringPiece spec,
-                                           bool add_dummy_prefix,
-                                           bool remove_extra_whitespaces,
-                                           bool escape_whitespaces);
-
-}  // namespace libtextclassifier3
-
-#endif  // LIBTEXTCLASSIFIER_UTILS_SENTENCEPIECE_TEST_UTILS_H_

diff --git a/native/utils/strings/append_test.cc b/native/utils/strings/append_test.cc
new file mode 100644
index 0000000..8950761
--- /dev/null
+++ b/native/utils/strings/append_test.cc

@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/strings/append.h"
+
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace strings {
+
+TEST(StringUtilTest, SStringAppendF) {
+  std::string str;
+  SStringAppendF(&str, 5, "%d %d", 0, 1);
+  EXPECT_EQ(str, "0 1");
+
+  SStringAppendF(&str, 1, "%d", 9);
+  EXPECT_EQ(str, "0 19");
+
+  SStringAppendF(&str, 1, "%d", 10);
+  EXPECT_EQ(str, "0 191");
+
+  str.clear();
+
+  SStringAppendF(&str, 5, "%d", 100);
+  EXPECT_EQ(str, "100");
+}
+
+TEST(StringUtilTest, SStringAppendFBufCalc) {
+  std::string str;
+  SStringAppendF(&str, 0, "%d %s %d", 1, "hello", 2);
+  EXPECT_EQ(str, "1 hello 2");
+}
+
+TEST(StringUtilTest, JoinStrings) {
+  std::vector<std::string> vec;
+  vec.push_back("1");
+  vec.push_back("2");
+  vec.push_back("3");
+
+  EXPECT_EQ("1,2,3", JoinStrings(",", vec));
+  EXPECT_EQ("123", JoinStrings("", vec));
+  EXPECT_EQ("1, 2, 3", JoinStrings(", ", vec));
+  EXPECT_EQ("", JoinStrings(",", std::vector<std::string>()));
+}
+
+}  // namespace strings
+}  // namespace libtextclassifier3

diff --git a/native/utils/strings/numbers_test.cc b/native/utils/strings/numbers_test.cc
new file mode 100644
index 0000000..bf2f84a
--- /dev/null
+++ b/native/utils/strings/numbers_test.cc

@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/strings/numbers.h"
+
+#include "utils/base/integral_types.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+void TestParseInt32(const char *c_str, bool expected_parsing_success,
+                    int32 expected_parsed_value = 0) {
+  int32 parsed_value = 0;
+  EXPECT_EQ(expected_parsing_success, ParseInt32(c_str, &parsed_value));
+  if (expected_parsing_success) {
+    EXPECT_EQ(expected_parsed_value, parsed_value);
+  }
+}
+
+TEST(ParseInt32Test, Normal) {
+  TestParseInt32("2", true, 2);
+  TestParseInt32("-357", true, -357);
+  TestParseInt32("7", true, 7);
+  TestParseInt32("+7", true, 7);
+  TestParseInt32("  +7", true, 7);
+  TestParseInt32("-23", true, -23);
+  TestParseInt32("  -23", true, -23);
+  TestParseInt32("04", true, 4);
+  TestParseInt32("07", true, 7);
+  TestParseInt32("08", true, 8);
+  TestParseInt32("09", true, 9);
+}
+
+TEST(ParseInt32Test, ErrorCases) {
+  TestParseInt32("", false);
+  TestParseInt32("  ", false);
+  TestParseInt32("not-a-number", false);
+  TestParseInt32("123a", false);
+}
+
+void TestParseInt64(const char *c_str, bool expected_parsing_success,
+                    int64 expected_parsed_value = 0) {
+  int64 parsed_value = 0;
+  EXPECT_EQ(expected_parsing_success, ParseInt64(c_str, &parsed_value));
+  if (expected_parsing_success) {
+    EXPECT_EQ(expected_parsed_value, parsed_value);
+  }
+}
+
+TEST(ParseInt64Test, Normal) {
+  TestParseInt64("2", true, 2);
+  TestParseInt64("-357", true, -357);
+  TestParseInt64("7", true, 7);
+  TestParseInt64("+7", true, 7);
+  TestParseInt64("  +7", true, 7);
+  TestParseInt64("-23", true, -23);
+  TestParseInt64("  -23", true, -23);
+  TestParseInt64("07", true, 7);
+  TestParseInt64("08", true, 8);
+}
+
+TEST(ParseInt64Test, ErrorCases) {
+  TestParseInt64("", false);
+  TestParseInt64("  ", false);
+  TestParseInt64("not-a-number", false);
+  TestParseInt64("23z", false);
+}
+
+void TestParseDouble(const char *c_str, bool expected_parsing_success,
+                     double expected_parsed_value = 0.0) {
+  double parsed_value = 0.0;
+  EXPECT_EQ(expected_parsing_success, ParseDouble(c_str, &parsed_value));
+  if (expected_parsing_success) {
+    EXPECT_NEAR(expected_parsed_value, parsed_value, 0.00001);
+  }
+}
+
+TEST(ParseDoubleTest, Normal) {
+  TestParseDouble("2", true, 2.0);
+  TestParseDouble("-357.023", true, -357.023);
+  TestParseDouble("7.04", true, 7.04);
+  TestParseDouble("+7.2", true, 7.2);
+  TestParseDouble("  +7.236", true, 7.236);
+  TestParseDouble("-23.4", true, -23.4);
+  TestParseDouble("  -23.4", true, -23.4);
+}
+
+TEST(ParseDoubleTest, ErrorCases) {
+  TestParseDouble("", false);
+  TestParseDouble("  ", false);
+  TestParseDouble("not-a-number", false);
+  TestParseDouble("23.5a", false);
+}
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/strings/stringpiece_test.cc b/native/utils/strings/stringpiece_test.cc
new file mode 100644
index 0000000..64808d3
--- /dev/null
+++ b/native/utils/strings/stringpiece_test.cc

@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "utils/strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(StringPieceTest, EndsWith) {
+  EXPECT_TRUE(EndsWith("hello there!", "there!"));
+  EXPECT_TRUE(EndsWith("hello there!", "!"));
+  EXPECT_FALSE(EndsWith("hello there!", "there"));
+  EXPECT_FALSE(EndsWith("hello there!", " hello there!"));
+  EXPECT_TRUE(EndsWith("hello there!", ""));
+  EXPECT_FALSE(EndsWith("", "hello there!"));
+}
+
+TEST(StringPieceTest, StartsWith) {
+  EXPECT_TRUE(StartsWith("hello there!", "hello"));
+  EXPECT_TRUE(StartsWith("hello there!", "hello "));
+  EXPECT_FALSE(StartsWith("hello there!", "there!"));
+  EXPECT_FALSE(StartsWith("hello there!", " hello there! "));
+  EXPECT_TRUE(StartsWith("hello there!", ""));
+  EXPECT_FALSE(StartsWith("", "hello there!"));
+}
+
+TEST(StringPieceTest, ConsumePrefix) {
+  StringPiece str("hello there!");
+  EXPECT_TRUE(ConsumePrefix(&str, "hello "));
+  EXPECT_EQ(str.ToString(), "there!");
+  EXPECT_TRUE(ConsumePrefix(&str, "there"));
+  EXPECT_EQ(str.ToString(), "!");
+  EXPECT_FALSE(ConsumePrefix(&str, "!!"));
+  EXPECT_TRUE(ConsumePrefix(&str, ""));
+  EXPECT_TRUE(ConsumePrefix(&str, "!"));
+  EXPECT_EQ(str.ToString(), "");
+  EXPECT_TRUE(ConsumePrefix(&str, ""));
+  EXPECT_FALSE(ConsumePrefix(&str, "!"));
+}
+
+TEST(StringPieceTest, ConsumeSuffix) {
+  StringPiece str("hello there!");
+  EXPECT_TRUE(ConsumeSuffix(&str, "!"));
+  EXPECT_EQ(str.ToString(), "hello there");
+  EXPECT_TRUE(ConsumeSuffix(&str, " there"));
+  EXPECT_EQ(str.ToString(), "hello");
+  EXPECT_FALSE(ConsumeSuffix(&str, "!!"));
+  EXPECT_TRUE(ConsumeSuffix(&str, ""));
+  EXPECT_TRUE(ConsumeSuffix(&str, "hello"));
+  EXPECT_EQ(str.ToString(), "");
+  EXPECT_TRUE(ConsumeSuffix(&str, ""));
+  EXPECT_FALSE(ConsumeSuffix(&str, "!"));
+}
+
+TEST(StringPieceTest, Find) {
+  StringPiece str("<hello there!>");
+  EXPECT_EQ(str.find('<'), 0);
+  EXPECT_EQ(str.find('>'), str.length() - 1);
+  EXPECT_EQ(str.find('?'), StringPiece::npos);
+  EXPECT_EQ(str.find('<', str.length() - 1), StringPiece::npos);
+  EXPECT_EQ(str.find('<', 0), 0);
+  EXPECT_EQ(str.find('>', str.length() - 1), str.length() - 1);
+}
+
+TEST(StringPieceTest, FindStringPiece) {
+  StringPiece str("<foo bar baz!>");
+  EXPECT_EQ(str.find("foo"), 1);
+  EXPECT_EQ(str.find("bar"), 5);
+  EXPECT_EQ(str.find("baz"), 9);
+  EXPECT_EQ(str.find("qux"), StringPiece::npos);
+  EXPECT_EQ(str.find("?"), StringPiece::npos);
+  EXPECT_EQ(str.find(">"), str.length() - 1);
+  EXPECT_EQ(str.find("<", str.length() - 1), StringPiece::npos);
+  EXPECT_EQ(str.find("<", 0), 0);
+  EXPECT_EQ(str.find(">", str.length() - 1), str.length() - 1);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/strings/substitute_test.cc b/native/utils/strings/substitute_test.cc
new file mode 100644
index 0000000..94b37ab
--- /dev/null
+++ b/native/utils/strings/substitute_test.cc

@@ -0,0 +1,48 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/strings/substitute.h"
+
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "utils/strings/stringpiece.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(SubstituteTest, Substitute) {
+  EXPECT_EQ("Hello, world!",
+            strings::Substitute("$0, $1!", {"Hello", "world"}));
+
+  // Out of order.
+  EXPECT_EQ("world, Hello!",
+            strings::Substitute("$1, $0!", {"Hello", "world"}));
+  EXPECT_EQ("b, a, c, b",
+            strings::Substitute("$1, $0, $2, $1", {"a", "b", "c"}));
+
+  // Literal $
+  EXPECT_EQ("$", strings::Substitute("$$", {}));
+  EXPECT_EQ("$1", strings::Substitute("$$1", {}));
+
+  const char* null_cstring = nullptr;
+  EXPECT_EQ("Text: ''", strings::Substitute("Text: '$0'", {null_cstring}));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/strings/utf8_test.cc b/native/utils/strings/utf8_test.cc
new file mode 100644
index 0000000..28d971b
--- /dev/null
+++ b/native/utils/strings/utf8_test.cc

@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/strings/utf8.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(Utf8Test, ComputesUtf8LengthOfUnicodeCharacters) {
+  EXPECT_EQ(GetNumBytesForUTF8Char("\x00"), 1);
+  EXPECT_EQ(GetNumBytesForUTF8Char("h"), 1);
+  EXPECT_EQ(GetNumBytesForUTF8Char("😋"), 4);
+  EXPECT_EQ(GetNumBytesForUTF8Char("㍿"), 3);
+}
+
+TEST(Utf8Test, IsValidUTF8) {
+  EXPECT_TRUE(IsValidUTF8("1234😋hello", 13));
+  EXPECT_TRUE(IsValidUTF8("\u304A\u00B0\u106B", 8));
+  EXPECT_TRUE(IsValidUTF8("this is a test😋😋😋", 26));
+  EXPECT_TRUE(IsValidUTF8("\xf0\x9f\x98\x8b", 4));
+  // Too short (string is too short).
+  EXPECT_FALSE(IsValidUTF8("\xf0\x9f", 2));
+  // Too long (too many trailing bytes).
+  EXPECT_FALSE(IsValidUTF8("\xf0\x9f\x98\x8b\x8b", 5));
+  // Too short (too few trailing bytes).
+  EXPECT_FALSE(IsValidUTF8("\xf0\x9f\x98\x61\x61", 5));
+}
+
+TEST(Utf8Test, ValidUTF8CharLength) {
+  EXPECT_EQ(ValidUTF8CharLength("1234😋hello", 13), 1);
+  EXPECT_EQ(ValidUTF8CharLength("\u304A\u00B0\u106B", 8), 3);
+  EXPECT_EQ(ValidUTF8CharLength("this is a test😋😋😋", 26), 1);
+  EXPECT_EQ(ValidUTF8CharLength("\xf0\x9f\x98\x8b", 4), 4);
+  // Too short (string is too short).
+  EXPECT_EQ(ValidUTF8CharLength("\xf0\x9f", 2), -1);
+  // Too long (too many trailing bytes). First character is valid.
+  EXPECT_EQ(ValidUTF8CharLength("\xf0\x9f\x98\x8b\x8b", 5), 4);
+  // Too short (too few trailing bytes).
+  EXPECT_EQ(ValidUTF8CharLength("\xf0\x9f\x98\x61\x61", 5), -1);
+}
+
+TEST(Utf8Test, CorrectlyTruncatesStrings) {
+  EXPECT_EQ(SafeTruncateLength("FooBar", 3), 3);
+  EXPECT_EQ(SafeTruncateLength("früh", 3), 2);
+  EXPECT_EQ(SafeTruncateLength("مَمِمّمَّمِّ", 5), 4);
+}
+
+TEST(Utf8Test, CorrectlyConvertsFromUtf8) {
+  EXPECT_EQ(ValidCharToRune("a"), 97);
+  EXPECT_EQ(ValidCharToRune("\0"), 0);
+  EXPECT_EQ(ValidCharToRune("\u304A"), 0x304a);
+  EXPECT_EQ(ValidCharToRune("\xe3\x81\x8a"), 0x304a);
+}
+
+TEST(Utf8Test, CorrectlyConvertsToUtf8) {
+  char utf8_encoding[4];
+  EXPECT_EQ(ValidRuneToChar(97, utf8_encoding), 1);
+  EXPECT_EQ(ValidRuneToChar(0, utf8_encoding), 1);
+  EXPECT_EQ(ValidRuneToChar(0x304a, utf8_encoding), 3);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/tensor-view_test.cc b/native/utils/tensor-view_test.cc
new file mode 100644
index 0000000..9467264
--- /dev/null
+++ b/native/utils/tensor-view_test.cc

@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/tensor-view.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(TensorViewTest, TestSize) {
+  std::vector<float> data{0.1, 0.2, 0.3, 0.4, 0.5, 0.6};
+  const TensorView<float> tensor(data.data(), {3, 1, 2});
+  EXPECT_TRUE(tensor.is_valid());
+  EXPECT_EQ(tensor.shape(), (std::vector<int>{3, 1, 2}));
+  EXPECT_EQ(tensor.data(), data.data());
+  EXPECT_EQ(tensor.size(), 6);
+  EXPECT_EQ(tensor.dims(), 3);
+  EXPECT_EQ(tensor.dim(0), 3);
+  EXPECT_EQ(tensor.dim(1), 1);
+  EXPECT_EQ(tensor.dim(2), 2);
+  std::vector<float> output_data(6);
+  EXPECT_TRUE(tensor.copy_to(output_data.data(), output_data.size()));
+  EXPECT_EQ(data, output_data);
+
+  // Should not copy when the output is small.
+  std::vector<float> small_output_data{-1, -1, -1};
+  EXPECT_FALSE(
+      tensor.copy_to(small_output_data.data(), small_output_data.size()));
+  // The output buffer should not be changed.
+  EXPECT_EQ(small_output_data, (std::vector<float>{-1, -1, -1}));
+
+  const TensorView<float> invalid_tensor = TensorView<float>::Invalid();
+  EXPECT_FALSE(invalid_tensor.is_valid());
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/test-utils.cc b/native/utils/test-utils.cc
new file mode 100644
index 0000000..8996a4a
--- /dev/null
+++ b/native/utils/test-utils.cc

@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/test-utils.h"
+
+#include <iterator>
+
+#include "utils/codepoint-range.h"
+#include "utils/strings/utf8.h"
+#include "utils/utf8/unicodetext.h"
+
+namespace libtextclassifier3 {
+
+using libtextclassifier3::Token;
+
+std::vector<Token> TokenizeOnSpace(const std::string& text) {
+  return TokenizeOnDelimiters(text, {' '});
+}
+
+std::vector<Token> TokenizeOnDelimiters(
+    const std::string& text, const std::unordered_set<char32>& delimiters) {
+  const UnicodeText unicode_text = UTF8ToUnicodeText(text, /*do_copy=*/false);
+
+  std::vector<Token> result;
+
+  int token_start_codepoint = 0;
+  auto token_start_it = unicode_text.begin();
+  int codepoint_idx = 0;
+
+  UnicodeText::const_iterator it;
+  for (it = unicode_text.begin(); it < unicode_text.end(); it++) {
+    if (delimiters.find(*it) != delimiters.end()) {
+      // Only add a token when the string is non-empty.
+      if (token_start_it != it) {
+        result.push_back(Token{UnicodeText::UTF8Substring(token_start_it, it),
+                               token_start_codepoint, codepoint_idx});
+      }
+
+      token_start_codepoint = codepoint_idx + 1;
+      token_start_it = it;
+      token_start_it++;
+    }
+
+    codepoint_idx++;
+  }
+  // Only add a token when the string is non-empty.
+  if (token_start_it != it) {
+    result.push_back(Token{UnicodeText::UTF8Substring(token_start_it, it),
+                           token_start_codepoint, codepoint_idx});
+  }
+
+  return result;
+}
+
+}  // namespace  libtextclassifier3

diff --git a/native/utils/test-utils.h b/native/utils/test-utils.h
new file mode 100644
index 0000000..0e75190
--- /dev/null
+++ b/native/utils/test-utils.h

@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Utilities for tests.
+
+#ifndef LIBTEXTCLASSIFIER_UTILS_TEST_UTILS_H_
+#define LIBTEXTCLASSIFIER_UTILS_TEST_UTILS_H_
+
+#include <string>
+
+#include "annotator/types.h"
+
+namespace libtextclassifier3 {
+
+// Returns a list of Tokens for a given input string, by tokenizing on space.
+std::vector<Token> TokenizeOnSpace(const std::string& text);
+
+// Returns a list of Tokens for a given input string, by tokenizing on the
+// given set of delimiter codepoints.
+std::vector<Token> TokenizeOnDelimiters(
+    const std::string& text, const std::unordered_set<char32>& delimiters);
+
+}  // namespace  libtextclassifier3
+
+#endif  // LIBTEXTCLASSIFIER_UTILS_TEST_UTILS_H_

diff --git a/native/utils/test-utils_test.cc b/native/utils/test-utils_test.cc
new file mode 100644
index 0000000..bdaa285
--- /dev/null
+++ b/native/utils/test-utils_test.cc

@@ -0,0 +1,100 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/test-utils.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(TestUtilTest, TokenizeOnSpace) {
+  std::vector<Token> tokens =
+      TokenizeOnSpace("Where is Jörg Borg located? Maybe in Zürich ...");
+
+  EXPECT_EQ(tokens.size(), 9);
+
+  EXPECT_EQ(tokens[0].value, "Where");
+  EXPECT_EQ(tokens[0].start, 0);
+  EXPECT_EQ(tokens[0].end, 5);
+
+  EXPECT_EQ(tokens[1].value, "is");
+  EXPECT_EQ(tokens[1].start, 6);
+  EXPECT_EQ(tokens[1].end, 8);
+
+  EXPECT_EQ(tokens[2].value, "Jörg");
+  EXPECT_EQ(tokens[2].start, 9);
+  EXPECT_EQ(tokens[2].end, 13);
+
+  EXPECT_EQ(tokens[3].value, "Borg");
+  EXPECT_EQ(tokens[3].start, 14);
+  EXPECT_EQ(tokens[3].end, 18);
+
+  EXPECT_EQ(tokens[4].value, "located?");
+  EXPECT_EQ(tokens[4].start, 19);
+  EXPECT_EQ(tokens[4].end, 27);
+
+  EXPECT_EQ(tokens[5].value, "Maybe");
+  EXPECT_EQ(tokens[5].start, 28);
+  EXPECT_EQ(tokens[5].end, 33);
+
+  EXPECT_EQ(tokens[6].value, "in");
+  EXPECT_EQ(tokens[6].start, 34);
+  EXPECT_EQ(tokens[6].end, 36);
+
+  EXPECT_EQ(tokens[7].value, "Zürich");
+  EXPECT_EQ(tokens[7].start, 37);
+  EXPECT_EQ(tokens[7].end, 43);
+
+  EXPECT_EQ(tokens[8].value, "...");
+  EXPECT_EQ(tokens[8].start, 44);
+  EXPECT_EQ(tokens[8].end, 47);
+}
+
+TEST(TestUtilTest, TokenizeOnDelimiters) {
+  std::vector<Token> tokens = TokenizeOnDelimiters(
+      "This   might be čomplíčateď?!: Oder?", {' ', '?', '!'});
+
+  EXPECT_EQ(tokens.size(), 6);
+
+  EXPECT_EQ(tokens[0].value, "This");
+  EXPECT_EQ(tokens[0].start, 0);
+  EXPECT_EQ(tokens[0].end, 4);
+
+  EXPECT_EQ(tokens[1].value, "might");
+  EXPECT_EQ(tokens[1].start, 7);
+  EXPECT_EQ(tokens[1].end, 12);
+
+  EXPECT_EQ(tokens[2].value, "be");
+  EXPECT_EQ(tokens[2].start, 13);
+  EXPECT_EQ(tokens[2].end, 15);
+
+  EXPECT_EQ(tokens[3].value, "čomplíčateď");
+  EXPECT_EQ(tokens[3].start, 16);
+  EXPECT_EQ(tokens[3].end, 27);
+
+  EXPECT_EQ(tokens[4].value, ":");
+  EXPECT_EQ(tokens[4].start, 29);
+  EXPECT_EQ(tokens[4].end, 30);
+
+  EXPECT_EQ(tokens[5].value, "Oder");
+  EXPECT_EQ(tokens[5].start, 31);
+  EXPECT_EQ(tokens[5].end, 35);
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/token-feature-extractor_test.cc b/native/utils/token-feature-extractor_test.cc
new file mode 100644
index 0000000..15a434c
--- /dev/null
+++ b/native/utils/token-feature-extractor_test.cc

@@ -0,0 +1,579 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/token-feature-extractor.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+class TokenFeatureExtractorTest : public ::testing::Test {
+ protected:
+  explicit TokenFeatureExtractorTest() : INIT_UNILIB_FOR_TESTING(unilib_) {}
+  UniLib unilib_;
+};
+
+class TestingTokenFeatureExtractor : public TokenFeatureExtractor {
+ public:
+  using TokenFeatureExtractor::HashToken;
+  using TokenFeatureExtractor::TokenFeatureExtractor;
+};
+
+TEST_F(TokenFeatureExtractorTest, ExtractAscii) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2, 3};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = false;
+  options.extract_selection_mask_feature = true;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+
+  extractor.Extract(Token{"Hello", 0, 5}, true, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({
+                  // clang-format off
+                  extractor.HashToken("H"),
+                  extractor.HashToken("e"),
+                  extractor.HashToken("l"),
+                  extractor.HashToken("l"),
+                  extractor.HashToken("o"),
+                  extractor.HashToken("^H"),
+                  extractor.HashToken("He"),
+                  extractor.HashToken("el"),
+                  extractor.HashToken("ll"),
+                  extractor.HashToken("lo"),
+                  extractor.HashToken("o$"),
+                  extractor.HashToken("^He"),
+                  extractor.HashToken("Hel"),
+                  extractor.HashToken("ell"),
+                  extractor.HashToken("llo"),
+                  extractor.HashToken("lo$")
+                  // clang-format on
+              }));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({1.0, 1.0}));
+
+  sparse_features.clear();
+  dense_features.clear();
+  extractor.Extract(Token{"world!", 23, 29}, false, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({
+                  // clang-format off
+                  extractor.HashToken("w"),
+                  extractor.HashToken("o"),
+                  extractor.HashToken("r"),
+                  extractor.HashToken("l"),
+                  extractor.HashToken("d"),
+                  extractor.HashToken("!"),
+                  extractor.HashToken("^w"),
+                  extractor.HashToken("wo"),
+                  extractor.HashToken("or"),
+                  extractor.HashToken("rl"),
+                  extractor.HashToken("ld"),
+                  extractor.HashToken("d!"),
+                  extractor.HashToken("!$"),
+                  extractor.HashToken("^wo"),
+                  extractor.HashToken("wor"),
+                  extractor.HashToken("orl"),
+                  extractor.HashToken("rld"),
+                  extractor.HashToken("ld!"),
+                  extractor.HashToken("d!$"),
+                  // clang-format on
+              }));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, 0.0}));
+}
+
+TEST_F(TokenFeatureExtractorTest, ExtractAsciiNoChargrams) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = false;
+  options.extract_selection_mask_feature = true;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+
+  extractor.Extract(Token{"Hello", 0, 5}, true, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({extractor.HashToken("^Hello$")}));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({1.0, 1.0}));
+
+  sparse_features.clear();
+  dense_features.clear();
+  extractor.Extract(Token{"world!", 23, 29}, false, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({extractor.HashToken("^world!$")}));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, 0.0}));
+}
+
+TEST_F(TokenFeatureExtractorTest, ExtractUnicode) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2, 3};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = true;
+  options.extract_selection_mask_feature = true;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+
+  extractor.Extract(Token{"Hělló", 0, 5}, true, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({
+                  // clang-format off
+                  extractor.HashToken("H"),
+                  extractor.HashToken("ě"),
+                  extractor.HashToken("l"),
+                  extractor.HashToken("l"),
+                  extractor.HashToken("ó"),
+                  extractor.HashToken("^H"),
+                  extractor.HashToken("Hě"),
+                  extractor.HashToken("ěl"),
+                  extractor.HashToken("ll"),
+                  extractor.HashToken("ló"),
+                  extractor.HashToken("ó$"),
+                  extractor.HashToken("^Hě"),
+                  extractor.HashToken("Hěl"),
+                  extractor.HashToken("ěll"),
+                  extractor.HashToken("lló"),
+                  extractor.HashToken("ló$")
+                  // clang-format on
+              }));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({1.0, 1.0}));
+
+  sparse_features.clear();
+  dense_features.clear();
+  extractor.Extract(Token{"world!", 23, 29}, false, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({
+                  // clang-format off
+                  extractor.HashToken("w"),
+                  extractor.HashToken("o"),
+                  extractor.HashToken("r"),
+                  extractor.HashToken("l"),
+                  extractor.HashToken("d"),
+                  extractor.HashToken("!"),
+                  extractor.HashToken("^w"),
+                  extractor.HashToken("wo"),
+                  extractor.HashToken("or"),
+                  extractor.HashToken("rl"),
+                  extractor.HashToken("ld"),
+                  extractor.HashToken("d!"),
+                  extractor.HashToken("!$"),
+                  extractor.HashToken("^wo"),
+                  extractor.HashToken("wor"),
+                  extractor.HashToken("orl"),
+                  extractor.HashToken("rld"),
+                  extractor.HashToken("ld!"),
+                  extractor.HashToken("d!$"),
+                  // clang-format on
+              }));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, -1.0}));
+}
+
+TEST_F(TokenFeatureExtractorTest, ExtractUnicodeNoChargrams) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = true;
+  options.extract_selection_mask_feature = true;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+
+  extractor.Extract(Token{"Hělló", 0, 5}, true, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({extractor.HashToken("^Hělló$")}));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({1.0, 1.0}));
+
+  sparse_features.clear();
+  dense_features.clear();
+  extractor.Extract(Token{"world!", 23, 29}, false, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features, testing::ElementsAreArray({
+                                   extractor.HashToken("^world!$"),
+                               }));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, -1.0}));
+}
+
+#ifdef TC3_TEST_ICU
+TEST_F(TokenFeatureExtractorTest, ICUCaseFeature) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = true;
+  options.extract_selection_mask_feature = false;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+  extractor.Extract(Token{"Hělló", 0, 5}, true, &sparse_features,
+                    &dense_features);
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({1.0}));
+
+  sparse_features.clear();
+  dense_features.clear();
+  extractor.Extract(Token{"world!", 23, 29}, false, &sparse_features,
+                    &dense_features);
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0}));
+
+  sparse_features.clear();
+  dense_features.clear();
+  extractor.Extract(Token{"Ř", 23, 29}, false, &sparse_features,
+                    &dense_features);
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({1.0}));
+
+  sparse_features.clear();
+  dense_features.clear();
+  extractor.Extract(Token{"ř", 23, 29}, false, &sparse_features,
+                    &dense_features);
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0}));
+}
+#endif
+
+TEST_F(TokenFeatureExtractorTest, DigitRemapping) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2};
+  options.remap_digits = true;
+  options.unicode_aware_features = false;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+  extractor.Extract(Token{"9:30am", 0, 6}, true, &sparse_features,
+                    &dense_features);
+
+  std::vector<int> sparse_features2;
+  extractor.Extract(Token{"5:32am", 0, 6}, true, &sparse_features2,
+                    &dense_features);
+  EXPECT_THAT(sparse_features, testing::ElementsAreArray(sparse_features2));
+
+  extractor.Extract(Token{"10:32am", 0, 6}, true, &sparse_features2,
+                    &dense_features);
+  EXPECT_THAT(sparse_features,
+              testing::Not(testing::ElementsAreArray(sparse_features2)));
+}
+
+TEST_F(TokenFeatureExtractorTest, DigitRemappingUnicode) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2};
+  options.remap_digits = true;
+  options.unicode_aware_features = true;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+  extractor.Extract(Token{"9:30am", 0, 6}, true, &sparse_features,
+                    &dense_features);
+
+  std::vector<int> sparse_features2;
+  extractor.Extract(Token{"5:32am", 0, 6}, true, &sparse_features2,
+                    &dense_features);
+  EXPECT_THAT(sparse_features, testing::ElementsAreArray(sparse_features2));
+
+  extractor.Extract(Token{"10:32am", 0, 6}, true, &sparse_features2,
+                    &dense_features);
+  EXPECT_THAT(sparse_features,
+              testing::Not(testing::ElementsAreArray(sparse_features2)));
+}
+
+TEST_F(TokenFeatureExtractorTest, LowercaseAscii) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2};
+  options.lowercase_tokens = true;
+  options.unicode_aware_features = false;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+  extractor.Extract(Token{"AABB", 0, 6}, true, &sparse_features,
+                    &dense_features);
+
+  std::vector<int> sparse_features2;
+  extractor.Extract(Token{"aaBB", 0, 6}, true, &sparse_features2,
+                    &dense_features);
+  EXPECT_THAT(sparse_features, testing::ElementsAreArray(sparse_features2));
+
+  extractor.Extract(Token{"aAbB", 0, 6}, true, &sparse_features2,
+                    &dense_features);
+  EXPECT_THAT(sparse_features, testing::ElementsAreArray(sparse_features2));
+}
+
+#ifdef TC3_TEST_ICU
+TEST_F(TokenFeatureExtractorTest, LowercaseUnicode) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2};
+  options.lowercase_tokens = true;
+  options.unicode_aware_features = true;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+  extractor.Extract(Token{"ŘŘ", 0, 6}, true, &sparse_features, &dense_features);
+
+  std::vector<int> sparse_features2;
+  extractor.Extract(Token{"řř", 0, 6}, true, &sparse_features2,
+                    &dense_features);
+  EXPECT_THAT(sparse_features, testing::ElementsAreArray(sparse_features2));
+}
+#endif
+
+#ifdef TC3_TEST_ICU
+TEST_F(TokenFeatureExtractorTest, RegexFeatures) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2};
+  options.remap_digits = false;
+  options.unicode_aware_features = false;
+  options.regexp_features.push_back("^[a-z]+$");  // all lower case.
+  options.regexp_features.push_back("^[0-9]+$");  // all digits.
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+  extractor.Extract(Token{"abCde", 0, 6}, true, &sparse_features,
+                    &dense_features);
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, -1.0}));
+
+  dense_features.clear();
+  extractor.Extract(Token{"abcde", 0, 6}, true, &sparse_features,
+                    &dense_features);
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({1.0, -1.0}));
+
+  dense_features.clear();
+  extractor.Extract(Token{"12c45", 0, 6}, true, &sparse_features,
+                    &dense_features);
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, -1.0}));
+
+  dense_features.clear();
+  extractor.Extract(Token{"12345", 0, 6}, true, &sparse_features,
+                    &dense_features);
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, 1.0}));
+}
+#endif
+
+TEST_F(TokenFeatureExtractorTest, ExtractTooLongWord) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{22};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = true;
+  options.extract_selection_mask_feature = true;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  // Test that this runs. ASAN should catch problems.
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+  extractor.Extract(Token{"abcdefghijklmnopqřstuvwxyz", 0, 0}, true,
+                    &sparse_features, &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({
+                  // clang-format off
+                  extractor.HashToken("^abcdefghij\1qřstuvwxyz"),
+                  extractor.HashToken("abcdefghij\1qřstuvwxyz$"),
+                  // clang-format on
+              }));
+}
+
+TEST_F(TokenFeatureExtractorTest, ExtractAsciiUnicodeMatches) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2, 3, 4, 5};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = true;
+  options.extract_selection_mask_feature = true;
+
+  TestingTokenFeatureExtractor extractor_unicode(options, &unilib_);
+
+  options.unicode_aware_features = false;
+  TestingTokenFeatureExtractor extractor_ascii(options, &unilib_);
+
+  for (const std::string& input :
+       {"https://www.abcdefgh.com/in/xxxkkkvayio",
+        "https://www.fjsidofj.om/xx/abadfy/xxxx/?xfjiis=ffffiijiihil",
+        "asdfhasdofjiasdofj#%()*%#*(aisdojfaosdifjiaofjdsiofjdi_fdis3w", "abcd",
+        "x", "Hello", "Hey,", "Hi", ""}) {
+    std::vector<int> sparse_features_unicode;
+    std::vector<float> dense_features_unicode;
+    extractor_unicode.Extract(Token{input, 0, 0}, true,
+                              &sparse_features_unicode,
+                              &dense_features_unicode);
+
+    std::vector<int> sparse_features_ascii;
+    std::vector<float> dense_features_ascii;
+    extractor_ascii.Extract(Token{input, 0, 0}, true, &sparse_features_ascii,
+                            &dense_features_ascii);
+
+    EXPECT_THAT(sparse_features_unicode, sparse_features_ascii) << input;
+    EXPECT_THAT(dense_features_unicode, dense_features_ascii) << input;
+  }
+}
+
+TEST_F(TokenFeatureExtractorTest, ExtractForPadToken) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = false;
+  options.extract_selection_mask_feature = true;
+
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+
+  extractor.Extract(Token(), false, &sparse_features, &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({extractor.HashToken("<PAD>")}));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, 0.0}));
+}
+
+TEST_F(TokenFeatureExtractorTest, ExtractFiltered) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2, 3};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = false;
+  options.extract_selection_mask_feature = true;
+  options.allowed_chargrams.insert("^H");
+  options.allowed_chargrams.insert("ll");
+  options.allowed_chargrams.insert("llo");
+  options.allowed_chargrams.insert("w");
+  options.allowed_chargrams.insert("!");
+  options.allowed_chargrams.insert("\xc4");  // UTF8 control character.
+
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+
+  extractor.Extract(Token{"Hěllo", 0, 5}, true, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features,
+              testing::ElementsAreArray({
+                  // clang-format off
+                  0,
+                  extractor.HashToken("\xc4"),
+                  0,
+                  0,
+                  0,
+                  0,
+                  extractor.HashToken("^H"),
+                  0,
+                  0,
+                  0,
+                  extractor.HashToken("ll"),
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  extractor.HashToken("llo"),
+                  0
+                  // clang-format on
+              }));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({1.0, 1.0}));
+
+  sparse_features.clear();
+  dense_features.clear();
+  extractor.Extract(Token{"world!", 23, 29}, false, &sparse_features,
+                    &dense_features);
+
+  EXPECT_THAT(sparse_features, testing::ElementsAreArray({
+                                   // clang-format off
+                  extractor.HashToken("w"),
+                  0,
+                  0,
+                  0,
+                  0,
+                  extractor.HashToken("!"),
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                  0,
+                                   // clang-format on
+                               }));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, 0.0}));
+  EXPECT_EQ(extractor.HashToken("<PAD>"), 1);
+}
+
+TEST_F(TokenFeatureExtractorTest, ExtractEmptyToken) {
+  TokenFeatureExtractorOptions options;
+  options.num_buckets = 1000;
+  options.chargram_orders = std::vector<int>{1, 2, 3};
+  options.extract_case_feature = true;
+  options.unicode_aware_features = false;
+  options.extract_selection_mask_feature = true;
+  TestingTokenFeatureExtractor extractor(options, &unilib_);
+
+  std::vector<int> sparse_features;
+  std::vector<float> dense_features;
+
+  // Should not crash.
+  extractor.Extract(Token(), true, &sparse_features, &dense_features);
+
+  EXPECT_THAT(sparse_features, testing::ElementsAreArray({
+                                   // clang-format off
+                  extractor.HashToken("<PAD>"),
+                                   // clang-format on
+                               }));
+  EXPECT_THAT(dense_features, testing::ElementsAreArray({-1.0, 1.0}));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/tokenizer_test.cc b/native/utils/tokenizer_test.cc
new file mode 100644
index 0000000..f73f8f8
--- /dev/null
+++ b/native/utils/tokenizer_test.cc

@@ -0,0 +1,626 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/tokenizer.h"
+
+#include <vector>
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+using testing::ElementsAreArray;
+
+class TestingTokenizer : public Tokenizer {
+ public:
+  TestingTokenizer(
+      const TokenizationType type, const UniLib* unilib,
+      const std::vector<const TokenizationCodepointRange*>& codepoint_ranges,
+      const std::vector<const CodepointRange*>&
+          internal_tokenizer_codepoint_ranges,
+      const bool split_on_script_change,
+      const bool icu_preserve_whitespace_tokens,
+      const bool preserve_floating_numbers)
+      : Tokenizer(type, unilib, codepoint_ranges,
+                  internal_tokenizer_codepoint_ranges, split_on_script_change,
+                  icu_preserve_whitespace_tokens, preserve_floating_numbers) {}
+
+  using Tokenizer::FindTokenizationRange;
+};
+
+class TestingTokenizerProxy {
+ public:
+  TestingTokenizerProxy(
+      TokenizationType type,
+      const std::vector<TokenizationCodepointRangeT>& codepoint_range_configs,
+      const std::vector<CodepointRangeT>& internal_codepoint_range_configs,
+      const bool split_on_script_change,
+      const bool icu_preserve_whitespace_tokens,
+      const bool preserve_floating_numbers)
+      : INIT_UNILIB_FOR_TESTING(unilib_) {
+    const int num_configs = codepoint_range_configs.size();
+    std::vector<const TokenizationCodepointRange*> configs_fb;
+    configs_fb.reserve(num_configs);
+    const int num_internal_configs = internal_codepoint_range_configs.size();
+    std::vector<const CodepointRange*> internal_configs_fb;
+    internal_configs_fb.reserve(num_internal_configs);
+    buffers_.reserve(num_configs + num_internal_configs);
+    for (int i = 0; i < num_configs; i++) {
+      flatbuffers::FlatBufferBuilder builder;
+      builder.Finish(CreateTokenizationCodepointRange(
+          builder, &codepoint_range_configs[i]));
+      buffers_.push_back(builder.Release());
+      configs_fb.push_back(flatbuffers::GetRoot<TokenizationCodepointRange>(
+          buffers_.back().data()));
+    }
+    for (int i = 0; i < num_internal_configs; i++) {
+      flatbuffers::FlatBufferBuilder builder;
+      builder.Finish(
+          CreateCodepointRange(builder, &internal_codepoint_range_configs[i]));
+      buffers_.push_back(builder.Release());
+      internal_configs_fb.push_back(
+          flatbuffers::GetRoot<CodepointRange>(buffers_.back().data()));
+    }
+    tokenizer_ = std::unique_ptr<TestingTokenizer>(new TestingTokenizer(
+        type, &unilib_, configs_fb, internal_configs_fb, split_on_script_change,
+        icu_preserve_whitespace_tokens, preserve_floating_numbers));
+  }
+
+  TokenizationCodepointRange_::Role TestFindTokenizationRole(int c) const {
+    const TokenizationCodepointRangeT* range =
+        tokenizer_->FindTokenizationRange(c);
+    if (range != nullptr) {
+      return range->role;
+    } else {
+      return TokenizationCodepointRange_::Role_DEFAULT_ROLE;
+    }
+  }
+
+  std::vector<Token> Tokenize(const std::string& utf8_text) const {
+    return tokenizer_->Tokenize(utf8_text);
+  }
+
+ private:
+  UniLib unilib_;
+  std::vector<flatbuffers::DetachedBuffer> buffers_;
+  std::unique_ptr<TestingTokenizer> tokenizer_;
+};
+
+TEST(TokenizerTest, FindTokenizationRange) {
+  std::vector<TokenizationCodepointRangeT> configs;
+  TokenizationCodepointRangeT* config;
+
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0;
+  config->end = 10;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 32;
+  config->end = 33;
+  config->role = TokenizationCodepointRange_::Role_WHITESPACE_SEPARATOR;
+
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 1234;
+  config->end = 12345;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+
+  TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER, configs,
+                                  {}, /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+
+  // Test hits to the first group.
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(0),
+            TokenizationCodepointRange_::Role_TOKEN_SEPARATOR);
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(5),
+            TokenizationCodepointRange_::Role_TOKEN_SEPARATOR);
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(10),
+            TokenizationCodepointRange_::Role_DEFAULT_ROLE);
+
+  // Test a hit to the second group.
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(31),
+            TokenizationCodepointRange_::Role_DEFAULT_ROLE);
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(32),
+            TokenizationCodepointRange_::Role_WHITESPACE_SEPARATOR);
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(33),
+            TokenizationCodepointRange_::Role_DEFAULT_ROLE);
+
+  // Test hits to the third group.
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(1233),
+            TokenizationCodepointRange_::Role_DEFAULT_ROLE);
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(1234),
+            TokenizationCodepointRange_::Role_TOKEN_SEPARATOR);
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(12344),
+            TokenizationCodepointRange_::Role_TOKEN_SEPARATOR);
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(12345),
+            TokenizationCodepointRange_::Role_DEFAULT_ROLE);
+
+  // Test a hit outside.
+  EXPECT_EQ(tokenizer.TestFindTokenizationRole(99),
+            TokenizationCodepointRange_::Role_DEFAULT_ROLE);
+}
+
+TEST(TokenizerTest, TokenizeOnSpace) {
+  std::vector<TokenizationCodepointRangeT> configs;
+  TokenizationCodepointRangeT* config;
+
+  configs.emplace_back();
+  config = &configs.back();
+  // Space character.
+  config->start = 32;
+  config->end = 33;
+  config->role = TokenizationCodepointRange_::Role_WHITESPACE_SEPARATOR;
+
+  TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER, configs,
+                                  {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens = tokenizer.Tokenize("Hello world!");
+
+  EXPECT_THAT(tokens,
+              ElementsAreArray({Token("Hello", 0, 5), Token("world!", 6, 12)}));
+}
+
+TEST(TokenizerTest, TokenizeOnSpaceAndScriptChange) {
+  std::vector<TokenizationCodepointRangeT> configs;
+  TokenizationCodepointRangeT* config;
+
+  // Latin.
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0;
+  config->end = 32;
+  config->role = TokenizationCodepointRange_::Role_DEFAULT_ROLE;
+  config->script_id = 1;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 32;
+  config->end = 33;
+  config->role = TokenizationCodepointRange_::Role_WHITESPACE_SEPARATOR;
+  config->script_id = 1;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 33;
+  config->end = 0x77F + 1;
+  config->role = TokenizationCodepointRange_::Role_DEFAULT_ROLE;
+  config->script_id = 1;
+
+  TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER, configs,
+                                  {},
+                                  /*split_on_script_change=*/true,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+  EXPECT_THAT(tokenizer.Tokenize("앨라배마 주 전화(123) 456-789웹사이트"),
+              std::vector<Token>({Token("앨라배마", 0, 4), Token("주", 5, 6),
+                                  Token("전화", 7, 10), Token("(123)", 10, 15),
+                                  Token("456-789", 16, 23),
+                                  Token("웹사이트", 23, 28)}));
+}  // namespace
+
+TEST(TokenizerTest, TokenizeComplex) {
+  std::vector<TokenizationCodepointRangeT> configs;
+  TokenizationCodepointRangeT* config;
+
+  // Source: http://www.unicode.org/Public/10.0.0/ucd/Blocks-10.0.0d1.txt
+  // Latin - cyrilic.
+  //   0000..007F; Basic Latin
+  //   0080..00FF; Latin-1 Supplement
+  //   0100..017F; Latin Extended-A
+  //   0180..024F; Latin Extended-B
+  //   0250..02AF; IPA Extensions
+  //   02B0..02FF; Spacing Modifier Letters
+  //   0300..036F; Combining Diacritical Marks
+  //   0370..03FF; Greek and Coptic
+  //   0400..04FF; Cyrillic
+  //   0500..052F; Cyrillic Supplement
+  //   0530..058F; Armenian
+  //   0590..05FF; Hebrew
+  //   0600..06FF; Arabic
+  //   0700..074F; Syriac
+  //   0750..077F; Arabic Supplement
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0;
+  config->end = 32;
+  config->role = TokenizationCodepointRange_::Role_DEFAULT_ROLE;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 32;
+  config->end = 33;
+  config->role = TokenizationCodepointRange_::Role_WHITESPACE_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 33;
+  config->end = 0x77F + 1;
+  config->role = TokenizationCodepointRange_::Role_DEFAULT_ROLE;
+
+  // CJK
+  // 2E80..2EFF; CJK Radicals Supplement
+  // 3000..303F; CJK Symbols and Punctuation
+  // 3040..309F; Hiragana
+  // 30A0..30FF; Katakana
+  // 3100..312F; Bopomofo
+  // 3130..318F; Hangul Compatibility Jamo
+  // 3190..319F; Kanbun
+  // 31A0..31BF; Bopomofo Extended
+  // 31C0..31EF; CJK Strokes
+  // 31F0..31FF; Katakana Phonetic Extensions
+  // 3200..32FF; Enclosed CJK Letters and Months
+  // 3300..33FF; CJK Compatibility
+  // 3400..4DBF; CJK Unified Ideographs Extension A
+  // 4DC0..4DFF; Yijing Hexagram Symbols
+  // 4E00..9FFF; CJK Unified Ideographs
+  // A000..A48F; Yi Syllables
+  // A490..A4CF; Yi Radicals
+  // A4D0..A4FF; Lisu
+  // A500..A63F; Vai
+  // F900..FAFF; CJK Compatibility Ideographs
+  // FE30..FE4F; CJK Compatibility Forms
+  // 20000..2A6DF; CJK Unified Ideographs Extension B
+  // 2A700..2B73F; CJK Unified Ideographs Extension C
+  // 2B740..2B81F; CJK Unified Ideographs Extension D
+  // 2B820..2CEAF; CJK Unified Ideographs Extension E
+  // 2CEB0..2EBEF; CJK Unified Ideographs Extension F
+  // 2F800..2FA1F; CJK Compatibility Ideographs Supplement
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x2E80;
+  config->end = 0x2EFF + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x3000;
+  config->end = 0xA63F + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0xF900;
+  config->end = 0xFAFF + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0xFE30;
+  config->end = 0xFE4F + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x20000;
+  config->end = 0x2A6DF + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x2A700;
+  config->end = 0x2B73F + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x2B740;
+  config->end = 0x2B81F + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x2B820;
+  config->end = 0x2CEAF + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x2CEB0;
+  config->end = 0x2EBEF + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x2F800;
+  config->end = 0x2FA1F + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+
+  // Thai.
+  // 0E00..0E7F; Thai
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0x0E00;
+  config->end = 0x0E7F + 1;
+  config->role = TokenizationCodepointRange_::Role_TOKEN_SEPARATOR;
+
+  TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER, configs,
+                                  {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens;
+
+  tokens = tokenizer.Tokenize(
+      "問少目木輸走猶術権自京門録球変。細開括省用掲情結傍走愛明氷。");
+  EXPECT_EQ(tokens.size(), 30);
+
+  tokens = tokenizer.Tokenize("問少目 hello 木輸ยามきゃ");
+  // clang-format off
+  EXPECT_THAT(
+      tokens,
+      ElementsAreArray({Token("問", 0, 1),
+                        Token("少", 1, 2),
+                        Token("目", 2, 3),
+                        Token("hello", 4, 9),
+                        Token("木", 10, 11),
+                        Token("輸", 11, 12),
+                        Token("ย", 12, 13),
+                        Token("า", 13, 14),
+                        Token("ม", 14, 15),
+                        Token("き", 15, 16),
+                        Token("ゃ", 16, 17)}));
+  // clang-format on
+}
+
+#if defined(TC3_TEST_ICU) || defined(__APPLE__)
+TEST(TokenizerTest, ICUTokenizeWithWhitespaces) {
+  TestingTokenizerProxy tokenizer(TokenizationType_ICU, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/true,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens = tokenizer.Tokenize("พระบาท สมเด็จ พระ ปร มิ");
+  // clang-format off
+  ASSERT_EQ(tokens,
+            std::vector<Token>({Token("พระบาท", 0, 6),
+                                Token(" ", 6, 7),
+                                Token("สมเด็จ", 7, 13),
+                                Token(" ", 13, 14),
+                                Token("พระ", 14, 17),
+                                Token(" ", 17, 18),
+                                Token("ปร", 18, 20),
+                                Token(" ", 20, 21),
+                                Token("มิ", 21, 23)}));
+  // clang-format on
+}
+
+TEST(TokenizerTest, ICUTokenizePunctuation) {
+  TestingTokenizerProxy tokenizer(TokenizationType_ICU, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/true,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens =
+      tokenizer.Tokenize("The interval is: -(12, 138*)");
+  // clang-format off
+  ASSERT_EQ(
+      tokens,
+            std::vector<Token>({Token("The", 0, 3),
+                                Token(" ", 3, 4),
+                                Token("interval", 4, 12),
+                                Token(" ", 12, 13),
+                                Token("is", 13, 15),
+                                Token(":", 15, 16),
+                                Token(" ", 16, 17),
+                                Token("-", 17, 18),
+                                Token("(", 18, 19),
+                                Token("12", 19, 21),
+                                Token(",", 21, 22),
+                                Token(" ", 22, 23),
+                                Token("138", 23, 26),
+                                Token("*", 26, 27),
+                                Token(")", 27, 28)}));
+  // clang-format on
+}
+
+TEST(TokenizerTest, ICUTokenizeWithNumbers) {
+  TestingTokenizerProxy tokenizer(TokenizationType_ICU, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/true,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens = tokenizer.Tokenize("3.1 3﹒2 3．3");
+  // clang-format off
+  ASSERT_EQ(tokens,
+            std::vector<Token>({Token("3.1", 0, 3),
+                                Token(" ", 3, 4),
+                                Token("3﹒2", 4, 7),
+                                Token(" ", 7, 8),
+                                Token("3．3", 8, 11)}));
+  // clang-format on
+}
+#endif
+
+#if defined(TC3_TEST_ICU)
+TEST(TokenizerTest, ICUTokenize) {
+  TestingTokenizerProxy tokenizer(TokenizationType_ICU, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens = tokenizer.Tokenize("พระบาทสมเด็จพระปรมิ");
+  // clang-format off
+  ASSERT_EQ(tokens,
+            std::vector<Token>({Token("พระบาท", 0, 6),
+                                Token("สมเด็จ", 6, 12),
+                                Token("พระ", 12, 15),
+                                Token("ปร", 15, 17),
+                                Token("มิ", 17, 19)}));
+  // clang-format on
+}
+
+TEST(TokenizerTest, MixedTokenize) {
+  std::vector<TokenizationCodepointRangeT> configs;
+  TokenizationCodepointRangeT* config;
+
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 32;
+  config->end = 33;
+  config->role = TokenizationCodepointRange_::Role_WHITESPACE_SEPARATOR;
+
+  std::vector<CodepointRangeT> internal_configs;
+  CodepointRangeT* interal_config;
+
+  internal_configs.emplace_back();
+  interal_config = &internal_configs.back();
+  interal_config->start = 0;
+  interal_config->end = 128;
+
+  internal_configs.emplace_back();
+  interal_config = &internal_configs.back();
+  interal_config->start = 128;
+  interal_config->end = 256;
+
+  internal_configs.emplace_back();
+  interal_config = &internal_configs.back();
+  interal_config->start = 256;
+  interal_config->end = 384;
+
+  internal_configs.emplace_back();
+  interal_config = &internal_configs.back();
+  interal_config->start = 384;
+  interal_config->end = 592;
+
+  TestingTokenizerProxy tokenizer(TokenizationType_MIXED, configs,
+                                  internal_configs,
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+
+  std::vector<Token> tokens = tokenizer.Tokenize(
+      "こんにちはJapanese-ląnguagę text 你好世界 http://www.google.com/");
+  ASSERT_EQ(
+      tokens,
+      // clang-format off
+      std::vector<Token>({Token("こんにちは", 0, 5),
+                          Token("Japanese-ląnguagę", 5, 22),
+                          Token("text", 23, 27),
+                          Token("你好", 28, 30),
+                          Token("世界", 30, 32),
+                          Token("http://www.google.com/", 33, 55)}));
+  // clang-format on
+}
+
+TEST(TokenizerTest, InternalTokenizeOnScriptChange) {
+  std::vector<TokenizationCodepointRangeT> configs;
+  TokenizationCodepointRangeT* config;
+
+  configs.emplace_back();
+  config = &configs.back();
+  config->start = 0;
+  config->end = 256;
+  config->role = TokenizationCodepointRange_::Role_DEFAULT_ROLE;
+
+  {
+    TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER,
+                                    configs, {},
+                                    /*split_on_script_change=*/false,
+                                    /*icu_preserve_whitespace_tokens=*/false,
+                                    /*preserve_floating_numbers=*/false);
+
+    EXPECT_EQ(tokenizer.Tokenize("앨라배마123웹사이트"),
+              std::vector<Token>({Token("앨라배마123웹사이트", 0, 11)}));
+  }
+
+  {
+    TestingTokenizerProxy tokenizer(TokenizationType_INTERNAL_TOKENIZER,
+                                    configs, {},
+                                    /*split_on_script_change=*/true,
+                                    /*icu_preserve_whitespace_tokens=*/false,
+                                    /*preserve_floating_numbers=*/false);
+    EXPECT_EQ(tokenizer.Tokenize("앨라배마123웹사이트"),
+              std::vector<Token>({Token("앨라배마", 0, 4), Token("123", 4, 7),
+                                  Token("웹사이트", 7, 11)}));
+  }
+}
+#endif
+
+TEST(TokenizerTest, LetterDigitTokenize) {
+  TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/true);
+  std::vector<Token> tokens = tokenizer.Tokenize("7% -3.14 68.9#? 7% $99 .18.");
+  ASSERT_EQ(tokens,
+            std::vector<Token>(
+                {Token("7", 0, 1), Token("%", 1, 2), Token(" ", 2, 3),
+                 Token("-", 3, 4), Token("3.14", 4, 8), Token(" ", 8, 9),
+                 Token("68.9", 9, 13), Token("#", 13, 14), Token("?", 14, 15),
+                 Token(" ", 15, 16), Token("7", 16, 17), Token("%", 17, 18),
+                 Token(" ", 18, 19), Token("$", 19, 20), Token("99", 20, 22),
+                 Token(" ", 22, 23), Token(".", 23, 24), Token("18", 24, 26),
+                 Token(".", 26, 27)}));
+}
+
+TEST(TokenizerTest, LetterDigitTokenizeUnicode) {
+  TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/true);
+  std::vector<Token> tokens = tokenizer.Tokenize("２ pércént ３パーセント");
+  ASSERT_EQ(tokens, std::vector<Token>({Token("２", 0, 1), Token(" ", 1, 2),
+                                        Token("pércént", 2, 9),
+                                        Token(" ", 9, 10), Token("３", 10, 11),
+                                        Token("パーセント", 11, 16)}));
+}
+
+TEST(TokenizerTest, LetterDigitTokenizeWithDots) {
+  TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/true);
+  std::vector<Token> tokens = tokenizer.Tokenize("3 3﹒2 3．3%");
+  ASSERT_EQ(tokens,
+            std::vector<Token>({Token("3", 0, 1), Token(" ", 1, 2),
+                                Token("3﹒2", 2, 5), Token(" ", 5, 6),
+                                Token("3．3", 6, 9), Token("%", 9, 10)}));
+}
+
+TEST(TokenizerTest, LetterDigitTokenizeDoNotPreserveFloatingNumbers) {
+  TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens = tokenizer.Tokenize("15.12.2019 january's 3.2");
+  ASSERT_EQ(tokens,
+            std::vector<Token>(
+                {Token("15", 0, 2), Token(".", 2, 3), Token("12", 3, 5),
+                 Token(".", 5, 6), Token("2019", 6, 10), Token(" ", 10, 11),
+                 Token("january", 11, 18), Token("'", 18, 19),
+                 Token("s", 19, 20), Token(" ", 20, 21), Token("3", 21, 22),
+                 Token(".", 22, 23), Token("2", 23, 24)}));
+}
+
+TEST(TokenizerTest, LetterDigitTokenizeStrangeStringFloatingNumbers) {
+  TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens = tokenizer.Tokenize("The+2345++the +íí+");
+  ASSERT_EQ(tokens,
+            std::vector<Token>({Token("The", 0, 3), Token("+", 3, 4),
+                                Token("2345", 4, 8), Token("+", 8, 9),
+                                Token("+", 9, 10), Token("the", 10, 13),
+                                Token(" ", 13, 14), Token("+", 14, 15),
+                                Token("íí", 15, 17), Token("+", 17, 18)}));
+}
+
+TEST(TokenizerTest, LetterDigitTokenizeWhitespcesInSameToken) {
+  TestingTokenizerProxy tokenizer(TokenizationType_LETTER_DIGIT, {}, {},
+                                  /*split_on_script_change=*/false,
+                                  /*icu_preserve_whitespace_tokens=*/false,
+                                  /*preserve_floating_numbers=*/false);
+  std::vector<Token> tokens = tokenizer.Tokenize("2 3  4   5");
+  ASSERT_EQ(tokens, std::vector<Token>({Token("2", 0, 1), Token(" ", 1, 2),
+                                        Token("3", 2, 3), Token("  ", 3, 5),
+                                        Token("4", 5, 6), Token("   ", 6, 9),
+                                        Token("5", 9, 10)}));
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/utf8/NSString+Unicode.h b/native/utils/utf8/NSString+Unicode.h
deleted file mode 100644
index 734d58f..0000000
--- a/native/utils/utf8/NSString+Unicode.h
+++ /dev/null

@@ -1,38 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#import <Foundation/Foundation.h>
-
-/// Defines utility methods for operating with Unicode in @c NSString.
-/// @discussion Unicode has 1,114,112 code points ( http://en.wikipedia.org/wiki/Code_point ),
-///             and multiple encodings that map these code points into code units.
-///             @c NSString API exposes the string as if it were encoded in UTF-16, which makes use
-///             of surrogate pairs ( http://en.wikipedia.org/wiki/UTF-16 ).
-///             The methods in this category translate indices between Unicode codepoints and
-///             UTF-16 unichars.
-@interface NSString (Unicode)
-
-/// Returns the number of Unicode codepoints for a string slice.
-/// @param start The NSString start index.
-/// @param length The number of unichar units.
-/// @return The number of Unicode code points in the specified unichar range.
-- (NSUInteger)tc_countChar32:(NSUInteger)start withLength:(NSUInteger)length;
-
-/// Returns the length of the string in terms of Unicode codepoints.
-/// @return The number of Unicode codepoints in this string.
-- (NSUInteger)tc_codepointLength;
-
-@end

diff --git a/native/utils/utf8/unicodetext_test.cc b/native/utils/utf8/unicodetext_test.cc
new file mode 100644
index 0000000..4e8883b
--- /dev/null
+++ b/native/utils/utf8/unicodetext_test.cc

@@ -0,0 +1,228 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/utf8/unicodetext.h"
+
+#include "utils/strings/stringpiece.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+class UnicodeTextTest : public testing::Test {
+ protected:
+  UnicodeTextTest() : empty_text_() {
+    text_.push_back(0x1C0);
+    text_.push_back(0x4E8C);
+    text_.push_back(0xD7DB);
+    text_.push_back(0x34);
+    text_.push_back(0x1D11E);
+  }
+
+  UnicodeText empty_text_;
+  UnicodeText text_;
+};
+
+TEST(UnicodeTextTest, ConstructionFromUnicodeText) {
+  UnicodeText text = UTF8ToUnicodeText("1234😋hello", /*do_copy=*/false);
+  EXPECT_EQ(UnicodeText(text).ToUTF8String(), "1234😋hello");
+  EXPECT_EQ(UnicodeText(text, /*do_copy=*/false).ToUTF8String(), "1234😋hello");
+}
+
+// Tests for our modifications of UnicodeText.
+TEST(UnicodeTextTest, Custom) {
+  UnicodeText text = UTF8ToUnicodeText("1234😋hello", /*do_copy=*/false);
+  EXPECT_EQ(text.ToUTF8String(), "1234😋hello");
+  EXPECT_EQ(text.size_codepoints(), 10);
+  EXPECT_EQ(text.size_bytes(), 13);
+
+  auto it_begin = text.begin();
+  std::advance(it_begin, 4);
+  auto it_end = text.begin();
+  std::advance(it_end, 6);
+  EXPECT_EQ(text.UTF8Substring(it_begin, it_end), "😋h");
+}
+
+TEST(UnicodeTextTest, StringPieceView) {
+  std::string raw_text = "1234😋hello";
+  UnicodeText text =
+      UTF8ToUnicodeText(StringPiece(raw_text), /*do_copy=*/false);
+  EXPECT_EQ(text.ToUTF8String(), "1234😋hello");
+  EXPECT_EQ(text.size_codepoints(), 10);
+  EXPECT_EQ(text.size_bytes(), 13);
+
+  auto it_begin = text.begin();
+  std::advance(it_begin, 4);
+  auto it_end = text.begin();
+  std::advance(it_end, 6);
+  EXPECT_EQ(text.UTF8Substring(it_begin, it_end), "😋h");
+}
+
+TEST(UnicodeTextTest, Substring) {
+  UnicodeText text = UTF8ToUnicodeText("1234😋hello", /*do_copy=*/false);
+
+  EXPECT_EQ(
+      UnicodeText::Substring(std::next(text.begin(), 4),
+                             std::next(text.begin(), 6), /*do_copy=*/true),
+      UTF8ToUnicodeText("😋h"));
+  EXPECT_EQ(
+      UnicodeText::Substring(std::next(text.begin(), 4),
+                             std::next(text.begin(), 6), /*do_copy=*/false),
+      UTF8ToUnicodeText("😋h"));
+  EXPECT_EQ(UnicodeText::Substring(text, 4, 6, /*do_copy=*/true),
+            UTF8ToUnicodeText("😋h"));
+  EXPECT_EQ(UnicodeText::Substring(text, 4, 6, /*do_copy=*/false),
+            UTF8ToUnicodeText("😋h"));
+}
+
+TEST(UnicodeTextTest, Ownership) {
+  const std::string src = "\u304A\u00B0\u106B";
+
+  UnicodeText alias;
+  alias.PointToUTF8(src.data(), src.size());
+  EXPECT_EQ(alias.data(), src.data());
+  UnicodeText::const_iterator it = alias.begin();
+  EXPECT_EQ(*it++, 0x304A);
+  EXPECT_EQ(*it++, 0x00B0);
+  EXPECT_EQ(*it++, 0x106B);
+  EXPECT_EQ(it, alias.end());
+
+  UnicodeText t = alias;  // Copy initialization copies the data.
+  EXPECT_NE(t.data(), alias.data());
+}
+
+TEST(UnicodeTextTest, Validation) {
+  EXPECT_TRUE(UTF8ToUnicodeText("1234😋hello", /*do_copy=*/false).is_valid());
+  EXPECT_TRUE(
+      UTF8ToUnicodeText("\u304A\u00B0\u106B", /*do_copy=*/false).is_valid());
+  EXPECT_TRUE(
+      UTF8ToUnicodeText("this is a test😋😋😋", /*do_copy=*/false).is_valid());
+  EXPECT_TRUE(
+      UTF8ToUnicodeText("\xf0\x9f\x98\x8b", /*do_copy=*/false).is_valid());
+  // Too short (string is too short).
+  EXPECT_FALSE(UTF8ToUnicodeText("\xf0\x9f", /*do_copy=*/false).is_valid());
+  // Too long (too many trailing bytes).
+  EXPECT_FALSE(
+      UTF8ToUnicodeText("\xf0\x9f\x98\x8b\x8b", /*do_copy=*/false).is_valid());
+  // Too short (too few trailing bytes).
+  EXPECT_FALSE(
+      UTF8ToUnicodeText("\xf0\x9f\x98\x61\x61", /*do_copy=*/false).is_valid());
+  // Invalid with context.
+  EXPECT_FALSE(
+      UTF8ToUnicodeText("hello \xf0\x9f\x98\x61\x61 world1", /*do_copy=*/false)
+          .is_valid());
+}
+
+class IteratorTest : public UnicodeTextTest {};
+
+TEST_F(IteratorTest, Iterates) {
+  UnicodeText::const_iterator iter = text_.begin();
+  EXPECT_EQ(0x1C0, *iter);
+  EXPECT_EQ(&iter, &++iter);  // operator++ returns *this.
+  EXPECT_EQ(0x4E8C, *iter++);
+  EXPECT_EQ(0xD7DB, *iter);
+  // Make sure you can dereference more than once.
+  EXPECT_EQ(0xD7DB, *iter);
+  EXPECT_EQ(0x34, *++iter);
+  EXPECT_EQ(0x1D11E, *++iter);
+  ASSERT_TRUE(iter != text_.end());
+  iter++;
+  EXPECT_TRUE(iter == text_.end());
+}
+
+TEST_F(IteratorTest, MultiPass) {
+  // Also tests Default Constructible and Assignable.
+  UnicodeText::const_iterator i1, i2;
+  i1 = text_.begin();
+  i2 = i1;
+  EXPECT_EQ(0x4E8C, *++i1);
+  EXPECT_TRUE(i1 != i2);
+  EXPECT_EQ(0x1C0, *i2);
+  ++i2;
+  EXPECT_TRUE(i1 == i2);
+  EXPECT_EQ(0x4E8C, *i2);
+}
+
+TEST_F(IteratorTest, ReverseIterates) {
+  UnicodeText::const_iterator iter = text_.end();
+  EXPECT_TRUE(iter == text_.end());
+  iter--;
+  ASSERT_TRUE(iter != text_.end());
+  EXPECT_EQ(0x1D11E, *iter--);
+  EXPECT_EQ(0x34, *iter);
+  EXPECT_EQ(0xD7DB, *--iter);
+  // Make sure you can dereference more than once.
+  EXPECT_EQ(0xD7DB, *iter);
+  --iter;
+  EXPECT_EQ(0x4E8C, *iter--);
+  EXPECT_EQ(0x1C0, *iter);
+  EXPECT_TRUE(iter == text_.begin());
+}
+
+TEST_F(IteratorTest, Comparable) {
+  UnicodeText::const_iterator i1, i2;
+  i1 = text_.begin();
+  i2 = i1;
+  ++i2;
+
+  EXPECT_TRUE(i1 < i2);
+  EXPECT_TRUE(text_.begin() <= i1);
+  EXPECT_FALSE(i1 >= i2);
+  EXPECT_FALSE(i1 > text_.end());
+}
+
+TEST_F(IteratorTest, Advance) {
+  UnicodeText::const_iterator iter = text_.begin();
+  EXPECT_EQ(0x1C0, *iter);
+  std::advance(iter, 4);
+  EXPECT_EQ(0x1D11E, *iter);
+  ++iter;
+  EXPECT_TRUE(iter == text_.end());
+}
+
+TEST_F(IteratorTest, Distance) {
+  UnicodeText::const_iterator iter = text_.begin();
+  EXPECT_EQ(0, std::distance(text_.begin(), iter));
+  EXPECT_EQ(5, std::distance(iter, text_.end()));
+  ++iter;
+  ++iter;
+  EXPECT_EQ(2, std::distance(text_.begin(), iter));
+  EXPECT_EQ(3, std::distance(iter, text_.end()));
+  ++iter;
+  ++iter;
+  EXPECT_EQ(4, std::distance(text_.begin(), iter));
+  ++iter;
+  EXPECT_EQ(0, std::distance(iter, text_.end()));
+}
+
+class OperatorTest : public UnicodeTextTest {};
+
+TEST_F(OperatorTest, Clear) {
+  UnicodeText empty_text(UTF8ToUnicodeText("", /*do_copy=*/false));
+  EXPECT_FALSE(text_ == empty_text);
+  text_.clear();
+  EXPECT_TRUE(text_ == empty_text);
+}
+
+TEST_F(OperatorTest, Empty) {
+  EXPECT_TRUE(empty_text_.empty());
+  EXPECT_FALSE(text_.empty());
+  text_.clear();
+  EXPECT_TRUE(text_.empty());
+}
+
+}  // namespace
+}  // namespace libtextclassifier3

diff --git a/native/utils/variant_test.cc b/native/utils/variant_test.cc
new file mode 100644
index 0000000..cf0acfb
--- /dev/null
+++ b/native/utils/variant_test.cc

@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "utils/variant.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier3 {
+namespace {
+
+TEST(VariantTest, GetType) {
+  EXPECT_EQ(Variant().GetType(), Variant::TYPE_EMPTY);
+  EXPECT_EQ(Variant(static_cast<int8_t>(9)).GetType(),
+            Variant::TYPE_INT8_VALUE);
+  EXPECT_EQ(Variant(static_cast<uint8_t>(9)).GetType(),
+            Variant::TYPE_UINT8_VALUE);
+  EXPECT_EQ(Variant(static_cast<int>(9)).GetType(), Variant::TYPE_INT_VALUE);
+  EXPECT_EQ(Variant(static_cast<uint>(9)).GetType(), Variant::TYPE_UINT_VALUE);
+  EXPECT_EQ(Variant(static_cast<int64>(9)).GetType(),
+            Variant::TYPE_INT64_VALUE);
+  EXPECT_EQ(Variant(static_cast<uint64>(9)).GetType(),
+            Variant::TYPE_UINT64_VALUE);
+  EXPECT_EQ(Variant(static_cast<float>(9)).GetType(),
+            Variant::TYPE_FLOAT_VALUE);
+  EXPECT_EQ(Variant(static_cast<double>(9)).GetType(),
+            Variant::TYPE_DOUBLE_VALUE);
+  EXPECT_EQ(Variant(true).GetType(), Variant::TYPE_BOOL_VALUE);
+  EXPECT_EQ(Variant("hello").GetType(), Variant::TYPE_STRING_VALUE);
+}
+
+TEST(VariantTest, HasValue) {
+  EXPECT_FALSE(Variant().HasValue());
+  EXPECT_TRUE(Variant(static_cast<int8_t>(9)).HasValue());
+  EXPECT_TRUE(Variant(static_cast<uint8_t>(9)).HasValue());
+  EXPECT_TRUE(Variant(static_cast<int>(9)).HasValue());
+  EXPECT_TRUE(Variant(static_cast<uint>(9)).HasValue());
+  EXPECT_TRUE(Variant(static_cast<int64>(9)).HasValue());
+  EXPECT_TRUE(Variant(static_cast<uint64>(9)).HasValue());
+  EXPECT_TRUE(Variant(static_cast<float>(9)).HasValue());
+  EXPECT_TRUE(Variant(static_cast<double>(9)).HasValue());
+  EXPECT_TRUE(Variant(true).HasValue());
+  EXPECT_TRUE(Variant("hello").HasValue());
+}
+
+TEST(VariantTest, Value) {
+  EXPECT_EQ(Variant(static_cast<int8_t>(9)).Value<int8>(), 9);
+  EXPECT_EQ(Variant(static_cast<uint8_t>(9)).Value<uint8>(), 9);
+  EXPECT_EQ(Variant(static_cast<int>(9)).Value<int>(), 9);
+  EXPECT_EQ(Variant(static_cast<uint>(9)).Value<uint>(), 9);
+  EXPECT_EQ(Variant(static_cast<int64>(9)).Value<int64>(), 9);
+  EXPECT_EQ(Variant(static_cast<uint64>(9)).Value<uint64>(), 9);
+  EXPECT_EQ(Variant(static_cast<float>(9)).Value<float>(), 9);
+  EXPECT_EQ(Variant(static_cast<double>(9)).Value<double>(), 9);
+  EXPECT_EQ(Variant(true).Value<bool>(), true);
+  EXPECT_EQ(Variant("hello").ConstRefValue<std::string>(), "hello");
+}
+
+}  // namespace
+}  // namespace libtextclassifier3
commit	0b2ab9d9f143091dffa6412fd2b257fe5ad0bc95	[log] [tgz]
author	Tony Mak <tonymak@google.com>	Fri May 29 20:39:29 2020 +0000
committer	Automerger Merge Worker <android-build-automerger-merge-worker@system.gserviceaccount.com>	Fri May 29 20:39:29 2020 +0000
tree	1f182d619c80835b42155acf299b3dc3dccf9b9c
parent	c750912d9355f64e91224a0de7d954edefb1434f [diff]
parent	f556113acd1b2ffd9a676c22a02ab3730326e95a [diff]