Fixes crashes by making native library thread-safe, makes Annotate calls much faster by re-using tokens, fixes default values in enums in FlatBuffer schema. Test: bit FrameworksCoreTests:android.view.textclassifier.TextClassificationManagerTest Test: bit CtsViewTestCases:android.view.textclassifier.cts.TextClassificationManagerTest Bug: 74193987 Bug: 68239358 Change-Id: Ic5ca42b628280bece59d31203748072084ac452c (cherry picked from commit 2191547d7109587d73077f9d4818c691f7d7dafb) Merged-In: Ic5ca42b628280bece59d31203748072084ac452c

commit: ba849e7b63cdf4a38e6ef1a5a9ffd60567d7c40b [log] [tgz]
author: Lukas Zilka <zilka@google.com> Thu Mar 08 14:48:21 2018 +0100
committer: Lukas Zilka <zilka@google.com> Tue Mar 13 11:12:47 2018 +0000
tree: c8d542bc273a2afbd4858a4d61eea1daf87175c8
parent: df710db0da01c5f470ead4f7518ba142c4117dae [diff] [blame]
diff --git a/feature-processor_test.cc b/feature-processor_test.cc
index 78977d4..70ef0a7 100644
--- a/feature-processor_test.cc
+++ b/feature-processor_test.cc

@@ -27,6 +27,7 @@
 
 using testing::ElementsAreArray;
 using testing::FloatEq;
+using testing::Matcher;
 
 flatbuffers::DetachedBuffer PackFeatureProcessorOptions(
     const FeatureProcessorOptionsT& options) {
@@ -35,6 +36,19 @@
   return builder.Release();
 }
 
+template <typename T>
+std::vector<T> Subvector(const std::vector<T>& vector, int start, int end) {
+  return std::vector<T>(vector.begin() + start, vector.begin() + end);
+}
+
+Matcher<std::vector<float>> ElementsAreFloat(const std::vector<float>& values) {
+  std::vector<Matcher<float>> matchers;
+  for (const float value : values) {
+    matchers.push_back(FloatEq(value));
+  }
+  return ElementsAreArray(matchers);
+}
+
 class TestingFeatureProcessor : public FeatureProcessor {
  public:
   using FeatureProcessor::CountIgnoredSpanBoundaryCodepoints;
@@ -51,7 +65,7 @@
 class FakeEmbeddingExecutor : public EmbeddingExecutor {
  public:
   bool AddEmbedding(const TensorView<int>& sparse_features, float* dest,
-                    int dest_size) override {
+                    int dest_size) const override {
     TC_CHECK_GE(dest_size, 4);
     EXPECT_EQ(sparse_features.size(), 1);
     dest[0] = sparse_features.data()[0];
@@ -147,7 +161,7 @@
 }
 
 TEST(FeatureProcessorTest, KeepLineWithClickFirst) {
-  CREATE_UNILIB_FOR_TESTING
+  CREATE_UNILIB_FOR_TESTING;
   FeatureProcessorOptionsT options;
   options.only_use_line_with_click = true;
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
@@ -173,7 +187,7 @@
 }
 
 TEST(FeatureProcessorTest, KeepLineWithClickSecond) {
-  CREATE_UNILIB_FOR_TESTING
+  CREATE_UNILIB_FOR_TESTING;
   FeatureProcessorOptionsT options;
   options.only_use_line_with_click = true;
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
@@ -199,7 +213,7 @@
 }
 
 TEST(FeatureProcessorTest, KeepLineWithClickThird) {
-  CREATE_UNILIB_FOR_TESTING
+  CREATE_UNILIB_FOR_TESTING;
   FeatureProcessorOptionsT options;
   options.only_use_line_with_click = true;
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
@@ -225,7 +239,7 @@
 }
 
 TEST(FeatureProcessorTest, KeepLineWithClickSecondWithPipe) {
-  CREATE_UNILIB_FOR_TESTING
+  CREATE_UNILIB_FOR_TESTING;
   FeatureProcessorOptionsT options;
   options.only_use_line_with_click = true;
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
@@ -251,7 +265,7 @@
 }
 
 TEST(FeatureProcessorTest, KeepLineWithCrosslineClick) {
-  CREATE_UNILIB_FOR_TESTING
+  CREATE_UNILIB_FOR_TESTING;
   FeatureProcessorOptionsT options;
   options.only_use_line_with_click = true;
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
@@ -279,7 +293,7 @@
 }
 
 TEST(FeatureProcessorTest, SpanToLabel) {
-  CREATE_UNILIB_FOR_TESTING
+  CREATE_UNILIB_FOR_TESTING;
   FeatureProcessorOptionsT options;
   options.context_size = 1;
   options.max_selection_span = 1;
@@ -354,7 +368,7 @@
 }
 
 TEST(FeatureProcessorTest, SpanToLabelIgnoresPunctuation) {
-  CREATE_UNILIB_FOR_TESTING
+  CREATE_UNILIB_FOR_TESTING;
   FeatureProcessorOptionsT options;
   options.context_size = 1;
   options.max_selection_span = 1;
@@ -542,7 +556,7 @@
   }
 
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
-  CREATE_UNILIB_FOR_TESTING
+  CREATE_UNILIB_FOR_TESTING;
   TestingFeatureProcessor feature_processor(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
       &unilib);
@@ -590,7 +604,7 @@
   EXPECT_TRUE(feature_processor2.ExtractFeatures(
       tokens, /*token_span=*/{0, 3},
       /*selection_span_for_feature=*/{kInvalidIndex, kInvalidIndex},
-      &embedding_executor,
+      &embedding_executor, /*embedding_cache=*/nullptr,
       /*feature_vector_size=*/4, &cached_features));
 
   options.min_supported_codepoint_ratio = 0.2;
@@ -602,7 +616,7 @@
   EXPECT_TRUE(feature_processor3.ExtractFeatures(
       tokens, /*token_span=*/{0, 3},
       /*selection_span_for_feature=*/{kInvalidIndex, kInvalidIndex},
-      &embedding_executor,
+      &embedding_executor, /*embedding_cache=*/nullptr,
       /*feature_vector_size=*/4, &cached_features));
 
   options.min_supported_codepoint_ratio = 0.5;
@@ -614,7 +628,7 @@
   EXPECT_FALSE(feature_processor4.ExtractFeatures(
       tokens, /*token_span=*/{0, 3},
       /*selection_span_for_feature=*/{kInvalidIndex, kInvalidIndex},
-      &embedding_executor,
+      &embedding_executor, /*embedding_cache=*/nullptr,
       /*feature_vector_size=*/4, &cached_features));
 }
 
@@ -628,7 +642,7 @@
   options.extract_selection_mask_feature = true;
 
   flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
-  CREATE_UNILIB_FOR_TESTING
+  CREATE_UNILIB_FOR_TESTING;
   TestingFeatureProcessor feature_processor(
       flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
       &unilib);
@@ -643,7 +657,8 @@
   EXPECT_TRUE(feature_processor.ExtractFeatures(
       tokens, /*token_span=*/{0, 4},
       /*selection_span_for_feature=*/{4, 11}, &embedding_executor,
-      /*feature_vector_size=*/5, &cached_features));
+      /*embedding_cache=*/nullptr, /*feature_vector_size=*/5,
+      &cached_features));
   std::vector<float> features;
   cached_features->AppendClickContextFeaturesForClick(1, &features);
   ASSERT_EQ(features.size(), 25);
@@ -654,6 +669,76 @@
   EXPECT_THAT(features[24], FloatEq(0.0));
 }
 
+TEST(FeatureProcessorTest, EmbeddingCache) {
+  FeatureProcessorOptionsT options;
+  options.context_size = 2;
+  options.max_selection_span = 2;
+  options.snap_label_span_boundaries_to_containing_tokens = false;
+  options.feature_version = 2;
+  options.embedding_size = 4;
+  options.bounds_sensitive_features.reset(
+      new FeatureProcessorOptions_::BoundsSensitiveFeaturesT());
+  options.bounds_sensitive_features->enabled = true;
+  options.bounds_sensitive_features->num_tokens_before = 3;
+  options.bounds_sensitive_features->num_tokens_inside_left = 2;
+  options.bounds_sensitive_features->num_tokens_inside_right = 2;
+  options.bounds_sensitive_features->num_tokens_after = 3;
+
+  flatbuffers::DetachedBuffer options_fb = PackFeatureProcessorOptions(options);
+  CREATE_UNILIB_FOR_TESTING;
+  TestingFeatureProcessor feature_processor(
+      flatbuffers::GetRoot<FeatureProcessorOptions>(options_fb.data()),
+      &unilib);
+
+  std::unique_ptr<CachedFeatures> cached_features;
+
+  FakeEmbeddingExecutor embedding_executor;
+
+  const std::vector<Token> tokens = {
+      Token("aaa", 0, 3),   Token("bbb", 4, 7),   Token("ccc", 8, 11),
+      Token("ddd", 12, 15), Token("eee", 16, 19), Token("fff", 20, 23)};
+
+  // We pre-populate the cache with dummy embeddings, to make sure they are
+  // used when populating the features vector.
+  const std::vector<float> cached_padding_features = {10.0, -10.0, 10.0, -10.0};
+  const std::vector<float> cached_features1 = {1.0, 2.0, 3.0, 4.0};
+  const std::vector<float> cached_features2 = {5.0, 6.0, 7.0, 8.0};
+  FeatureProcessor::EmbeddingCache embedding_cache = {
+      {{kInvalidIndex, kInvalidIndex}, cached_padding_features},
+      {{4, 7}, cached_features1},
+      {{12, 15}, cached_features2},
+  };
+
+  EXPECT_TRUE(feature_processor.ExtractFeatures(
+      tokens, /*token_span=*/{0, 6},
+      /*selection_span_for_feature=*/{kInvalidIndex, kInvalidIndex},
+      &embedding_executor, &embedding_cache, /*feature_vector_size=*/4,
+      &cached_features));
+  std::vector<float> features;
+  cached_features->AppendBoundsSensitiveFeaturesForSpan({2, 4}, &features);
+  ASSERT_EQ(features.size(), 40);
+  // Check that the dummy embeddings were used.
+  EXPECT_THAT(Subvector(features, 0, 4),
+              ElementsAreFloat(cached_padding_features));
+  EXPECT_THAT(Subvector(features, 8, 12), ElementsAreFloat(cached_features1));
+  EXPECT_THAT(Subvector(features, 16, 20), ElementsAreFloat(cached_features2));
+  EXPECT_THAT(Subvector(features, 24, 28), ElementsAreFloat(cached_features2));
+  EXPECT_THAT(Subvector(features, 36, 40),
+              ElementsAreFloat(cached_padding_features));
+  // Check that the real embeddings were cached.
+  EXPECT_EQ(embedding_cache.size(), 7);
+  EXPECT_THAT(Subvector(features, 4, 8),
+              ElementsAreFloat(embedding_cache.at({0, 3})));
+  EXPECT_THAT(Subvector(features, 12, 16),
+              ElementsAreFloat(embedding_cache.at({8, 11})));
+  EXPECT_THAT(Subvector(features, 20, 24),
+              ElementsAreFloat(embedding_cache.at({8, 11})));
+  EXPECT_THAT(Subvector(features, 28, 32),
+              ElementsAreFloat(embedding_cache.at({16, 19})));
+  EXPECT_THAT(Subvector(features, 32, 36),
+              ElementsAreFloat(embedding_cache.at({20, 23})));
+}
+
 TEST(FeatureProcessorTest, StripUnusedTokensWithNoRelativeClick) {
   std::vector<Token> tokens_orig{
       Token("0", 0, 0), Token("1", 0, 0), Token("2", 0, 0),  Token("3", 0, 0),
@@ -767,7 +852,7 @@
 }
 
 TEST(FeatureProcessorTest, InternalTokenizeOnScriptChange) {
-  CREATE_UNILIB_FOR_TESTING
+  CREATE_UNILIB_FOR_TESTING;
   FeatureProcessorOptionsT options;
   options.tokenization_codepoint_config.emplace_back(
       new TokenizationCodepointRangeT());
@@ -907,7 +992,7 @@
 #endif
 
 TEST(FeatureProcessorTest, IgnoredSpanBoundaryCodepoints) {
-  CREATE_UNILIB_FOR_TESTING
+  CREATE_UNILIB_FOR_TESTING;
   FeatureProcessorOptionsT options;
   options.ignored_span_boundary_codepoints.push_back('.');
   options.ignored_span_boundary_codepoints.push_back(',');
commit	ba849e7b63cdf4a38e6ef1a5a9ffd60567d7c40b	[log] [tgz]
author	Lukas Zilka <zilka@google.com>	Thu Mar 08 14:48:21 2018 +0100
committer	Lukas Zilka <zilka@google.com>	Tue Mar 13 11:12:47 2018 +0000
tree	c8d542bc273a2afbd4858a4d61eea1daf87175c8
parent	df710db0da01c5f470ead4f7518ba142c4117dae [diff] [blame]