[smart-select:] import recent changes from google3. (1) Strip some Google-specific links (http://cr/150751377) (2) Improve and add tests for registration mechanism: http://cr/150679054, http://cr/150792974, http://cr/150899682 (3) Extra regression tests: http://cr/150939660 Test: everything builds, no significant change of .so size. Change-Id: I177ba12b8d0cdcd615619f8ecd960cbcb0b26a77

commit: 9087f1f23d476f5ab26cd11a09cfa05b95659ceb [log] [tgz]
author: Alex Salcianu <salcianu@google.com> Wed Mar 22 21:22:39 2017 -0400
committer: Alex Salcianu <salcianu@google.com> Wed Mar 22 21:22:39 2017 -0400
tree: d370abdff951d41f18269d3d2fdcd8a6601c5398
parent: 29fc69fe417ba1fb67a8a6731c5ff894023a089c [diff]
diff --git a/common/memory_image/data-store.proto b/common/memory_image/data-store.proto
index 808d3a6..68e914a 100644
--- a/common/memory_image/data-store.proto
+++ b/common/memory_image/data-store.proto

@@ -19,9 +19,6 @@
 // string, with minimal parsing; after deserialization, all chunks of bytes
 // start at aligned addresses (aligned = multiple of an address specified at
 // build time).
-//
-// Note: underlying implementation uses the memory images from
-// http://g3doc/nlp/saft/components/common/mobile/memory_image/g3doc/index.md
 
 syntax = "proto2";
 option optimize_for = LITE_RUNTIME;

diff --git a/common/memory_image/embedding-network-params-from-image.h b/common/memory_image/embedding-network-params-from-image.h
index 0a410ff..feb4817 100644
--- a/common/memory_image/embedding-network-params-from-image.h
+++ b/common/memory_image/embedding-network-params-from-image.h

@@ -30,8 +30,7 @@
 //
 // In this context, a memory image is like an EmbeddingNetworkProto, but with
 // all repeated weights (>99% of the size) directly usable (with no parsing
-// required).  For general info on memory images, see
-// http://g3doc/nlp/saft/components/common/mobile/memory_image/g3doc/index.md
+// required).
 class EmbeddingNetworkParamsFromImage : public EmbeddingNetworkParams {
  public:
   // Constructs an EmbeddingNetworkParamsFromImage, using the memory image that

diff --git a/common/memory_image/memory-image-common.h b/common/memory_image/memory-image-common.h
index d49cdd3..2e84116 100644
--- a/common/memory_image/memory-image-common.h
+++ b/common/memory_image/memory-image-common.h

@@ -14,8 +14,7 @@
  * limitations under the License.
  */
 
-// Common utils for memory images.  For more info on memory images, see
-// http://g3doc/common/memory_image/g3doc/index.md
+// Common utils for memory images.
 
 #ifndef LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_MEMORY_IMAGE_COMMON_H_
 #define LIBTEXTCLASSIFIER_COMMON_MEMORY_IMAGE_MEMORY_IMAGE_COMMON_H_

diff --git a/common/memory_image/memory-image.proto b/common/memory_image/memory-image.proto
index 9ff12db..f6b624c 100644
--- a/common/memory_image/memory-image.proto
+++ b/common/memory_image/memory-image.proto

@@ -13,9 +13,6 @@
 // limitations under the License.
 
 // Protos for "memory images".
-//
-// For info on memory images, see
-// http://g3doc/nlp/saft/components/common/mobile/memory_image/g3doc/index.md
 
 syntax = "proto2";
 option optimize_for = LITE_RUNTIME;

diff --git a/common/registry.h b/common/registry.h
index 6f77b28..377e5fb 100644
--- a/common/registry.h
+++ b/common/registry.h

@@ -28,9 +28,13 @@
 //  // Abstract function that takes a double and returns a double.
 //  class Function : public RegisterableClass<Function> {
 //   public:
+//    virtual ~Function() {}
 //    virtual double Evaluate(double x) = 0;
 //  };
 //
+//  // Should be inside namespace libtextclassifier::nlp_core.
+//  TC_DECLARE_CLASS_REGISTRY_NAME(Function);
+//
 // Notice the inheritance from RegisterableClass<Function>.  RegisterableClass
 // is defined by this file (registry.h).  Under the hood, this inheritanace
 // defines a "registry" that maps names (zero-terminated arrays of chars) to
@@ -39,26 +43,27 @@
 // to be a .cc file, as it defines some static data):
 //
 //  // Inside function.cc
+//  // Should be inside namespace libtextclassifier::nlp_core.
 //  TC_DEFINE_CLASS_REGISTRY_NAME("function", Function);
 //
 // Now, let's define a few concrete Functions: e.g.,
 //
 //   class Cos : public Function {
 //    public:
-//     double Evaluate(double x) { return cos(x); }
+//     double Evaluate(double x) override { return cos(x); }
 //     TC_DEFINE_REGISTRATION_METHOD("cos", Cos);
 //   };
 //
 //   class Exp : public Function {
 //    public:
-//     double Evaluate(double x) { return exp(x); }
+//     double Evaluate(double x) override { return exp(x); }
 //     TC_DEFINE_REGISTRATION_METHOD("sin", Sin);
 //   };
 //
 // Each concrete Function implementation should have (in the public section) the
 // macro
 //
-//   TC_DEFINE_REGISTRATION_METHOD(base_class, "name", implementation_class);
+//   TC_DEFINE_REGISTRATION_METHOD("name", implementation_class);
 //
 // This defines a RegisterClass static method that, when invoked, associates
 // "name" with a factory method that creates instances of implementation_class.
@@ -76,22 +81,25 @@
 // interesting if the Function name is not statically known (i.e.,
 // read from an input proto:
 //
-//   std::unique_ptr<Function> f.reset(Function::Create("cos"));
+//   std::unique_ptr<Function> f(Function::Create("cos"));
 //   double result = f->Evaluate(arg);
 //
 // NOTE: the same binary can use this mechanism for different APIs.  E.g., one
 // can also have (in the binary with Function, Sin, Cos, etc):
 //
-// class IntFunction : RegisterableClass<IntFunction> {
+// class IntFunction : public RegisterableClass<IntFunction> {
 //  public:
+//   virtual ~IntFunction() {}
 //   virtual int Evaluate(int k) = 0;
 // };
 //
+// TC_DECLARE_CLASS_REGISTRY_NAME(IntFunction);
+//
 // TC_DEFINE_CLASS_REGISTRY_NAME("int function", IntFunction);
 //
 // class Inc : public IntFunction {
 //  public:
-//   int Evaluate(int k) { return k + 1; }
+//   int Evaluate(int k) override { return k + 1; }
 //   TC_DEFINE_REGISTRATION_METHOD("inc", Inc);
 // };
 //
@@ -259,6 +267,12 @@
   }
 
 // Defines the human-readable name of the registry associated with base_class.
+#define TC_DECLARE_CLASS_REGISTRY_NAME(base_class)             \
+  template <>                                                  \
+  const char ::libtextclassifier::nlp_core::RegisterableClass< \
+      base_class>::kRegistryName[]
+
+// Defines the human-readable name of the registry associated with base_class.
 #define TC_DEFINE_CLASS_REGISTRY_NAME(registry_name, base_class) \
   template <>                                                    \
   const char ::libtextclassifier::nlp_core::RegisterableClass<   \

diff --git a/lang_id/light-sentence-features.h b/lang_id/light-sentence-features.h
index 758f23d..a140f65 100644
--- a/lang_id/light-sentence-features.h
+++ b/lang_id/light-sentence-features.h

@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_LANG_ID_LIGH_SENTENCE_FEATURES_H_
-#define LIBTEXTCLASSIFIER_LANG_ID_LIGH_SENTENCE_FEATURES_H_
+#ifndef LIBTEXTCLASSIFIER_LANG_ID_LIGHT_SENTENCE_FEATURES_H_
+#define LIBTEXTCLASSIFIER_LANG_ID_LIGHT_SENTENCE_FEATURES_H_
 
 #include "common/feature-extractor.h"
 #include "lang_id/light-sentence.h"
@@ -31,7 +31,11 @@
 typedef FeatureExtractor<LightSentence> LightSentenceExtractor;
 
 }  // namespace lang_id
+
+// Should be used in namespace libtextclassifier::nlp_core.
+TC_DECLARE_CLASS_REGISTRY_NAME(lang_id::LightSentenceFeature);
+
 }  // namespace nlp_core
 }  // namespace libtextclassifier
 
-#endif  // LIBTEXTCLASSIFIER_LANG_ID_LIGH_SENTENCE_FEATURES_H_
+#endif  // LIBTEXTCLASSIFIER_LANG_ID_LIGHT_SENTENCE_FEATURES_H_

diff --git a/lang_id/light-sentence.h b/lang_id/light-sentence.h
index 98aca3d..e8451be 100644
--- a/lang_id/light-sentence.h
+++ b/lang_id/light-sentence.h

@@ -14,8 +14,8 @@
  * limitations under the License.
  */
 
-#ifndef LIBTEXTCLASSIFIER_LANG_ID_LIGH_SENTENCE_H_
-#define LIBTEXTCLASSIFIER_LANG_ID_LIGH_SENTENCE_H_
+#ifndef LIBTEXTCLASSIFIER_LANG_ID_LIGHT_SENTENCE_H_
+#define LIBTEXTCLASSIFIER_LANG_ID_LIGHT_SENTENCE_H_
 
 #include <string>
 #include <vector>
@@ -63,4 +63,4 @@
 }  // namespace nlp_core
 }  // namespace libtextclassifier
 
-#endif  // LIBTEXTCLASSIFIER_LANG_ID_LIGH_SENTENCE_H_
+#endif  // LIBTEXTCLASSIFIER_LANG_ID_LIGHT_SENTENCE_H_

diff --git a/smartselect/feature-processor.cc b/smartselect/feature-processor.cc
index 9e357bd..90ed241 100644
--- a/smartselect/feature-processor.cc
+++ b/smartselect/feature-processor.cc

@@ -163,6 +163,31 @@
   }
 }
 
+std::vector<Token> FindTokensInSelection(
+    const std::vector<Token>& selectable_tokens,
+    const SelectionWithContext& selection_with_context) {
+  std::vector<Token> tokens_in_selection;
+  for (const Token& token : selectable_tokens) {
+    const bool selection_start_in_token =
+        token.start <= selection_with_context.selection_start &&
+        token.end > selection_with_context.selection_start;
+
+    const bool token_contained_in_selection =
+        token.start >= selection_with_context.selection_start &&
+        token.end < selection_with_context.selection_end;
+
+    const bool selection_end_in_token =
+        token.start < selection_with_context.selection_end &&
+        token.end >= selection_with_context.selection_end;
+
+    if (selection_start_in_token || token_contained_in_selection ||
+        selection_end_in_token) {
+      tokens_in_selection.push_back(token);
+    }
+  }
+  return tokens_in_selection;
+}
+
 }  // namespace internal
 
 const char* const FeatureProcessor::kFeatureTypeName = "chargram_continuous";
@@ -372,36 +397,11 @@
   }
 }
 
-std::vector<Token> FeatureProcessor::FindTokensInSelection(
-    const std::vector<Token>& selectable_tokens,
-    const SelectionWithContext& selection_with_context) const {
-  std::vector<Token> tokens_in_selection;
-  for (const Token& token : selectable_tokens) {
-    const bool selection_start_in_token =
-        token.start <= selection_with_context.selection_start &&
-        token.end > selection_with_context.selection_start;
-
-    const bool token_contained_in_selection =
-        token.start >= selection_with_context.selection_start &&
-        token.end < selection_with_context.selection_end;
-
-    const bool selection_end_in_token =
-        token.start < selection_with_context.selection_end &&
-        token.end >= selection_with_context.selection_end;
-
-    if (selection_start_in_token || token_contained_in_selection ||
-        selection_end_in_token) {
-      tokens_in_selection.push_back(token);
-    }
-  }
-  return tokens_in_selection;
-}
-
 CodepointSpan FeatureProcessor::ClickRandomTokenInSelection(
     const SelectionWithContext& selection_with_context) const {
   const std::vector<Token> tokens = Tokenize(selection_with_context.context);
   const std::vector<Token> tokens_in_selection =
-      FindTokensInSelection(tokens, selection_with_context);
+      internal::FindTokensInSelection(tokens, selection_with_context);
 
   if (!tokens_in_selection.empty()) {
     std::uniform_int_distribution<> selection_token_draw(

diff --git a/smartselect/feature-processor.h b/smartselect/feature-processor.h
index 311be3e..619ea4d 100644
--- a/smartselect/feature-processor.h
+++ b/smartselect/feature-processor.h

@@ -61,6 +61,12 @@
 int CenterTokenFromMiddleOfSelection(
     CodepointSpan span, const std::vector<Token>& selectable_tokens);
 
+// Finds tokens that are part of the selection.
+// NOTE: Will select all tokens that somehow overlap with the selection.
+std::vector<Token> FindTokensInSelection(
+    const std::vector<Token>& selectable_tokens,
+    const SelectionWithContext& selection_with_context);
+
 }  // namespace internal
 
 TokenSpan CodepointSpanToTokenSpan(const std::vector<Token>& selectable_tokens,
@@ -186,12 +192,6 @@
   // Converts a token span to the corresponding label.
   int TokenSpanToLabel(const std::pair<TokenIndex, TokenIndex>& span) const;
 
-  // Finds tokens that are part of the selection.
-  // NOTE: Will select all tokens that somehow overlap with the selection.
-  std::vector<Token> FindTokensInSelection(
-      const std::vector<Token>& selectable_tokens,
-      const SelectionWithContext& selection_with_context) const;
-
   // Finds the center token index in tokens vector, using the method defined
   // in options_.
   int FindCenterToken(CodepointSpan span,

diff --git a/smartselect/types.h b/smartselect/types.h
index d7c1b82..7367ed0 100644
--- a/smartselect/types.h
+++ b/smartselect/types.h

@@ -126,4 +126,4 @@
 
 }  // namespace libtextclassifier
 
-#endif  // LIBTEXTCLASSIFIER_TYPES_H_
+#endif  // LIBTEXTCLASSIFIER_SMARTSELECT_TYPES_H_

diff --git a/tests/feature-processor_test.cc b/tests/feature-processor_test.cc
index 652db84..27cac6a 100644
--- a/tests/feature-processor_test.cc
+++ b/tests/feature-processor_test.cc

@@ -270,12 +270,6 @@
   EXPECT_EQ(19, features.size());
 }
 
-class TestingFeatureProcessor : public FeatureProcessor {
- public:
-  using FeatureProcessor::FeatureProcessor;
-  using FeatureProcessor::FindTokensInSelection;
-};
-
 TEST(FeatureProcessorTest, FindTokensInSelectionSingleCharacter) {
   FeatureProcessorOptions options;
   options.set_num_buckets(10);
@@ -288,7 +282,7 @@
   config->set_start(32);
   config->set_end(33);
   config->set_role(TokenizationCodepointRange::WHITESPACE_SEPARATOR);
-  TestingFeatureProcessor feature_processor(options);
+  FeatureProcessor feature_processor(options);
 
   SelectionWithContext selection_with_context;
   selection_with_context.context = "1 2 3 c o n t e x t X c o n t e x t 1 2 3";
@@ -297,7 +291,7 @@
   selection_with_context.selection_start = 20;
   selection_with_context.selection_end = 21;
   // clang-format off
-  EXPECT_THAT(feature_processor.FindTokensInSelection(
+  EXPECT_THAT(internal::FindTokensInSelection(
                   feature_processor.Tokenize(selection_with_context.context),
                   selection_with_context),
               ElementsAreArray({Token("X", 20, 21, false)}));
@@ -316,7 +310,7 @@
   config->set_start(32);
   config->set_end(33);
   config->set_role(TokenizationCodepointRange::WHITESPACE_SEPARATOR);
-  TestingFeatureProcessor feature_processor(options);
+  FeatureProcessor feature_processor(options);
 
   SelectionWithContext selection_with_context;
   selection_with_context.context = "I live at 350 Third Street, today.";
@@ -332,7 +326,7 @@
   // Selection: I live at {350 Third Str}eet, today.
   selection_with_context.selection_start = 10;
   selection_with_context.selection_end = 23;
-  EXPECT_THAT(feature_processor.FindTokensInSelection(
+  EXPECT_THAT(internal::FindTokensInSelection(
                   feature_processor.Tokenize(selection_with_context.context),
                   selection_with_context),
               ElementsAreArray(expected_selection));
@@ -340,7 +334,7 @@
   // Selection: I live at {350 Third Street,} today.
   selection_with_context.selection_start = 10;
   selection_with_context.selection_end = 27;
-  EXPECT_THAT(feature_processor.FindTokensInSelection(
+  EXPECT_THAT(internal::FindTokensInSelection(
                   feature_processor.Tokenize(selection_with_context.context),
                   selection_with_context),
               ElementsAreArray(expected_selection));
@@ -348,7 +342,7 @@
   // Selection: I live at {350 Third Street, }today.
   selection_with_context.selection_start = 10;
   selection_with_context.selection_end = 28;
-  EXPECT_THAT(feature_processor.FindTokensInSelection(
+  EXPECT_THAT(internal::FindTokensInSelection(
                   feature_processor.Tokenize(selection_with_context.context),
                   selection_with_context),
               ElementsAreArray(expected_selection));
@@ -356,7 +350,7 @@
   // Selection: I live at {350 Third S}treet, today.
   selection_with_context.selection_start = 10;
   selection_with_context.selection_end = 21;
-  EXPECT_THAT(feature_processor.FindTokensInSelection(
+  EXPECT_THAT(internal::FindTokensInSelection(
                   feature_processor.Tokenize(selection_with_context.context),
                   selection_with_context),
               ElementsAreArray(expected_selection));
@@ -366,7 +360,7 @@
   // Selection: I live at {350 Third} Street, today.
   selection_with_context.selection_start = 10;
   selection_with_context.selection_end = 19;
-  EXPECT_THAT(feature_processor.FindTokensInSelection(
+  EXPECT_THAT(internal::FindTokensInSelection(
                   feature_processor.Tokenize(selection_with_context.context),
                   selection_with_context),
               ElementsAreArray({
@@ -380,7 +374,7 @@
   selection_with_context.selection_start = 10;
   selection_with_context.selection_end = 29;
   EXPECT_THAT(
-      feature_processor.FindTokensInSelection(
+      internal::FindTokensInSelection(
           feature_processor.Tokenize(selection_with_context.context),
           selection_with_context),
       ElementsAreArray({
@@ -453,6 +447,46 @@
                  Token("Token3", 14, 20, false), Token("Token4", 21, 27, false),
                  Token("Token5", 28, 34, false)});
   EXPECT_EQ(token_index, 4);
+
+  // Some invalid ones.
+  token_index = internal::CenterTokenFromMiddleOfSelection({7, 27}, {});
+  EXPECT_EQ(token_index, -1);
+}
+
+TEST(FeatureProcessorTest, GetFeaturesForSharing) {
+  FeatureProcessorOptions options;
+  options.set_num_buckets(10);
+  options.set_context_size(9);
+  options.set_max_selection_span(7);
+  options.add_chargram_orders(1);
+  options.set_tokenize_on_space(true);
+  options.set_center_token_selection_method(
+      FeatureProcessorOptions::CENTER_TOKEN_MIDDLE_OF_SELECTION);
+  options.set_only_use_line_with_click(true);
+  options.set_split_tokens_on_selection_boundaries(true);
+  options.set_extract_selection_mask_feature(true);
+  TokenizationCodepointRange* config =
+      options.add_tokenization_codepoint_config();
+  config->set_start(32);
+  config->set_end(33);
+  config->set_role(TokenizationCodepointRange::WHITESPACE_SEPARATOR);
+  config = options.add_tokenization_codepoint_config();
+  config->set_start(10);
+  config->set_end(11);
+  config->set_role(TokenizationCodepointRange::WHITESPACE_SEPARATOR);
+  FeatureProcessor feature_processor(options);
+
+  std::vector<std::vector<std::pair<int, float>>> features;
+  std::vector<float> extra_features;
+  std::vector<CodepointSpan> selection_label_spans;
+  int selection_label;
+  CodepointSpan selection_codepoint_label;
+  int classification_label;
+  EXPECT_TRUE(feature_processor.GetFeaturesAndLabels(
+      "line 1\nline2\nsome entity\n line 4", {13, 24}, {13, 24}, "", &features,
+      &extra_features, &selection_label_spans, &selection_label,
+      &selection_codepoint_label, &classification_label));
+  EXPECT_EQ(19, features.size());
 }
 
 }  // namespace

diff --git a/tests/functions.cc b/tests/functions.cc
new file mode 100644
index 0000000..8ea5a8d
--- /dev/null
+++ b/tests/functions.cc

@@ -0,0 +1,29 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tests/functions.h"
+
+#include "common/registry.h"
+
+namespace libtextclassifier {
+namespace nlp_core {
+
+TC_DEFINE_CLASS_REGISTRY_NAME("function", functions::Function);
+
+TC_DEFINE_CLASS_REGISTRY_NAME("int-function", functions::IntFunction);
+
+}  // namespace nlp_core
+}  // namespace libtextclassifier

diff --git a/tests/functions.h b/tests/functions.h
new file mode 100644
index 0000000..b96fe2d
--- /dev/null
+++ b/tests/functions.h

@@ -0,0 +1,73 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_TESTS_FUNCTIONS_H_
+#define LIBTEXTCLASSIFIER_TESTS_FUNCTIONS_H_
+
+#include <math.h>
+
+#include "common/registry.h"
+
+namespace libtextclassifier {
+namespace nlp_core {
+namespace functions {
+// Abstract double -> double function.
+class Function : public RegisterableClass<Function> {
+ public:
+  virtual ~Function() {}
+  virtual double Evaluate(double x) = 0;
+};
+
+class Cos : public Function {
+ public:
+  double Evaluate(double x) override { return cos(x); }
+  TC_DEFINE_REGISTRATION_METHOD("cos", Cos);
+};
+
+class Exp : public Function {
+ public:
+  double Evaluate(double x) override { return exp(x); }
+  TC_DEFINE_REGISTRATION_METHOD("exp", Exp);
+};
+
+// Abstract int -> int function.
+class IntFunction : public RegisterableClass<IntFunction> {
+ public:
+  virtual ~IntFunction() {}
+  virtual int Evaluate(int k) = 0;
+};
+
+class Inc : public IntFunction {
+ public:
+  int Evaluate(int k) override { return k + 1; }
+  TC_DEFINE_REGISTRATION_METHOD("inc", Inc);
+};
+
+class Dec : public IntFunction {
+ public:
+  int Evaluate(int k) override { return k + 1; }
+  TC_DEFINE_REGISTRATION_METHOD("dec", Dec);
+};
+}  // namespace functions
+
+// Should be inside namespace libtextclassifier::nlp_core.
+TC_DECLARE_CLASS_REGISTRY_NAME(functions::Function);
+TC_DECLARE_CLASS_REGISTRY_NAME(functions::IntFunction);
+
+}  // namespace nlp_core
+}  // namespace libtextclassifier
+
+#endif  // LIBTEXTCLASSIFIER_TESTS_FUNCTIONS_H_

diff --git a/tests/registry_test.cc b/tests/registry_test.cc
new file mode 100644
index 0000000..7de4163
--- /dev/null
+++ b/tests/registry_test.cc

@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <memory>
+
+#include "tests/functions.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier {
+namespace nlp_core {
+namespace functions {
+
+TEST(RegistryTest, InstantiateFunctionsByName) {
+  // First, we need to register the functions we are interested in:
+  Exp::RegisterClass();
+  Inc::RegisterClass();
+  Cos::RegisterClass();
+
+  // RegisterClass methods can be called in any order, even multiple times :)
+  Cos::RegisterClass();
+  Inc::RegisterClass();
+  Inc::RegisterClass();
+  Cos::RegisterClass();
+  Inc::RegisterClass();
+
+  // NOTE: we intentionally do not register Dec.  Attempts to create an instance
+  // of that function by name should fail.
+
+  // Instantiate a few functions and check that the created functions produce
+  // the expected results for a few sample values.
+  std::unique_ptr<Function> f1(Function::Create("cos"));
+  ASSERT_NE(f1, nullptr);
+  std::unique_ptr<Function> f2(Function::Create("exp"));
+  ASSERT_NE(f2, nullptr);
+  EXPECT_NEAR(f1->Evaluate(-3), -0.9899, 0.0001);
+  EXPECT_NEAR(f2->Evaluate(2.3), 9.9741, 0.0001);
+
+  std::unique_ptr<IntFunction> f3(IntFunction::Create("inc"));
+  ASSERT_NE(f3, nullptr);
+  EXPECT_EQ(f3->Evaluate(7), 8);
+
+  // Instantiating unknown functions should return nullptr, but not crash
+  // anything.
+  EXPECT_EQ(Function::Create("mambo"), nullptr);
+
+  // Functions that are defined in the code, but are not registered are unknown.
+  EXPECT_EQ(IntFunction::Create("dec"), nullptr);
+
+  // Function and IntFunction use different registries.
+  EXPECT_EQ(IntFunction::Create("exp"), nullptr);
+}
+
+}  // namespace functions
+}  // namespace nlp_core
+}  // namespace libtextclassifier

diff --git a/tests/text-classification-model_test.cc b/tests/text-classification-model_test.cc
index 2e2e841..20351c6 100644
--- a/tests/text-classification-model_test.cc
+++ b/tests/text-classification-model_test.cc

@@ -257,6 +257,12 @@
   EXPECT_EQ("other", FindBestResult(model->ClassifyText("asdf", {0, 4})));
   EXPECT_EQ("<INVALID RESULTS>",
             FindBestResult(model->ClassifyText("asdf", {0, 0})));
+
+  // Junk.
+  EXPECT_EQ("<INVALID RESULTS>",
+            FindBestResult(model->ClassifyText("", {0, 0})));
+  EXPECT_EQ("<INVALID RESULTS>", FindBestResult(model->ClassifyText(
+                                     "a\n\n\n\nx x x\n\n\n\n\n\n", {1, 5})));
 }
 
 }  // namespace
commit	9087f1f23d476f5ab26cd11a09cfa05b95659ceb	[log] [tgz]
author	Alex Salcianu <salcianu@google.com>	Wed Mar 22 21:22:39 2017 -0400
committer	Alex Salcianu <salcianu@google.com>	Wed Mar 22 21:22:39 2017 -0400
tree	d370abdff951d41f18269d3d2fdcd8a6601c5398
parent	29fc69fe417ba1fb67a8a6731c5ff894023a089c [diff]