Export libtextclassifier to Android
Test: atest android.view.textclassifier.TextClassificationManagerTest
Change-Id: Id7a31dc60c8f6625ff8f2a9c85689e13b121a5a4
diff --git a/utils/base/logging.h b/utils/base/logging.h
index e197780..e8bde39 100644
--- a/utils/base/logging.h
+++ b/utils/base/logging.h
@@ -155,7 +155,7 @@
#endif // NDEBUG
-#ifdef LIBTEXTCLASSIFIER_VLOG
+#ifdef TC3_VLOG
#define TC3_VLOG(severity) \
::libtextclassifier3::logging::LogMessage( \
::libtextclassifier3::logging::INFO, __FILE__, __LINE__) \
diff --git a/utils/calendar/calendar_test.cc b/utils/calendar/calendar_test.cc
index 02ce63f..a8c3af8 100644
--- a/utils/calendar/calendar_test.cc
+++ b/utils/calendar/calendar_test.cc
@@ -45,7 +45,7 @@
TC3_LOG(INFO) << result;
}
-#ifdef LIBTEXTCLASSIFIER_CALENDAR_ICU
+#ifdef TC3_CALENDAR_ICU
TEST_F(CalendarTest, RoundingToGranularity) {
int64 time;
DateParseData data;
@@ -238,7 +238,7 @@
/*granularity=*/GRANULARITY_DAY, &time));
EXPECT_EQ(time, 1523397600000L /* 11 April 2018 00:00:00 */);
}
-#endif // LIBTEXTCLASSIFIER_UNILIB_DUMMY
+#endif // TC3_UNILIB_DUMMY
} // namespace
} // namespace libtextclassifier3
diff --git a/utils/intents/intent-config.fbs b/utils/intents/intent-config.fbs
new file mode 100755
index 0000000..d350ae4
--- /dev/null
+++ b/utils/intents/intent-config.fbs
@@ -0,0 +1,174 @@
+//
+// Copyright (C) 2018 The Android Open Source Project
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+
+// The type of variable to fetch.
+namespace libtextclassifier3;
+enum AndroidSimpleIntentGeneratorVariableType : int {
+ INVALID_VARIABLE = 0,
+
+ // The raw text that was classified.
+ RAW_TEXT = 1,
+
+ // Text as a URL with explicit protocol. If no protocol was specified, http
+ // is prepended.
+ URL_TEXT = 2,
+
+ // The raw text, but URL encoded.
+ URL_ENCODED_TEXT = 3,
+
+ // For dates/times: the instant of the event in UTC millis.
+ EVENT_TIME_MS_UTC = 4,
+
+ // For dates/times: the start of the event in UTC millis.
+ EVENT_START_MS_UTC = 5,
+
+ // For dates/times: the end of the event in UTC millis.
+ EVENT_END_MS_UTC = 6,
+
+ // Name of the package that's running the classifier.
+ PACKAGE_NAME = 7,
+}
+
+// Enumerates the possible extra types for the simple intent generator.
+namespace libtextclassifier3;
+enum AndroidSimpleIntentGeneratorExtraType : int {
+ INVALID_EXTRA_TYPE = 0,
+ STRING = 1,
+ BOOL = 2,
+ VARIABLE_AS_LONG = 3,
+}
+
+// Enumerates the possible condition types for the simple intent generator.
+namespace libtextclassifier3;
+enum AndroidSimpleIntentGeneratorConditionType : int {
+ INVALID_CONDITION_TYPE = 0,
+
+ // Queries the UserManager for the given boolean restriction. The condition
+ // passes if the result is of getBoolean is false. The name of the
+ // restriction to check is in the string_ field.
+ USER_RESTRICTION_NOT_SET = 1,
+
+ // Checks that the parsed event start time is at least a give number of
+ // milliseconds in the future. (Only valid if there is a parsed event
+ // time) The offset is stored in the int64_ field.
+ EVENT_START_IN_FUTURE_MS = 2,
+}
+
+// Describes how intents for the various entity types should be generated on
+// Android. This is distributed through the model, but not used by
+// libtextclassifier yet - rather, it's passed to the calling Java code, which
+// implements the Intent generation logic.
+namespace libtextclassifier3;
+table AndroidIntentFactoryOptions {
+ entity:[libtextclassifier3.AndroidIntentFactoryEntityOptions];
+}
+
+// Describes how intents should be generated for a particular entity type.
+namespace libtextclassifier3;
+table AndroidIntentFactoryEntityOptions {
+ // The entity type as defined by one of the TextClassifier ENTITY_TYPE
+ // constants. (e.g. "address", "phone", etc.)
+ entity_type:string;
+
+ // List of generators for all the different types of intents that should
+ // be made available for the entity type.
+ generator:[libtextclassifier3.AndroidIntentGeneratorOptions];
+}
+
+// Configures a single Android Intent generator.
+namespace libtextclassifier3;
+table AndroidIntentGeneratorOptions {
+ // Strings for UI elements.
+ strings:[libtextclassifier3.AndroidIntentGeneratorStrings];
+
+ // Generator specific configuration.
+ simple:libtextclassifier3.AndroidSimpleIntentGeneratorOptions;
+}
+
+// Language dependent configuration for an Android Intent generator.
+namespace libtextclassifier3;
+table AndroidIntentGeneratorStrings {
+ // BCP 47 tag for the supported locale. Note that because of API level
+ // restrictions, this must /not/ use wildcards. To e.g. match all English
+ // locales, use only "en" and not "en_*". Reference the java.util.Locale
+ // constructor for details.
+ language_tag:string;
+
+ // Title shown for the action (see RemoteAction.getTitle).
+ title:string;
+
+ // Description shown for the action (see
+ // RemoteAction.getContentDescription).
+ description:string;
+}
+
+// An extra to set on a simple intent generator Intent.
+namespace libtextclassifier3;
+table AndroidSimpleIntentGeneratorExtra {
+ // The name of the extra to set.
+ name:string;
+
+ // The type of the extra to set.
+ type:libtextclassifier3.AndroidSimpleIntentGeneratorExtraType;
+
+ string_:string;
+
+ bool_:bool;
+ int32_:int;
+}
+
+// A condition that needs to be fulfilled for an Intent to get generated.
+namespace libtextclassifier3;
+table AndroidSimpleIntentGeneratorCondition {
+ type:libtextclassifier3.AndroidSimpleIntentGeneratorConditionType;
+
+ string_:string;
+
+ int32_:int;
+ int64_:long;
+}
+
+// Configures an intent generator where the logic is simple to be expressed with
+// basic rules - which covers the vast majority of use cases and is analogous
+// to Android Actions.
+// Most strings (action, data, type, ...) may contain variable references. To
+// use them, the generator must first declare all the variables it wishes to use
+// in the variables field. The values then become available as numbered
+// arguments (using the normal java.util.Formatter syntax) in the order they
+// were specified.
+namespace libtextclassifier3;
+table AndroidSimpleIntentGeneratorOptions {
+ // The action to set on the Intent (see Intent.setAction). Supports variables.
+ action:string;
+
+ // The data to set on the Intent (see Intent.setData). Supports variables.
+ data:string;
+
+ // The type to set on the Intent (see Intent.setType). Supports variables.
+ type:string;
+
+ // The list of all the extras to add to the Intent.
+ extra:[libtextclassifier3.AndroidSimpleIntentGeneratorExtra];
+
+ // The list of all the variables that become available for substitution in
+ // the action, data, type and extra strings. To e.g. set a field to the value
+ // of the first variable, use "%0$s".
+ variable:[libtextclassifier3.AndroidSimpleIntentGeneratorVariableType];
+
+ // The list of all conditions that need to be fulfilled for Intent generation.
+ condition:[libtextclassifier3.AndroidSimpleIntentGeneratorCondition];
+}
+
diff --git a/utils/sentencepiece/encoder.cc b/utils/sentencepiece/encoder.cc
index 96fb868..6ffb0c7 100644
--- a/utils/sentencepiece/encoder.cc
+++ b/utils/sentencepiece/encoder.cc
@@ -35,6 +35,17 @@
normalized_text.RemovePrefix(1);
continue;
}
+ // Check whether we can use the unknown token.
+ if (unknown_code_ >= 0) {
+ const int pos = i + 1;
+ const float unknown_penalty = segmentation[i].score + unknown_score_;
+ if (segmentation[pos].previous_pos < 0 ||
+ segmentation[pos].score < unknown_penalty) {
+ segmentation[pos] = {/*score=*/unknown_penalty, /*previous_pos=*/i,
+ /*piece_id=*/unknown_code_,
+ /*num_pieces=*/segmentation[i].num_pieces + 1};
+ }
+ }
for (const auto& match : matcher_->FindAllPrefixMatches(normalized_text)) {
TC3_CHECK(match.id >= 0 && match.id < num_pieces_);
const int pos = i + match.match_length;
@@ -42,7 +53,7 @@
if (segmentation[pos].previous_pos < 0 ||
segmentation[pos].score < candidate_score) {
segmentation[pos] = {/*score=*/candidate_score, /*previous_pos=*/i,
- /*piece_id=*/match.id,
+ /*piece_id=*/match.id + encoding_offset_,
/*num_pieces=*/segmentation[i].num_pieces + 1};
}
}
@@ -57,7 +68,7 @@
result[num_pieces + 1] = end_code_;
int pos = len;
for (int i = num_pieces; i > 0; i--) {
- result[i] = segmentation[pos].piece_id + encoding_offset_;
+ result[i] = segmentation[pos].piece_id;
pos = segmentation[pos].previous_pos;
}
result[0] = start_code_;
diff --git a/utils/sentencepiece/encoder.h b/utils/sentencepiece/encoder.h
index fffd86f..0f1bfd3 100644
--- a/utils/sentencepiece/encoder.h
+++ b/utils/sentencepiece/encoder.h
@@ -33,19 +33,24 @@
// a trie.
// num_pieces: the number of pieces in the trie.
// pieces_scores: the scores of the individual pieces.
- // start_code: Code that is used as encoding of the start of input.
- // end_code: Code that is used as encoding of the end of input.
- // encoding_offset: Value added to the sentence piece ids to make them
+ // start_code: code that is used as encoding of the start of input.
+ // end_code: code that is used as encoding of the end of input.
+ // encoding_offset: value added to the sentence piece ids to make them
// not interesecting with start_code and end_code.
+ // unknown_code: code that is used for out-of-dictionary characters.
+ // unknown_score: the penality score associated with the unknown code.
Encoder(const SentencePieceMatcher* matcher, const int num_pieces,
const float* pieces_scores, int start_code = 0, int end_code = 1,
- int encoding_offset = 2)
+ int encoding_offset = 2, int unknown_code = -1,
+ float unknown_score = 0.f)
: num_pieces_(num_pieces),
scores_(pieces_scores),
matcher_(matcher),
start_code_(start_code),
end_code_(end_code),
- encoding_offset_(encoding_offset) {}
+ encoding_offset_(encoding_offset),
+ unknown_code_(unknown_code),
+ unknown_score_(unknown_score) {}
// Segment the input so that the total score of the pieces used is maximized.
// This is a simplified implementation of the general Viterbi algorithm,
@@ -74,6 +79,8 @@
const int start_code_;
const int end_code_;
const int encoding_offset_;
+ const int unknown_code_;
+ const int unknown_score_;
};
} // namespace libtextclassifier3
diff --git a/utils/sentencepiece/encoder_test.cc b/utils/sentencepiece/encoder_test.cc
index 59c12ad..6bc9aeb 100644
--- a/utils/sentencepiece/encoder_test.cc
+++ b/utils/sentencepiece/encoder_test.cc
@@ -26,7 +26,7 @@
namespace libtextclassifier3 {
namespace {
-using testing::ElementsAreArray;
+using testing::ElementsAre;
using testing::IsEmpty;
TEST(EncoderTest, SimpleTokenization) {
@@ -38,12 +38,12 @@
const Encoder encoder(matcher.get(),
/*num_pieces=*/4, scores);
- EXPECT_THAT(encoder.Encode("hellothere"), ElementsAreArray({0, 3, 5, 1}));
+ EXPECT_THAT(encoder.Encode("hellothere"), ElementsAre(0, 3, 5, 1));
// Make probability of hello very low:
// hello gets now tokenized as hell + o.
scores[1] = -100.0;
- EXPECT_THAT(encoder.Encode("hellothere"), ElementsAreArray({0, 2, 4, 5, 1}));
+ EXPECT_THAT(encoder.Encode("hellothere"), ElementsAre(0, 2, 4, 5, 1));
}
TEST(EncoderTest, HandlesEdgeCases) {
@@ -54,10 +54,28 @@
/*num_pieces=*/4, offsets, StringPiece(pieces, 18)));
const Encoder encoder(matcher.get(),
/*num_pieces=*/4, scores);
- EXPECT_THAT(encoder.Encode("hellhello"), ElementsAreArray({0, 2, 3, 1}));
- EXPECT_THAT(encoder.Encode("hellohell"), ElementsAreArray({0, 3, 2, 1}));
- EXPECT_THAT(encoder.Encode(""), ElementsAreArray({0, 1}));
- EXPECT_THAT(encoder.Encode("hellathere"), ElementsAreArray({0, 1}));
+ EXPECT_THAT(encoder.Encode("hellhello"), ElementsAre(0, 2, 3, 1));
+ EXPECT_THAT(encoder.Encode("hellohell"), ElementsAre(0, 3, 2, 1));
+ EXPECT_THAT(encoder.Encode(""), ElementsAre(0, 1));
+ EXPECT_THAT(encoder.Encode("hellathere"), ElementsAre(0, 1));
+}
+
+TEST(EncoderTest, HandlesOutOfDictionary) {
+ const char pieces[] = "hell\0hello\0o\0there\0";
+ const int offsets[] = {0, 5, 11, 13};
+ float scores[] = {-0.5, -1.0, -10.0, -1.0};
+ std::unique_ptr<SentencePieceMatcher> matcher(new SortedStringsTable(
+ /*num_pieces=*/4, offsets, StringPiece(pieces, 18)));
+ const Encoder encoder(matcher.get(),
+ /*num_pieces=*/4, scores,
+ /*start_code=*/0, /*end_code=*/1,
+ /*encoding_offset=*/3, /*unknown_code=*/2,
+ /*unknown_score=*/-100.0);
+ EXPECT_THAT(encoder.Encode("hellhello"), ElementsAre(0, 3, 4, 1));
+ EXPECT_THAT(encoder.Encode("hellohell"), ElementsAre(0, 4, 3, 1));
+ EXPECT_THAT(encoder.Encode(""), ElementsAre(0, 1));
+ EXPECT_THAT(encoder.Encode("hellathere"),
+ ElementsAre(0, /*hell*/ 3, /*unknown*/ 2, /*there*/ 6, 1));
}
} // namespace
diff --git a/utils/sentencepiece/normalizer.cc b/utils/sentencepiece/normalizer.cc
index 9fcc1e5..1dd20da 100644
--- a/utils/sentencepiece/normalizer.cc
+++ b/utils/sentencepiece/normalizer.cc
@@ -21,7 +21,7 @@
namespace libtextclassifier3 {
-std::string Normalizer::Normalize(StringPiece input) const {
+std::string SentencePieceNormalizer::Normalize(StringPiece input) const {
std::string normalized;
// Ignores heading space.
@@ -106,7 +106,7 @@
return normalized;
}
-std::pair<StringPiece, int> Normalizer::NormalizePrefix(
+std::pair<StringPiece, int> SentencePieceNormalizer::NormalizePrefix(
StringPiece input) const {
std::pair<StringPiece, int> result;
if (input.empty()) return result;
diff --git a/utils/sentencepiece/normalizer.h b/utils/sentencepiece/normalizer.h
index 582d563..227e09b 100644
--- a/utils/sentencepiece/normalizer.h
+++ b/utils/sentencepiece/normalizer.h
@@ -27,7 +27,7 @@
// Normalizer implements a simple text normalizer with user-defined
// string-to-string rules and leftmost longest matching.
-class Normalizer {
+class SentencePieceNormalizer {
public:
// charsmap_trie and charsmap_normalized specify the normalization/replacement
// string-to-string rules in the following way:
@@ -41,10 +41,11 @@
// internal whitespace.
//
// escape_whitespaces: Whether to replace whitespace with a meta symbol.
- Normalizer(const DoubleArrayTrie &charsmap_trie,
- StringPiece charsmap_normalized, bool add_dummy_prefix = true,
- bool remove_extra_whitespaces = true,
- bool escape_whitespaces = true)
+ SentencePieceNormalizer(const DoubleArrayTrie &charsmap_trie,
+ StringPiece charsmap_normalized,
+ bool add_dummy_prefix = true,
+ bool remove_extra_whitespaces = true,
+ bool escape_whitespaces = true)
: charsmap_trie_(charsmap_trie),
charsmap_normalized_(charsmap_normalized),
add_dummy_prefix_(add_dummy_prefix),
diff --git a/utils/sentencepiece/normalizer_test.cc b/utils/sentencepiece/normalizer_test.cc
index 143e795..f6018ab 100644
--- a/utils/sentencepiece/normalizer_test.cc
+++ b/utils/sentencepiece/normalizer_test.cc
@@ -36,9 +36,10 @@
std::ifstream test_config_stream(GetTestConfigPath());
std::string config((std::istreambuf_iterator<char>(test_config_stream)),
(std::istreambuf_iterator<char>()));
- Normalizer normalizer = NormalizerFromSpec(config, /*add_dummy_prefix=*/true,
- /*remove_extra_whitespaces=*/true,
- /*escape_whitespaces=*/true);
+ SentencePieceNormalizer normalizer =
+ NormalizerFromSpec(config, /*add_dummy_prefix=*/true,
+ /*remove_extra_whitespaces=*/true,
+ /*escape_whitespaces=*/true);
EXPECT_EQ(normalizer.Normalize("hello there"), "▁hello▁there");
@@ -63,9 +64,10 @@
std::ifstream test_config_stream(GetTestConfigPath());
std::string config((std::istreambuf_iterator<char>(test_config_stream)),
(std::istreambuf_iterator<char>()));
- Normalizer normalizer = NormalizerFromSpec(config, /*add_dummy_prefix=*/false,
- /*remove_extra_whitespaces=*/true,
- /*escape_whitespaces=*/true);
+ SentencePieceNormalizer normalizer =
+ NormalizerFromSpec(config, /*add_dummy_prefix=*/false,
+ /*remove_extra_whitespaces=*/true,
+ /*escape_whitespaces=*/true);
EXPECT_EQ(normalizer.Normalize("hello there"), "hello▁there");
@@ -90,9 +92,10 @@
std::ifstream test_config_stream(GetTestConfigPath());
std::string config((std::istreambuf_iterator<char>(test_config_stream)),
(std::istreambuf_iterator<char>()));
- Normalizer normalizer = NormalizerFromSpec(config, /*add_dummy_prefix=*/false,
- /*remove_extra_whitespaces=*/false,
- /*escape_whitespaces=*/true);
+ SentencePieceNormalizer normalizer =
+ NormalizerFromSpec(config, /*add_dummy_prefix=*/false,
+ /*remove_extra_whitespaces=*/false,
+ /*escape_whitespaces=*/true);
EXPECT_EQ(normalizer.Normalize("hello there"), "hello▁there");
@@ -108,9 +111,10 @@
std::ifstream test_config_stream(GetTestConfigPath());
std::string config((std::istreambuf_iterator<char>(test_config_stream)),
(std::istreambuf_iterator<char>()));
- Normalizer normalizer = NormalizerFromSpec(config, /*add_dummy_prefix=*/false,
- /*remove_extra_whitespaces=*/false,
- /*escape_whitespaces=*/false);
+ SentencePieceNormalizer normalizer =
+ NormalizerFromSpec(config, /*add_dummy_prefix=*/false,
+ /*remove_extra_whitespaces=*/false,
+ /*escape_whitespaces=*/false);
EXPECT_EQ(normalizer.Normalize("hello there"), "hello there");
diff --git a/utils/sentencepiece/test_utils.cc b/utils/sentencepiece/test_utils.cc
index 1b766ac..1ed2bf3 100644
--- a/utils/sentencepiece/test_utils.cc
+++ b/utils/sentencepiece/test_utils.cc
@@ -24,15 +24,16 @@
namespace libtextclassifier3 {
-Normalizer NormalizerFromSpec(StringPiece spec, bool add_dummy_prefix,
- bool remove_extra_whitespaces,
- bool escape_whitespaces) {
+SentencePieceNormalizer NormalizerFromSpec(StringPiece spec,
+ bool add_dummy_prefix,
+ bool remove_extra_whitespaces,
+ bool escape_whitespaces) {
const uint32 trie_blob_size = reinterpret_cast<const uint32*>(spec.data())[0];
spec.RemovePrefix(sizeof(trie_blob_size));
const TrieNode* trie_blob = reinterpret_cast<const TrieNode*>(spec.data());
spec.RemovePrefix(trie_blob_size);
const int num_nodes = trie_blob_size / sizeof(TrieNode);
- return Normalizer(
+ return SentencePieceNormalizer(
DoubleArrayTrie(trie_blob, num_nodes),
/*charsmap_normalized=*/StringPiece(spec.data(), spec.size()),
add_dummy_prefix, remove_extra_whitespaces, escape_whitespaces);
diff --git a/utils/sentencepiece/test_utils.h b/utils/sentencepiece/test_utils.h
index 71a4994..0c833da 100644
--- a/utils/sentencepiece/test_utils.h
+++ b/utils/sentencepiece/test_utils.h
@@ -25,9 +25,10 @@
namespace libtextclassifier3 {
-Normalizer NormalizerFromSpec(StringPiece spec, bool add_dummy_prefix,
- bool remove_extra_whitespaces,
- bool escape_whitespaces);
+SentencePieceNormalizer NormalizerFromSpec(StringPiece spec,
+ bool add_dummy_prefix,
+ bool remove_extra_whitespaces,
+ bool escape_whitespaces);
} // namespace libtextclassifier3
diff --git a/utils/tflite/text_encoder.cc b/utils/tflite/text_encoder.cc
index 9554283..734b5b0 100644
--- a/utils/tflite/text_encoder.cc
+++ b/utils/tflite/text_encoder.cc
@@ -35,7 +35,7 @@
namespace {
struct TextEncoderOp {
- std::unique_ptr<Normalizer> normalizer;
+ std::unique_ptr<SentencePieceNormalizer> normalizer;
std::unique_ptr<Encoder> encoder;
std::unique_ptr<SentencePieceMatcher> matcher;
};
@@ -81,7 +81,7 @@
config->normalization_charsmap()->Data());
const int charsmap_trie_nodes_length =
config->normalization_charsmap()->Length() / sizeof(TrieNode);
- encoder_op->normalizer.reset(new Normalizer(
+ encoder_op->normalizer.reset(new SentencePieceNormalizer(
DoubleArrayTrie(charsmap_trie_nodes, charsmap_trie_nodes_length),
StringPiece(config->normalization_charsmap_values()->data(),
config->normalization_charsmap_values()->size()),
@@ -113,7 +113,8 @@
}
encoder_op->encoder.reset(new Encoder(
encoder_op->matcher.get(), num_pieces, config->pieces_scores()->data(),
- config->start_code(), config->end_code(), config->encoding_offset()));
+ config->start_code(), config->end_code(), config->encoding_offset(),
+ config->unknown_code(), config->unknown_score()));
return encoder_op.release();
}
diff --git a/utils/tflite/text_encoder_config.fbs b/utils/tflite/text_encoder_config.fbs
index 462da21..8ae8fc5 100644
--- a/utils/tflite/text_encoder_config.fbs
+++ b/utils/tflite/text_encoder_config.fbs
@@ -34,6 +34,12 @@
// `start_code` and `end_code`.
encoding_offset:int32 = 2;
+ // Code that is used for out-of-dictionary characters.
+ unknown_code:int32 = -1;
+
+ // Penalty associated with the unknown code.
+ unknown_score:float;
+
// Normalization options.
// Serialized normalization charsmap.
normalization_charsmap:string;
diff --git a/utils/tflite/text_encoder_test.cc b/utils/tflite/text_encoder_test.cc
index 0b6ff71..0cd67ce 100644
--- a/utils/tflite/text_encoder_test.cc
+++ b/utils/tflite/text_encoder_test.cc
@@ -20,6 +20,7 @@
#include "utils/tflite/text_encoder.h"
#include "gtest/gtest.h"
+#include "third_party/absl/flags/flag.h"
#include "flatbuffers/flexbuffers.h"
#include "tensorflow/contrib/lite/interpreter.h"
#include "tensorflow/contrib/lite/kernels/register.h"
diff --git a/utils/utf8/unilib_test.cc b/utils/utf8/unilib_test.cc
index e2ad26b..96b2c2d 100644
--- a/utils/utf8/unilib_test.cc
+++ b/utils/utf8/unilib_test.cc
@@ -50,7 +50,7 @@
EXPECT_EQ(unilib_.GetPairedBracket('}'), '{');
}
-#ifndef LIBTEXTCLASSIFIER_UNILIB_DUMMY
+#ifndef TC3_UNILIB_DUMMY
TEST_F(UniLibTest, CharacterClassesUnicode) {
EXPECT_TRUE(unilib_.IsOpeningBracket(0x0F3C)); // TIBET ANG KHANG GYON
EXPECT_TRUE(unilib_.IsClosingBracket(0x0F3D)); // TIBET ANG KHANG GYAS
@@ -72,7 +72,7 @@
EXPECT_EQ(unilib_.GetPairedBracket(0x0F3C), 0x0F3D);
EXPECT_EQ(unilib_.GetPairedBracket(0x0F3D), 0x0F3C);
}
-#endif // ndef LIBTEXTCLASSIFIER_UNILIB_DUMMY
+#endif // ndef TC3_UNILIB_DUMMY
TEST_F(UniLibTest, RegexInterface) {
const UnicodeText regex_pattern =
@@ -89,7 +89,7 @@
TC3_LOG(INFO) << matcher->Group(0, &status).size_codepoints();
}
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
+#ifdef TC3_UNILIB_ICU
TEST_F(UniLibTest, Regex) {
// The smiley face is a 4-byte UTF8 codepoint 0x1F60B, and it's important to
// test the regex functionality with it to verify we are handling the indices
@@ -126,9 +126,9 @@
EXPECT_EQ(matcher->Group(0, &status).ToUTF8String(), "0123😋");
EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
}
-#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif // TC3_UNILIB_ICU
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
+#ifdef TC3_UNILIB_ICU
TEST_F(UniLibTest, RegexGroups) {
// The smiley face is a 4-byte UTF8 codepoint 0x1F60B, and it's important to
// test the regex functionality with it to verify we are handling the indices
@@ -163,9 +163,9 @@
EXPECT_EQ(matcher->Group(2, &status).ToUTF8String(), "123");
EXPECT_EQ(status, UniLib::RegexMatcher::kNoError);
}
-#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif // TC3_UNILIB_ICU
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
+#ifdef TC3_UNILIB_ICU
TEST_F(UniLibTest, BreakIterator) {
const UnicodeText text = UTF8ToUnicodeText("some text", /*do_copy=*/false);
@@ -178,9 +178,9 @@
}
EXPECT_THAT(break_indices, ElementsAre(4, 5, 9));
}
-#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif // TC3_UNILIB_ICU
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
+#ifdef TC3_UNILIB_ICU
TEST_F(UniLibTest, BreakIterator4ByteUTF8) {
const UnicodeText text = UTF8ToUnicodeText("😀😂😋", /*do_copy=*/false);
std::unique_ptr<UniLib::BreakIterator> iterator =
@@ -192,18 +192,18 @@
}
EXPECT_THAT(break_indices, ElementsAre(1, 2, 3));
}
-#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif // TC3_UNILIB_ICU
-#ifndef LIBTEXTCLASSIFIER_UNILIB_JAVAICU
+#ifndef TC3_UNILIB_JAVAICU
TEST_F(UniLibTest, IntegerParse) {
int result;
EXPECT_TRUE(
unilib_.ParseInt32(UTF8ToUnicodeText("123", /*do_copy=*/false), &result));
EXPECT_EQ(result, 123);
}
-#endif // ndef LIBTEXTCLASSIFIER_UNILIB_JAVAICU
+#endif // ndef TC3_UNILIB_JAVAICU
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
+#ifdef TC3_UNILIB_ICU
TEST_F(UniLibTest, IntegerParseFullWidth) {
int result;
// The input string here is full width
@@ -211,16 +211,16 @@
&result));
EXPECT_EQ(result, 123);
}
-#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif // TC3_UNILIB_ICU
-#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
+#ifdef TC3_UNILIB_ICU
TEST_F(UniLibTest, IntegerParseFullWidthWithAlpha) {
int result;
// The input string here is full width
EXPECT_FALSE(unilib_.ParseInt32(UTF8ToUnicodeText("1a3", /*do_copy=*/false),
&result));
}
-#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
+#endif // TC3_UNILIB_ICU
} // namespace
} // namespace libtextclassifier3