Blame - text-classifier_test.cc - platform/external/libtextclassifier

blob: 74534e2adb99dce6be6e1d6ebca5f614978a4018 [file] [log] [blame]

Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	1	/*
				2	* Copyright (C) 2017 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	#include "text-classifier.h"
				18
				19	#include <fstream>
				20	#include <iostream>
				21	#include <memory>
				22	#include <string>
				23
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	24	#include "model_generated.h"
				25	#include "types-test-util.h"
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	26	#include "gmock/gmock.h"
				27	#include "gtest/gtest.h"
				28
				29	namespace libtextclassifier2 {
				30	namespace {
				31
				32	using testing::ElementsAreArray;
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	33	using testing::IsEmpty;
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	34	using testing::Pair;
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	35	using testing::Values;
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	36
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	37	std::string FirstResult(const std::vector<ClassificationResult>& results) {
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	38	if (results.empty()) {
				39	return "<INVALID RESULTS>";
				40	}
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	41	return results[0].collection;
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	42	}
				43
				44	MATCHER_P3(IsAnnotatedSpan, start, end, best_class, "") {
				45	return testing::Value(arg.span, Pair(start, end)) &&
				46	testing::Value(FirstResult(arg.classification), best_class);
				47	}
				48
				49	std::string ReadFile(const std::string& file_name) {
				50	std::ifstream file_stream(file_name);
				51	return std::string(std::istreambuf_iterator<char>(file_stream), {});
				52	}
				53
				54	std::string GetModelPath() {
				55	return LIBTEXTCLASSIFIER_TEST_DATA_DIR;
				56	}
				57
				58	TEST(TextClassifierTest, EmbeddingExecutorLoadingFails) {
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	59	CREATE_UNILIB_FOR_TESTING;
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	60	std::unique_ptr<TextClassifier> classifier =
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	61	TextClassifier::FromPath(GetModelPath() + "wrong_embeddings.fb", &unilib);
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	62	EXPECT_FALSE(classifier);
				63	}
				64
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	65	class TextClassifierTest : public ::testing::TestWithParam<const char*> {};
				66
				67	INSTANTIATE_TEST_CASE_P(ClickContext, TextClassifierTest,
				68	Values("test_model_cc.fb"));
				69	INSTANTIATE_TEST_CASE_P(BoundsSensitive, TextClassifierTest,
				70	Values("test_model.fb"));
				71
				72	TEST_P(TextClassifierTest, ClassifyText) {
				73	CREATE_UNILIB_FOR_TESTING;
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	74	std::unique_ptr<TextClassifier> classifier =
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	75	TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	76	ASSERT_TRUE(classifier);
				77
				78	EXPECT_EQ("other",
				79	FirstResult(classifier->ClassifyText(
				80	"this afternoon Barack Obama gave a speech at", {15, 27})));
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	81	EXPECT_EQ("phone", FirstResult(classifier->ClassifyText(
				82	"Call me at (800) 123-456 today", {11, 24})));
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	83
				84	// More lines.
				85	EXPECT_EQ("other",
				86	FirstResult(classifier->ClassifyText(
				87	"this afternoon Barack Obama gave a speech at\|Visit "
				88	"www.google.com every today!\|Call me at (800) 123-456 today.",
				89	{15, 27})));
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	90	EXPECT_EQ("phone",
				91	FirstResult(classifier->ClassifyText(
				92	"this afternoon Barack Obama gave a speech at\|Visit "
				93	"www.google.com every today!\|Call me at (800) 123-456 today.",
				94	{90, 103})));
				95
				96	// Single word.
				97	EXPECT_EQ("other", FirstResult(classifier->ClassifyText("obama", {0, 5})));
				98	EXPECT_EQ("other", FirstResult(classifier->ClassifyText("asdf", {0, 4})));
				99	EXPECT_EQ("<INVALID RESULTS>",
				100	FirstResult(classifier->ClassifyText("asdf", {0, 0})));
				101
				102	// Junk.
				103	EXPECT_EQ("<INVALID RESULTS>",
				104	FirstResult(classifier->ClassifyText("", {0, 0})));
				105	EXPECT_EQ("<INVALID RESULTS>", FirstResult(classifier->ClassifyText(
				106	"a\n\n\n\nx x x\n\n\n\n\n\n", {1, 5})));
				107	}
				108
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	109	TEST_P(TextClassifierTest, ClassifyTextDisabledFail) {
				110	CREATE_UNILIB_FOR_TESTING;
				111	const std::string test_model = ReadFile(GetModelPath() + GetParam());
				112	std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
				113
				114	unpacked_model->classification_model.clear();
				115	unpacked_model->triggering_options.reset(new ModelTriggeringOptionsT);
				116	unpacked_model->triggering_options->enabled_modes = ModeFlag_SELECTION;
				117
				118	flatbuffers::FlatBufferBuilder builder;
				119	builder.Finish(Model::Pack(builder, unpacked_model.get()));
				120
				121	std::unique_ptr<TextClassifier> classifier =
				122	TextClassifier::FromUnownedBuffer(
				123	reinterpret_cast<const char*>(builder.GetBufferPointer()),
				124	builder.GetSize(), &unilib);
				125
				126	// The classification model is still needed for selection scores.
				127	ASSERT_FALSE(classifier);
				128	}
				129
				130	TEST_P(TextClassifierTest, ClassifyTextDisabled) {
				131	CREATE_UNILIB_FOR_TESTING;
				132	const std::string test_model = ReadFile(GetModelPath() + GetParam());
				133	std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
				134
				135	unpacked_model->triggering_options.reset(new ModelTriggeringOptionsT);
				136	unpacked_model->triggering_options->enabled_modes =
				137	ModeFlag_ANNOTATION_AND_SELECTION;
				138
				139	flatbuffers::FlatBufferBuilder builder;
				140	builder.Finish(Model::Pack(builder, unpacked_model.get()));
				141
				142	std::unique_ptr<TextClassifier> classifier =
				143	TextClassifier::FromUnownedBuffer(
				144	reinterpret_cast<const char*>(builder.GetBufferPointer()),
				145	builder.GetSize(), &unilib);
				146	ASSERT_TRUE(classifier);
				147
				148	EXPECT_THAT(
				149	classifier->ClassifyText("Call me at (800) 123-456 today", {11, 24}),
				150	IsEmpty());
				151	}
				152
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	153	std::unique_ptr<RegexModel_::PatternT> MakePattern(
				154	const std::string& collection_name, const std::string& pattern,
				155	const bool enabled_for_classification, const bool enabled_for_selection,
				156	const bool enabled_for_annotation, const float score) {
				157	std::unique_ptr<RegexModel_::PatternT> result(new RegexModel_::PatternT);
				158	result->collection_name = collection_name;
				159	result->pattern = pattern;
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	160	// We cannot directly operate with \|= on the flag, so use an int here.
				161	int enabled_modes = ModeFlag_NONE;
				162	if (enabled_for_annotation) enabled_modes \|= ModeFlag_ANNOTATION;
				163	if (enabled_for_classification) enabled_modes \|= ModeFlag_CLASSIFICATION;
				164	if (enabled_for_selection) enabled_modes \|= ModeFlag_SELECTION;
				165	result->enabled_modes = static_cast<ModeFlag>(enabled_modes);
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	166	result->target_classification_score = score;
				167	result->priority_score = score;
				168	return result;
				169	}
				170
				171	#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
				172	TEST_P(TextClassifierTest, ClassifyTextRegularExpression) {
				173	CREATE_UNILIB_FOR_TESTING;
				174	const std::string test_model = ReadFile(GetModelPath() + GetParam());
				175	std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
				176
				177	// Add test regex models.
				178	unpacked_model->regex_model->patterns.push_back(MakePattern(
				179	"person", "Barack Obama", /enabled_for_classification=/true,
				180	/enabled_for_selection=/false, /enabled_for_annotation=/false, 1.0));
				181	unpacked_model->regex_model->patterns.push_back(MakePattern(
				182	"flight", "[a-zA-Z]{2}\\d{2,4}", /enabled_for_classification=/true,
				183	/enabled_for_selection=/false, /enabled_for_annotation=/false, 0.5));
				184
				185	flatbuffers::FlatBufferBuilder builder;
				186	builder.Finish(Model::Pack(builder, unpacked_model.get()));
				187
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	188	std::unique_ptr<TextClassifier> classifier =
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	189	TextClassifier::FromUnownedBuffer(
				190	reinterpret_cast<const char*>(builder.GetBufferPointer()),
				191	builder.GetSize(), &unilib);
				192	ASSERT_TRUE(classifier);
				193
				194	EXPECT_EQ("flight",
				195	FirstResult(classifier->ClassifyText(
				196	"Your flight LX373 is delayed by 3 hours.", {12, 17})));
				197	EXPECT_EQ("person",
				198	FirstResult(classifier->ClassifyText(
				199	"this afternoon Barack Obama gave a speech at", {15, 27})));
				200	EXPECT_EQ("email",
				201	FirstResult(classifier->ClassifyText("you@android.com", {0, 15})));
				202	EXPECT_EQ("email", FirstResult(classifier->ClassifyText(
				203	"Contact me at you@android.com", {14, 29})));
				204
				205	EXPECT_EQ("url", FirstResult(classifier->ClassifyText(
				206	"Visit www.google.com every today!", {6, 20})));
				207
				208	EXPECT_EQ("flight", FirstResult(classifier->ClassifyText("LX 37", {0, 5})));
				209	EXPECT_EQ("flight", FirstResult(classifier->ClassifyText("flight LX 37 abcd",
				210	{7, 12})));
				211
				212	// More lines.
				213	EXPECT_EQ("url",
				214	FirstResult(classifier->ClassifyText(
				215	"this afternoon Barack Obama gave a speech at\|Visit "
				216	"www.google.com every today!\|Call me at (800) 123-456 today.",
				217	{51, 65})));
				218	}
				219	#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
				220
				221	#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	222	TEST_P(TextClassifierTest, SuggestSelectionRegularExpression) {
				223	CREATE_UNILIB_FOR_TESTING;
				224	const std::string test_model = ReadFile(GetModelPath() + GetParam());
				225	std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
				226
				227	// Add test regex models.
				228	unpacked_model->regex_model.reset(new RegexModelT);
				229	unpacked_model->regex_model->patterns.push_back(MakePattern(
				230	"person", " (Barack Obama) ", /enabled_for_classification=/false,
				231	/enabled_for_selection=/true, /enabled_for_annotation=/false, 1.0));
				232	unpacked_model->regex_model->patterns.push_back(MakePattern(
				233	"flight", "([a-zA-Z]{2} ?\\d{2,4})", /enabled_for_classification=/false,
				234	/enabled_for_selection=/true, /enabled_for_annotation=/false, 1.0));
				235	unpacked_model->regex_model->patterns.back()->priority_score = 1.1;
				236
				237	flatbuffers::FlatBufferBuilder builder;
				238	builder.Finish(Model::Pack(builder, unpacked_model.get()));
				239
				240	std::unique_ptr<TextClassifier> classifier =
				241	TextClassifier::FromUnownedBuffer(
				242	reinterpret_cast<const char*>(builder.GetBufferPointer()),
				243	builder.GetSize(), &unilib);
				244	ASSERT_TRUE(classifier);
				245
				246	// Check regular expression selection.
				247	EXPECT_EQ(classifier->SuggestSelection(
				248	"Your flight MA 0123 is delayed by 3 hours.", {12, 14}),
				249	std::make_pair(12, 19));
				250	EXPECT_EQ(classifier->SuggestSelection(
				251	"this afternoon Barack Obama gave a speech at", {15, 21}),
				252	std::make_pair(15, 27));
				253	}
				254	#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
				255
				256	#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
				257	TEST_P(TextClassifierTest,
				258	SuggestSelectionRegularExpressionConflictsModelWins) {
				259	const std::string test_model = ReadFile(GetModelPath() + GetParam());
				260	std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
				261
				262	// Add test regex models.
				263	unpacked_model->regex_model.reset(new RegexModelT);
				264	unpacked_model->regex_model->patterns.push_back(MakePattern(
				265	"person", " (Barack Obama) ", /enabled_for_classification=/false,
				266	/enabled_for_selection=/true, /enabled_for_annotation=/false, 1.0));
				267	unpacked_model->regex_model->patterns.push_back(MakePattern(
				268	"flight", "([a-zA-Z]{2} ?\\d{2,4})", /enabled_for_classification=/false,
				269	/enabled_for_selection=/true, /enabled_for_annotation=/false, 1.0));
				270	unpacked_model->regex_model->patterns.back()->priority_score = 0.5;
				271
				272	flatbuffers::FlatBufferBuilder builder;
				273	builder.Finish(Model::Pack(builder, unpacked_model.get()));
				274
				275	std::unique_ptr<TextClassifier> classifier =
				276	TextClassifier::FromUnownedBuffer(
				277	reinterpret_cast<const char*>(builder.GetBufferPointer()),
				278	builder.GetSize());
				279	ASSERT_TRUE(classifier);
				280
				281	// Check conflict resolution.
				282	EXPECT_EQ(
				283	classifier->SuggestSelection(
				284	"saw Barack Obama today .. 350 Third Street, Cambridge, MA 0123",
				285	{55, 57}),
				286	std::make_pair(26, 62));
				287	}
				288	#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
				289
				290	#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
				291	TEST_P(TextClassifierTest,
				292	SuggestSelectionRegularExpressionConflictsRegexWins) {
				293	const std::string test_model = ReadFile(GetModelPath() + GetParam());
				294	std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
				295
				296	// Add test regex models.
				297	unpacked_model->regex_model.reset(new RegexModelT);
				298	unpacked_model->regex_model->patterns.push_back(MakePattern(
				299	"person", " (Barack Obama) ", /enabled_for_classification=/false,
				300	/enabled_for_selection=/true, /enabled_for_annotation=/false, 1.0));
				301	unpacked_model->regex_model->patterns.push_back(MakePattern(
				302	"flight", "([a-zA-Z]{2} ?\\d{2,4})", /enabled_for_classification=/false,
				303	/enabled_for_selection=/true, /enabled_for_annotation=/false, 1.0));
				304	unpacked_model->regex_model->patterns.back()->priority_score = 1.1;
				305
				306	flatbuffers::FlatBufferBuilder builder;
				307	builder.Finish(Model::Pack(builder, unpacked_model.get()));
				308
				309	std::unique_ptr<TextClassifier> classifier =
				310	TextClassifier::FromUnownedBuffer(
				311	reinterpret_cast<const char*>(builder.GetBufferPointer()),
				312	builder.GetSize());
				313	ASSERT_TRUE(classifier);
				314
				315	// Check conflict resolution.
				316	EXPECT_EQ(
				317	classifier->SuggestSelection(
				318	"saw Barack Obama today .. 350 Third Street, Cambridge, MA 0123",
				319	{55, 57}),
				320	std::make_pair(55, 62));
				321	}
				322	#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
				323
				324	#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
				325	TEST_P(TextClassifierTest, AnnotateRegex) {
				326	CREATE_UNILIB_FOR_TESTING;
				327	const std::string test_model = ReadFile(GetModelPath() + GetParam());
				328	std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
				329
				330	// Add test regex models.
				331	unpacked_model->regex_model.reset(new RegexModelT);
				332	unpacked_model->regex_model->patterns.push_back(MakePattern(
				333	"person", " (Barack Obama) ", /enabled_for_classification=/false,
				334	/enabled_for_selection=/false, /enabled_for_annotation=/true, 1.0));
				335	unpacked_model->regex_model->patterns.push_back(MakePattern(
				336	"flight", "([a-zA-Z]{2} ?\\d{2,4})", /enabled_for_classification=/false,
				337	/enabled_for_selection=/false, /enabled_for_annotation=/true, 0.5));
				338	flatbuffers::FlatBufferBuilder builder;
				339	builder.Finish(Model::Pack(builder, unpacked_model.get()));
				340
				341	std::unique_ptr<TextClassifier> classifier =
				342	TextClassifier::FromUnownedBuffer(
				343	reinterpret_cast<const char*>(builder.GetBufferPointer()),
				344	builder.GetSize(), &unilib);
				345	ASSERT_TRUE(classifier);
				346
				347	const std::string test_string =
				348	"& saw Barack Obama today .. 350 Third Street, Cambridge\nand my phone "
				349	"number is 853 225 3556";
				350	EXPECT_THAT(classifier->Annotate(test_string),
				351	ElementsAreArray({
				352	IsAnnotatedSpan(6, 18, "person"),
				353	IsAnnotatedSpan(19, 24, "date"),
				354	IsAnnotatedSpan(28, 55, "address"),
				355	IsAnnotatedSpan(79, 91, "phone"),
				356	}));
				357	}
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	358	#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
				359
				360	TEST_P(TextClassifierTest, PhoneFiltering) {
				361	CREATE_UNILIB_FOR_TESTING;
				362	std::unique_ptr<TextClassifier> classifier =
				363	TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	364	ASSERT_TRUE(classifier);
				365
				366	EXPECT_EQ("phone", FirstResult(classifier->ClassifyText(
				367	"phone: (123) 456 789", {7, 20})));
				368	EXPECT_EQ("phone", FirstResult(classifier->ClassifyText(
				369	"phone: (123) 456 789,0001112", {7, 25})));
				370	EXPECT_EQ("other", FirstResult(classifier->ClassifyText(
				371	"phone: (123) 456 789,0001112", {7, 28})));
				372	}
				373
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	374	TEST_P(TextClassifierTest, SuggestSelection) {
				375	CREATE_UNILIB_FOR_TESTING;
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	376	std::unique_ptr<TextClassifier> classifier =
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	377	TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	378	ASSERT_TRUE(classifier);
				379
				380	EXPECT_EQ(classifier->SuggestSelection(
				381	"this afternoon Barack Obama gave a speech at", {15, 21}),
				382	std::make_pair(15, 21));
				383
				384	// Try passing whole string.
				385	// If more than 1 token is specified, we should return back what entered.
				386	EXPECT_EQ(
				387	classifier->SuggestSelection("350 Third Street, Cambridge", {0, 27}),
				388	std::make_pair(0, 27));
				389
				390	// Single letter.
				391	EXPECT_EQ(classifier->SuggestSelection("a", {0, 1}), std::make_pair(0, 1));
				392
				393	// Single word.
				394	EXPECT_EQ(classifier->SuggestSelection("asdf", {0, 4}), std::make_pair(0, 4));
				395
				396	EXPECT_EQ(
				397	classifier->SuggestSelection("call me at 857 225 3556 today", {11, 14}),
				398	std::make_pair(11, 23));
				399
				400	// Unpaired bracket stripping.
				401	EXPECT_EQ(
				402	classifier->SuggestSelection("call me at (857) 225 3556 today", {11, 16}),
				403	std::make_pair(11, 25));
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	404	EXPECT_EQ(classifier->SuggestSelection("call me at (857 today", {11, 15}),
				405	std::make_pair(12, 15));
				406	EXPECT_EQ(classifier->SuggestSelection("call me at 3556) today", {11, 16}),
				407	std::make_pair(11, 15));
				408	EXPECT_EQ(classifier->SuggestSelection("call me at )857( today", {11, 16}),
				409	std::make_pair(12, 15));
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	410
				411	// If the resulting selection would be empty, the original span is returned.
				412	EXPECT_EQ(classifier->SuggestSelection("call me at )( today", {11, 13}),
				413	std::make_pair(11, 13));
				414	EXPECT_EQ(classifier->SuggestSelection("call me at ( today", {11, 12}),
				415	std::make_pair(11, 12));
				416	EXPECT_EQ(classifier->SuggestSelection("call me at ) today", {11, 12}),
				417	std::make_pair(11, 12));
				418	}
				419
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	420	TEST_P(TextClassifierTest, SuggestSelectionDisabledFail) {
				421	CREATE_UNILIB_FOR_TESTING;
				422	const std::string test_model = ReadFile(GetModelPath() + GetParam());
				423	std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
				424
				425	// Disable the selection model.
				426	unpacked_model->selection_model.clear();
				427	unpacked_model->triggering_options.reset(new ModelTriggeringOptionsT);
				428	unpacked_model->triggering_options->enabled_modes = ModeFlag_ANNOTATION;
				429
				430	flatbuffers::FlatBufferBuilder builder;
				431	builder.Finish(Model::Pack(builder, unpacked_model.get()));
				432
				433	std::unique_ptr<TextClassifier> classifier =
				434	TextClassifier::FromUnownedBuffer(
				435	reinterpret_cast<const char*>(builder.GetBufferPointer()),
				436	builder.GetSize(), &unilib);
				437	// Selection model needs to be present for annotation.
				438	ASSERT_FALSE(classifier);
				439	}
				440
				441	TEST_P(TextClassifierTest, SuggestSelectionDisabled) {
				442	CREATE_UNILIB_FOR_TESTING;
				443	const std::string test_model = ReadFile(GetModelPath() + GetParam());
				444	std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
				445
				446	// Disable the selection model.
				447	unpacked_model->selection_model.clear();
				448	unpacked_model->triggering_options.reset(new ModelTriggeringOptionsT);
				449	unpacked_model->triggering_options->enabled_modes = ModeFlag_CLASSIFICATION;
				450	unpacked_model->enabled_modes = ModeFlag_CLASSIFICATION;
				451
				452	flatbuffers::FlatBufferBuilder builder;
				453	builder.Finish(Model::Pack(builder, unpacked_model.get()));
				454
				455	std::unique_ptr<TextClassifier> classifier =
				456	TextClassifier::FromUnownedBuffer(
				457	reinterpret_cast<const char*>(builder.GetBufferPointer()),
				458	builder.GetSize(), &unilib);
				459	ASSERT_TRUE(classifier);
				460
				461	EXPECT_EQ(
				462	classifier->SuggestSelection("call me at 857 225 3556 today", {11, 14}),
				463	std::make_pair(11, 14));
				464
				465	EXPECT_EQ("phone", FirstResult(classifier->ClassifyText(
				466	"call me at (800) 123-456 today", {11, 24})));
				467
				468	EXPECT_THAT(classifier->Annotate("call me at (800) 123-456 today"),
				469	IsEmpty());
				470	}
				471
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	472	TEST_P(TextClassifierTest, SuggestSelectionsAreSymmetric) {
				473	CREATE_UNILIB_FOR_TESTING;
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	474	std::unique_ptr<TextClassifier> classifier =
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	475	TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	476	ASSERT_TRUE(classifier);
				477
				478	EXPECT_EQ(classifier->SuggestSelection("350 Third Street, Cambridge", {0, 3}),
				479	std::make_pair(0, 27));
				480	EXPECT_EQ(classifier->SuggestSelection("350 Third Street, Cambridge", {4, 9}),
				481	std::make_pair(0, 27));
				482	EXPECT_EQ(
				483	classifier->SuggestSelection("350 Third Street, Cambridge", {10, 16}),
				484	std::make_pair(0, 27));
				485	EXPECT_EQ(classifier->SuggestSelection("a\nb\nc\n350 Third Street, Cambridge",
				486	{16, 22}),
				487	std::make_pair(6, 33));
				488	}
				489
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	490	TEST_P(TextClassifierTest, SuggestSelectionWithNewLine) {
				491	CREATE_UNILIB_FOR_TESTING;
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	492	std::unique_ptr<TextClassifier> classifier =
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	493	TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	494	ASSERT_TRUE(classifier);
				495
				496	EXPECT_EQ(classifier->SuggestSelection("abc\n857 225 3556", {4, 7}),
				497	std::make_pair(4, 16));
				498	EXPECT_EQ(classifier->SuggestSelection("857 225 3556\nabc", {0, 3}),
				499	std::make_pair(0, 12));
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	500
				501	SelectionOptions options;
				502	EXPECT_EQ(classifier->SuggestSelection("857 225\n3556\nabc", {0, 3}, options),
				503	std::make_pair(0, 7));
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	504	}
				505
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	506	TEST_P(TextClassifierTest, SuggestSelectionWithPunctuation) {
				507	CREATE_UNILIB_FOR_TESTING;
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	508	std::unique_ptr<TextClassifier> classifier =
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	509	TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	510	ASSERT_TRUE(classifier);
				511
				512	// From the right.
				513	EXPECT_EQ(classifier->SuggestSelection(
				514	"this afternoon BarackObama, gave a speech at", {15, 26}),
				515	std::make_pair(15, 26));
				516
				517	// From the right multiple.
				518	EXPECT_EQ(classifier->SuggestSelection(
				519	"this afternoon BarackObama,.,.,, gave a speech at", {15, 26}),
				520	std::make_pair(15, 26));
				521
				522	// From the left multiple.
				523	EXPECT_EQ(classifier->SuggestSelection(
				524	"this afternoon ,.,.,,BarackObama gave a speech at", {21, 32}),
				525	std::make_pair(21, 32));
				526
				527	// From both sides.
				528	EXPECT_EQ(classifier->SuggestSelection(
				529	"this afternoon !BarackObama,- gave a speech at", {16, 27}),
				530	std::make_pair(16, 27));
				531	}
				532
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	533	TEST_P(TextClassifierTest, SuggestSelectionNoCrashWithJunk) {
				534	CREATE_UNILIB_FOR_TESTING;
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	535	std::unique_ptr<TextClassifier> classifier =
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	536	TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	537	ASSERT_TRUE(classifier);
				538
				539	// Try passing in bunch of invalid selections.
				540	EXPECT_EQ(classifier->SuggestSelection("", {0, 27}), std::make_pair(0, 27));
				541	EXPECT_EQ(classifier->SuggestSelection("", {-10, 27}),
				542	std::make_pair(-10, 27));
				543	EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {0, 27}),
				544	std::make_pair(0, 27));
				545	EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {-30, 300}),
				546	std::make_pair(-30, 300));
				547	EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {-10, -1}),
				548	std::make_pair(-10, -1));
				549	EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {100, 17}),
				550	std::make_pair(100, 17));
				551	}
				552
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	553	TEST_P(TextClassifierTest, Annotate) {
				554	CREATE_UNILIB_FOR_TESTING;
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	555	std::unique_ptr<TextClassifier> classifier =
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	556	TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	557	ASSERT_TRUE(classifier);
				558
				559	const std::string test_string =
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	560	"& saw Barack Obama today .. 350 Third Street, Cambridge\nand my phone "
				561	"number is 853 225 3556";
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	562	EXPECT_THAT(classifier->Annotate(test_string),
				563	ElementsAreArray({
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	564	#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
				565	IsAnnotatedSpan(19, 24, "date"),
				566	#endif
				567	IsAnnotatedSpan(28, 55, "address"),
				568	IsAnnotatedSpan(79, 91, "phone"),
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	569	}));
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	570
				571	AnnotationOptions options;
				572	EXPECT_THAT(classifier->Annotate("853 225 3556", options),
				573	ElementsAreArray({IsAnnotatedSpan(0, 12, "phone")}));
				574	EXPECT_TRUE(classifier->Annotate("853 225\n3556", options).empty());
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	575	}
				576
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	577	TEST_P(TextClassifierTest, AnnotateSmallBatches) {
				578	CREATE_UNILIB_FOR_TESTING;
				579	const std::string test_model = ReadFile(GetModelPath() + GetParam());
				580	std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
				581
				582	// Set the batch size.
				583	unpacked_model->selection_options->batch_size = 4;
				584	flatbuffers::FlatBufferBuilder builder;
				585	builder.Finish(Model::Pack(builder, unpacked_model.get()));
				586
				587	std::unique_ptr<TextClassifier> classifier =
				588	TextClassifier::FromUnownedBuffer(
				589	reinterpret_cast<const char*>(builder.GetBufferPointer()),
				590	builder.GetSize(), &unilib);
				591	ASSERT_TRUE(classifier);
				592
				593	const std::string test_string =
				594	"& saw Barack Obama today .. 350 Third Street, Cambridge\nand my phone "
				595	"number is 853 225 3556";
				596	EXPECT_THAT(classifier->Annotate(test_string),
				597	ElementsAreArray({
				598	#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
				599	IsAnnotatedSpan(19, 24, "date"),
				600	#endif
				601	IsAnnotatedSpan(28, 55, "address"),
				602	IsAnnotatedSpan(79, 91, "phone"),
				603	}));
				604
				605	AnnotationOptions options;
				606	EXPECT_THAT(classifier->Annotate("853 225 3556", options),
				607	ElementsAreArray({IsAnnotatedSpan(0, 12, "phone")}));
				608	EXPECT_TRUE(classifier->Annotate("853 225\n3556", options).empty());
				609	}
				610
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	611	#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	612	TEST_P(TextClassifierTest, AnnotateFilteringDiscardAll) {
				613	CREATE_UNILIB_FOR_TESTING;
				614	const std::string test_model = ReadFile(GetModelPath() + GetParam());
				615	std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
				616
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	617	unpacked_model->triggering_options.reset(new ModelTriggeringOptionsT);
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	618	// Add test threshold.
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	619	unpacked_model->triggering_options->min_annotate_confidence =
				620	2.f; // Discards all results.
				621	flatbuffers::FlatBufferBuilder builder;
				622	builder.Finish(Model::Pack(builder, unpacked_model.get()));
				623
				624	std::unique_ptr<TextClassifier> classifier =
				625	TextClassifier::FromUnownedBuffer(
				626	reinterpret_cast<const char*>(builder.GetBufferPointer()),
				627	builder.GetSize(), &unilib);
				628	ASSERT_TRUE(classifier);
				629
				630	const std::string test_string =
				631	"& saw Barack Obama today .. 350 Third Street, Cambridge\nand my phone "
				632	"number is 853 225 3556";
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	633
				634	EXPECT_EQ(classifier->Annotate(test_string).size(), 1);
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	635	}
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	636	#endif
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	637
				638	TEST_P(TextClassifierTest, AnnotateFilteringKeepAll) {
				639	CREATE_UNILIB_FOR_TESTING;
				640	const std::string test_model = ReadFile(GetModelPath() + GetParam());
				641	std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
				642
				643	// Add test thresholds.
				644	unpacked_model->triggering_options.reset(new ModelTriggeringOptionsT);
				645	unpacked_model->triggering_options->min_annotate_confidence =
				646	0.f; // Keeps all results.
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	647	unpacked_model->triggering_options->enabled_modes = ModeFlag_ALL;
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	648	flatbuffers::FlatBufferBuilder builder;
				649	builder.Finish(Model::Pack(builder, unpacked_model.get()));
				650
				651	std::unique_ptr<TextClassifier> classifier =
				652	TextClassifier::FromUnownedBuffer(
				653	reinterpret_cast<const char*>(builder.GetBufferPointer()),
				654	builder.GetSize(), &unilib);
				655	ASSERT_TRUE(classifier);
				656
				657	const std::string test_string =
				658	"& saw Barack Obama today .. 350 Third Street, Cambridge\nand my phone "
				659	"number is 853 225 3556";
				660	#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
				661	EXPECT_EQ(classifier->Annotate(test_string).size(), 3);
				662	#else
				663	// In non-ICU mode there is no "date" result.
				664	EXPECT_EQ(classifier->Annotate(test_string).size(), 2);
				665	#endif
				666	}
				667
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	668	TEST_P(TextClassifierTest, AnnotateDisabled) {
				669	CREATE_UNILIB_FOR_TESTING;
				670	const std::string test_model = ReadFile(GetModelPath() + GetParam());
				671	std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
				672
				673	// Disable the model for annotation.
				674	unpacked_model->enabled_modes = ModeFlag_CLASSIFICATION_AND_SELECTION;
				675	flatbuffers::FlatBufferBuilder builder;
				676	builder.Finish(Model::Pack(builder, unpacked_model.get()));
				677
				678	std::unique_ptr<TextClassifier> classifier =
				679	TextClassifier::FromUnownedBuffer(
				680	reinterpret_cast<const char*>(builder.GetBufferPointer()),
				681	builder.GetSize(), &unilib);
				682	ASSERT_TRUE(classifier);
				683	const std::string test_string =
				684	"& saw Barack Obama today .. 350 Third Street, Cambridge\nand my phone "
				685	"number is 853 225 3556";
				686	EXPECT_THAT(classifier->Annotate(test_string), IsEmpty());
				687	}
				688
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	689	#ifdef LIBTEXTCLASSIFIER_CALENDAR_ICU
				690	TEST_P(TextClassifierTest, ClassifyTextDate) {
				691	std::unique_ptr<TextClassifier> classifier =
				692	TextClassifier::FromPath(GetModelPath() + GetParam());
				693	EXPECT_TRUE(classifier);
				694
				695	std::vector<ClassificationResult> result;
				696	ClassificationOptions options;
				697
				698	options.reference_timezone = "Europe/Zurich";
				699	result = classifier->ClassifyText("january 1, 2017", {0, 15}, options);
				700
				701	ASSERT_EQ(result.size(), 1);
				702	EXPECT_THAT(result[0].collection, "date");
				703	EXPECT_EQ(result[0].datetime_parse_result.time_ms_utc, 1483225200000);
				704	EXPECT_EQ(result[0].datetime_parse_result.granularity,
				705	DatetimeGranularity::GRANULARITY_DAY);
				706	result.clear();
				707
				708	options.reference_timezone = "America/Los_Angeles";
				709	result = classifier->ClassifyText("march 1, 2017", {0, 13}, options);
				710	ASSERT_EQ(result.size(), 1);
				711	EXPECT_THAT(result[0].collection, "date");
				712	EXPECT_EQ(result[0].datetime_parse_result.time_ms_utc, 1488355200000);
				713	EXPECT_EQ(result[0].datetime_parse_result.granularity,
				714	DatetimeGranularity::GRANULARITY_DAY);
				715	result.clear();
				716
				717	options.reference_timezone = "America/Los_Angeles";
				718	result = classifier->ClassifyText("2018/01/01 10:30:20", {0, 19}, options);
				719	ASSERT_EQ(result.size(), 1);
				720	EXPECT_THAT(result[0].collection, "date");
				721	EXPECT_EQ(result[0].datetime_parse_result.time_ms_utc, 1514831420000);
				722	EXPECT_EQ(result[0].datetime_parse_result.granularity,
				723	DatetimeGranularity::GRANULARITY_SECOND);
				724	result.clear();
				725
				726	// Date on another line.
				727	options.reference_timezone = "Europe/Zurich";
				728	result = classifier->ClassifyText(
				729	"hello world this is the first line\n"
				730	"january 1, 2017",
				731	{35, 50}, options);
				732	ASSERT_EQ(result.size(), 1);
				733	EXPECT_THAT(result[0].collection, "date");
				734	EXPECT_EQ(result[0].datetime_parse_result.time_ms_utc, 1483225200000);
				735	EXPECT_EQ(result[0].datetime_parse_result.granularity,
				736	DatetimeGranularity::GRANULARITY_DAY);
				737	result.clear();
				738	}
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	739
				740	TEST_P(TextClassifierTest, SuggestTextDateDisabled) {
				741	CREATE_UNILIB_FOR_TESTING;
				742	const std::string test_model = ReadFile(GetModelPath() + GetParam());
				743	std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
				744
				745	// Disable the patterns for selection.
				746	for (int i = 0; i < unpacked_model->datetime_model->patterns.size(); i++) {
				747	unpacked_model->datetime_model->patterns[i]->enabled_modes =
				748	ModeFlag_ANNOTATION_AND_CLASSIFICATION;
				749	}
				750	flatbuffers::FlatBufferBuilder builder;
				751	builder.Finish(Model::Pack(builder, unpacked_model.get()));
				752
				753	std::unique_ptr<TextClassifier> classifier =
				754	TextClassifier::FromUnownedBuffer(
				755	reinterpret_cast<const char*>(builder.GetBufferPointer()),
				756	builder.GetSize(), &unilib);
				757	ASSERT_TRUE(classifier);
				758	EXPECT_EQ("date",
				759	FirstResult(classifier->ClassifyText("january 1, 2017", {0, 15})));
				760	EXPECT_EQ(classifier->SuggestSelection("january 1, 2017", {0, 7}),
				761	std::make_pair(0, 7));
				762	EXPECT_THAT(classifier->Annotate("january 1, 2017"),
				763	ElementsAreArray({IsAnnotatedSpan(0, 15, "date")}));
				764	}
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	765	#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
				766
				767	class TestingTextClassifier : public TextClassifier {
				768	public:
				769	TestingTextClassifier(const std::string& model, const UniLib* unilib)
				770	: TextClassifier(ViewModel(model.data(), model.size()), unilib) {}
				771
				772	using TextClassifier::ResolveConflicts;
				773	};
				774
				775	AnnotatedSpan MakeAnnotatedSpan(CodepointSpan span,
				776	const std::string& collection,
				777	const float score) {
				778	AnnotatedSpan result;
				779	result.span = span;
				780	result.classification.push_back({collection, score});
				781	return result;
				782	}
				783
				784	TEST(TextClassifierTest, ResolveConflictsTrivial) {
				785	CREATE_UNILIB_FOR_TESTING;
				786	TestingTextClassifier classifier("", &unilib);
				787
				788	std::vector<AnnotatedSpan> candidates{
				789	{MakeAnnotatedSpan({0, 1}, "phone", 1.0)}};
				790
				791	std::vector<int> chosen;
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	792	classifier.ResolveConflicts(candidates, /context=/"",
				793	/interpreter_manager=/nullptr, &chosen);
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	794	EXPECT_THAT(chosen, ElementsAreArray({0}));
				795	}
				796
				797	TEST(TextClassifierTest, ResolveConflictsSequence) {
				798	CREATE_UNILIB_FOR_TESTING;
				799	TestingTextClassifier classifier("", &unilib);
				800
				801	std::vector<AnnotatedSpan> candidates{{
				802	MakeAnnotatedSpan({0, 1}, "phone", 1.0),
				803	MakeAnnotatedSpan({1, 2}, "phone", 1.0),
				804	MakeAnnotatedSpan({2, 3}, "phone", 1.0),
				805	MakeAnnotatedSpan({3, 4}, "phone", 1.0),
				806	MakeAnnotatedSpan({4, 5}, "phone", 1.0),
				807	}};
				808
				809	std::vector<int> chosen;
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	810	classifier.ResolveConflicts(candidates, /context=/"",
				811	/interpreter_manager=/nullptr, &chosen);
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	812	EXPECT_THAT(chosen, ElementsAreArray({0, 1, 2, 3, 4}));
				813	}
				814
				815	TEST(TextClassifierTest, ResolveConflictsThreeSpans) {
				816	CREATE_UNILIB_FOR_TESTING;
				817	TestingTextClassifier classifier("", &unilib);
				818
				819	std::vector<AnnotatedSpan> candidates{{
				820	MakeAnnotatedSpan({0, 3}, "phone", 1.0),
				821	MakeAnnotatedSpan({1, 5}, "phone", 0.5), // Looser!
				822	MakeAnnotatedSpan({3, 7}, "phone", 1.0),
				823	}};
				824
				825	std::vector<int> chosen;
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	826	classifier.ResolveConflicts(candidates, /context=/"",
				827	/interpreter_manager=/nullptr, &chosen);
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	828	EXPECT_THAT(chosen, ElementsAreArray({0, 2}));
				829	}
				830
				831	TEST(TextClassifierTest, ResolveConflictsThreeSpansReversed) {
				832	CREATE_UNILIB_FOR_TESTING;
				833	TestingTextClassifier classifier("", &unilib);
				834
				835	std::vector<AnnotatedSpan> candidates{{
				836	MakeAnnotatedSpan({0, 3}, "phone", 0.5), // Looser!
				837	MakeAnnotatedSpan({1, 5}, "phone", 1.0),
				838	MakeAnnotatedSpan({3, 7}, "phone", 0.6), // Looser!
				839	}};
				840
				841	std::vector<int> chosen;
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	842	classifier.ResolveConflicts(candidates, /context=/"",
				843	/interpreter_manager=/nullptr, &chosen);
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	844	EXPECT_THAT(chosen, ElementsAreArray({1}));
				845	}
				846
				847	TEST(TextClassifierTest, ResolveConflictsFiveSpans) {
				848	CREATE_UNILIB_FOR_TESTING;
				849	TestingTextClassifier classifier("", &unilib);
				850
				851	std::vector<AnnotatedSpan> candidates{{
				852	MakeAnnotatedSpan({0, 3}, "phone", 0.5),
				853	MakeAnnotatedSpan({1, 5}, "other", 1.0), // Looser!
				854	MakeAnnotatedSpan({3, 7}, "phone", 0.6),
				855	MakeAnnotatedSpan({8, 12}, "phone", 0.6), // Looser!
				856	MakeAnnotatedSpan({11, 15}, "phone", 0.9),
				857	}};
				858
				859	std::vector<int> chosen;
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	860	classifier.ResolveConflicts(candidates, /context=/"",
				861	/interpreter_manager=/nullptr, &chosen);
Lukas Zilka	b23e212	2018-02-09 10:25:19 +0100	[diff] [blame]	862	EXPECT_THAT(chosen, ElementsAreArray({0, 2, 4}));
				863	}
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	864
Lukas Zilka	df710db	2018-02-27 12:44:09 +0100	[diff] [blame]	865	#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
				866	TEST_P(TextClassifierTest, LongInput) {
				867	CREATE_UNILIB_FOR_TESTING;
				868	std::unique_ptr<TextClassifier> classifier =
				869	TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
				870	ASSERT_TRUE(classifier);
				871
				872	for (const auto& type_value_pair :
				873	std::vector<std::pair<std::string, std::string>>{
				874	{"address", "350 Third Street, Cambridge"},
				875	{"phone", "123 456-7890"},
				876	{"url", "www.google.com"},
				877	{"email", "someone@gmail.com"},
				878	{"flight", "LX 38"},
				879	{"date", "September 1, 2018"}}) {
				880	const std::string input_100k = std::string(50000, ' ') +
				881	type_value_pair.second +
				882	std::string(50000, ' ');
				883	const int value_length = type_value_pair.second.size();
				884
				885	EXPECT_THAT(classifier->Annotate(input_100k),
				886	ElementsAreArray({IsAnnotatedSpan(50000, 50000 + value_length,
				887	type_value_pair.first)}));
				888	EXPECT_EQ(classifier->SuggestSelection(input_100k, {50000, 50001}),
				889	std::make_pair(50000, 50000 + value_length));
				890	EXPECT_EQ(type_value_pair.first,
				891	FirstResult(classifier->ClassifyText(
				892	input_100k, {50000, 50000 + value_length})));
				893	}
				894	}
				895	#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
				896
Lukas Zilka	ba849e7	2018-03-08 14:48:21 +0100	[diff] [blame^]	897	#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
				898	// These coarse tests are there only to make sure the execution happens in
				899	// reasonable amount of time.
				900	TEST_P(TextClassifierTest, LongInputNoResultCheck) {
				901	CREATE_UNILIB_FOR_TESTING;
				902	std::unique_ptr<TextClassifier> classifier =
				903	TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
				904	ASSERT_TRUE(classifier);
				905
				906	for (const std::string& value :
				907	std::vector<std::string>{"http://www.aaaaaaaaaaaaaaaaaaaa.com "}) {
				908	const std::string input_100k =
				909	std::string(50000, ' ') + value + std::string(50000, ' ');
				910	const int value_length = value.size();
				911
				912	classifier->Annotate(input_100k);
				913	classifier->SuggestSelection(input_100k, {50000, 50001});
				914	classifier->ClassifyText(input_100k, {50000, 50000 + value_length});
				915	}
				916	}
				917	#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
				918
Lukas Zilka	21d8c98	2018-01-24 11:11:20 +0100	[diff] [blame]	919	} // namespace
				920	} // namespace libtextclassifier2