blob: 74534e2adb99dce6be6e1d6ebca5f614978a4018 [file] [log] [blame]
Lukas Zilka21d8c982018-01-24 11:11:20 +01001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "text-classifier.h"
18
19#include <fstream>
20#include <iostream>
21#include <memory>
22#include <string>
23
Lukas Zilkab23e2122018-02-09 10:25:19 +010024#include "model_generated.h"
25#include "types-test-util.h"
Lukas Zilka21d8c982018-01-24 11:11:20 +010026#include "gmock/gmock.h"
27#include "gtest/gtest.h"
28
29namespace libtextclassifier2 {
30namespace {
31
32using testing::ElementsAreArray;
Lukas Zilkaba849e72018-03-08 14:48:21 +010033using testing::IsEmpty;
Lukas Zilka21d8c982018-01-24 11:11:20 +010034using testing::Pair;
Lukas Zilkab23e2122018-02-09 10:25:19 +010035using testing::Values;
Lukas Zilka21d8c982018-01-24 11:11:20 +010036
Lukas Zilkab23e2122018-02-09 10:25:19 +010037std::string FirstResult(const std::vector<ClassificationResult>& results) {
Lukas Zilka21d8c982018-01-24 11:11:20 +010038 if (results.empty()) {
39 return "<INVALID RESULTS>";
40 }
Lukas Zilkab23e2122018-02-09 10:25:19 +010041 return results[0].collection;
Lukas Zilka21d8c982018-01-24 11:11:20 +010042}
43
44MATCHER_P3(IsAnnotatedSpan, start, end, best_class, "") {
45 return testing::Value(arg.span, Pair(start, end)) &&
46 testing::Value(FirstResult(arg.classification), best_class);
47}
48
49std::string ReadFile(const std::string& file_name) {
50 std::ifstream file_stream(file_name);
51 return std::string(std::istreambuf_iterator<char>(file_stream), {});
52}
53
54std::string GetModelPath() {
55 return LIBTEXTCLASSIFIER_TEST_DATA_DIR;
56}
57
58TEST(TextClassifierTest, EmbeddingExecutorLoadingFails) {
Lukas Zilkab23e2122018-02-09 10:25:19 +010059 CREATE_UNILIB_FOR_TESTING;
Lukas Zilka21d8c982018-01-24 11:11:20 +010060 std::unique_ptr<TextClassifier> classifier =
Lukas Zilkab23e2122018-02-09 10:25:19 +010061 TextClassifier::FromPath(GetModelPath() + "wrong_embeddings.fb", &unilib);
Lukas Zilka21d8c982018-01-24 11:11:20 +010062 EXPECT_FALSE(classifier);
63}
64
Lukas Zilkab23e2122018-02-09 10:25:19 +010065class TextClassifierTest : public ::testing::TestWithParam<const char*> {};
66
67INSTANTIATE_TEST_CASE_P(ClickContext, TextClassifierTest,
68 Values("test_model_cc.fb"));
69INSTANTIATE_TEST_CASE_P(BoundsSensitive, TextClassifierTest,
70 Values("test_model.fb"));
71
72TEST_P(TextClassifierTest, ClassifyText) {
73 CREATE_UNILIB_FOR_TESTING;
Lukas Zilka21d8c982018-01-24 11:11:20 +010074 std::unique_ptr<TextClassifier> classifier =
Lukas Zilkab23e2122018-02-09 10:25:19 +010075 TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
Lukas Zilka21d8c982018-01-24 11:11:20 +010076 ASSERT_TRUE(classifier);
77
78 EXPECT_EQ("other",
79 FirstResult(classifier->ClassifyText(
80 "this afternoon Barack Obama gave a speech at", {15, 27})));
Lukas Zilka21d8c982018-01-24 11:11:20 +010081 EXPECT_EQ("phone", FirstResult(classifier->ClassifyText(
82 "Call me at (800) 123-456 today", {11, 24})));
Lukas Zilka21d8c982018-01-24 11:11:20 +010083
84 // More lines.
85 EXPECT_EQ("other",
86 FirstResult(classifier->ClassifyText(
87 "this afternoon Barack Obama gave a speech at|Visit "
88 "www.google.com every today!|Call me at (800) 123-456 today.",
89 {15, 27})));
Lukas Zilka21d8c982018-01-24 11:11:20 +010090 EXPECT_EQ("phone",
91 FirstResult(classifier->ClassifyText(
92 "this afternoon Barack Obama gave a speech at|Visit "
93 "www.google.com every today!|Call me at (800) 123-456 today.",
94 {90, 103})));
95
96 // Single word.
97 EXPECT_EQ("other", FirstResult(classifier->ClassifyText("obama", {0, 5})));
98 EXPECT_EQ("other", FirstResult(classifier->ClassifyText("asdf", {0, 4})));
99 EXPECT_EQ("<INVALID RESULTS>",
100 FirstResult(classifier->ClassifyText("asdf", {0, 0})));
101
102 // Junk.
103 EXPECT_EQ("<INVALID RESULTS>",
104 FirstResult(classifier->ClassifyText("", {0, 0})));
105 EXPECT_EQ("<INVALID RESULTS>", FirstResult(classifier->ClassifyText(
106 "a\n\n\n\nx x x\n\n\n\n\n\n", {1, 5})));
107}
108
Lukas Zilkaba849e72018-03-08 14:48:21 +0100109TEST_P(TextClassifierTest, ClassifyTextDisabledFail) {
110 CREATE_UNILIB_FOR_TESTING;
111 const std::string test_model = ReadFile(GetModelPath() + GetParam());
112 std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
113
114 unpacked_model->classification_model.clear();
115 unpacked_model->triggering_options.reset(new ModelTriggeringOptionsT);
116 unpacked_model->triggering_options->enabled_modes = ModeFlag_SELECTION;
117
118 flatbuffers::FlatBufferBuilder builder;
119 builder.Finish(Model::Pack(builder, unpacked_model.get()));
120
121 std::unique_ptr<TextClassifier> classifier =
122 TextClassifier::FromUnownedBuffer(
123 reinterpret_cast<const char*>(builder.GetBufferPointer()),
124 builder.GetSize(), &unilib);
125
126 // The classification model is still needed for selection scores.
127 ASSERT_FALSE(classifier);
128}
129
130TEST_P(TextClassifierTest, ClassifyTextDisabled) {
131 CREATE_UNILIB_FOR_TESTING;
132 const std::string test_model = ReadFile(GetModelPath() + GetParam());
133 std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
134
135 unpacked_model->triggering_options.reset(new ModelTriggeringOptionsT);
136 unpacked_model->triggering_options->enabled_modes =
137 ModeFlag_ANNOTATION_AND_SELECTION;
138
139 flatbuffers::FlatBufferBuilder builder;
140 builder.Finish(Model::Pack(builder, unpacked_model.get()));
141
142 std::unique_ptr<TextClassifier> classifier =
143 TextClassifier::FromUnownedBuffer(
144 reinterpret_cast<const char*>(builder.GetBufferPointer()),
145 builder.GetSize(), &unilib);
146 ASSERT_TRUE(classifier);
147
148 EXPECT_THAT(
149 classifier->ClassifyText("Call me at (800) 123-456 today", {11, 24}),
150 IsEmpty());
151}
152
Lukas Zilkab23e2122018-02-09 10:25:19 +0100153std::unique_ptr<RegexModel_::PatternT> MakePattern(
154 const std::string& collection_name, const std::string& pattern,
155 const bool enabled_for_classification, const bool enabled_for_selection,
156 const bool enabled_for_annotation, const float score) {
157 std::unique_ptr<RegexModel_::PatternT> result(new RegexModel_::PatternT);
158 result->collection_name = collection_name;
159 result->pattern = pattern;
Lukas Zilkaba849e72018-03-08 14:48:21 +0100160 // We cannot directly operate with |= on the flag, so use an int here.
161 int enabled_modes = ModeFlag_NONE;
162 if (enabled_for_annotation) enabled_modes |= ModeFlag_ANNOTATION;
163 if (enabled_for_classification) enabled_modes |= ModeFlag_CLASSIFICATION;
164 if (enabled_for_selection) enabled_modes |= ModeFlag_SELECTION;
165 result->enabled_modes = static_cast<ModeFlag>(enabled_modes);
Lukas Zilkab23e2122018-02-09 10:25:19 +0100166 result->target_classification_score = score;
167 result->priority_score = score;
168 return result;
169}
170
171#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
172TEST_P(TextClassifierTest, ClassifyTextRegularExpression) {
173 CREATE_UNILIB_FOR_TESTING;
174 const std::string test_model = ReadFile(GetModelPath() + GetParam());
175 std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
176
177 // Add test regex models.
178 unpacked_model->regex_model->patterns.push_back(MakePattern(
179 "person", "Barack Obama", /*enabled_for_classification=*/true,
180 /*enabled_for_selection=*/false, /*enabled_for_annotation=*/false, 1.0));
181 unpacked_model->regex_model->patterns.push_back(MakePattern(
182 "flight", "[a-zA-Z]{2}\\d{2,4}", /*enabled_for_classification=*/true,
183 /*enabled_for_selection=*/false, /*enabled_for_annotation=*/false, 0.5));
184
185 flatbuffers::FlatBufferBuilder builder;
186 builder.Finish(Model::Pack(builder, unpacked_model.get()));
187
Lukas Zilka21d8c982018-01-24 11:11:20 +0100188 std::unique_ptr<TextClassifier> classifier =
Lukas Zilkab23e2122018-02-09 10:25:19 +0100189 TextClassifier::FromUnownedBuffer(
190 reinterpret_cast<const char*>(builder.GetBufferPointer()),
191 builder.GetSize(), &unilib);
192 ASSERT_TRUE(classifier);
193
194 EXPECT_EQ("flight",
195 FirstResult(classifier->ClassifyText(
196 "Your flight LX373 is delayed by 3 hours.", {12, 17})));
197 EXPECT_EQ("person",
198 FirstResult(classifier->ClassifyText(
199 "this afternoon Barack Obama gave a speech at", {15, 27})));
200 EXPECT_EQ("email",
201 FirstResult(classifier->ClassifyText("you@android.com", {0, 15})));
202 EXPECT_EQ("email", FirstResult(classifier->ClassifyText(
203 "Contact me at you@android.com", {14, 29})));
204
205 EXPECT_EQ("url", FirstResult(classifier->ClassifyText(
206 "Visit www.google.com every today!", {6, 20})));
207
208 EXPECT_EQ("flight", FirstResult(classifier->ClassifyText("LX 37", {0, 5})));
209 EXPECT_EQ("flight", FirstResult(classifier->ClassifyText("flight LX 37 abcd",
210 {7, 12})));
211
212 // More lines.
213 EXPECT_EQ("url",
214 FirstResult(classifier->ClassifyText(
215 "this afternoon Barack Obama gave a speech at|Visit "
216 "www.google.com every today!|Call me at (800) 123-456 today.",
217 {51, 65})));
218}
219#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
220
221#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
Lukas Zilkab23e2122018-02-09 10:25:19 +0100222TEST_P(TextClassifierTest, SuggestSelectionRegularExpression) {
223 CREATE_UNILIB_FOR_TESTING;
224 const std::string test_model = ReadFile(GetModelPath() + GetParam());
225 std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
226
227 // Add test regex models.
228 unpacked_model->regex_model.reset(new RegexModelT);
229 unpacked_model->regex_model->patterns.push_back(MakePattern(
230 "person", " (Barack Obama) ", /*enabled_for_classification=*/false,
231 /*enabled_for_selection=*/true, /*enabled_for_annotation=*/false, 1.0));
232 unpacked_model->regex_model->patterns.push_back(MakePattern(
233 "flight", "([a-zA-Z]{2} ?\\d{2,4})", /*enabled_for_classification=*/false,
234 /*enabled_for_selection=*/true, /*enabled_for_annotation=*/false, 1.0));
235 unpacked_model->regex_model->patterns.back()->priority_score = 1.1;
236
237 flatbuffers::FlatBufferBuilder builder;
238 builder.Finish(Model::Pack(builder, unpacked_model.get()));
239
240 std::unique_ptr<TextClassifier> classifier =
241 TextClassifier::FromUnownedBuffer(
242 reinterpret_cast<const char*>(builder.GetBufferPointer()),
243 builder.GetSize(), &unilib);
244 ASSERT_TRUE(classifier);
245
246 // Check regular expression selection.
247 EXPECT_EQ(classifier->SuggestSelection(
248 "Your flight MA 0123 is delayed by 3 hours.", {12, 14}),
249 std::make_pair(12, 19));
250 EXPECT_EQ(classifier->SuggestSelection(
251 "this afternoon Barack Obama gave a speech at", {15, 21}),
252 std::make_pair(15, 27));
253}
254#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
255
256#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
257TEST_P(TextClassifierTest,
258 SuggestSelectionRegularExpressionConflictsModelWins) {
259 const std::string test_model = ReadFile(GetModelPath() + GetParam());
260 std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
261
262 // Add test regex models.
263 unpacked_model->regex_model.reset(new RegexModelT);
264 unpacked_model->regex_model->patterns.push_back(MakePattern(
265 "person", " (Barack Obama) ", /*enabled_for_classification=*/false,
266 /*enabled_for_selection=*/true, /*enabled_for_annotation=*/false, 1.0));
267 unpacked_model->regex_model->patterns.push_back(MakePattern(
268 "flight", "([a-zA-Z]{2} ?\\d{2,4})", /*enabled_for_classification=*/false,
269 /*enabled_for_selection=*/true, /*enabled_for_annotation=*/false, 1.0));
270 unpacked_model->regex_model->patterns.back()->priority_score = 0.5;
271
272 flatbuffers::FlatBufferBuilder builder;
273 builder.Finish(Model::Pack(builder, unpacked_model.get()));
274
275 std::unique_ptr<TextClassifier> classifier =
276 TextClassifier::FromUnownedBuffer(
277 reinterpret_cast<const char*>(builder.GetBufferPointer()),
278 builder.GetSize());
279 ASSERT_TRUE(classifier);
280
281 // Check conflict resolution.
282 EXPECT_EQ(
283 classifier->SuggestSelection(
284 "saw Barack Obama today .. 350 Third Street, Cambridge, MA 0123",
285 {55, 57}),
286 std::make_pair(26, 62));
287}
288#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
289
290#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
291TEST_P(TextClassifierTest,
292 SuggestSelectionRegularExpressionConflictsRegexWins) {
293 const std::string test_model = ReadFile(GetModelPath() + GetParam());
294 std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
295
296 // Add test regex models.
297 unpacked_model->regex_model.reset(new RegexModelT);
298 unpacked_model->regex_model->patterns.push_back(MakePattern(
299 "person", " (Barack Obama) ", /*enabled_for_classification=*/false,
300 /*enabled_for_selection=*/true, /*enabled_for_annotation=*/false, 1.0));
301 unpacked_model->regex_model->patterns.push_back(MakePattern(
302 "flight", "([a-zA-Z]{2} ?\\d{2,4})", /*enabled_for_classification=*/false,
303 /*enabled_for_selection=*/true, /*enabled_for_annotation=*/false, 1.0));
304 unpacked_model->regex_model->patterns.back()->priority_score = 1.1;
305
306 flatbuffers::FlatBufferBuilder builder;
307 builder.Finish(Model::Pack(builder, unpacked_model.get()));
308
309 std::unique_ptr<TextClassifier> classifier =
310 TextClassifier::FromUnownedBuffer(
311 reinterpret_cast<const char*>(builder.GetBufferPointer()),
312 builder.GetSize());
313 ASSERT_TRUE(classifier);
314
315 // Check conflict resolution.
316 EXPECT_EQ(
317 classifier->SuggestSelection(
318 "saw Barack Obama today .. 350 Third Street, Cambridge, MA 0123",
319 {55, 57}),
320 std::make_pair(55, 62));
321}
322#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
323
324#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
325TEST_P(TextClassifierTest, AnnotateRegex) {
326 CREATE_UNILIB_FOR_TESTING;
327 const std::string test_model = ReadFile(GetModelPath() + GetParam());
328 std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
329
330 // Add test regex models.
331 unpacked_model->regex_model.reset(new RegexModelT);
332 unpacked_model->regex_model->patterns.push_back(MakePattern(
333 "person", " (Barack Obama) ", /*enabled_for_classification=*/false,
334 /*enabled_for_selection=*/false, /*enabled_for_annotation=*/true, 1.0));
335 unpacked_model->regex_model->patterns.push_back(MakePattern(
336 "flight", "([a-zA-Z]{2} ?\\d{2,4})", /*enabled_for_classification=*/false,
337 /*enabled_for_selection=*/false, /*enabled_for_annotation=*/true, 0.5));
338 flatbuffers::FlatBufferBuilder builder;
339 builder.Finish(Model::Pack(builder, unpacked_model.get()));
340
341 std::unique_ptr<TextClassifier> classifier =
342 TextClassifier::FromUnownedBuffer(
343 reinterpret_cast<const char*>(builder.GetBufferPointer()),
344 builder.GetSize(), &unilib);
345 ASSERT_TRUE(classifier);
346
347 const std::string test_string =
348 "& saw Barack Obama today .. 350 Third Street, Cambridge\nand my phone "
349 "number is 853 225 3556";
350 EXPECT_THAT(classifier->Annotate(test_string),
351 ElementsAreArray({
352 IsAnnotatedSpan(6, 18, "person"),
353 IsAnnotatedSpan(19, 24, "date"),
354 IsAnnotatedSpan(28, 55, "address"),
355 IsAnnotatedSpan(79, 91, "phone"),
356 }));
357}
Lukas Zilkab23e2122018-02-09 10:25:19 +0100358#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
359
360TEST_P(TextClassifierTest, PhoneFiltering) {
361 CREATE_UNILIB_FOR_TESTING;
362 std::unique_ptr<TextClassifier> classifier =
363 TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
Lukas Zilka21d8c982018-01-24 11:11:20 +0100364 ASSERT_TRUE(classifier);
365
366 EXPECT_EQ("phone", FirstResult(classifier->ClassifyText(
367 "phone: (123) 456 789", {7, 20})));
368 EXPECT_EQ("phone", FirstResult(classifier->ClassifyText(
369 "phone: (123) 456 789,0001112", {7, 25})));
370 EXPECT_EQ("other", FirstResult(classifier->ClassifyText(
371 "phone: (123) 456 789,0001112", {7, 28})));
372}
373
Lukas Zilkab23e2122018-02-09 10:25:19 +0100374TEST_P(TextClassifierTest, SuggestSelection) {
375 CREATE_UNILIB_FOR_TESTING;
Lukas Zilka21d8c982018-01-24 11:11:20 +0100376 std::unique_ptr<TextClassifier> classifier =
Lukas Zilkab23e2122018-02-09 10:25:19 +0100377 TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
Lukas Zilka21d8c982018-01-24 11:11:20 +0100378 ASSERT_TRUE(classifier);
379
380 EXPECT_EQ(classifier->SuggestSelection(
381 "this afternoon Barack Obama gave a speech at", {15, 21}),
382 std::make_pair(15, 21));
383
384 // Try passing whole string.
385 // If more than 1 token is specified, we should return back what entered.
386 EXPECT_EQ(
387 classifier->SuggestSelection("350 Third Street, Cambridge", {0, 27}),
388 std::make_pair(0, 27));
389
390 // Single letter.
391 EXPECT_EQ(classifier->SuggestSelection("a", {0, 1}), std::make_pair(0, 1));
392
393 // Single word.
394 EXPECT_EQ(classifier->SuggestSelection("asdf", {0, 4}), std::make_pair(0, 4));
395
396 EXPECT_EQ(
397 classifier->SuggestSelection("call me at 857 225 3556 today", {11, 14}),
398 std::make_pair(11, 23));
399
400 // Unpaired bracket stripping.
401 EXPECT_EQ(
402 classifier->SuggestSelection("call me at (857) 225 3556 today", {11, 16}),
403 std::make_pair(11, 25));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100404 EXPECT_EQ(classifier->SuggestSelection("call me at (857 today", {11, 15}),
405 std::make_pair(12, 15));
406 EXPECT_EQ(classifier->SuggestSelection("call me at 3556) today", {11, 16}),
407 std::make_pair(11, 15));
408 EXPECT_EQ(classifier->SuggestSelection("call me at )857( today", {11, 16}),
409 std::make_pair(12, 15));
Lukas Zilka21d8c982018-01-24 11:11:20 +0100410
411 // If the resulting selection would be empty, the original span is returned.
412 EXPECT_EQ(classifier->SuggestSelection("call me at )( today", {11, 13}),
413 std::make_pair(11, 13));
414 EXPECT_EQ(classifier->SuggestSelection("call me at ( today", {11, 12}),
415 std::make_pair(11, 12));
416 EXPECT_EQ(classifier->SuggestSelection("call me at ) today", {11, 12}),
417 std::make_pair(11, 12));
418}
419
Lukas Zilkaba849e72018-03-08 14:48:21 +0100420TEST_P(TextClassifierTest, SuggestSelectionDisabledFail) {
421 CREATE_UNILIB_FOR_TESTING;
422 const std::string test_model = ReadFile(GetModelPath() + GetParam());
423 std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
424
425 // Disable the selection model.
426 unpacked_model->selection_model.clear();
427 unpacked_model->triggering_options.reset(new ModelTriggeringOptionsT);
428 unpacked_model->triggering_options->enabled_modes = ModeFlag_ANNOTATION;
429
430 flatbuffers::FlatBufferBuilder builder;
431 builder.Finish(Model::Pack(builder, unpacked_model.get()));
432
433 std::unique_ptr<TextClassifier> classifier =
434 TextClassifier::FromUnownedBuffer(
435 reinterpret_cast<const char*>(builder.GetBufferPointer()),
436 builder.GetSize(), &unilib);
437 // Selection model needs to be present for annotation.
438 ASSERT_FALSE(classifier);
439}
440
441TEST_P(TextClassifierTest, SuggestSelectionDisabled) {
442 CREATE_UNILIB_FOR_TESTING;
443 const std::string test_model = ReadFile(GetModelPath() + GetParam());
444 std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
445
446 // Disable the selection model.
447 unpacked_model->selection_model.clear();
448 unpacked_model->triggering_options.reset(new ModelTriggeringOptionsT);
449 unpacked_model->triggering_options->enabled_modes = ModeFlag_CLASSIFICATION;
450 unpacked_model->enabled_modes = ModeFlag_CLASSIFICATION;
451
452 flatbuffers::FlatBufferBuilder builder;
453 builder.Finish(Model::Pack(builder, unpacked_model.get()));
454
455 std::unique_ptr<TextClassifier> classifier =
456 TextClassifier::FromUnownedBuffer(
457 reinterpret_cast<const char*>(builder.GetBufferPointer()),
458 builder.GetSize(), &unilib);
459 ASSERT_TRUE(classifier);
460
461 EXPECT_EQ(
462 classifier->SuggestSelection("call me at 857 225 3556 today", {11, 14}),
463 std::make_pair(11, 14));
464
465 EXPECT_EQ("phone", FirstResult(classifier->ClassifyText(
466 "call me at (800) 123-456 today", {11, 24})));
467
468 EXPECT_THAT(classifier->Annotate("call me at (800) 123-456 today"),
469 IsEmpty());
470}
471
Lukas Zilkab23e2122018-02-09 10:25:19 +0100472TEST_P(TextClassifierTest, SuggestSelectionsAreSymmetric) {
473 CREATE_UNILIB_FOR_TESTING;
Lukas Zilka21d8c982018-01-24 11:11:20 +0100474 std::unique_ptr<TextClassifier> classifier =
Lukas Zilkab23e2122018-02-09 10:25:19 +0100475 TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
Lukas Zilka21d8c982018-01-24 11:11:20 +0100476 ASSERT_TRUE(classifier);
477
478 EXPECT_EQ(classifier->SuggestSelection("350 Third Street, Cambridge", {0, 3}),
479 std::make_pair(0, 27));
480 EXPECT_EQ(classifier->SuggestSelection("350 Third Street, Cambridge", {4, 9}),
481 std::make_pair(0, 27));
482 EXPECT_EQ(
483 classifier->SuggestSelection("350 Third Street, Cambridge", {10, 16}),
484 std::make_pair(0, 27));
485 EXPECT_EQ(classifier->SuggestSelection("a\nb\nc\n350 Third Street, Cambridge",
486 {16, 22}),
487 std::make_pair(6, 33));
488}
489
Lukas Zilkab23e2122018-02-09 10:25:19 +0100490TEST_P(TextClassifierTest, SuggestSelectionWithNewLine) {
491 CREATE_UNILIB_FOR_TESTING;
Lukas Zilka21d8c982018-01-24 11:11:20 +0100492 std::unique_ptr<TextClassifier> classifier =
Lukas Zilkab23e2122018-02-09 10:25:19 +0100493 TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
Lukas Zilka21d8c982018-01-24 11:11:20 +0100494 ASSERT_TRUE(classifier);
495
496 EXPECT_EQ(classifier->SuggestSelection("abc\n857 225 3556", {4, 7}),
497 std::make_pair(4, 16));
498 EXPECT_EQ(classifier->SuggestSelection("857 225 3556\nabc", {0, 3}),
499 std::make_pair(0, 12));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100500
501 SelectionOptions options;
502 EXPECT_EQ(classifier->SuggestSelection("857 225\n3556\nabc", {0, 3}, options),
503 std::make_pair(0, 7));
Lukas Zilka21d8c982018-01-24 11:11:20 +0100504}
505
Lukas Zilkab23e2122018-02-09 10:25:19 +0100506TEST_P(TextClassifierTest, SuggestSelectionWithPunctuation) {
507 CREATE_UNILIB_FOR_TESTING;
Lukas Zilka21d8c982018-01-24 11:11:20 +0100508 std::unique_ptr<TextClassifier> classifier =
Lukas Zilkab23e2122018-02-09 10:25:19 +0100509 TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
Lukas Zilka21d8c982018-01-24 11:11:20 +0100510 ASSERT_TRUE(classifier);
511
512 // From the right.
513 EXPECT_EQ(classifier->SuggestSelection(
514 "this afternoon BarackObama, gave a speech at", {15, 26}),
515 std::make_pair(15, 26));
516
517 // From the right multiple.
518 EXPECT_EQ(classifier->SuggestSelection(
519 "this afternoon BarackObama,.,.,, gave a speech at", {15, 26}),
520 std::make_pair(15, 26));
521
522 // From the left multiple.
523 EXPECT_EQ(classifier->SuggestSelection(
524 "this afternoon ,.,.,,BarackObama gave a speech at", {21, 32}),
525 std::make_pair(21, 32));
526
527 // From both sides.
528 EXPECT_EQ(classifier->SuggestSelection(
529 "this afternoon !BarackObama,- gave a speech at", {16, 27}),
530 std::make_pair(16, 27));
531}
532
Lukas Zilkab23e2122018-02-09 10:25:19 +0100533TEST_P(TextClassifierTest, SuggestSelectionNoCrashWithJunk) {
534 CREATE_UNILIB_FOR_TESTING;
Lukas Zilka21d8c982018-01-24 11:11:20 +0100535 std::unique_ptr<TextClassifier> classifier =
Lukas Zilkab23e2122018-02-09 10:25:19 +0100536 TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
Lukas Zilka21d8c982018-01-24 11:11:20 +0100537 ASSERT_TRUE(classifier);
538
539 // Try passing in bunch of invalid selections.
540 EXPECT_EQ(classifier->SuggestSelection("", {0, 27}), std::make_pair(0, 27));
541 EXPECT_EQ(classifier->SuggestSelection("", {-10, 27}),
542 std::make_pair(-10, 27));
543 EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {0, 27}),
544 std::make_pair(0, 27));
545 EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {-30, 300}),
546 std::make_pair(-30, 300));
547 EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {-10, -1}),
548 std::make_pair(-10, -1));
549 EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {100, 17}),
550 std::make_pair(100, 17));
551}
552
Lukas Zilkab23e2122018-02-09 10:25:19 +0100553TEST_P(TextClassifierTest, Annotate) {
554 CREATE_UNILIB_FOR_TESTING;
Lukas Zilka21d8c982018-01-24 11:11:20 +0100555 std::unique_ptr<TextClassifier> classifier =
Lukas Zilkab23e2122018-02-09 10:25:19 +0100556 TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
Lukas Zilka21d8c982018-01-24 11:11:20 +0100557 ASSERT_TRUE(classifier);
558
559 const std::string test_string =
Lukas Zilkab23e2122018-02-09 10:25:19 +0100560 "& saw Barack Obama today .. 350 Third Street, Cambridge\nand my phone "
561 "number is 853 225 3556";
Lukas Zilka21d8c982018-01-24 11:11:20 +0100562 EXPECT_THAT(classifier->Annotate(test_string),
563 ElementsAreArray({
Lukas Zilkab23e2122018-02-09 10:25:19 +0100564#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
565 IsAnnotatedSpan(19, 24, "date"),
566#endif
567 IsAnnotatedSpan(28, 55, "address"),
568 IsAnnotatedSpan(79, 91, "phone"),
Lukas Zilka21d8c982018-01-24 11:11:20 +0100569 }));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100570
571 AnnotationOptions options;
572 EXPECT_THAT(classifier->Annotate("853 225 3556", options),
573 ElementsAreArray({IsAnnotatedSpan(0, 12, "phone")}));
574 EXPECT_TRUE(classifier->Annotate("853 225\n3556", options).empty());
Lukas Zilka21d8c982018-01-24 11:11:20 +0100575}
576
Lukas Zilkab23e2122018-02-09 10:25:19 +0100577TEST_P(TextClassifierTest, AnnotateSmallBatches) {
578 CREATE_UNILIB_FOR_TESTING;
579 const std::string test_model = ReadFile(GetModelPath() + GetParam());
580 std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
581
582 // Set the batch size.
583 unpacked_model->selection_options->batch_size = 4;
584 flatbuffers::FlatBufferBuilder builder;
585 builder.Finish(Model::Pack(builder, unpacked_model.get()));
586
587 std::unique_ptr<TextClassifier> classifier =
588 TextClassifier::FromUnownedBuffer(
589 reinterpret_cast<const char*>(builder.GetBufferPointer()),
590 builder.GetSize(), &unilib);
591 ASSERT_TRUE(classifier);
592
593 const std::string test_string =
594 "& saw Barack Obama today .. 350 Third Street, Cambridge\nand my phone "
595 "number is 853 225 3556";
596 EXPECT_THAT(classifier->Annotate(test_string),
597 ElementsAreArray({
598#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
599 IsAnnotatedSpan(19, 24, "date"),
600#endif
601 IsAnnotatedSpan(28, 55, "address"),
602 IsAnnotatedSpan(79, 91, "phone"),
603 }));
604
605 AnnotationOptions options;
606 EXPECT_THAT(classifier->Annotate("853 225 3556", options),
607 ElementsAreArray({IsAnnotatedSpan(0, 12, "phone")}));
608 EXPECT_TRUE(classifier->Annotate("853 225\n3556", options).empty());
609}
610
Lukas Zilkaba849e72018-03-08 14:48:21 +0100611#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
Lukas Zilkab23e2122018-02-09 10:25:19 +0100612TEST_P(TextClassifierTest, AnnotateFilteringDiscardAll) {
613 CREATE_UNILIB_FOR_TESTING;
614 const std::string test_model = ReadFile(GetModelPath() + GetParam());
615 std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
616
Lukas Zilkab23e2122018-02-09 10:25:19 +0100617 unpacked_model->triggering_options.reset(new ModelTriggeringOptionsT);
Lukas Zilkaba849e72018-03-08 14:48:21 +0100618 // Add test threshold.
Lukas Zilkab23e2122018-02-09 10:25:19 +0100619 unpacked_model->triggering_options->min_annotate_confidence =
620 2.f; // Discards all results.
621 flatbuffers::FlatBufferBuilder builder;
622 builder.Finish(Model::Pack(builder, unpacked_model.get()));
623
624 std::unique_ptr<TextClassifier> classifier =
625 TextClassifier::FromUnownedBuffer(
626 reinterpret_cast<const char*>(builder.GetBufferPointer()),
627 builder.GetSize(), &unilib);
628 ASSERT_TRUE(classifier);
629
630 const std::string test_string =
631 "& saw Barack Obama today .. 350 Third Street, Cambridge\nand my phone "
632 "number is 853 225 3556";
Lukas Zilkaba849e72018-03-08 14:48:21 +0100633
634 EXPECT_EQ(classifier->Annotate(test_string).size(), 1);
Lukas Zilkab23e2122018-02-09 10:25:19 +0100635}
Lukas Zilkaba849e72018-03-08 14:48:21 +0100636#endif
Lukas Zilkab23e2122018-02-09 10:25:19 +0100637
638TEST_P(TextClassifierTest, AnnotateFilteringKeepAll) {
639 CREATE_UNILIB_FOR_TESTING;
640 const std::string test_model = ReadFile(GetModelPath() + GetParam());
641 std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
642
643 // Add test thresholds.
644 unpacked_model->triggering_options.reset(new ModelTriggeringOptionsT);
645 unpacked_model->triggering_options->min_annotate_confidence =
646 0.f; // Keeps all results.
Lukas Zilkaba849e72018-03-08 14:48:21 +0100647 unpacked_model->triggering_options->enabled_modes = ModeFlag_ALL;
Lukas Zilkab23e2122018-02-09 10:25:19 +0100648 flatbuffers::FlatBufferBuilder builder;
649 builder.Finish(Model::Pack(builder, unpacked_model.get()));
650
651 std::unique_ptr<TextClassifier> classifier =
652 TextClassifier::FromUnownedBuffer(
653 reinterpret_cast<const char*>(builder.GetBufferPointer()),
654 builder.GetSize(), &unilib);
655 ASSERT_TRUE(classifier);
656
657 const std::string test_string =
658 "& saw Barack Obama today .. 350 Third Street, Cambridge\nand my phone "
659 "number is 853 225 3556";
660#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
661 EXPECT_EQ(classifier->Annotate(test_string).size(), 3);
662#else
663 // In non-ICU mode there is no "date" result.
664 EXPECT_EQ(classifier->Annotate(test_string).size(), 2);
665#endif
666}
667
Lukas Zilkaba849e72018-03-08 14:48:21 +0100668TEST_P(TextClassifierTest, AnnotateDisabled) {
669 CREATE_UNILIB_FOR_TESTING;
670 const std::string test_model = ReadFile(GetModelPath() + GetParam());
671 std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
672
673 // Disable the model for annotation.
674 unpacked_model->enabled_modes = ModeFlag_CLASSIFICATION_AND_SELECTION;
675 flatbuffers::FlatBufferBuilder builder;
676 builder.Finish(Model::Pack(builder, unpacked_model.get()));
677
678 std::unique_ptr<TextClassifier> classifier =
679 TextClassifier::FromUnownedBuffer(
680 reinterpret_cast<const char*>(builder.GetBufferPointer()),
681 builder.GetSize(), &unilib);
682 ASSERT_TRUE(classifier);
683 const std::string test_string =
684 "& saw Barack Obama today .. 350 Third Street, Cambridge\nand my phone "
685 "number is 853 225 3556";
686 EXPECT_THAT(classifier->Annotate(test_string), IsEmpty());
687}
688
Lukas Zilkab23e2122018-02-09 10:25:19 +0100689#ifdef LIBTEXTCLASSIFIER_CALENDAR_ICU
690TEST_P(TextClassifierTest, ClassifyTextDate) {
691 std::unique_ptr<TextClassifier> classifier =
692 TextClassifier::FromPath(GetModelPath() + GetParam());
693 EXPECT_TRUE(classifier);
694
695 std::vector<ClassificationResult> result;
696 ClassificationOptions options;
697
698 options.reference_timezone = "Europe/Zurich";
699 result = classifier->ClassifyText("january 1, 2017", {0, 15}, options);
700
701 ASSERT_EQ(result.size(), 1);
702 EXPECT_THAT(result[0].collection, "date");
703 EXPECT_EQ(result[0].datetime_parse_result.time_ms_utc, 1483225200000);
704 EXPECT_EQ(result[0].datetime_parse_result.granularity,
705 DatetimeGranularity::GRANULARITY_DAY);
706 result.clear();
707
708 options.reference_timezone = "America/Los_Angeles";
709 result = classifier->ClassifyText("march 1, 2017", {0, 13}, options);
710 ASSERT_EQ(result.size(), 1);
711 EXPECT_THAT(result[0].collection, "date");
712 EXPECT_EQ(result[0].datetime_parse_result.time_ms_utc, 1488355200000);
713 EXPECT_EQ(result[0].datetime_parse_result.granularity,
714 DatetimeGranularity::GRANULARITY_DAY);
715 result.clear();
716
717 options.reference_timezone = "America/Los_Angeles";
718 result = classifier->ClassifyText("2018/01/01 10:30:20", {0, 19}, options);
719 ASSERT_EQ(result.size(), 1);
720 EXPECT_THAT(result[0].collection, "date");
721 EXPECT_EQ(result[0].datetime_parse_result.time_ms_utc, 1514831420000);
722 EXPECT_EQ(result[0].datetime_parse_result.granularity,
723 DatetimeGranularity::GRANULARITY_SECOND);
724 result.clear();
725
726 // Date on another line.
727 options.reference_timezone = "Europe/Zurich";
728 result = classifier->ClassifyText(
729 "hello world this is the first line\n"
730 "january 1, 2017",
731 {35, 50}, options);
732 ASSERT_EQ(result.size(), 1);
733 EXPECT_THAT(result[0].collection, "date");
734 EXPECT_EQ(result[0].datetime_parse_result.time_ms_utc, 1483225200000);
735 EXPECT_EQ(result[0].datetime_parse_result.granularity,
736 DatetimeGranularity::GRANULARITY_DAY);
737 result.clear();
738}
Lukas Zilkaba849e72018-03-08 14:48:21 +0100739
740TEST_P(TextClassifierTest, SuggestTextDateDisabled) {
741 CREATE_UNILIB_FOR_TESTING;
742 const std::string test_model = ReadFile(GetModelPath() + GetParam());
743 std::unique_ptr<ModelT> unpacked_model = UnPackModel(test_model.c_str());
744
745 // Disable the patterns for selection.
746 for (int i = 0; i < unpacked_model->datetime_model->patterns.size(); i++) {
747 unpacked_model->datetime_model->patterns[i]->enabled_modes =
748 ModeFlag_ANNOTATION_AND_CLASSIFICATION;
749 }
750 flatbuffers::FlatBufferBuilder builder;
751 builder.Finish(Model::Pack(builder, unpacked_model.get()));
752
753 std::unique_ptr<TextClassifier> classifier =
754 TextClassifier::FromUnownedBuffer(
755 reinterpret_cast<const char*>(builder.GetBufferPointer()),
756 builder.GetSize(), &unilib);
757 ASSERT_TRUE(classifier);
758 EXPECT_EQ("date",
759 FirstResult(classifier->ClassifyText("january 1, 2017", {0, 15})));
760 EXPECT_EQ(classifier->SuggestSelection("january 1, 2017", {0, 7}),
761 std::make_pair(0, 7));
762 EXPECT_THAT(classifier->Annotate("january 1, 2017"),
763 ElementsAreArray({IsAnnotatedSpan(0, 15, "date")}));
764}
Lukas Zilkab23e2122018-02-09 10:25:19 +0100765#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
766
767class TestingTextClassifier : public TextClassifier {
768 public:
769 TestingTextClassifier(const std::string& model, const UniLib* unilib)
770 : TextClassifier(ViewModel(model.data(), model.size()), unilib) {}
771
772 using TextClassifier::ResolveConflicts;
773};
774
775AnnotatedSpan MakeAnnotatedSpan(CodepointSpan span,
776 const std::string& collection,
777 const float score) {
778 AnnotatedSpan result;
779 result.span = span;
780 result.classification.push_back({collection, score});
781 return result;
782}
783
784TEST(TextClassifierTest, ResolveConflictsTrivial) {
785 CREATE_UNILIB_FOR_TESTING;
786 TestingTextClassifier classifier("", &unilib);
787
788 std::vector<AnnotatedSpan> candidates{
789 {MakeAnnotatedSpan({0, 1}, "phone", 1.0)}};
790
791 std::vector<int> chosen;
Lukas Zilkaba849e72018-03-08 14:48:21 +0100792 classifier.ResolveConflicts(candidates, /*context=*/"",
793 /*interpreter_manager=*/nullptr, &chosen);
Lukas Zilkab23e2122018-02-09 10:25:19 +0100794 EXPECT_THAT(chosen, ElementsAreArray({0}));
795}
796
797TEST(TextClassifierTest, ResolveConflictsSequence) {
798 CREATE_UNILIB_FOR_TESTING;
799 TestingTextClassifier classifier("", &unilib);
800
801 std::vector<AnnotatedSpan> candidates{{
802 MakeAnnotatedSpan({0, 1}, "phone", 1.0),
803 MakeAnnotatedSpan({1, 2}, "phone", 1.0),
804 MakeAnnotatedSpan({2, 3}, "phone", 1.0),
805 MakeAnnotatedSpan({3, 4}, "phone", 1.0),
806 MakeAnnotatedSpan({4, 5}, "phone", 1.0),
807 }};
808
809 std::vector<int> chosen;
Lukas Zilkaba849e72018-03-08 14:48:21 +0100810 classifier.ResolveConflicts(candidates, /*context=*/"",
811 /*interpreter_manager=*/nullptr, &chosen);
Lukas Zilkab23e2122018-02-09 10:25:19 +0100812 EXPECT_THAT(chosen, ElementsAreArray({0, 1, 2, 3, 4}));
813}
814
815TEST(TextClassifierTest, ResolveConflictsThreeSpans) {
816 CREATE_UNILIB_FOR_TESTING;
817 TestingTextClassifier classifier("", &unilib);
818
819 std::vector<AnnotatedSpan> candidates{{
820 MakeAnnotatedSpan({0, 3}, "phone", 1.0),
821 MakeAnnotatedSpan({1, 5}, "phone", 0.5), // Looser!
822 MakeAnnotatedSpan({3, 7}, "phone", 1.0),
823 }};
824
825 std::vector<int> chosen;
Lukas Zilkaba849e72018-03-08 14:48:21 +0100826 classifier.ResolveConflicts(candidates, /*context=*/"",
827 /*interpreter_manager=*/nullptr, &chosen);
Lukas Zilkab23e2122018-02-09 10:25:19 +0100828 EXPECT_THAT(chosen, ElementsAreArray({0, 2}));
829}
830
831TEST(TextClassifierTest, ResolveConflictsThreeSpansReversed) {
832 CREATE_UNILIB_FOR_TESTING;
833 TestingTextClassifier classifier("", &unilib);
834
835 std::vector<AnnotatedSpan> candidates{{
836 MakeAnnotatedSpan({0, 3}, "phone", 0.5), // Looser!
837 MakeAnnotatedSpan({1, 5}, "phone", 1.0),
838 MakeAnnotatedSpan({3, 7}, "phone", 0.6), // Looser!
839 }};
840
841 std::vector<int> chosen;
Lukas Zilkaba849e72018-03-08 14:48:21 +0100842 classifier.ResolveConflicts(candidates, /*context=*/"",
843 /*interpreter_manager=*/nullptr, &chosen);
Lukas Zilkab23e2122018-02-09 10:25:19 +0100844 EXPECT_THAT(chosen, ElementsAreArray({1}));
845}
846
847TEST(TextClassifierTest, ResolveConflictsFiveSpans) {
848 CREATE_UNILIB_FOR_TESTING;
849 TestingTextClassifier classifier("", &unilib);
850
851 std::vector<AnnotatedSpan> candidates{{
852 MakeAnnotatedSpan({0, 3}, "phone", 0.5),
853 MakeAnnotatedSpan({1, 5}, "other", 1.0), // Looser!
854 MakeAnnotatedSpan({3, 7}, "phone", 0.6),
855 MakeAnnotatedSpan({8, 12}, "phone", 0.6), // Looser!
856 MakeAnnotatedSpan({11, 15}, "phone", 0.9),
857 }};
858
859 std::vector<int> chosen;
Lukas Zilkaba849e72018-03-08 14:48:21 +0100860 classifier.ResolveConflicts(candidates, /*context=*/"",
861 /*interpreter_manager=*/nullptr, &chosen);
Lukas Zilkab23e2122018-02-09 10:25:19 +0100862 EXPECT_THAT(chosen, ElementsAreArray({0, 2, 4}));
863}
Lukas Zilka21d8c982018-01-24 11:11:20 +0100864
Lukas Zilkadf710db2018-02-27 12:44:09 +0100865#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
866TEST_P(TextClassifierTest, LongInput) {
867 CREATE_UNILIB_FOR_TESTING;
868 std::unique_ptr<TextClassifier> classifier =
869 TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
870 ASSERT_TRUE(classifier);
871
872 for (const auto& type_value_pair :
873 std::vector<std::pair<std::string, std::string>>{
874 {"address", "350 Third Street, Cambridge"},
875 {"phone", "123 456-7890"},
876 {"url", "www.google.com"},
877 {"email", "someone@gmail.com"},
878 {"flight", "LX 38"},
879 {"date", "September 1, 2018"}}) {
880 const std::string input_100k = std::string(50000, ' ') +
881 type_value_pair.second +
882 std::string(50000, ' ');
883 const int value_length = type_value_pair.second.size();
884
885 EXPECT_THAT(classifier->Annotate(input_100k),
886 ElementsAreArray({IsAnnotatedSpan(50000, 50000 + value_length,
887 type_value_pair.first)}));
888 EXPECT_EQ(classifier->SuggestSelection(input_100k, {50000, 50001}),
889 std::make_pair(50000, 50000 + value_length));
890 EXPECT_EQ(type_value_pair.first,
891 FirstResult(classifier->ClassifyText(
892 input_100k, {50000, 50000 + value_length})));
893 }
894}
895#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
896
Lukas Zilkaba849e72018-03-08 14:48:21 +0100897#ifdef LIBTEXTCLASSIFIER_UNILIB_ICU
898// These coarse tests are there only to make sure the execution happens in
899// reasonable amount of time.
900TEST_P(TextClassifierTest, LongInputNoResultCheck) {
901 CREATE_UNILIB_FOR_TESTING;
902 std::unique_ptr<TextClassifier> classifier =
903 TextClassifier::FromPath(GetModelPath() + GetParam(), &unilib);
904 ASSERT_TRUE(classifier);
905
906 for (const std::string& value :
907 std::vector<std::string>{"http://www.aaaaaaaaaaaaaaaaaaaa.com "}) {
908 const std::string input_100k =
909 std::string(50000, ' ') + value + std::string(50000, ' ');
910 const int value_length = value.size();
911
912 classifier->Annotate(input_100k);
913 classifier->SuggestSelection(input_100k, {50000, 50001});
914 classifier->ClassifyText(input_100k, {50000, 50000 + value_length});
915 }
916}
917#endif // LIBTEXTCLASSIFIER_UNILIB_ICU
918
Lukas Zilka21d8c982018-01-24 11:11:20 +0100919} // namespace
920} // namespace libtextclassifier2