blob: d46accffe4ba2830c7648bbba6c0043ac108ef80 [file] [log] [blame]
Lukas Zilkab23e2122018-02-09 10:25:19 +01001/*
Tony Mak6c4cc672018-09-17 11:48:50 +01002 * Copyright (C) 2018 The Android Open Source Project
Lukas Zilkab23e2122018-02-09 10:25:19 +01003 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <time.h>
18#include <fstream>
19#include <iostream>
20#include <memory>
21#include <string>
22
23#include "gmock/gmock.h"
24#include "gtest/gtest.h"
25
Tony Mak6c4cc672018-09-17 11:48:50 +010026#include "annotator/annotator.h"
27#include "annotator/datetime/parser.h"
28#include "annotator/model_generated.h"
29#include "annotator/types-test-util.h"
Lukas Zilkab23e2122018-02-09 10:25:19 +010030
31using testing::ElementsAreArray;
32
Tony Mak6c4cc672018-09-17 11:48:50 +010033namespace libtextclassifier3 {
Lukas Zilkab23e2122018-02-09 10:25:19 +010034namespace {
35
36std::string GetModelPath() {
Tony Maka0f598b2018-11-20 20:39:04 +000037 return TC3_TEST_DATA_DIR;
Lukas Zilkab23e2122018-02-09 10:25:19 +010038}
39
40std::string ReadFile(const std::string& file_name) {
41 std::ifstream file_stream(file_name);
42 return std::string(std::istreambuf_iterator<char>(file_stream), {});
43}
44
45std::string FormatMillis(int64 time_ms_utc) {
46 long time_seconds = time_ms_utc / 1000; // NOLINT
47 // Format time, "ddd yyyy-mm-dd hh:mm:ss zzz"
48 char buffer[512];
49 strftime(buffer, sizeof(buffer), "%a %Y-%m-%d %H:%M:%S %Z",
50 localtime(&time_seconds));
51 return std::string(buffer);
52}
53
54class ParserTest : public testing::Test {
55 public:
56 void SetUp() override {
57 model_buffer_ = ReadFile(GetModelPath() + "test_model.fb");
Tony Mak6c4cc672018-09-17 11:48:50 +010058 classifier_ = Annotator::FromUnownedBuffer(model_buffer_.data(),
59 model_buffer_.size(), &unilib_);
60 TC3_CHECK(classifier_);
Lukas Zilkae7962cc2018-03-28 18:09:48 +020061 parser_ = classifier_->DatetimeParserForTests();
62 }
63
64 bool HasNoResult(const std::string& text, bool anchor_start_end = false,
65 const std::string& timezone = "Europe/Zurich") {
66 std::vector<DatetimeParseResultSpan> results;
67 if (!parser_->Parse(text, 0, timezone, /*locales=*/"", ModeFlag_ANNOTATION,
68 anchor_start_end, &results)) {
Tony Mak6c4cc672018-09-17 11:48:50 +010069 TC3_LOG(ERROR) << text;
70 TC3_CHECK(false);
Lukas Zilkae7962cc2018-03-28 18:09:48 +020071 }
72 return results.empty();
Lukas Zilkab23e2122018-02-09 10:25:19 +010073 }
74
75 bool ParsesCorrectly(const std::string& marked_text,
76 const int64 expected_ms_utc,
77 DatetimeGranularity expected_granularity,
Lukas Zilkae7962cc2018-03-28 18:09:48 +020078 bool anchor_start_end = false,
Lukas Zilka434442d2018-04-25 11:38:51 +020079 const std::string& timezone = "Europe/Zurich",
80 const std::string& locales = "en-US") {
81 const UnicodeText marked_text_unicode =
82 UTF8ToUnicodeText(marked_text, /*do_copy=*/false);
83 auto brace_open_it =
84 std::find(marked_text_unicode.begin(), marked_text_unicode.end(), '{');
85 auto brace_end_it =
86 std::find(marked_text_unicode.begin(), marked_text_unicode.end(), '}');
Tony Mak6c4cc672018-09-17 11:48:50 +010087 TC3_CHECK(brace_open_it != marked_text_unicode.end());
88 TC3_CHECK(brace_end_it != marked_text_unicode.end());
Lukas Zilkab23e2122018-02-09 10:25:19 +010089
90 std::string text;
Lukas Zilka434442d2018-04-25 11:38:51 +020091 text +=
92 UnicodeText::UTF8Substring(marked_text_unicode.begin(), brace_open_it);
93 text += UnicodeText::UTF8Substring(std::next(brace_open_it), brace_end_it);
94 text += UnicodeText::UTF8Substring(std::next(brace_end_it),
95 marked_text_unicode.end());
Lukas Zilkab23e2122018-02-09 10:25:19 +010096
97 std::vector<DatetimeParseResultSpan> results;
98
Lukas Zilka434442d2018-04-25 11:38:51 +020099 if (!parser_->Parse(text, 0, timezone, locales, ModeFlag_ANNOTATION,
Lukas Zilkae7962cc2018-03-28 18:09:48 +0200100 anchor_start_end, &results)) {
Tony Mak6c4cc672018-09-17 11:48:50 +0100101 TC3_LOG(ERROR) << text;
102 TC3_CHECK(false);
Lukas Zilkab23e2122018-02-09 10:25:19 +0100103 }
Lukas Zilka434442d2018-04-25 11:38:51 +0200104 if (results.empty()) {
Tony Mak6c4cc672018-09-17 11:48:50 +0100105 TC3_LOG(ERROR) << "No results.";
Lukas Zilka434442d2018-04-25 11:38:51 +0200106 return false;
107 }
108
109 const int expected_start_index =
110 std::distance(marked_text_unicode.begin(), brace_open_it);
111 // The -1 bellow is to account for the opening bracket character.
112 const int expected_end_index =
113 std::distance(marked_text_unicode.begin(), brace_end_it) - 1;
Lukas Zilkab23e2122018-02-09 10:25:19 +0100114
115 std::vector<DatetimeParseResultSpan> filtered_results;
116 for (const DatetimeParseResultSpan& result : results) {
117 if (SpansOverlap(result.span,
118 {expected_start_index, expected_end_index})) {
119 filtered_results.push_back(result);
120 }
121 }
122
123 const std::vector<DatetimeParseResultSpan> expected{
Lukas Zilka434442d2018-04-25 11:38:51 +0200124 {{expected_start_index, expected_end_index},
Lukas Zilkab23e2122018-02-09 10:25:19 +0100125 {expected_ms_utc, expected_granularity},
126 /*target_classification_score=*/1.0,
Tony Mak51a9e542018-11-02 13:36:22 +0000127 /*priority_score=*/0.1}};
Lukas Zilkab23e2122018-02-09 10:25:19 +0100128 const bool matches =
129 testing::Matches(ElementsAreArray(expected))(filtered_results);
130 if (!matches) {
Tony Mak6c4cc672018-09-17 11:48:50 +0100131 TC3_LOG(ERROR) << "Expected: " << expected[0] << " which corresponds to: "
132 << FormatMillis(expected[0].data.time_ms_utc);
Lukas Zilkab23e2122018-02-09 10:25:19 +0100133 for (int i = 0; i < filtered_results.size(); ++i) {
Tony Mak6c4cc672018-09-17 11:48:50 +0100134 TC3_LOG(ERROR) << "Actual[" << i << "]: " << filtered_results[i]
135 << " which corresponds to: "
136 << FormatMillis(filtered_results[i].data.time_ms_utc);
Lukas Zilkab23e2122018-02-09 10:25:19 +0100137 }
138 }
139 return matches;
140 }
141
Lukas Zilka434442d2018-04-25 11:38:51 +0200142 bool ParsesCorrectlyGerman(const std::string& marked_text,
143 const int64 expected_ms_utc,
144 DatetimeGranularity expected_granularity) {
145 return ParsesCorrectly(marked_text, expected_ms_utc, expected_granularity,
146 /*anchor_start_end=*/false,
147 /*timezone=*/"Europe/Zurich", /*locales=*/"de");
148 }
149
Lukas Zilkab23e2122018-02-09 10:25:19 +0100150 protected:
151 std::string model_buffer_;
Tony Mak6c4cc672018-09-17 11:48:50 +0100152 std::unique_ptr<Annotator> classifier_;
Lukas Zilkae7962cc2018-03-28 18:09:48 +0200153 const DatetimeParser* parser_;
Lukas Zilkab23e2122018-02-09 10:25:19 +0100154 UniLib unilib_;
155};
156
157// Test with just a few cases to make debugging of general failures easier.
158TEST_F(ParserTest, ParseShort) {
159 EXPECT_TRUE(
160 ParsesCorrectly("{January 1, 1988}", 567990000000, GRANULARITY_DAY));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100161}
162
163TEST_F(ParserTest, Parse) {
164 EXPECT_TRUE(
165 ParsesCorrectly("{January 1, 1988}", 567990000000, GRANULARITY_DAY));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100166 EXPECT_TRUE(
167 ParsesCorrectly("{january 31 2018}", 1517353200000, GRANULARITY_DAY));
168 EXPECT_TRUE(ParsesCorrectly("lorem {1 january 2018} ipsum", 1514761200000,
169 GRANULARITY_DAY));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100170 EXPECT_TRUE(ParsesCorrectly("{09/Mar/2004 22:02:40}", 1078866160000,
171 GRANULARITY_SECOND));
172 EXPECT_TRUE(ParsesCorrectly("{Dec 2, 2010 2:39:58 AM}", 1291253998000,
173 GRANULARITY_SECOND));
174 EXPECT_TRUE(ParsesCorrectly("{Jun 09 2011 15:28:14}", 1307626094000,
175 GRANULARITY_SECOND));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100176 EXPECT_TRUE(
177 ParsesCorrectly("{Mar 16 08:12:04}", 6419524000, GRANULARITY_SECOND));
Tony Mak6c4cc672018-09-17 11:48:50 +0100178 EXPECT_TRUE(ParsesCorrectly("{2010-06-26 02:31:29}", 1277512289000,
Lukas Zilkab23e2122018-02-09 10:25:19 +0100179 GRANULARITY_SECOND));
180 EXPECT_TRUE(ParsesCorrectly("{2006/01/22 04:11:05}", 1137899465000,
181 GRANULARITY_SECOND));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100182 EXPECT_TRUE(ParsesCorrectly("{11:42:35}", 38555000, GRANULARITY_SECOND));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100183 EXPECT_TRUE(
Tony Mak6c4cc672018-09-17 11:48:50 +0100184 ParsesCorrectly("{23/Apr 11:42:35}", 9715355000, GRANULARITY_SECOND));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100185 EXPECT_TRUE(ParsesCorrectly("{23/Apr/2015 11:42:35}", 1429782155000,
186 GRANULARITY_SECOND));
187 EXPECT_TRUE(ParsesCorrectly("{23-Apr-2015 11:42:35}", 1429782155000,
188 GRANULARITY_SECOND));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100189 EXPECT_TRUE(ParsesCorrectly("{23 Apr 2015 11:42:35}", 1429782155000,
190 GRANULARITY_SECOND));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100191 EXPECT_TRUE(ParsesCorrectly("{04/23/15 11:42:35}", 1429782155000,
192 GRANULARITY_SECOND));
193 EXPECT_TRUE(ParsesCorrectly("{04/23/2015 11:42:35}", 1429782155000,
194 GRANULARITY_SECOND));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100195 EXPECT_TRUE(ParsesCorrectly("{9/28/2011 2:23:15 PM}", 1317212595000,
196 GRANULARITY_SECOND));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100197 EXPECT_TRUE(ParsesCorrectly(
198 "Are sentiments apartments decisively the especially alteration. "
199 "Thrown shy denote ten ladies though ask saw. Or by to he going "
200 "think order event music. Incommode so intention defective at "
201 "convinced. Led income months itself and houses you. After nor "
Lukas Zilka434442d2018-04-25 11:38:51 +0200202 "you leave might share court balls. {19/apr/2010 06:36:15} Are "
Lukas Zilkab23e2122018-02-09 10:25:19 +0100203 "sentiments apartments decisively the especially alteration. "
204 "Thrown shy denote ten ladies though ask saw. Or by to he going "
205 "think order event music. Incommode so intention defective at "
206 "convinced. Led income months itself and houses you. After nor "
207 "you leave might share court balls. ",
208 1271651775000, GRANULARITY_SECOND));
209 EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4:30}", 1514777400000,
210 GRANULARITY_MINUTE));
Lukas Zilka434442d2018-04-25 11:38:51 +0200211 EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4:30 am}", 1514777400000,
212 GRANULARITY_MINUTE));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100213 EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4pm}", 1514818800000,
214 GRANULARITY_HOUR));
215
Tony Mak6c4cc672018-09-17 11:48:50 +0100216 EXPECT_TRUE(ParsesCorrectly("{today at 0:00}", -3600000, GRANULARITY_MINUTE));
217 EXPECT_TRUE(ParsesCorrectly("{today at 0:00}", -57600000, GRANULARITY_MINUTE,
Lukas Zilkae7962cc2018-03-28 18:09:48 +0200218 /*anchor_start_end=*/false,
Lukas Zilkab23e2122018-02-09 10:25:19 +0100219 "America/Los_Angeles"));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100220 EXPECT_TRUE(
221 ParsesCorrectly("{tomorrow at 4:00}", 97200000, GRANULARITY_MINUTE));
Tony Mak6c4cc672018-09-17 11:48:50 +0100222 EXPECT_TRUE(ParsesCorrectly("{tomorrow at 4am}", 97200000, GRANULARITY_HOUR));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100223 EXPECT_TRUE(
Tony Mak6c4cc672018-09-17 11:48:50 +0100224 ParsesCorrectly("{wednesday at 4am}", 529200000, GRANULARITY_HOUR));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100225 EXPECT_TRUE(ParsesCorrectly("last seen {today at 9:01 PM}", 72060000,
226 GRANULARITY_MINUTE));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100227}
228
Lukas Zilkae7962cc2018-03-28 18:09:48 +0200229TEST_F(ParserTest, ParseWithAnchor) {
230 EXPECT_TRUE(ParsesCorrectly("{January 1, 1988}", 567990000000,
231 GRANULARITY_DAY, /*anchor_start_end=*/false));
232 EXPECT_TRUE(ParsesCorrectly("{January 1, 1988}", 567990000000,
233 GRANULARITY_DAY, /*anchor_start_end=*/true));
234 EXPECT_TRUE(ParsesCorrectly("lorem {1 january 2018} ipsum", 1514761200000,
235 GRANULARITY_DAY, /*anchor_start_end=*/false));
236 EXPECT_TRUE(HasNoResult("lorem 1 january 2018 ipsum",
237 /*anchor_start_end=*/true));
238}
239
Lukas Zilka434442d2018-04-25 11:38:51 +0200240TEST_F(ParserTest, ParseGerman) {
241 EXPECT_TRUE(
242 ParsesCorrectlyGerman("{Januar 1 2018}", 1514761200000, GRANULARITY_DAY));
243 EXPECT_TRUE(
244 ParsesCorrectlyGerman("{1 2 2018}", 1517439600000, GRANULARITY_DAY));
245 EXPECT_TRUE(ParsesCorrectlyGerman("lorem {1 Januar 2018} ipsum",
246 1514761200000, GRANULARITY_DAY));
247 EXPECT_TRUE(ParsesCorrectlyGerman("{19/Apr/2010:06:36:15}", 1271651775000,
248 GRANULARITY_SECOND));
249 EXPECT_TRUE(ParsesCorrectlyGerman("{09/März/2004 22:02:40}", 1078866160000,
250 GRANULARITY_SECOND));
251 EXPECT_TRUE(ParsesCorrectlyGerman("{Dez 2, 2010 2:39:58}", 1291253998000,
252 GRANULARITY_SECOND));
253 EXPECT_TRUE(ParsesCorrectlyGerman("{Juni 09 2011 15:28:14}", 1307626094000,
254 GRANULARITY_SECOND));
255 EXPECT_TRUE(ParsesCorrectlyGerman("{März 16 08:12:04}", 6419524000,
256 GRANULARITY_SECOND));
Tony Mak6c4cc672018-09-17 11:48:50 +0100257 EXPECT_TRUE(ParsesCorrectlyGerman("{2010-06-26 02:31:29}", 1277512289000,
Lukas Zilka434442d2018-04-25 11:38:51 +0200258 GRANULARITY_SECOND));
259 EXPECT_TRUE(ParsesCorrectlyGerman("{2006/01/22 04:11:05}", 1137899465000,
260 GRANULARITY_SECOND));
261 EXPECT_TRUE(
262 ParsesCorrectlyGerman("{11:42:35}", 38555000, GRANULARITY_SECOND));
Tony Mak6c4cc672018-09-17 11:48:50 +0100263 EXPECT_TRUE(ParsesCorrectlyGerman("{23/Apr 11:42:35}", 9715355000,
Lukas Zilka434442d2018-04-25 11:38:51 +0200264 GRANULARITY_SECOND));
265 EXPECT_TRUE(ParsesCorrectlyGerman("{23/Apr/2015:11:42:35}", 1429782155000,
266 GRANULARITY_SECOND));
267 EXPECT_TRUE(ParsesCorrectlyGerman("{23/Apr/2015 11:42:35}", 1429782155000,
268 GRANULARITY_SECOND));
269 EXPECT_TRUE(ParsesCorrectlyGerman("{23-Apr-2015 11:42:35}", 1429782155000,
270 GRANULARITY_SECOND));
Lukas Zilka434442d2018-04-25 11:38:51 +0200271 EXPECT_TRUE(ParsesCorrectlyGerman("{23 Apr 2015 11:42:35}", 1429782155000,
272 GRANULARITY_SECOND));
Lukas Zilka434442d2018-04-25 11:38:51 +0200273 EXPECT_TRUE(ParsesCorrectlyGerman("{04/23/15 11:42:35}", 1429782155000,
274 GRANULARITY_SECOND));
275 EXPECT_TRUE(ParsesCorrectlyGerman("{04/23/2015 11:42:35}", 1429782155000,
276 GRANULARITY_SECOND));
Lukas Zilka434442d2018-04-25 11:38:51 +0200277 EXPECT_TRUE(ParsesCorrectlyGerman("{19/apr/2010:06:36:15}", 1271651775000,
278 GRANULARITY_SECOND));
279 EXPECT_TRUE(ParsesCorrectlyGerman("{januar 1 2018 um 4:30}", 1514777400000,
280 GRANULARITY_MINUTE));
281 EXPECT_TRUE(ParsesCorrectlyGerman("{januar 1 2018 um 4:30 nachm}",
282 1514820600000, GRANULARITY_MINUTE));
283 EXPECT_TRUE(ParsesCorrectlyGerman("{januar 1 2018 um 4 nachm}", 1514818800000,
284 GRANULARITY_HOUR));
285 EXPECT_TRUE(
286 ParsesCorrectlyGerman("{14.03.2017}", 1489446000000, GRANULARITY_DAY));
Lukas Zilka434442d2018-04-25 11:38:51 +0200287 EXPECT_TRUE(
Tony Mak6c4cc672018-09-17 11:48:50 +0100288 ParsesCorrectlyGerman("{morgen 0:00}", 82800000, GRANULARITY_MINUTE));
Lukas Zilka434442d2018-04-25 11:38:51 +0200289 EXPECT_TRUE(
290 ParsesCorrectlyGerman("{morgen um 4:00}", 97200000, GRANULARITY_MINUTE));
291 EXPECT_TRUE(
Tony Mak6c4cc672018-09-17 11:48:50 +0100292 ParsesCorrectlyGerman("{morgen um 4 vorm}", 97200000, GRANULARITY_HOUR));
Lukas Zilka434442d2018-04-25 11:38:51 +0200293}
294
295TEST_F(ParserTest, ParseNonUs) {
296 EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1430431200000, GRANULARITY_DAY,
297 /*anchor_start_end=*/false,
298 /*timezone=*/"Europe/Zurich",
299 /*locales=*/"en-GB"));
300 EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1430431200000, GRANULARITY_DAY,
301 /*anchor_start_end=*/false,
302 /*timezone=*/"Europe/Zurich", /*locales=*/"en"));
303}
304
305TEST_F(ParserTest, ParseUs) {
306 EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1420412400000, GRANULARITY_DAY,
307 /*anchor_start_end=*/false,
308 /*timezone=*/"Europe/Zurich",
309 /*locales=*/"en-US"));
310 EXPECT_TRUE(ParsesCorrectly("{1/5/15}", 1420412400000, GRANULARITY_DAY,
311 /*anchor_start_end=*/false,
312 /*timezone=*/"Europe/Zurich",
313 /*locales=*/"es-US"));
314}
315
316TEST_F(ParserTest, ParseUnknownLanguage) {
317 EXPECT_TRUE(ParsesCorrectly("bylo to {31. 12. 2015} v 6 hodin", 1451516400000,
318 GRANULARITY_DAY,
319 /*anchor_start_end=*/false,
320 /*timezone=*/"Europe/Zurich", /*locales=*/"xx"));
321}
322
Lukas Zilkae7962cc2018-03-28 18:09:48 +0200323class ParserLocaleTest : public testing::Test {
324 public:
325 void SetUp() override;
326 bool HasResult(const std::string& input, const std::string& locales);
327
328 protected:
329 UniLib unilib_;
Tony Mak6c4cc672018-09-17 11:48:50 +0100330 CalendarLib calendarlib_;
Lukas Zilkae7962cc2018-03-28 18:09:48 +0200331 flatbuffers::FlatBufferBuilder builder_;
332 std::unique_ptr<DatetimeParser> parser_;
333};
334
335void AddPattern(const std::string& regex, int locale,
336 std::vector<std::unique_ptr<DatetimeModelPatternT>>* patterns) {
337 patterns->emplace_back(new DatetimeModelPatternT);
338 patterns->back()->regexes.emplace_back(new DatetimeModelPattern_::RegexT);
339 patterns->back()->regexes.back()->pattern = regex;
340 patterns->back()->regexes.back()->groups.push_back(
341 DatetimeGroupType_GROUP_UNUSED);
342 patterns->back()->locales.push_back(locale);
343}
344
345void ParserLocaleTest::SetUp() {
346 DatetimeModelT model;
347 model.use_extractors_for_locating = false;
348 model.locales.clear();
349 model.locales.push_back("en-US");
350 model.locales.push_back("en-CH");
351 model.locales.push_back("zh-Hant");
352 model.locales.push_back("en-*");
353 model.locales.push_back("zh-Hant-*");
354 model.locales.push_back("*-CH");
Lukas Zilka434442d2018-04-25 11:38:51 +0200355 model.locales.push_back("default");
356 model.default_locales.push_back(6);
Lukas Zilkae7962cc2018-03-28 18:09:48 +0200357
358 AddPattern(/*regex=*/"en-US", /*locale=*/0, &model.patterns);
359 AddPattern(/*regex=*/"en-CH", /*locale=*/1, &model.patterns);
360 AddPattern(/*regex=*/"zh-Hant", /*locale=*/2, &model.patterns);
361 AddPattern(/*regex=*/"en-all", /*locale=*/3, &model.patterns);
362 AddPattern(/*regex=*/"zh-Hant-all", /*locale=*/4, &model.patterns);
363 AddPattern(/*regex=*/"all-CH", /*locale=*/5, &model.patterns);
364 AddPattern(/*regex=*/"default", /*locale=*/6, &model.patterns);
365
366 builder_.Finish(DatetimeModel::Pack(builder_, &model));
367 const DatetimeModel* model_fb =
368 flatbuffers::GetRoot<DatetimeModel>(builder_.GetBufferPointer());
369 ASSERT_TRUE(model_fb);
370
Tony Mak6c4cc672018-09-17 11:48:50 +0100371 parser_ = DatetimeParser::Instance(model_fb, unilib_, calendarlib_,
Lukas Zilkae7962cc2018-03-28 18:09:48 +0200372 /*decompressor=*/nullptr);
373 ASSERT_TRUE(parser_);
374}
375
376bool ParserLocaleTest::HasResult(const std::string& input,
377 const std::string& locales) {
378 std::vector<DatetimeParseResultSpan> results;
379 EXPECT_TRUE(parser_->Parse(input, /*reference_time_ms_utc=*/0,
380 /*reference_timezone=*/"", locales,
381 ModeFlag_ANNOTATION, false, &results));
382 return results.size() == 1;
383}
384
385TEST_F(ParserLocaleTest, English) {
386 EXPECT_TRUE(HasResult("en-US", /*locales=*/"en-US"));
387 EXPECT_FALSE(HasResult("en-CH", /*locales=*/"en-US"));
388 EXPECT_FALSE(HasResult("en-US", /*locales=*/"en-CH"));
389 EXPECT_TRUE(HasResult("en-CH", /*locales=*/"en-CH"));
390 EXPECT_TRUE(HasResult("default", /*locales=*/"en-CH"));
391}
392
393TEST_F(ParserLocaleTest, TraditionalChinese) {
394 EXPECT_TRUE(HasResult("zh-Hant-all", /*locales=*/"zh-Hant"));
395 EXPECT_TRUE(HasResult("zh-Hant-all", /*locales=*/"zh-Hant-TW"));
396 EXPECT_TRUE(HasResult("zh-Hant-all", /*locales=*/"zh-Hant-SG"));
397 EXPECT_FALSE(HasResult("zh-Hant-all", /*locales=*/"zh-SG"));
398 EXPECT_FALSE(HasResult("zh-Hant-all", /*locales=*/"zh"));
399 EXPECT_TRUE(HasResult("default", /*locales=*/"zh"));
400 EXPECT_TRUE(HasResult("default", /*locales=*/"zh-Hant-SG"));
401}
402
403TEST_F(ParserLocaleTest, SwissEnglish) {
404 EXPECT_TRUE(HasResult("all-CH", /*locales=*/"de-CH"));
405 EXPECT_TRUE(HasResult("all-CH", /*locales=*/"en-CH"));
406 EXPECT_TRUE(HasResult("en-all", /*locales=*/"en-CH"));
407 EXPECT_FALSE(HasResult("all-CH", /*locales=*/"de-DE"));
408 EXPECT_TRUE(HasResult("default", /*locales=*/"de-CH"));
409 EXPECT_TRUE(HasResult("default", /*locales=*/"en-CH"));
410}
Lukas Zilkab23e2122018-02-09 10:25:19 +0100411
412} // namespace
Tony Mak6c4cc672018-09-17 11:48:50 +0100413} // namespace libtextclassifier3