blob: 1df959fea347631418a396f6c5a9eac46a8fa60d [file] [log] [blame]
Lukas Zilkab23e2122018-02-09 10:25:19 +01001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include <time.h>
18#include <fstream>
19#include <iostream>
20#include <memory>
21#include <string>
22
23#include "gmock/gmock.h"
24#include "gtest/gtest.h"
25
26#include "datetime/parser.h"
27#include "model_generated.h"
28#include "types-test-util.h"
29
30using testing::ElementsAreArray;
31
32namespace libtextclassifier2 {
33namespace {
34
35std::string GetModelPath() {
36 return LIBTEXTCLASSIFIER_TEST_DATA_DIR;
37}
38
39std::string ReadFile(const std::string& file_name) {
40 std::ifstream file_stream(file_name);
41 return std::string(std::istreambuf_iterator<char>(file_stream), {});
42}
43
44std::string FormatMillis(int64 time_ms_utc) {
45 long time_seconds = time_ms_utc / 1000; // NOLINT
46 // Format time, "ddd yyyy-mm-dd hh:mm:ss zzz"
47 char buffer[512];
48 strftime(buffer, sizeof(buffer), "%a %Y-%m-%d %H:%M:%S %Z",
49 localtime(&time_seconds));
50 return std::string(buffer);
51}
52
53class ParserTest : public testing::Test {
54 public:
55 void SetUp() override {
56 model_buffer_ = ReadFile(GetModelPath() + "test_model.fb");
57 const Model* model = GetModel(model_buffer_.data());
58 ASSERT_TRUE(model != nullptr);
59 ASSERT_TRUE(model->datetime_model() != nullptr);
60 parser_ = DatetimeParser::Instance(model->datetime_model(), unilib_);
61 }
62
63 bool ParsesCorrectly(const std::string& marked_text,
64 const int64 expected_ms_utc,
65 DatetimeGranularity expected_granularity,
66 const std::string& timezone = "Europe/Zurich") {
67 auto expected_start_index = marked_text.find("{");
68 EXPECT_TRUE(expected_start_index != std::string::npos);
69 auto expected_end_index = marked_text.find("}");
70 EXPECT_TRUE(expected_end_index != std::string::npos);
71
72 std::string text;
73 text += std::string(marked_text.begin(),
74 marked_text.begin() + expected_start_index);
75 text += std::string(marked_text.begin() + expected_start_index + 1,
76 marked_text.begin() + expected_end_index);
77 text += std::string(marked_text.begin() + expected_end_index + 1,
78 marked_text.end());
79
80 std::vector<DatetimeParseResultSpan> results;
81
Lukas Zilkaba849e72018-03-08 14:48:21 +010082 if (!parser_->Parse(text, 0, timezone, /*locales=*/"", ModeFlag_ANNOTATION,
83 &results)) {
Lukas Zilkab23e2122018-02-09 10:25:19 +010084 TC_LOG(ERROR) << text;
85 TC_CHECK(false);
86 }
87 EXPECT_TRUE(!results.empty());
88
89 std::vector<DatetimeParseResultSpan> filtered_results;
90 for (const DatetimeParseResultSpan& result : results) {
91 if (SpansOverlap(result.span,
92 {expected_start_index, expected_end_index})) {
93 filtered_results.push_back(result);
94 }
95 }
96
97 const std::vector<DatetimeParseResultSpan> expected{
98 {{expected_start_index, expected_end_index - 1},
99 {expected_ms_utc, expected_granularity},
100 /*target_classification_score=*/1.0,
101 /*priority_score=*/0.0}};
102 const bool matches =
103 testing::Matches(ElementsAreArray(expected))(filtered_results);
104 if (!matches) {
105 TC_LOG(ERROR) << "Expected: " << expected[0] << " which corresponds to: "
106 << FormatMillis(expected[0].data.time_ms_utc);
107 for (int i = 0; i < filtered_results.size(); ++i) {
108 TC_LOG(ERROR) << "Actual[" << i << "]: " << filtered_results[i]
109 << " which corresponds to: "
110 << FormatMillis(filtered_results[i].data.time_ms_utc);
111 }
112 }
113 return matches;
114 }
115
116 protected:
117 std::string model_buffer_;
118 std::unique_ptr<DatetimeParser> parser_;
119 UniLib unilib_;
120};
121
122// Test with just a few cases to make debugging of general failures easier.
123TEST_F(ParserTest, ParseShort) {
124 EXPECT_TRUE(
125 ParsesCorrectly("{January 1, 1988}", 567990000000, GRANULARITY_DAY));
Lukas Zilkaba849e72018-03-08 14:48:21 +0100126 EXPECT_TRUE(ParsesCorrectly("{three days ago}", -262800000, GRANULARITY_DAY));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100127}
128
129TEST_F(ParserTest, Parse) {
130 EXPECT_TRUE(
131 ParsesCorrectly("{January 1, 1988}", 567990000000, GRANULARITY_DAY));
132 EXPECT_TRUE(ParsesCorrectly("{1 2 2018}", 1517439600000, GRANULARITY_DAY));
133 EXPECT_TRUE(
134 ParsesCorrectly("{january 31 2018}", 1517353200000, GRANULARITY_DAY));
135 EXPECT_TRUE(ParsesCorrectly("lorem {1 january 2018} ipsum", 1514761200000,
136 GRANULARITY_DAY));
137 EXPECT_TRUE(ParsesCorrectly("{19/apr/2010:06:36:15}", 1271651775000,
138 GRANULARITY_SECOND));
139 EXPECT_TRUE(ParsesCorrectly("{09/Mar/2004 22:02:40}", 1078866160000,
140 GRANULARITY_SECOND));
141 EXPECT_TRUE(ParsesCorrectly("{Dec 2, 2010 2:39:58 AM}", 1291253998000,
142 GRANULARITY_SECOND));
143 EXPECT_TRUE(ParsesCorrectly("{Jun 09 2011 15:28:14}", 1307626094000,
144 GRANULARITY_SECOND));
145 EXPECT_TRUE(ParsesCorrectly("{Apr 20 00:00:35 2010}", 1271714435000,
146 GRANULARITY_SECOND));
147 EXPECT_TRUE(
148 ParsesCorrectly("{Mar 16 08:12:04}", 6419524000, GRANULARITY_SECOND));
149 EXPECT_TRUE(ParsesCorrectly("{2012-10-14T22:11:20}", 1350245480000,
150 GRANULARITY_SECOND));
Lukas Zilkaba849e72018-03-08 14:48:21 +0100151 EXPECT_TRUE(ParsesCorrectly("{2014-07-01T14:59:55}.711Z", 1404219595000,
Lukas Zilkab23e2122018-02-09 10:25:19 +0100152 GRANULARITY_SECOND));
Lukas Zilkaba849e72018-03-08 14:48:21 +0100153 EXPECT_TRUE(ParsesCorrectly("{2010-06-26 02:31:29},573", 1277512289000,
Lukas Zilkab23e2122018-02-09 10:25:19 +0100154 GRANULARITY_SECOND));
155 EXPECT_TRUE(ParsesCorrectly("{2006/01/22 04:11:05}", 1137899465000,
156 GRANULARITY_SECOND));
157 EXPECT_TRUE(
158 ParsesCorrectly("{150423 11:42:35}", 1429782155000, GRANULARITY_SECOND));
159 EXPECT_TRUE(ParsesCorrectly("{11:42:35}", 38555000, GRANULARITY_SECOND));
Lukas Zilkaba849e72018-03-08 14:48:21 +0100160 EXPECT_TRUE(ParsesCorrectly("{11:42:35}.173", 38555000, GRANULARITY_SECOND));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100161 EXPECT_TRUE(
Lukas Zilkaba849e72018-03-08 14:48:21 +0100162 ParsesCorrectly("{23/Apr 11:42:35},173", 9715355000, GRANULARITY_SECOND));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100163 EXPECT_TRUE(ParsesCorrectly("{23/Apr/2015:11:42:35}", 1429782155000,
164 GRANULARITY_SECOND));
165 EXPECT_TRUE(ParsesCorrectly("{23/Apr/2015 11:42:35}", 1429782155000,
166 GRANULARITY_SECOND));
167 EXPECT_TRUE(ParsesCorrectly("{23-Apr-2015 11:42:35}", 1429782155000,
168 GRANULARITY_SECOND));
Lukas Zilkaba849e72018-03-08 14:48:21 +0100169 EXPECT_TRUE(ParsesCorrectly("{23-Apr-2015 11:42:35}.883", 1429782155000,
Lukas Zilkab23e2122018-02-09 10:25:19 +0100170 GRANULARITY_SECOND));
171 EXPECT_TRUE(ParsesCorrectly("{23 Apr 2015 11:42:35}", 1429782155000,
172 GRANULARITY_SECOND));
Lukas Zilkaba849e72018-03-08 14:48:21 +0100173 EXPECT_TRUE(ParsesCorrectly("{23 Apr 2015 11:42:35}.883", 1429782155000,
Lukas Zilkab23e2122018-02-09 10:25:19 +0100174 GRANULARITY_SECOND));
175 EXPECT_TRUE(ParsesCorrectly("{04/23/15 11:42:35}", 1429782155000,
176 GRANULARITY_SECOND));
177 EXPECT_TRUE(ParsesCorrectly("{04/23/2015 11:42:35}", 1429782155000,
178 GRANULARITY_SECOND));
Lukas Zilkaba849e72018-03-08 14:48:21 +0100179 EXPECT_TRUE(ParsesCorrectly("{04/23/2015 11:42:35}.883", 1429782155000,
Lukas Zilkab23e2122018-02-09 10:25:19 +0100180 GRANULARITY_SECOND));
Lukas Zilkaba849e72018-03-08 14:48:21 +0100181 EXPECT_TRUE(ParsesCorrectly("{8/5/2011 3:31:18 AM}:234}", 1312507878000,
Lukas Zilkab23e2122018-02-09 10:25:19 +0100182 GRANULARITY_SECOND));
183 EXPECT_TRUE(ParsesCorrectly("{9/28/2011 2:23:15 PM}", 1317212595000,
184 GRANULARITY_SECOND));
185 EXPECT_TRUE(ParsesCorrectly("{19/apr/2010:06:36:15}", 1271651775000,
186 GRANULARITY_SECOND));
187 EXPECT_TRUE(ParsesCorrectly(
188 "Are sentiments apartments decisively the especially alteration. "
189 "Thrown shy denote ten ladies though ask saw. Or by to he going "
190 "think order event music. Incommode so intention defective at "
191 "convinced. Led income months itself and houses you. After nor "
192 "you leave might share court balls. {19/apr/2010:06:36:15} Are "
193 "sentiments apartments decisively the especially alteration. "
194 "Thrown shy denote ten ladies though ask saw. Or by to he going "
195 "think order event music. Incommode so intention defective at "
196 "convinced. Led income months itself and houses you. After nor "
197 "you leave might share court balls. ",
198 1271651775000, GRANULARITY_SECOND));
199 EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4:30}", 1514777400000,
200 GRANULARITY_MINUTE));
201 EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4}", 1514775600000,
202 GRANULARITY_HOUR));
203 EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4pm}", 1514818800000,
204 GRANULARITY_HOUR));
205
Lukas Zilkaba849e72018-03-08 14:48:21 +0100206 EXPECT_TRUE(ParsesCorrectly("{today}", -3600000, GRANULARITY_DAY));
207 EXPECT_TRUE(ParsesCorrectly("{today}", -57600000, GRANULARITY_DAY,
Lukas Zilkab23e2122018-02-09 10:25:19 +0100208 "America/Los_Angeles"));
Lukas Zilkaba849e72018-03-08 14:48:21 +0100209 EXPECT_TRUE(ParsesCorrectly("{next week}", 255600000, GRANULARITY_WEEK));
210 EXPECT_TRUE(ParsesCorrectly("{next day}", 82800000, GRANULARITY_DAY));
211 EXPECT_TRUE(ParsesCorrectly("{in three days}", 255600000, GRANULARITY_DAY));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100212 EXPECT_TRUE(
Lukas Zilkaba849e72018-03-08 14:48:21 +0100213 ParsesCorrectly("{in three weeks}", 1465200000, GRANULARITY_WEEK));
214 EXPECT_TRUE(ParsesCorrectly("{tomorrow}", 82800000, GRANULARITY_DAY));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100215 EXPECT_TRUE(
216 ParsesCorrectly("{tomorrow at 4:00}", 97200000, GRANULARITY_MINUTE));
217 EXPECT_TRUE(ParsesCorrectly("{tomorrow at 4}", 97200000, GRANULARITY_HOUR));
Lukas Zilkaba849e72018-03-08 14:48:21 +0100218 EXPECT_TRUE(ParsesCorrectly("{next wednesday}", 514800000, GRANULARITY_DAY));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100219 EXPECT_TRUE(
220 ParsesCorrectly("{next wednesday at 4}", 529200000, GRANULARITY_HOUR));
221 EXPECT_TRUE(ParsesCorrectly("last seen {today at 9:01 PM}", 72060000,
222 GRANULARITY_MINUTE));
Lukas Zilkaba849e72018-03-08 14:48:21 +0100223 EXPECT_TRUE(ParsesCorrectly("{Three days ago}", -262800000, GRANULARITY_DAY));
224 EXPECT_TRUE(ParsesCorrectly("{three days ago}", -262800000, GRANULARITY_DAY));
Lukas Zilkab23e2122018-02-09 10:25:19 +0100225}
226
227// TODO(zilka): Add a test that tests multiple locales.
228
229} // namespace
230} // namespace libtextclassifier2