Lukas Zilka | b23e212 | 2018-02-09 10:25:19 +0100 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2017 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include <time.h> |
| 18 | #include <fstream> |
| 19 | #include <iostream> |
| 20 | #include <memory> |
| 21 | #include <string> |
| 22 | |
| 23 | #include "gmock/gmock.h" |
| 24 | #include "gtest/gtest.h" |
| 25 | |
| 26 | #include "datetime/parser.h" |
| 27 | #include "model_generated.h" |
| 28 | #include "types-test-util.h" |
| 29 | |
| 30 | using testing::ElementsAreArray; |
| 31 | |
| 32 | namespace libtextclassifier2 { |
| 33 | namespace { |
| 34 | |
| 35 | std::string GetModelPath() { |
| 36 | return LIBTEXTCLASSIFIER_TEST_DATA_DIR; |
| 37 | } |
| 38 | |
| 39 | std::string ReadFile(const std::string& file_name) { |
| 40 | std::ifstream file_stream(file_name); |
| 41 | return std::string(std::istreambuf_iterator<char>(file_stream), {}); |
| 42 | } |
| 43 | |
| 44 | std::string FormatMillis(int64 time_ms_utc) { |
| 45 | long time_seconds = time_ms_utc / 1000; // NOLINT |
| 46 | // Format time, "ddd yyyy-mm-dd hh:mm:ss zzz" |
| 47 | char buffer[512]; |
| 48 | strftime(buffer, sizeof(buffer), "%a %Y-%m-%d %H:%M:%S %Z", |
| 49 | localtime(&time_seconds)); |
| 50 | return std::string(buffer); |
| 51 | } |
| 52 | |
| 53 | class ParserTest : public testing::Test { |
| 54 | public: |
| 55 | void SetUp() override { |
| 56 | model_buffer_ = ReadFile(GetModelPath() + "test_model.fb"); |
| 57 | const Model* model = GetModel(model_buffer_.data()); |
| 58 | ASSERT_TRUE(model != nullptr); |
| 59 | ASSERT_TRUE(model->datetime_model() != nullptr); |
| 60 | parser_ = DatetimeParser::Instance(model->datetime_model(), unilib_); |
| 61 | } |
| 62 | |
| 63 | bool ParsesCorrectly(const std::string& marked_text, |
| 64 | const int64 expected_ms_utc, |
| 65 | DatetimeGranularity expected_granularity, |
| 66 | const std::string& timezone = "Europe/Zurich") { |
| 67 | auto expected_start_index = marked_text.find("{"); |
| 68 | EXPECT_TRUE(expected_start_index != std::string::npos); |
| 69 | auto expected_end_index = marked_text.find("}"); |
| 70 | EXPECT_TRUE(expected_end_index != std::string::npos); |
| 71 | |
| 72 | std::string text; |
| 73 | text += std::string(marked_text.begin(), |
| 74 | marked_text.begin() + expected_start_index); |
| 75 | text += std::string(marked_text.begin() + expected_start_index + 1, |
| 76 | marked_text.begin() + expected_end_index); |
| 77 | text += std::string(marked_text.begin() + expected_end_index + 1, |
| 78 | marked_text.end()); |
| 79 | |
| 80 | std::vector<DatetimeParseResultSpan> results; |
| 81 | |
Lukas Zilka | ba849e7 | 2018-03-08 14:48:21 +0100 | [diff] [blame^] | 82 | if (!parser_->Parse(text, 0, timezone, /*locales=*/"", ModeFlag_ANNOTATION, |
| 83 | &results)) { |
Lukas Zilka | b23e212 | 2018-02-09 10:25:19 +0100 | [diff] [blame] | 84 | TC_LOG(ERROR) << text; |
| 85 | TC_CHECK(false); |
| 86 | } |
| 87 | EXPECT_TRUE(!results.empty()); |
| 88 | |
| 89 | std::vector<DatetimeParseResultSpan> filtered_results; |
| 90 | for (const DatetimeParseResultSpan& result : results) { |
| 91 | if (SpansOverlap(result.span, |
| 92 | {expected_start_index, expected_end_index})) { |
| 93 | filtered_results.push_back(result); |
| 94 | } |
| 95 | } |
| 96 | |
| 97 | const std::vector<DatetimeParseResultSpan> expected{ |
| 98 | {{expected_start_index, expected_end_index - 1}, |
| 99 | {expected_ms_utc, expected_granularity}, |
| 100 | /*target_classification_score=*/1.0, |
| 101 | /*priority_score=*/0.0}}; |
| 102 | const bool matches = |
| 103 | testing::Matches(ElementsAreArray(expected))(filtered_results); |
| 104 | if (!matches) { |
| 105 | TC_LOG(ERROR) << "Expected: " << expected[0] << " which corresponds to: " |
| 106 | << FormatMillis(expected[0].data.time_ms_utc); |
| 107 | for (int i = 0; i < filtered_results.size(); ++i) { |
| 108 | TC_LOG(ERROR) << "Actual[" << i << "]: " << filtered_results[i] |
| 109 | << " which corresponds to: " |
| 110 | << FormatMillis(filtered_results[i].data.time_ms_utc); |
| 111 | } |
| 112 | } |
| 113 | return matches; |
| 114 | } |
| 115 | |
| 116 | protected: |
| 117 | std::string model_buffer_; |
| 118 | std::unique_ptr<DatetimeParser> parser_; |
| 119 | UniLib unilib_; |
| 120 | }; |
| 121 | |
| 122 | // Test with just a few cases to make debugging of general failures easier. |
| 123 | TEST_F(ParserTest, ParseShort) { |
| 124 | EXPECT_TRUE( |
| 125 | ParsesCorrectly("{January 1, 1988}", 567990000000, GRANULARITY_DAY)); |
Lukas Zilka | ba849e7 | 2018-03-08 14:48:21 +0100 | [diff] [blame^] | 126 | EXPECT_TRUE(ParsesCorrectly("{three days ago}", -262800000, GRANULARITY_DAY)); |
Lukas Zilka | b23e212 | 2018-02-09 10:25:19 +0100 | [diff] [blame] | 127 | } |
| 128 | |
| 129 | TEST_F(ParserTest, Parse) { |
| 130 | EXPECT_TRUE( |
| 131 | ParsesCorrectly("{January 1, 1988}", 567990000000, GRANULARITY_DAY)); |
| 132 | EXPECT_TRUE(ParsesCorrectly("{1 2 2018}", 1517439600000, GRANULARITY_DAY)); |
| 133 | EXPECT_TRUE( |
| 134 | ParsesCorrectly("{january 31 2018}", 1517353200000, GRANULARITY_DAY)); |
| 135 | EXPECT_TRUE(ParsesCorrectly("lorem {1 january 2018} ipsum", 1514761200000, |
| 136 | GRANULARITY_DAY)); |
| 137 | EXPECT_TRUE(ParsesCorrectly("{19/apr/2010:06:36:15}", 1271651775000, |
| 138 | GRANULARITY_SECOND)); |
| 139 | EXPECT_TRUE(ParsesCorrectly("{09/Mar/2004 22:02:40}", 1078866160000, |
| 140 | GRANULARITY_SECOND)); |
| 141 | EXPECT_TRUE(ParsesCorrectly("{Dec 2, 2010 2:39:58 AM}", 1291253998000, |
| 142 | GRANULARITY_SECOND)); |
| 143 | EXPECT_TRUE(ParsesCorrectly("{Jun 09 2011 15:28:14}", 1307626094000, |
| 144 | GRANULARITY_SECOND)); |
| 145 | EXPECT_TRUE(ParsesCorrectly("{Apr 20 00:00:35 2010}", 1271714435000, |
| 146 | GRANULARITY_SECOND)); |
| 147 | EXPECT_TRUE( |
| 148 | ParsesCorrectly("{Mar 16 08:12:04}", 6419524000, GRANULARITY_SECOND)); |
| 149 | EXPECT_TRUE(ParsesCorrectly("{2012-10-14T22:11:20}", 1350245480000, |
| 150 | GRANULARITY_SECOND)); |
Lukas Zilka | ba849e7 | 2018-03-08 14:48:21 +0100 | [diff] [blame^] | 151 | EXPECT_TRUE(ParsesCorrectly("{2014-07-01T14:59:55}.711Z", 1404219595000, |
Lukas Zilka | b23e212 | 2018-02-09 10:25:19 +0100 | [diff] [blame] | 152 | GRANULARITY_SECOND)); |
Lukas Zilka | ba849e7 | 2018-03-08 14:48:21 +0100 | [diff] [blame^] | 153 | EXPECT_TRUE(ParsesCorrectly("{2010-06-26 02:31:29},573", 1277512289000, |
Lukas Zilka | b23e212 | 2018-02-09 10:25:19 +0100 | [diff] [blame] | 154 | GRANULARITY_SECOND)); |
| 155 | EXPECT_TRUE(ParsesCorrectly("{2006/01/22 04:11:05}", 1137899465000, |
| 156 | GRANULARITY_SECOND)); |
| 157 | EXPECT_TRUE( |
| 158 | ParsesCorrectly("{150423 11:42:35}", 1429782155000, GRANULARITY_SECOND)); |
| 159 | EXPECT_TRUE(ParsesCorrectly("{11:42:35}", 38555000, GRANULARITY_SECOND)); |
Lukas Zilka | ba849e7 | 2018-03-08 14:48:21 +0100 | [diff] [blame^] | 160 | EXPECT_TRUE(ParsesCorrectly("{11:42:35}.173", 38555000, GRANULARITY_SECOND)); |
Lukas Zilka | b23e212 | 2018-02-09 10:25:19 +0100 | [diff] [blame] | 161 | EXPECT_TRUE( |
Lukas Zilka | ba849e7 | 2018-03-08 14:48:21 +0100 | [diff] [blame^] | 162 | ParsesCorrectly("{23/Apr 11:42:35},173", 9715355000, GRANULARITY_SECOND)); |
Lukas Zilka | b23e212 | 2018-02-09 10:25:19 +0100 | [diff] [blame] | 163 | EXPECT_TRUE(ParsesCorrectly("{23/Apr/2015:11:42:35}", 1429782155000, |
| 164 | GRANULARITY_SECOND)); |
| 165 | EXPECT_TRUE(ParsesCorrectly("{23/Apr/2015 11:42:35}", 1429782155000, |
| 166 | GRANULARITY_SECOND)); |
| 167 | EXPECT_TRUE(ParsesCorrectly("{23-Apr-2015 11:42:35}", 1429782155000, |
| 168 | GRANULARITY_SECOND)); |
Lukas Zilka | ba849e7 | 2018-03-08 14:48:21 +0100 | [diff] [blame^] | 169 | EXPECT_TRUE(ParsesCorrectly("{23-Apr-2015 11:42:35}.883", 1429782155000, |
Lukas Zilka | b23e212 | 2018-02-09 10:25:19 +0100 | [diff] [blame] | 170 | GRANULARITY_SECOND)); |
| 171 | EXPECT_TRUE(ParsesCorrectly("{23 Apr 2015 11:42:35}", 1429782155000, |
| 172 | GRANULARITY_SECOND)); |
Lukas Zilka | ba849e7 | 2018-03-08 14:48:21 +0100 | [diff] [blame^] | 173 | EXPECT_TRUE(ParsesCorrectly("{23 Apr 2015 11:42:35}.883", 1429782155000, |
Lukas Zilka | b23e212 | 2018-02-09 10:25:19 +0100 | [diff] [blame] | 174 | GRANULARITY_SECOND)); |
| 175 | EXPECT_TRUE(ParsesCorrectly("{04/23/15 11:42:35}", 1429782155000, |
| 176 | GRANULARITY_SECOND)); |
| 177 | EXPECT_TRUE(ParsesCorrectly("{04/23/2015 11:42:35}", 1429782155000, |
| 178 | GRANULARITY_SECOND)); |
Lukas Zilka | ba849e7 | 2018-03-08 14:48:21 +0100 | [diff] [blame^] | 179 | EXPECT_TRUE(ParsesCorrectly("{04/23/2015 11:42:35}.883", 1429782155000, |
Lukas Zilka | b23e212 | 2018-02-09 10:25:19 +0100 | [diff] [blame] | 180 | GRANULARITY_SECOND)); |
Lukas Zilka | ba849e7 | 2018-03-08 14:48:21 +0100 | [diff] [blame^] | 181 | EXPECT_TRUE(ParsesCorrectly("{8/5/2011 3:31:18 AM}:234}", 1312507878000, |
Lukas Zilka | b23e212 | 2018-02-09 10:25:19 +0100 | [diff] [blame] | 182 | GRANULARITY_SECOND)); |
| 183 | EXPECT_TRUE(ParsesCorrectly("{9/28/2011 2:23:15 PM}", 1317212595000, |
| 184 | GRANULARITY_SECOND)); |
| 185 | EXPECT_TRUE(ParsesCorrectly("{19/apr/2010:06:36:15}", 1271651775000, |
| 186 | GRANULARITY_SECOND)); |
| 187 | EXPECT_TRUE(ParsesCorrectly( |
| 188 | "Are sentiments apartments decisively the especially alteration. " |
| 189 | "Thrown shy denote ten ladies though ask saw. Or by to he going " |
| 190 | "think order event music. Incommode so intention defective at " |
| 191 | "convinced. Led income months itself and houses you. After nor " |
| 192 | "you leave might share court balls. {19/apr/2010:06:36:15} Are " |
| 193 | "sentiments apartments decisively the especially alteration. " |
| 194 | "Thrown shy denote ten ladies though ask saw. Or by to he going " |
| 195 | "think order event music. Incommode so intention defective at " |
| 196 | "convinced. Led income months itself and houses you. After nor " |
| 197 | "you leave might share court balls. ", |
| 198 | 1271651775000, GRANULARITY_SECOND)); |
| 199 | EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4:30}", 1514777400000, |
| 200 | GRANULARITY_MINUTE)); |
| 201 | EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4}", 1514775600000, |
| 202 | GRANULARITY_HOUR)); |
| 203 | EXPECT_TRUE(ParsesCorrectly("{january 1 2018 at 4pm}", 1514818800000, |
| 204 | GRANULARITY_HOUR)); |
| 205 | |
Lukas Zilka | ba849e7 | 2018-03-08 14:48:21 +0100 | [diff] [blame^] | 206 | EXPECT_TRUE(ParsesCorrectly("{today}", -3600000, GRANULARITY_DAY)); |
| 207 | EXPECT_TRUE(ParsesCorrectly("{today}", -57600000, GRANULARITY_DAY, |
Lukas Zilka | b23e212 | 2018-02-09 10:25:19 +0100 | [diff] [blame] | 208 | "America/Los_Angeles")); |
Lukas Zilka | ba849e7 | 2018-03-08 14:48:21 +0100 | [diff] [blame^] | 209 | EXPECT_TRUE(ParsesCorrectly("{next week}", 255600000, GRANULARITY_WEEK)); |
| 210 | EXPECT_TRUE(ParsesCorrectly("{next day}", 82800000, GRANULARITY_DAY)); |
| 211 | EXPECT_TRUE(ParsesCorrectly("{in three days}", 255600000, GRANULARITY_DAY)); |
Lukas Zilka | b23e212 | 2018-02-09 10:25:19 +0100 | [diff] [blame] | 212 | EXPECT_TRUE( |
Lukas Zilka | ba849e7 | 2018-03-08 14:48:21 +0100 | [diff] [blame^] | 213 | ParsesCorrectly("{in three weeks}", 1465200000, GRANULARITY_WEEK)); |
| 214 | EXPECT_TRUE(ParsesCorrectly("{tomorrow}", 82800000, GRANULARITY_DAY)); |
Lukas Zilka | b23e212 | 2018-02-09 10:25:19 +0100 | [diff] [blame] | 215 | EXPECT_TRUE( |
| 216 | ParsesCorrectly("{tomorrow at 4:00}", 97200000, GRANULARITY_MINUTE)); |
| 217 | EXPECT_TRUE(ParsesCorrectly("{tomorrow at 4}", 97200000, GRANULARITY_HOUR)); |
Lukas Zilka | ba849e7 | 2018-03-08 14:48:21 +0100 | [diff] [blame^] | 218 | EXPECT_TRUE(ParsesCorrectly("{next wednesday}", 514800000, GRANULARITY_DAY)); |
Lukas Zilka | b23e212 | 2018-02-09 10:25:19 +0100 | [diff] [blame] | 219 | EXPECT_TRUE( |
| 220 | ParsesCorrectly("{next wednesday at 4}", 529200000, GRANULARITY_HOUR)); |
| 221 | EXPECT_TRUE(ParsesCorrectly("last seen {today at 9:01 PM}", 72060000, |
| 222 | GRANULARITY_MINUTE)); |
Lukas Zilka | ba849e7 | 2018-03-08 14:48:21 +0100 | [diff] [blame^] | 223 | EXPECT_TRUE(ParsesCorrectly("{Three days ago}", -262800000, GRANULARITY_DAY)); |
| 224 | EXPECT_TRUE(ParsesCorrectly("{three days ago}", -262800000, GRANULARITY_DAY)); |
Lukas Zilka | b23e212 | 2018-02-09 10:25:19 +0100 | [diff] [blame] | 225 | } |
| 226 | |
| 227 | // TODO(zilka): Add a test that tests multiple locales. |
| 228 | |
| 229 | } // namespace |
| 230 | } // namespace libtextclassifier2 |