blob: 82904e5abcf5c6abb151e6409a9e7e3312025d30 [file] [log] [blame]
Lukas Zilka21d8c982018-01-24 11:11:20 +01001/*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "text-classifier.h"
18
19#include <fstream>
20#include <iostream>
21#include <memory>
22#include <string>
23
24#include "gmock/gmock.h"
25#include "gtest/gtest.h"
26
27namespace libtextclassifier2 {
28namespace {
29
30using testing::ElementsAreArray;
31using testing::Pair;
32
33std::string FirstResult(
34 const std::vector<std::pair<std::string, float>>& results) {
35 if (results.empty()) {
36 return "<INVALID RESULTS>";
37 }
38 return results[0].first;
39}
40
41MATCHER_P3(IsAnnotatedSpan, start, end, best_class, "") {
42 return testing::Value(arg.span, Pair(start, end)) &&
43 testing::Value(FirstResult(arg.classification), best_class);
44}
45
46std::string ReadFile(const std::string& file_name) {
47 std::ifstream file_stream(file_name);
48 return std::string(std::istreambuf_iterator<char>(file_stream), {});
49}
50
51std::string GetModelPath() {
52 return LIBTEXTCLASSIFIER_TEST_DATA_DIR;
53}
54
55TEST(TextClassifierTest, EmbeddingExecutorLoadingFails) {
56 std::unique_ptr<TextClassifier> classifier =
57 TextClassifier::FromPath(GetModelPath() + "wrong_embeddings.fb");
58 EXPECT_FALSE(classifier);
59}
60
61TEST(TextClassifierTest, ClassifyText) {
62 std::unique_ptr<TextClassifier> classifier =
63 TextClassifier::FromPath(GetModelPath() + "test_model.fb");
64 ASSERT_TRUE(classifier);
65
66 EXPECT_EQ("other",
67 FirstResult(classifier->ClassifyText(
68 "this afternoon Barack Obama gave a speech at", {15, 27})));
69 EXPECT_EQ("other",
70 FirstResult(classifier->ClassifyText("you@android.com", {0, 15})));
71 EXPECT_EQ("other", FirstResult(classifier->ClassifyText(
72 "Contact me at you@android.com", {14, 29})));
73 EXPECT_EQ("phone", FirstResult(classifier->ClassifyText(
74 "Call me at (800) 123-456 today", {11, 24})));
75 EXPECT_EQ("other", FirstResult(classifier->ClassifyText(
76 "Visit www.google.com every today!", {6, 20})));
77
78 // More lines.
79 EXPECT_EQ("other",
80 FirstResult(classifier->ClassifyText(
81 "this afternoon Barack Obama gave a speech at|Visit "
82 "www.google.com every today!|Call me at (800) 123-456 today.",
83 {15, 27})));
84 EXPECT_EQ("other",
85 FirstResult(classifier->ClassifyText(
86 "this afternoon Barack Obama gave a speech at|Visit "
87 "www.google.com every today!|Call me at (800) 123-456 today.",
88 {51, 65})));
89 EXPECT_EQ("phone",
90 FirstResult(classifier->ClassifyText(
91 "this afternoon Barack Obama gave a speech at|Visit "
92 "www.google.com every today!|Call me at (800) 123-456 today.",
93 {90, 103})));
94
95 // Single word.
96 EXPECT_EQ("other", FirstResult(classifier->ClassifyText("obama", {0, 5})));
97 EXPECT_EQ("other", FirstResult(classifier->ClassifyText("asdf", {0, 4})));
98 EXPECT_EQ("<INVALID RESULTS>",
99 FirstResult(classifier->ClassifyText("asdf", {0, 0})));
100
101 // Junk.
102 EXPECT_EQ("<INVALID RESULTS>",
103 FirstResult(classifier->ClassifyText("", {0, 0})));
104 EXPECT_EQ("<INVALID RESULTS>", FirstResult(classifier->ClassifyText(
105 "a\n\n\n\nx x x\n\n\n\n\n\n", {1, 5})));
106}
107
108TEST(TextClassifierTest, PhoneFiltering) {
109 std::unique_ptr<TextClassifier> classifier =
110 TextClassifier::FromPath(GetModelPath() + "test_model.fb");
111 ASSERT_TRUE(classifier);
112
113 EXPECT_EQ("phone", FirstResult(classifier->ClassifyText(
114 "phone: (123) 456 789", {7, 20})));
115 EXPECT_EQ("phone", FirstResult(classifier->ClassifyText(
116 "phone: (123) 456 789,0001112", {7, 25})));
117 EXPECT_EQ("other", FirstResult(classifier->ClassifyText(
118 "phone: (123) 456 789,0001112", {7, 28})));
119}
120
121TEST(TextClassifierTest, SuggestSelection) {
122 std::unique_ptr<TextClassifier> classifier =
123 TextClassifier::FromPath(GetModelPath() + "test_model.fb");
124 ASSERT_TRUE(classifier);
125
126 EXPECT_EQ(classifier->SuggestSelection(
127 "this afternoon Barack Obama gave a speech at", {15, 21}),
128 std::make_pair(15, 21));
129
130 // Try passing whole string.
131 // If more than 1 token is specified, we should return back what entered.
132 EXPECT_EQ(
133 classifier->SuggestSelection("350 Third Street, Cambridge", {0, 27}),
134 std::make_pair(0, 27));
135
136 // Single letter.
137 EXPECT_EQ(classifier->SuggestSelection("a", {0, 1}), std::make_pair(0, 1));
138
139 // Single word.
140 EXPECT_EQ(classifier->SuggestSelection("asdf", {0, 4}), std::make_pair(0, 4));
141
142 EXPECT_EQ(
143 classifier->SuggestSelection("call me at 857 225 3556 today", {11, 14}),
144 std::make_pair(11, 23));
145
146 // Unpaired bracket stripping.
147 EXPECT_EQ(
148 classifier->SuggestSelection("call me at (857) 225 3556 today", {11, 16}),
149 std::make_pair(11, 25));
150 EXPECT_EQ(
151 classifier->SuggestSelection("call me at (857 225 3556 today", {11, 15}),
152 std::make_pair(12, 24));
153 EXPECT_EQ(
154 classifier->SuggestSelection("call me at 857 225 3556) today", {11, 14}),
155 std::make_pair(11, 23));
156 EXPECT_EQ(
157 classifier->SuggestSelection("call me at )857 225 3556( today", {11, 15}),
158 std::make_pair(12, 24));
159
160 // If the resulting selection would be empty, the original span is returned.
161 EXPECT_EQ(classifier->SuggestSelection("call me at )( today", {11, 13}),
162 std::make_pair(11, 13));
163 EXPECT_EQ(classifier->SuggestSelection("call me at ( today", {11, 12}),
164 std::make_pair(11, 12));
165 EXPECT_EQ(classifier->SuggestSelection("call me at ) today", {11, 12}),
166 std::make_pair(11, 12));
167}
168
169TEST(TextClassifierTest, SuggestSelectionsAreSymmetric) {
170 std::unique_ptr<TextClassifier> classifier =
171 TextClassifier::FromPath(GetModelPath() + "test_model.fb");
172 ASSERT_TRUE(classifier);
173
174 EXPECT_EQ(classifier->SuggestSelection("350 Third Street, Cambridge", {0, 3}),
175 std::make_pair(0, 27));
176 EXPECT_EQ(classifier->SuggestSelection("350 Third Street, Cambridge", {4, 9}),
177 std::make_pair(0, 27));
178 EXPECT_EQ(
179 classifier->SuggestSelection("350 Third Street, Cambridge", {10, 16}),
180 std::make_pair(0, 27));
181 EXPECT_EQ(classifier->SuggestSelection("a\nb\nc\n350 Third Street, Cambridge",
182 {16, 22}),
183 std::make_pair(6, 33));
184}
185
186TEST(TextClassifierTest, SuggestSelectionWithNewLine) {
187 std::unique_ptr<TextClassifier> classifier =
188 TextClassifier::FromPath(GetModelPath() + "test_model.fb");
189 ASSERT_TRUE(classifier);
190
191 EXPECT_EQ(classifier->SuggestSelection("abc\n857 225 3556", {4, 7}),
192 std::make_pair(4, 16));
193 EXPECT_EQ(classifier->SuggestSelection("857 225 3556\nabc", {0, 3}),
194 std::make_pair(0, 12));
195}
196
197TEST(TextClassifierTest, SuggestSelectionWithPunctuation) {
198 std::unique_ptr<TextClassifier> classifier =
199 TextClassifier::FromPath(GetModelPath() + "test_model.fb");
200 ASSERT_TRUE(classifier);
201
202 // From the right.
203 EXPECT_EQ(classifier->SuggestSelection(
204 "this afternoon BarackObama, gave a speech at", {15, 26}),
205 std::make_pair(15, 26));
206
207 // From the right multiple.
208 EXPECT_EQ(classifier->SuggestSelection(
209 "this afternoon BarackObama,.,.,, gave a speech at", {15, 26}),
210 std::make_pair(15, 26));
211
212 // From the left multiple.
213 EXPECT_EQ(classifier->SuggestSelection(
214 "this afternoon ,.,.,,BarackObama gave a speech at", {21, 32}),
215 std::make_pair(21, 32));
216
217 // From both sides.
218 EXPECT_EQ(classifier->SuggestSelection(
219 "this afternoon !BarackObama,- gave a speech at", {16, 27}),
220 std::make_pair(16, 27));
221}
222
223TEST(TextClassifierTest, SuggestSelectionNoCrashWithJunk) {
224 std::unique_ptr<TextClassifier> classifier =
225 TextClassifier::FromPath(GetModelPath() + "test_model.fb");
226 ASSERT_TRUE(classifier);
227
228 // Try passing in bunch of invalid selections.
229 EXPECT_EQ(classifier->SuggestSelection("", {0, 27}), std::make_pair(0, 27));
230 EXPECT_EQ(classifier->SuggestSelection("", {-10, 27}),
231 std::make_pair(-10, 27));
232 EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {0, 27}),
233 std::make_pair(0, 27));
234 EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {-30, 300}),
235 std::make_pair(-30, 300));
236 EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {-10, -1}),
237 std::make_pair(-10, -1));
238 EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {100, 17}),
239 std::make_pair(100, 17));
240}
241
242TEST(TextClassifierTest, Annotate) {
243 std::unique_ptr<TextClassifier> classifier =
244 TextClassifier::FromPath(GetModelPath() + "test_model.fb");
245 ASSERT_TRUE(classifier);
246
247 const std::string test_string =
248 "& saw Barak Obama today .. 350 Third Street, Cambridge\nand my phone "
249 "number is 853 225 3556.";
250 EXPECT_THAT(classifier->Annotate(test_string),
251 ElementsAreArray({
252 IsAnnotatedSpan(0, 0, "<INVALID RESULTS>"),
253 IsAnnotatedSpan(2, 5, "other"),
254 IsAnnotatedSpan(6, 11, "other"),
255 IsAnnotatedSpan(12, 17, "other"),
256 IsAnnotatedSpan(18, 23, "other"),
257 IsAnnotatedSpan(24, 24, "<INVALID RESULTS>"),
258 IsAnnotatedSpan(27, 54, "address"),
259 IsAnnotatedSpan(55, 58, "other"),
260 IsAnnotatedSpan(59, 61, "other"),
261 IsAnnotatedSpan(62, 67, "other"),
262 IsAnnotatedSpan(68, 74, "other"),
263 IsAnnotatedSpan(75, 77, "other"),
264 IsAnnotatedSpan(78, 90, "phone"),
265 }));
266}
267
268// TODO(jacekj): Test the regex functionality.
269
270} // namespace
271} // namespace libtextclassifier2