Lukas Zilka | 21d8c98 | 2018-01-24 11:11:20 +0100 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (C) 2017 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #include "text-classifier.h" |
| 18 | |
| 19 | #include <fstream> |
| 20 | #include <iostream> |
| 21 | #include <memory> |
| 22 | #include <string> |
| 23 | |
| 24 | #include "gmock/gmock.h" |
| 25 | #include "gtest/gtest.h" |
| 26 | |
| 27 | namespace libtextclassifier2 { |
| 28 | namespace { |
| 29 | |
| 30 | using testing::ElementsAreArray; |
| 31 | using testing::Pair; |
| 32 | |
| 33 | std::string FirstResult( |
| 34 | const std::vector<std::pair<std::string, float>>& results) { |
| 35 | if (results.empty()) { |
| 36 | return "<INVALID RESULTS>"; |
| 37 | } |
| 38 | return results[0].first; |
| 39 | } |
| 40 | |
| 41 | MATCHER_P3(IsAnnotatedSpan, start, end, best_class, "") { |
| 42 | return testing::Value(arg.span, Pair(start, end)) && |
| 43 | testing::Value(FirstResult(arg.classification), best_class); |
| 44 | } |
| 45 | |
| 46 | std::string ReadFile(const std::string& file_name) { |
| 47 | std::ifstream file_stream(file_name); |
| 48 | return std::string(std::istreambuf_iterator<char>(file_stream), {}); |
| 49 | } |
| 50 | |
| 51 | std::string GetModelPath() { |
| 52 | return LIBTEXTCLASSIFIER_TEST_DATA_DIR; |
| 53 | } |
| 54 | |
| 55 | TEST(TextClassifierTest, EmbeddingExecutorLoadingFails) { |
| 56 | std::unique_ptr<TextClassifier> classifier = |
| 57 | TextClassifier::FromPath(GetModelPath() + "wrong_embeddings.fb"); |
| 58 | EXPECT_FALSE(classifier); |
| 59 | } |
| 60 | |
| 61 | TEST(TextClassifierTest, ClassifyText) { |
| 62 | std::unique_ptr<TextClassifier> classifier = |
| 63 | TextClassifier::FromPath(GetModelPath() + "test_model.fb"); |
| 64 | ASSERT_TRUE(classifier); |
| 65 | |
| 66 | EXPECT_EQ("other", |
| 67 | FirstResult(classifier->ClassifyText( |
| 68 | "this afternoon Barack Obama gave a speech at", {15, 27}))); |
| 69 | EXPECT_EQ("other", |
| 70 | FirstResult(classifier->ClassifyText("you@android.com", {0, 15}))); |
| 71 | EXPECT_EQ("other", FirstResult(classifier->ClassifyText( |
| 72 | "Contact me at you@android.com", {14, 29}))); |
| 73 | EXPECT_EQ("phone", FirstResult(classifier->ClassifyText( |
| 74 | "Call me at (800) 123-456 today", {11, 24}))); |
| 75 | EXPECT_EQ("other", FirstResult(classifier->ClassifyText( |
| 76 | "Visit www.google.com every today!", {6, 20}))); |
| 77 | |
| 78 | // More lines. |
| 79 | EXPECT_EQ("other", |
| 80 | FirstResult(classifier->ClassifyText( |
| 81 | "this afternoon Barack Obama gave a speech at|Visit " |
| 82 | "www.google.com every today!|Call me at (800) 123-456 today.", |
| 83 | {15, 27}))); |
| 84 | EXPECT_EQ("other", |
| 85 | FirstResult(classifier->ClassifyText( |
| 86 | "this afternoon Barack Obama gave a speech at|Visit " |
| 87 | "www.google.com every today!|Call me at (800) 123-456 today.", |
| 88 | {51, 65}))); |
| 89 | EXPECT_EQ("phone", |
| 90 | FirstResult(classifier->ClassifyText( |
| 91 | "this afternoon Barack Obama gave a speech at|Visit " |
| 92 | "www.google.com every today!|Call me at (800) 123-456 today.", |
| 93 | {90, 103}))); |
| 94 | |
| 95 | // Single word. |
| 96 | EXPECT_EQ("other", FirstResult(classifier->ClassifyText("obama", {0, 5}))); |
| 97 | EXPECT_EQ("other", FirstResult(classifier->ClassifyText("asdf", {0, 4}))); |
| 98 | EXPECT_EQ("<INVALID RESULTS>", |
| 99 | FirstResult(classifier->ClassifyText("asdf", {0, 0}))); |
| 100 | |
| 101 | // Junk. |
| 102 | EXPECT_EQ("<INVALID RESULTS>", |
| 103 | FirstResult(classifier->ClassifyText("", {0, 0}))); |
| 104 | EXPECT_EQ("<INVALID RESULTS>", FirstResult(classifier->ClassifyText( |
| 105 | "a\n\n\n\nx x x\n\n\n\n\n\n", {1, 5}))); |
| 106 | } |
| 107 | |
| 108 | TEST(TextClassifierTest, PhoneFiltering) { |
| 109 | std::unique_ptr<TextClassifier> classifier = |
| 110 | TextClassifier::FromPath(GetModelPath() + "test_model.fb"); |
| 111 | ASSERT_TRUE(classifier); |
| 112 | |
| 113 | EXPECT_EQ("phone", FirstResult(classifier->ClassifyText( |
| 114 | "phone: (123) 456 789", {7, 20}))); |
| 115 | EXPECT_EQ("phone", FirstResult(classifier->ClassifyText( |
| 116 | "phone: (123) 456 789,0001112", {7, 25}))); |
| 117 | EXPECT_EQ("other", FirstResult(classifier->ClassifyText( |
| 118 | "phone: (123) 456 789,0001112", {7, 28}))); |
| 119 | } |
| 120 | |
| 121 | TEST(TextClassifierTest, SuggestSelection) { |
| 122 | std::unique_ptr<TextClassifier> classifier = |
| 123 | TextClassifier::FromPath(GetModelPath() + "test_model.fb"); |
| 124 | ASSERT_TRUE(classifier); |
| 125 | |
| 126 | EXPECT_EQ(classifier->SuggestSelection( |
| 127 | "this afternoon Barack Obama gave a speech at", {15, 21}), |
| 128 | std::make_pair(15, 21)); |
| 129 | |
| 130 | // Try passing whole string. |
| 131 | // If more than 1 token is specified, we should return back what entered. |
| 132 | EXPECT_EQ( |
| 133 | classifier->SuggestSelection("350 Third Street, Cambridge", {0, 27}), |
| 134 | std::make_pair(0, 27)); |
| 135 | |
| 136 | // Single letter. |
| 137 | EXPECT_EQ(classifier->SuggestSelection("a", {0, 1}), std::make_pair(0, 1)); |
| 138 | |
| 139 | // Single word. |
| 140 | EXPECT_EQ(classifier->SuggestSelection("asdf", {0, 4}), std::make_pair(0, 4)); |
| 141 | |
| 142 | EXPECT_EQ( |
| 143 | classifier->SuggestSelection("call me at 857 225 3556 today", {11, 14}), |
| 144 | std::make_pair(11, 23)); |
| 145 | |
| 146 | // Unpaired bracket stripping. |
| 147 | EXPECT_EQ( |
| 148 | classifier->SuggestSelection("call me at (857) 225 3556 today", {11, 16}), |
| 149 | std::make_pair(11, 25)); |
| 150 | EXPECT_EQ( |
| 151 | classifier->SuggestSelection("call me at (857 225 3556 today", {11, 15}), |
| 152 | std::make_pair(12, 24)); |
| 153 | EXPECT_EQ( |
| 154 | classifier->SuggestSelection("call me at 857 225 3556) today", {11, 14}), |
| 155 | std::make_pair(11, 23)); |
| 156 | EXPECT_EQ( |
| 157 | classifier->SuggestSelection("call me at )857 225 3556( today", {11, 15}), |
| 158 | std::make_pair(12, 24)); |
| 159 | |
| 160 | // If the resulting selection would be empty, the original span is returned. |
| 161 | EXPECT_EQ(classifier->SuggestSelection("call me at )( today", {11, 13}), |
| 162 | std::make_pair(11, 13)); |
| 163 | EXPECT_EQ(classifier->SuggestSelection("call me at ( today", {11, 12}), |
| 164 | std::make_pair(11, 12)); |
| 165 | EXPECT_EQ(classifier->SuggestSelection("call me at ) today", {11, 12}), |
| 166 | std::make_pair(11, 12)); |
| 167 | } |
| 168 | |
| 169 | TEST(TextClassifierTest, SuggestSelectionsAreSymmetric) { |
| 170 | std::unique_ptr<TextClassifier> classifier = |
| 171 | TextClassifier::FromPath(GetModelPath() + "test_model.fb"); |
| 172 | ASSERT_TRUE(classifier); |
| 173 | |
| 174 | EXPECT_EQ(classifier->SuggestSelection("350 Third Street, Cambridge", {0, 3}), |
| 175 | std::make_pair(0, 27)); |
| 176 | EXPECT_EQ(classifier->SuggestSelection("350 Third Street, Cambridge", {4, 9}), |
| 177 | std::make_pair(0, 27)); |
| 178 | EXPECT_EQ( |
| 179 | classifier->SuggestSelection("350 Third Street, Cambridge", {10, 16}), |
| 180 | std::make_pair(0, 27)); |
| 181 | EXPECT_EQ(classifier->SuggestSelection("a\nb\nc\n350 Third Street, Cambridge", |
| 182 | {16, 22}), |
| 183 | std::make_pair(6, 33)); |
| 184 | } |
| 185 | |
| 186 | TEST(TextClassifierTest, SuggestSelectionWithNewLine) { |
| 187 | std::unique_ptr<TextClassifier> classifier = |
| 188 | TextClassifier::FromPath(GetModelPath() + "test_model.fb"); |
| 189 | ASSERT_TRUE(classifier); |
| 190 | |
| 191 | EXPECT_EQ(classifier->SuggestSelection("abc\n857 225 3556", {4, 7}), |
| 192 | std::make_pair(4, 16)); |
| 193 | EXPECT_EQ(classifier->SuggestSelection("857 225 3556\nabc", {0, 3}), |
| 194 | std::make_pair(0, 12)); |
| 195 | } |
| 196 | |
| 197 | TEST(TextClassifierTest, SuggestSelectionWithPunctuation) { |
| 198 | std::unique_ptr<TextClassifier> classifier = |
| 199 | TextClassifier::FromPath(GetModelPath() + "test_model.fb"); |
| 200 | ASSERT_TRUE(classifier); |
| 201 | |
| 202 | // From the right. |
| 203 | EXPECT_EQ(classifier->SuggestSelection( |
| 204 | "this afternoon BarackObama, gave a speech at", {15, 26}), |
| 205 | std::make_pair(15, 26)); |
| 206 | |
| 207 | // From the right multiple. |
| 208 | EXPECT_EQ(classifier->SuggestSelection( |
| 209 | "this afternoon BarackObama,.,.,, gave a speech at", {15, 26}), |
| 210 | std::make_pair(15, 26)); |
| 211 | |
| 212 | // From the left multiple. |
| 213 | EXPECT_EQ(classifier->SuggestSelection( |
| 214 | "this afternoon ,.,.,,BarackObama gave a speech at", {21, 32}), |
| 215 | std::make_pair(21, 32)); |
| 216 | |
| 217 | // From both sides. |
| 218 | EXPECT_EQ(classifier->SuggestSelection( |
| 219 | "this afternoon !BarackObama,- gave a speech at", {16, 27}), |
| 220 | std::make_pair(16, 27)); |
| 221 | } |
| 222 | |
| 223 | TEST(TextClassifierTest, SuggestSelectionNoCrashWithJunk) { |
| 224 | std::unique_ptr<TextClassifier> classifier = |
| 225 | TextClassifier::FromPath(GetModelPath() + "test_model.fb"); |
| 226 | ASSERT_TRUE(classifier); |
| 227 | |
| 228 | // Try passing in bunch of invalid selections. |
| 229 | EXPECT_EQ(classifier->SuggestSelection("", {0, 27}), std::make_pair(0, 27)); |
| 230 | EXPECT_EQ(classifier->SuggestSelection("", {-10, 27}), |
| 231 | std::make_pair(-10, 27)); |
| 232 | EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {0, 27}), |
| 233 | std::make_pair(0, 27)); |
| 234 | EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {-30, 300}), |
| 235 | std::make_pair(-30, 300)); |
| 236 | EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {-10, -1}), |
| 237 | std::make_pair(-10, -1)); |
| 238 | EXPECT_EQ(classifier->SuggestSelection("Word 1 2 3 hello!", {100, 17}), |
| 239 | std::make_pair(100, 17)); |
| 240 | } |
| 241 | |
| 242 | TEST(TextClassifierTest, Annotate) { |
| 243 | std::unique_ptr<TextClassifier> classifier = |
| 244 | TextClassifier::FromPath(GetModelPath() + "test_model.fb"); |
| 245 | ASSERT_TRUE(classifier); |
| 246 | |
| 247 | const std::string test_string = |
| 248 | "& saw Barak Obama today .. 350 Third Street, Cambridge\nand my phone " |
| 249 | "number is 853 225 3556."; |
| 250 | EXPECT_THAT(classifier->Annotate(test_string), |
| 251 | ElementsAreArray({ |
| 252 | IsAnnotatedSpan(0, 0, "<INVALID RESULTS>"), |
| 253 | IsAnnotatedSpan(2, 5, "other"), |
| 254 | IsAnnotatedSpan(6, 11, "other"), |
| 255 | IsAnnotatedSpan(12, 17, "other"), |
| 256 | IsAnnotatedSpan(18, 23, "other"), |
| 257 | IsAnnotatedSpan(24, 24, "<INVALID RESULTS>"), |
| 258 | IsAnnotatedSpan(27, 54, "address"), |
| 259 | IsAnnotatedSpan(55, 58, "other"), |
| 260 | IsAnnotatedSpan(59, 61, "other"), |
| 261 | IsAnnotatedSpan(62, 67, "other"), |
| 262 | IsAnnotatedSpan(68, 74, "other"), |
| 263 | IsAnnotatedSpan(75, 77, "other"), |
| 264 | IsAnnotatedSpan(78, 90, "phone"), |
| 265 | })); |
| 266 | } |
| 267 | |
| 268 | // TODO(jacekj): Test the regex functionality. |
| 269 | |
| 270 | } // namespace |
| 271 | } // namespace libtextclassifier2 |