Wyatt Hepler | 80c6ee5 | 2020-01-03 09:54:58 -0800 | [diff] [blame] | 1 | // Copyright 2020 The Pigweed Authors |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| 4 | // use this file except in compliance with the License. You may obtain a copy of |
| 5 | // the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 11 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 12 | // License for the specific language governing permissions and limitations under |
| 13 | // the License. |
| 14 | |
| 15 | #include "pw_tokenizer/detokenize.h" |
| 16 | |
| 17 | #include <string_view> |
| 18 | |
| 19 | #include "gtest/gtest.h" |
| 20 | |
| 21 | namespace pw::tokenizer { |
| 22 | namespace { |
| 23 | |
| 24 | using namespace std::literals::string_view_literals; |
| 25 | |
| 26 | // Use a shorter name for the error string macro. |
| 27 | #define ERR PW_TOKENIZER_ARG_DECODING_ERROR |
| 28 | |
| 29 | // Use alignas to ensure that the data is properly aligned to be read from a |
| 30 | // token database entry struct. This avoids unaligned memory reads. |
| 31 | alignas(TokenDatabase::RawEntry) constexpr char kBasicData[] = |
| 32 | "TOKENS\0\0" |
| 33 | "\x04\x00\x00\x00" |
| 34 | "\0\0\0\0" |
| 35 | "\x01\x00\x00\x00----" |
| 36 | "\x05\x00\x00\x00----" |
| 37 | "\xFF\x00\x00\x00----" |
| 38 | "\xFF\xEE\xEE\xDD----" |
| 39 | "One\0" |
| 40 | "TWO\0" |
| 41 | "333\0" |
| 42 | "FOUR"; |
| 43 | |
| 44 | class Detokenize : public ::testing::Test { |
| 45 | protected: |
| 46 | Detokenize() : detok_(TokenDatabase::Create<kBasicData>()) {} |
| 47 | Detokenizer detok_; |
| 48 | }; |
| 49 | |
| 50 | TEST_F(Detokenize, NoFormatting) { |
| 51 | EXPECT_EQ(detok_.Detokenize("\1\0\0\0"sv).BestString(), "One"); |
| 52 | EXPECT_EQ(detok_.Detokenize("\5\0\0\0"sv).BestString(), "TWO"); |
| 53 | EXPECT_EQ(detok_.Detokenize("\xff\x00\x00\x00"sv).BestString(), "333"); |
| 54 | EXPECT_EQ(detok_.Detokenize("\xff\xee\xee\xdd"sv).BestString(), "FOUR"); |
| 55 | } |
| 56 | |
| 57 | TEST_F(Detokenize, BestString_MissingToken_IsEmpty) { |
| 58 | EXPECT_FALSE(detok_.Detokenize("").ok()); |
| 59 | EXPECT_TRUE(detok_.Detokenize("", 0u).BestString().empty()); |
| 60 | EXPECT_TRUE(detok_.Detokenize("\1", 1u).BestString().empty()); |
| 61 | EXPECT_TRUE(detok_.Detokenize("\1\0"sv).BestString().empty()); |
| 62 | EXPECT_TRUE(detok_.Detokenize("\1\0\0"sv).BestString().empty()); |
| 63 | EXPECT_TRUE(detok_.Detokenize("\0\0\0"sv).BestString().empty()); |
| 64 | } |
| 65 | |
| 66 | TEST_F(Detokenize, BestString_UnknownToken_IsEmpty) { |
| 67 | EXPECT_FALSE(detok_.Detokenize("\0\0\0\0"sv).ok()); |
| 68 | EXPECT_TRUE(detok_.Detokenize("\0\0\0\0"sv).BestString().empty()); |
| 69 | EXPECT_TRUE(detok_.Detokenize("\2\0\0\0"sv).BestString().empty()); |
| 70 | EXPECT_TRUE(detok_.Detokenize("\x10\x32\x54\x76\x99"sv).BestString().empty()); |
| 71 | EXPECT_TRUE(detok_.Detokenize("\x98\xba\xdc\xfe"sv).BestString().empty()); |
| 72 | } |
| 73 | |
| 74 | TEST_F(Detokenize, BestStringWithErrors_MissingToken_ErrorMessage) { |
| 75 | EXPECT_FALSE(detok_.Detokenize("").ok()); |
| 76 | EXPECT_EQ(detok_.Detokenize("", 0u).BestStringWithErrors(), |
| 77 | ERR("missing token")); |
| 78 | EXPECT_EQ(detok_.Detokenize("\1", 1u).BestStringWithErrors(), |
| 79 | ERR("missing token")); |
| 80 | EXPECT_EQ(detok_.Detokenize("\1\0"sv).BestStringWithErrors(), |
| 81 | ERR("missing token")); |
| 82 | EXPECT_EQ(detok_.Detokenize("\1\0\0"sv).BestStringWithErrors(), |
| 83 | ERR("missing token")); |
| 84 | EXPECT_EQ(detok_.Detokenize("\0\0\0"sv).BestStringWithErrors(), |
| 85 | ERR("missing token")); |
| 86 | } |
| 87 | |
| 88 | TEST_F(Detokenize, BestStringWithErrors_UnknownToken_ErrorMessage) { |
| 89 | EXPECT_FALSE(detok_.Detokenize("\0\0\0\0"sv).ok()); |
| 90 | EXPECT_EQ(detok_.Detokenize("\0\0\0\0"sv).BestStringWithErrors(), |
| 91 | ERR("unknown token 00000000")); |
| 92 | EXPECT_EQ(detok_.Detokenize("\2\0\0\0"sv).BestStringWithErrors(), |
| 93 | ERR("unknown token 00000002")); |
| 94 | EXPECT_EQ(detok_.Detokenize("\x10\x32\x54\x76\x99"sv).BestStringWithErrors(), |
| 95 | ERR("unknown token 76543210")); |
| 96 | EXPECT_EQ(detok_.Detokenize("\x98\xba\xdc\xfe"sv).BestStringWithErrors(), |
| 97 | ERR("unknown token fedcba98")); |
| 98 | } |
| 99 | |
| 100 | alignas(TokenDatabase::RawEntry) constexpr char kDataWithArguments[] = |
| 101 | "TOKENS\0\0" |
| 102 | "\x09\x00\x00\x00" |
| 103 | "\0\0\0\0" |
| 104 | "\x00\x00\x00\x00----" |
| 105 | "\x0A\x0B\x0C\x0D----" |
| 106 | "\x0E\x0F\x00\x01----" |
| 107 | "\xAA\xAA\xAA\xAA----" |
| 108 | "\xBB\xBB\xBB\xBB----" |
| 109 | "\xCC\xCC\xCC\xCC----" |
| 110 | "\xDD\xDD\xDD\xDD----" |
| 111 | "\xEE\xEE\xEE\xEE----" |
| 112 | "\xFF\xFF\xFF\xFF----" |
| 113 | "\0" |
| 114 | "Use the %s, %s.\0" |
| 115 | "Now there are %d of %s!\0" |
| 116 | "%c!\0" // AA |
| 117 | "%hhu!\0" // BB |
| 118 | "%hu!\0" // CC |
| 119 | "%u!\0" // DD |
| 120 | "%lu!\0" // EE |
| 121 | "%llu!"; // FF |
| 122 | |
| 123 | constexpr TokenDatabase kWithArgs = TokenDatabase::Create<kDataWithArguments>(); |
| 124 | |
| 125 | using Case = std::pair<std::string_view, std::string_view>; |
| 126 | |
| 127 | template <typename... Args> |
| 128 | auto TestCases(Args... args) { |
| 129 | return std::array<Case, sizeof...(Args)>{args...}; |
| 130 | } |
| 131 | |
| 132 | class DetokenizeWithArgs : public ::testing::Test { |
| 133 | protected: |
| 134 | DetokenizeWithArgs() : detok_(kWithArgs) {} |
| 135 | |
| 136 | Detokenizer detok_; |
| 137 | }; |
| 138 | |
| 139 | TEST_F(DetokenizeWithArgs, NoMatches) { |
| 140 | EXPECT_TRUE(detok_.Detokenize("\x23\xab\xc9\x87"sv).matches().empty()); |
| 141 | } |
| 142 | |
| 143 | TEST_F(DetokenizeWithArgs, SingleMatch) { |
| 144 | EXPECT_EQ(detok_.Detokenize("\x00\x00\x00\x00"sv).matches().size(), 1u); |
| 145 | } |
| 146 | |
| 147 | TEST_F(DetokenizeWithArgs, Empty) { |
| 148 | EXPECT_EQ(detok_.Detokenize("\x00\x00\x00\x00"sv).BestString(), ""); |
| 149 | } |
| 150 | |
| 151 | TEST_F(DetokenizeWithArgs, Successful) { |
| 152 | // Run through test cases, but don't include cases that use %hhu or %llu since |
| 153 | // these are not currently supported in arm-none-eabi-gcc. |
| 154 | for (auto [data, expected] : TestCases( |
| 155 | Case{"\x0A\x0B\x0C\x0D\5force\4Luke"sv, "Use the force, Luke."}, |
| 156 | Case{"\x0E\x0F\x00\x01\4\4them"sv, "Now there are 2 of them!"}, |
| 157 | Case{"\xAA\xAA\xAA\xAA\xfc\x01"sv, "~!"}, |
| 158 | Case{"\xCC\xCC\xCC\xCC\xfe\xff\x07"sv, "65535!"}, |
| 159 | Case{"\xDD\xDD\xDD\xDD\xfe\xff\x07"sv, "65535!"}, |
| 160 | Case{"\xDD\xDD\xDD\xDD\xfe\xff\xff\xff\x1f"sv, "4294967295!"}, |
| 161 | Case{"\xEE\xEE\xEE\xEE\xfe\xff\x07"sv, "65535!"}, |
| 162 | Case{"\xEE\xEE\xEE\xEE\xfe\xff\xff\xff\x1f"sv, "4294967295!"})) { |
| 163 | EXPECT_EQ(detok_.Detokenize(data).BestString(), expected); |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | TEST_F(DetokenizeWithArgs, ExtraDataError) { |
| 168 | auto error = detok_.Detokenize("\x00\x00\x00\x00MORE data"sv); |
| 169 | EXPECT_FALSE(error.ok()); |
| 170 | EXPECT_EQ("", error.BestString()); |
| 171 | } |
| 172 | |
| 173 | TEST_F(DetokenizeWithArgs, MissingArgumentError) { |
| 174 | auto error = detok_.Detokenize("\x0A\x0B\x0C\x0D\5force"sv); |
| 175 | EXPECT_FALSE(error.ok()); |
| 176 | EXPECT_EQ(error.BestString(), "Use the force, %s."); |
| 177 | EXPECT_EQ(error.BestStringWithErrors(), |
| 178 | "Use the force, " ERR("%s MISSING") "."); |
| 179 | } |
| 180 | |
| 181 | TEST_F(DetokenizeWithArgs, DecodingError) { |
| 182 | auto error = detok_.Detokenize("\x0E\x0F\x00\x01\xFF"sv); |
| 183 | EXPECT_FALSE(error.ok()); |
| 184 | EXPECT_EQ(error.BestString(), "Now there are %d of %s!"); |
| 185 | EXPECT_EQ(error.BestStringWithErrors(), |
| 186 | "Now there are " ERR("%d ERROR") " of " ERR("%s SKIPPED") "!"); |
| 187 | } |
| 188 | |
| 189 | alignas(TokenDatabase::RawEntry) constexpr char kDataWithCollisions[] = |
| 190 | "TOKENS\0\0" |
| 191 | "\x0F\x00\x00\x00" |
| 192 | "\0\0\0\0" |
| 193 | "\x00\x00\x00\x00\xff\xff\xff\xff" // 1 |
| 194 | "\x00\x00\x00\x00\x01\x02\x03\x04" // 2 |
| 195 | "\x00\x00\x00\x00\xff\xff\xff\xff" // 3 |
| 196 | "\x00\x00\x00\x00\xff\xff\xff\xff" // 4 |
| 197 | "\x00\x00\x00\x00\xff\xff\xff\xff" // 5 |
| 198 | "\x00\x00\x00\x00\xff\xff\xff\xff" // 6 |
| 199 | "\x00\x00\x00\x00\xff\xff\xff\xff" // 7 |
| 200 | "\xAA\xAA\xAA\xAA\x00\x00\x00\x00" // 8 |
| 201 | "\xAA\xAA\xAA\xAA\xff\xff\xff\xff" // 9 |
| 202 | "\xBB\xBB\xBB\xBB\xff\xff\xff\xff" // A |
| 203 | "\xBB\xBB\xBB\xBB\xff\xff\xff\xff" // B |
| 204 | "\xCC\xCC\xCC\xCC\xff\xff\xff\xff" // C |
| 205 | "\xCC\xCC\xCC\xCC\xff\xff\xff\xff" // D |
| 206 | "\xDD\xDD\xDD\xDD\xff\xff\xff\xff" // E |
| 207 | "\xDD\xDD\xDD\xDD\xff\xff\xff\xff" // F |
| 208 | // String table |
| 209 | "This string is present\0" // 1 |
| 210 | "This string is removed\0" // 2 |
| 211 | "One arg %d\0" // 3 |
| 212 | "One arg %s\0" // 4 |
| 213 | "Two args %s %u\0" // 5 |
| 214 | "Two args %s %s %% %% %%\0" // 6 |
| 215 | "Four args %d %d %d %d\0" // 7 |
| 216 | "This one is removed\0" // 8 |
| 217 | "This one is present\0" // 9 |
| 218 | "Two ints %d %d\0" // A |
| 219 | "Three ints %d %d %d\0" // B |
| 220 | "Three strings %s %s %s\0" // C |
| 221 | "Two strings %s %s\0" // D |
| 222 | "Three %s %s %s\0" // E |
| 223 | "Five %d %d %d %d %s\0"; // F |
| 224 | |
| 225 | constexpr TokenDatabase kWithCollisions = |
| 226 | TokenDatabase::Create<kDataWithCollisions>(); |
| 227 | |
| 228 | class DetokenizeWithCollisions : public ::testing::Test { |
| 229 | protected: |
| 230 | DetokenizeWithCollisions() : detok_(kWithCollisions) {} |
| 231 | |
| 232 | Detokenizer detok_; |
| 233 | }; |
| 234 | |
| 235 | TEST_F(DetokenizeWithCollisions, Collision_AlwaysPreferSuccessfulDecode) { |
| 236 | for (auto [data, expected] : |
| 237 | TestCases(Case{"\0\0\0\0"sv, "This string is present"}, |
| 238 | Case{"\0\0\0\0\x01"sv, "One arg -1"}, |
| 239 | Case{"\0\0\0\0\x80"sv, "One arg [...]"}, |
| 240 | Case{"\0\0\0\0\4Hey!\x04"sv, "Two args Hey! 2"})) { |
| 241 | EXPECT_EQ(detok_.Detokenize(data).BestString(), expected); |
| 242 | } |
| 243 | } |
| 244 | |
| 245 | TEST_F(DetokenizeWithCollisions, Collision_PreferDecodingAllBytes) { |
| 246 | for (auto [data, expected] : |
| 247 | TestCases(Case{"\0\0\0\0\x80\x80\x80\x80\x00"sv, "Two args [...] 0"}, |
| 248 | Case{"\0\0\0\0\x08?"sv, "One arg %s"}, |
| 249 | Case{"\0\0\0\0\x01!\x01\x80"sv, "Two args ! \x80 % % %"})) { |
| 250 | EXPECT_EQ(detok_.Detokenize(data).BestString(), expected); |
| 251 | } |
| 252 | } |
| 253 | |
| 254 | TEST_F(DetokenizeWithCollisions, Collision_PreferFewestDecodingErrors) { |
| 255 | for (auto [data, expected] : |
| 256 | TestCases(Case{"\xBB\xBB\xBB\xBB\x00"sv, "Two ints 0 %d"}, |
| 257 | Case{"\xCC\xCC\xCC\xCC\2Yo\5?"sv, "Two strings Yo %s"})) { |
| 258 | EXPECT_EQ(detok_.Detokenize(data).BestString(), expected); |
| 259 | } |
| 260 | } |
| 261 | |
| 262 | TEST_F(DetokenizeWithCollisions, Collision_PreferMostDecodedArgs) { |
| 263 | auto result = detok_.Detokenize("\xDD\xDD\xDD\xDD\x01\x02\x01\x04\x05"sv); |
| 264 | EXPECT_EQ((std::string_view)result.matches()[0].value(), "Five -1 1 -1 2 %s"); |
| 265 | EXPECT_EQ((std::string_view)result.matches()[1].value(), "Three \2 \4 %s"sv); |
| 266 | } |
| 267 | |
| 268 | TEST_F(DetokenizeWithCollisions, Collision_PreferMostDecodedArgs_NoPercent) { |
| 269 | // The "Two args %s %s ..." string successfully decodes this, and has more |
| 270 | // "arguments", because of %%, but %% doesn't count as as a decoded argument. |
| 271 | EXPECT_EQ(detok_.Detokenize("\0\0\0\0\x01\x00\x01\x02"sv).BestString(), |
| 272 | "Four args -1 0 -1 1"); |
| 273 | } |
| 274 | |
| 275 | TEST_F(DetokenizeWithCollisions, Collision_PreferStillPresentString) { |
| 276 | for (auto [data, expected] : |
| 277 | TestCases(Case{"\x00\x00\x00\x00"sv, "This string is present"}, |
| 278 | Case{"\xAA\xAA\xAA\xAA"sv, "This one is present"})) { |
| 279 | EXPECT_EQ(detok_.Detokenize(data).BestString(), expected); |
| 280 | } |
| 281 | } |
| 282 | |
| 283 | TEST_F(DetokenizeWithCollisions, Collision_TracksAllMatches) { |
| 284 | auto result = detok_.Detokenize("\0\0\0\0"sv); |
| 285 | EXPECT_EQ(result.matches().size(), 7u); |
| 286 | } |
| 287 | |
| 288 | } // namespace |
| 289 | } // namespace pw::tokenizer |