Wyatt Hepler | 80c6ee5 | 2020-01-03 09:54:58 -0800 | [diff] [blame] | 1 | // Copyright 2020 The Pigweed Authors |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); you may not |
| 4 | // use this file except in compliance with the License. You may obtain a copy of |
| 5 | // the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT |
| 11 | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the |
| 12 | // License for the specific language governing permissions and limitations under |
| 13 | // the License. |
| 14 | |
| 15 | #include "pw_tokenizer/token_database.h" |
| 16 | |
| 17 | #include <cstring> |
| 18 | #include <string> |
| 19 | #include <string_view> |
| 20 | |
| 21 | #include "gtest/gtest.h" |
| 22 | |
| 23 | namespace pw::tokenizer { |
| 24 | namespace { |
| 25 | |
| 26 | using namespace std::literals::string_view_literals; |
| 27 | |
| 28 | // Use alignas to ensure that the data is properly aligned for database entries. |
| 29 | // This avoids unaligned memory reads. |
| 30 | alignas(TokenDatabase::RawEntry) constexpr char kBasicData[] = |
| 31 | "TOKENS\0\0\x03\x00\x00\x00\0\0\0\0" |
| 32 | "\x01\0\0\0\0\0\0\0" |
| 33 | "\x02\0\0\0\0\0\0\0" |
| 34 | "\xFF\0\0\0\0\0\0\0" |
| 35 | "hi!\0" |
| 36 | "goodbye\0" |
| 37 | ":)"; |
| 38 | |
| 39 | alignas(TokenDatabase::RawEntry) constexpr char kEmptyData[] = |
| 40 | "TOKENS\0\0\x00\x00\x00\x00\0\0\0"; // Last byte is null terminator. |
| 41 | |
| 42 | alignas(TokenDatabase::RawEntry) constexpr char kBadMagic[] = |
| 43 | "TOKENs\0\0\x03\x00\x00\x00\0\0\0\0" |
| 44 | "\x01\0\0\0\0\0\0\0" |
| 45 | "hi!\0"; |
| 46 | |
| 47 | alignas(TokenDatabase::RawEntry) constexpr char kBadVersion[] = |
| 48 | "TOKENS\0\1\x00\0\0\0\0\0\0\0"; |
| 49 | |
| 50 | alignas(TokenDatabase::RawEntry) constexpr char kBadEntryCount[] = |
| 51 | "TOKENS\0\0\xff\x00\x00\x00\0\0\0\0"; |
| 52 | |
| 53 | // Use signed data and a size with the top bit set to test that the entry count |
| 54 | // is read correctly, without per-byte sign extension. |
| 55 | alignas(TokenDatabase::RawEntry) constexpr signed char kSignedWithTopBit[] = |
| 56 | "TOKENS\0\0\x80\x00\x00\x00\0\0\0\0" |
| 57 | // Entries |
| 58 | "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" |
| 59 | "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" |
| 60 | "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" |
| 61 | "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 32 |
| 62 | "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" |
| 63 | "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" |
| 64 | "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" |
| 65 | "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 64 |
| 66 | "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" |
| 67 | "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" |
| 68 | "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" |
| 69 | "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 96 |
| 70 | "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" |
| 71 | "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" |
| 72 | "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" |
| 73 | "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 128 |
| 74 | // Strings (empty) |
| 75 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 32 |
| 76 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 64 |
| 77 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 96 |
| 78 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; // 128 |
| 79 | |
| 80 | constexpr TokenDatabase kBasicDatabase = TokenDatabase::Create<kBasicData>(); |
| 81 | static_assert(kBasicDatabase.size() == 3u); |
| 82 | |
| 83 | TEST(TokenDatabase, EntryCount) { |
| 84 | static_assert(TokenDatabase::Create<kBasicData>().size() == 3u); |
| 85 | static_assert(TokenDatabase::Create(kEmptyData).size() == 0u); |
| 86 | EXPECT_EQ(TokenDatabase::Create<kSignedWithTopBit>().size(), 0x80u); |
| 87 | } |
| 88 | |
| 89 | TEST(TokenDatabase, ValidCheck) { |
| 90 | char basic_data[sizeof(kBasicData)]; |
| 91 | std::memcpy(basic_data, kBasicData, sizeof(basic_data)); |
| 92 | EXPECT_TRUE(TokenDatabase::IsValid(basic_data)); |
| 93 | |
| 94 | static_assert(TokenDatabase::IsValid(kBasicData)); |
| 95 | static_assert(TokenDatabase::IsValid(kEmptyData)); |
| 96 | static_assert(TokenDatabase::IsValid(kSignedWithTopBit)); |
| 97 | |
| 98 | static_assert(!TokenDatabase::IsValid(kBadMagic)); |
| 99 | static_assert(!TokenDatabase::IsValid(kBadVersion)); |
| 100 | static_assert(!TokenDatabase::IsValid(kBadEntryCount)); |
| 101 | |
| 102 | static_assert(!TokenDatabase::IsValid("TOKENS\0\0\0\0")); // too short |
| 103 | static_assert(!TokenDatabase::IsValid("TOKENS\0\1\0\0\0\0\0\0\0\0")); |
| 104 | static_assert(!TokenDatabase::IsValid("TOKENSv0\0\0\0\0\0\0\0\0")); |
| 105 | static_assert(!TokenDatabase::IsValid("tokens\0\0\0\0\0\0\0\0\0\0")); |
| 106 | |
| 107 | // No string table; this is one byte too short. |
| 108 | static_assert( |
| 109 | !TokenDatabase::IsValid("TOKENS\0\0\x01\x00\x00\x00\0\0\0\0WXYZdate"sv)); |
| 110 | |
| 111 | // Add one byte for the string table. |
| 112 | static_assert( |
| 113 | TokenDatabase::IsValid("TOKENS\0\0\x01\x00\x00\x00\0\0\0\0WXYZdate\0"sv)); |
| 114 | |
| 115 | static_assert( |
| 116 | !TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0" |
| 117 | "WXYZdate" |
| 118 | "WXYZdate" |
| 119 | "\0"sv)); |
| 120 | static_assert( |
| 121 | TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0" |
| 122 | "WXYZdate" |
| 123 | "WXYZdate" |
| 124 | "hi\0\0"sv)); |
| 125 | static_assert( |
| 126 | TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0" |
| 127 | "WXYZdate" |
| 128 | "WXYZdate" |
| 129 | "hi\0hello\0"sv)); |
| 130 | } |
| 131 | |
| 132 | TEST(TokenDatabase, Iterator) { |
| 133 | auto it = kBasicDatabase.begin(); |
| 134 | EXPECT_EQ(it->token, 1u); |
| 135 | EXPECT_STREQ(it.entry().string, "hi!"); |
| 136 | |
| 137 | ++it; |
| 138 | EXPECT_EQ(it->token, 2u); |
| 139 | EXPECT_STREQ(it.entry().string, "goodbye"); |
| 140 | EXPECT_EQ(it - kBasicDatabase.begin(), 1); |
| 141 | |
| 142 | ++it; |
| 143 | EXPECT_EQ(it->token, 0xFFu); |
| 144 | EXPECT_STREQ(it.entry().string, ":)"); |
| 145 | EXPECT_EQ(it - kBasicDatabase.begin(), 2); |
| 146 | |
| 147 | ++it; |
| 148 | EXPECT_EQ(it, kBasicDatabase.end()); |
| 149 | EXPECT_EQ(static_cast<size_t>(it - kBasicDatabase.begin()), |
| 150 | kBasicDatabase.size()); |
| 151 | } |
| 152 | |
Wyatt Hepler | cb7691f | 2020-06-19 12:51:21 -0700 | [diff] [blame] | 153 | TEST(TokenDatabase, Iterator_PreIncrement) { |
| 154 | auto it = kBasicDatabase.begin(); |
| 155 | EXPECT_EQ((++it)->token, 2u); |
| 156 | EXPECT_STREQ(it.entry().string, "goodbye"); |
| 157 | } |
| 158 | |
| 159 | TEST(TokenDatabase, Iterator_PostIncrement) { |
| 160 | auto it = kBasicDatabase.begin(); |
| 161 | EXPECT_EQ((it++)->token, 1u); |
| 162 | |
| 163 | EXPECT_EQ(it->token, 2u); |
| 164 | EXPECT_STREQ(it.entry().string, "goodbye"); |
| 165 | } |
| 166 | |
Wyatt Hepler | 80c6ee5 | 2020-01-03 09:54:58 -0800 | [diff] [blame] | 167 | TEST(TokenDatabase, SingleEntryLookup_FirstEntry) { |
| 168 | auto match = kBasicDatabase.Find(1); |
| 169 | ASSERT_EQ(match.size(), 1u); |
| 170 | EXPECT_FALSE(match.empty()); |
| 171 | EXPECT_STREQ(match[0].string, "hi!"); |
| 172 | |
| 173 | for (const auto& entry : match) { |
| 174 | EXPECT_EQ(entry.token, 1u); |
| 175 | EXPECT_STREQ(entry.string, "hi!"); |
| 176 | } |
| 177 | } |
| 178 | |
| 179 | TEST(TokenDatabase, SingleEntryLookup_MiddleEntry) { |
| 180 | auto match = kBasicDatabase.Find(2); |
| 181 | ASSERT_EQ(match.size(), 1u); |
| 182 | EXPECT_FALSE(match.empty()); |
| 183 | EXPECT_STREQ(match[0].string, "goodbye"); |
| 184 | } |
| 185 | |
| 186 | TEST(TokenDatabase, SingleEntryLookup_LastEntry) { |
| 187 | auto match = kBasicDatabase.Find(0xff); |
| 188 | ASSERT_EQ(match.size(), 1u); |
| 189 | EXPECT_STREQ(match[0].string, ":)"); |
| 190 | EXPECT_FALSE(match.empty()); |
| 191 | } |
| 192 | |
| 193 | TEST(TokenDatabase, SingleEntryLookup_NonPresent) { |
| 194 | EXPECT_TRUE(kBasicDatabase.Find(0).empty()); |
| 195 | EXPECT_TRUE(kBasicDatabase.Find(3).empty()); |
| 196 | EXPECT_TRUE(kBasicDatabase.Find(10239).empty()); |
| 197 | EXPECT_TRUE(kBasicDatabase.Find(0xFFFFFFFFu).empty()); |
| 198 | } |
| 199 | |
| 200 | TEST(TokenDatabase, SingleEntryLookup_NoMatches) { |
| 201 | // Can also create the database at runtime. |
| 202 | TokenDatabase tokens = TokenDatabase::Create(kBasicData); |
| 203 | const auto match = tokens.Find(42); |
| 204 | ASSERT_EQ(match.size(), 0u); |
| 205 | EXPECT_TRUE(match.empty()); |
| 206 | |
| 207 | for (const auto& entry : match) { |
| 208 | FAIL(); // There were no matches, so this code should never execute. |
| 209 | static_cast<void>(entry); |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | alignas(TokenDatabase::RawEntry) constexpr char kCollisionsData[] = |
| 214 | "TOKENS\0\0\x05\0\0\0\0\0\0\0" |
| 215 | "\x01\0\0\0date" |
| 216 | "\x01\0\0\0date" |
| 217 | "\x01\0\0\0date" |
| 218 | "\x02\0\0\0date" |
| 219 | "\xFF\0\0\0date" |
| 220 | "hi!\0goodbye\0:)\0\0"; |
| 221 | |
| 222 | constexpr TokenDatabase kCollisions = TokenDatabase::Create<kCollisionsData>(); |
| 223 | static_assert(kCollisions.size() == 5u); |
| 224 | |
| 225 | TEST(TokenDatabase, MultipleEntriesWithSameToken) { |
| 226 | TokenDatabase::Entries match = kCollisions.Find(1); |
| 227 | |
| 228 | EXPECT_EQ(match.begin()->token, 1u); |
| 229 | EXPECT_EQ(match.end()->token, 2u); |
| 230 | ASSERT_EQ(match.size(), 3u); |
| 231 | |
| 232 | EXPECT_STREQ(match[0].string, "hi!"); |
| 233 | EXPECT_STREQ(match[1].string, "goodbye"); |
| 234 | EXPECT_STREQ(match[2].string, ":)"); |
| 235 | |
| 236 | for (const auto& entry : match) { |
| 237 | EXPECT_EQ(entry.token, 1u); |
| 238 | } |
| 239 | } |
| 240 | |
| 241 | TEST(TokenDatabase, Empty) { |
| 242 | constexpr TokenDatabase empty_db = TokenDatabase::Create<kEmptyData>(); |
| 243 | static_assert(empty_db.size() == 0u); |
| 244 | static_assert(empty_db.ok()); |
| 245 | |
| 246 | EXPECT_TRUE(empty_db.Find(0).empty()); |
| 247 | EXPECT_TRUE(empty_db.Find(123).empty()); |
| 248 | |
| 249 | for (const auto& entry : empty_db) { |
| 250 | FAIL(); // The database is empty; this should never execute. |
| 251 | static_cast<void>(entry); |
| 252 | } |
| 253 | } |
| 254 | |
| 255 | TEST(TokenDatabase, NullDatabase) { |
| 256 | constexpr TokenDatabase empty_db; |
| 257 | |
| 258 | static_assert(empty_db.size() == 0u); |
| 259 | static_assert(!empty_db.ok()); |
| 260 | EXPECT_TRUE(empty_db.Find(0).empty()); |
| 261 | } |
| 262 | |
| 263 | TEST(TokenDatabase, InvalidData) { |
| 264 | constexpr TokenDatabase bad_db = TokenDatabase::Create("TOKENS\0\0"); |
| 265 | |
| 266 | static_assert(!bad_db.ok()); |
| 267 | EXPECT_TRUE(bad_db.Find(0).empty()); |
| 268 | } |
| 269 | |
| 270 | TEST(TokenDatabase, FromString) { |
| 271 | TokenDatabase bad_db = TokenDatabase::Create(std::string("wow!")); |
| 272 | |
| 273 | EXPECT_FALSE(bad_db.ok()); |
| 274 | } |
| 275 | |
| 276 | } // namespace |
| 277 | } // namespace pw::tokenizer |