blob: e06780fd0dc3bddfab57765f77c8b43e83c279f0 [file] [log] [blame]
Wyatt Hepler80c6ee52020-01-03 09:54:58 -08001// Copyright 2020 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
15#include "pw_tokenizer/token_database.h"
16
17#include <cstring>
18#include <string>
19#include <string_view>
20
21#include "gtest/gtest.h"
22
23namespace pw::tokenizer {
24namespace {
25
26using namespace std::literals::string_view_literals;
27
28// Use alignas to ensure that the data is properly aligned for database entries.
29// This avoids unaligned memory reads.
30alignas(TokenDatabase::RawEntry) constexpr char kBasicData[] =
31 "TOKENS\0\0\x03\x00\x00\x00\0\0\0\0"
32 "\x01\0\0\0\0\0\0\0"
33 "\x02\0\0\0\0\0\0\0"
34 "\xFF\0\0\0\0\0\0\0"
35 "hi!\0"
36 "goodbye\0"
37 ":)";
38
39alignas(TokenDatabase::RawEntry) constexpr char kEmptyData[] =
40 "TOKENS\0\0\x00\x00\x00\x00\0\0\0"; // Last byte is null terminator.
41
42alignas(TokenDatabase::RawEntry) constexpr char kBadMagic[] =
43 "TOKENs\0\0\x03\x00\x00\x00\0\0\0\0"
44 "\x01\0\0\0\0\0\0\0"
45 "hi!\0";
46
47alignas(TokenDatabase::RawEntry) constexpr char kBadVersion[] =
48 "TOKENS\0\1\x00\0\0\0\0\0\0\0";
49
50alignas(TokenDatabase::RawEntry) constexpr char kBadEntryCount[] =
51 "TOKENS\0\0\xff\x00\x00\x00\0\0\0\0";
52
53// Use signed data and a size with the top bit set to test that the entry count
54// is read correctly, without per-byte sign extension.
55alignas(TokenDatabase::RawEntry) constexpr signed char kSignedWithTopBit[] =
56 "TOKENS\0\0\x80\x00\x00\x00\0\0\0\0"
57 // Entries
58 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
59 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
60 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
61 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 32
62 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
63 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
64 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
65 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 64
66 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
67 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
68 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
69 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 96
70 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
71 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
72 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate"
73 "TOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdateTOKNdate" // 128
74 // Strings (empty)
75 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 32
76 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 64
77 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0" // 96
78 "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; // 128
79
80constexpr TokenDatabase kBasicDatabase = TokenDatabase::Create<kBasicData>();
81static_assert(kBasicDatabase.size() == 3u);
82
83TEST(TokenDatabase, EntryCount) {
84 static_assert(TokenDatabase::Create<kBasicData>().size() == 3u);
85 static_assert(TokenDatabase::Create(kEmptyData).size() == 0u);
86 EXPECT_EQ(TokenDatabase::Create<kSignedWithTopBit>().size(), 0x80u);
87}
88
89TEST(TokenDatabase, ValidCheck) {
90 char basic_data[sizeof(kBasicData)];
91 std::memcpy(basic_data, kBasicData, sizeof(basic_data));
92 EXPECT_TRUE(TokenDatabase::IsValid(basic_data));
93
94 static_assert(TokenDatabase::IsValid(kBasicData));
95 static_assert(TokenDatabase::IsValid(kEmptyData));
96 static_assert(TokenDatabase::IsValid(kSignedWithTopBit));
97
98 static_assert(!TokenDatabase::IsValid(kBadMagic));
99 static_assert(!TokenDatabase::IsValid(kBadVersion));
100 static_assert(!TokenDatabase::IsValid(kBadEntryCount));
101
102 static_assert(!TokenDatabase::IsValid("TOKENS\0\0\0\0")); // too short
103 static_assert(!TokenDatabase::IsValid("TOKENS\0\1\0\0\0\0\0\0\0\0"));
104 static_assert(!TokenDatabase::IsValid("TOKENSv0\0\0\0\0\0\0\0\0"));
105 static_assert(!TokenDatabase::IsValid("tokens\0\0\0\0\0\0\0\0\0\0"));
106
107 // No string table; this is one byte too short.
108 static_assert(
109 !TokenDatabase::IsValid("TOKENS\0\0\x01\x00\x00\x00\0\0\0\0WXYZdate"sv));
110
111 // Add one byte for the string table.
112 static_assert(
113 TokenDatabase::IsValid("TOKENS\0\0\x01\x00\x00\x00\0\0\0\0WXYZdate\0"sv));
114
115 static_assert(
116 !TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
117 "WXYZdate"
118 "WXYZdate"
119 "\0"sv));
120 static_assert(
121 TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
122 "WXYZdate"
123 "WXYZdate"
124 "hi\0\0"sv));
125 static_assert(
126 TokenDatabase::IsValid("TOKENS\0\0\x02\x00\x00\x00\0\0\0\0"
127 "WXYZdate"
128 "WXYZdate"
129 "hi\0hello\0"sv));
130}
131
132TEST(TokenDatabase, Iterator) {
133 auto it = kBasicDatabase.begin();
134 EXPECT_EQ(it->token, 1u);
135 EXPECT_STREQ(it.entry().string, "hi!");
136
137 ++it;
138 EXPECT_EQ(it->token, 2u);
139 EXPECT_STREQ(it.entry().string, "goodbye");
140 EXPECT_EQ(it - kBasicDatabase.begin(), 1);
141
142 ++it;
143 EXPECT_EQ(it->token, 0xFFu);
144 EXPECT_STREQ(it.entry().string, ":)");
145 EXPECT_EQ(it - kBasicDatabase.begin(), 2);
146
147 ++it;
148 EXPECT_EQ(it, kBasicDatabase.end());
149 EXPECT_EQ(static_cast<size_t>(it - kBasicDatabase.begin()),
150 kBasicDatabase.size());
151}
152
Wyatt Heplercb7691f2020-06-19 12:51:21 -0700153TEST(TokenDatabase, Iterator_PreIncrement) {
154 auto it = kBasicDatabase.begin();
155 EXPECT_EQ((++it)->token, 2u);
156 EXPECT_STREQ(it.entry().string, "goodbye");
157}
158
159TEST(TokenDatabase, Iterator_PostIncrement) {
160 auto it = kBasicDatabase.begin();
161 EXPECT_EQ((it++)->token, 1u);
162
163 EXPECT_EQ(it->token, 2u);
164 EXPECT_STREQ(it.entry().string, "goodbye");
165}
166
Wyatt Hepler80c6ee52020-01-03 09:54:58 -0800167TEST(TokenDatabase, SingleEntryLookup_FirstEntry) {
168 auto match = kBasicDatabase.Find(1);
169 ASSERT_EQ(match.size(), 1u);
170 EXPECT_FALSE(match.empty());
171 EXPECT_STREQ(match[0].string, "hi!");
172
173 for (const auto& entry : match) {
174 EXPECT_EQ(entry.token, 1u);
175 EXPECT_STREQ(entry.string, "hi!");
176 }
177}
178
179TEST(TokenDatabase, SingleEntryLookup_MiddleEntry) {
180 auto match = kBasicDatabase.Find(2);
181 ASSERT_EQ(match.size(), 1u);
182 EXPECT_FALSE(match.empty());
183 EXPECT_STREQ(match[0].string, "goodbye");
184}
185
186TEST(TokenDatabase, SingleEntryLookup_LastEntry) {
187 auto match = kBasicDatabase.Find(0xff);
188 ASSERT_EQ(match.size(), 1u);
189 EXPECT_STREQ(match[0].string, ":)");
190 EXPECT_FALSE(match.empty());
191}
192
193TEST(TokenDatabase, SingleEntryLookup_NonPresent) {
194 EXPECT_TRUE(kBasicDatabase.Find(0).empty());
195 EXPECT_TRUE(kBasicDatabase.Find(3).empty());
196 EXPECT_TRUE(kBasicDatabase.Find(10239).empty());
197 EXPECT_TRUE(kBasicDatabase.Find(0xFFFFFFFFu).empty());
198}
199
200TEST(TokenDatabase, SingleEntryLookup_NoMatches) {
201 // Can also create the database at runtime.
202 TokenDatabase tokens = TokenDatabase::Create(kBasicData);
203 const auto match = tokens.Find(42);
204 ASSERT_EQ(match.size(), 0u);
205 EXPECT_TRUE(match.empty());
206
207 for (const auto& entry : match) {
208 FAIL(); // There were no matches, so this code should never execute.
209 static_cast<void>(entry);
210 }
211}
212
213alignas(TokenDatabase::RawEntry) constexpr char kCollisionsData[] =
214 "TOKENS\0\0\x05\0\0\0\0\0\0\0"
215 "\x01\0\0\0date"
216 "\x01\0\0\0date"
217 "\x01\0\0\0date"
218 "\x02\0\0\0date"
219 "\xFF\0\0\0date"
220 "hi!\0goodbye\0:)\0\0";
221
222constexpr TokenDatabase kCollisions = TokenDatabase::Create<kCollisionsData>();
223static_assert(kCollisions.size() == 5u);
224
225TEST(TokenDatabase, MultipleEntriesWithSameToken) {
226 TokenDatabase::Entries match = kCollisions.Find(1);
227
228 EXPECT_EQ(match.begin()->token, 1u);
229 EXPECT_EQ(match.end()->token, 2u);
230 ASSERT_EQ(match.size(), 3u);
231
232 EXPECT_STREQ(match[0].string, "hi!");
233 EXPECT_STREQ(match[1].string, "goodbye");
234 EXPECT_STREQ(match[2].string, ":)");
235
236 for (const auto& entry : match) {
237 EXPECT_EQ(entry.token, 1u);
238 }
239}
240
241TEST(TokenDatabase, Empty) {
242 constexpr TokenDatabase empty_db = TokenDatabase::Create<kEmptyData>();
243 static_assert(empty_db.size() == 0u);
244 static_assert(empty_db.ok());
245
246 EXPECT_TRUE(empty_db.Find(0).empty());
247 EXPECT_TRUE(empty_db.Find(123).empty());
248
249 for (const auto& entry : empty_db) {
250 FAIL(); // The database is empty; this should never execute.
251 static_cast<void>(entry);
252 }
253}
254
255TEST(TokenDatabase, NullDatabase) {
256 constexpr TokenDatabase empty_db;
257
258 static_assert(empty_db.size() == 0u);
259 static_assert(!empty_db.ok());
260 EXPECT_TRUE(empty_db.Find(0).empty());
261}
262
263TEST(TokenDatabase, InvalidData) {
264 constexpr TokenDatabase bad_db = TokenDatabase::Create("TOKENS\0\0");
265
266 static_assert(!bad_db.ok());
267 EXPECT_TRUE(bad_db.Find(0).empty());
268}
269
270TEST(TokenDatabase, FromString) {
271 TokenDatabase bad_db = TokenDatabase::Create(std::string("wow!"));
272
273 EXPECT_FALSE(bad_db.ok());
274}
275
276} // namespace
277} // namespace pw::tokenizer