blob: bc552bcfee16784b53c30f91305cbd8e6f0be65b [file] [log] [blame]
Wyatt Hepler80c6ee52020-01-03 09:54:58 -08001// Copyright 2020 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
15#include "pw_tokenizer/tokenize.h"
16
17#include <cinttypes>
18#include <cstdint>
19#include <cstring>
20#include <iterator>
21
22#include "gtest/gtest.h"
23#include "pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h"
24#include "pw_tokenizer_private/tokenize_test.h"
25#include "pw_varint/varint.h"
26
27namespace pw::tokenizer {
28namespace {
29
30// The hash to use for this test. This makes sure the strings are shorter than
31// the configured max length to ensure this test works with any reasonable
32// configuration.
33template <size_t kSize>
34constexpr uint32_t TestHash(const char (&string)[kSize]) {
35 constexpr unsigned kTestHashLength = 48;
36 static_assert(kTestHashLength <= PW_TOKENIZER_CFG_HASH_LENGTH);
37 static_assert(kSize <= kTestHashLength + 1);
38 return PwTokenizer65599FixedLengthHash(std::string_view(string, kSize - 1),
39 kTestHashLength);
40}
41
42// Constructs an array with the hashed string followed by the provided bytes.
43template <uint8_t... kData, size_t kSize>
44constexpr auto ExpectedData(const char (&format)[kSize]) {
45 const uint32_t value = TestHash(format);
46 return std::array<uint8_t, sizeof(uint32_t) + sizeof...(kData)>{
47 static_cast<uint8_t>(value & 0xff),
48 static_cast<uint8_t>(value >> 8 & 0xff),
49 static_cast<uint8_t>(value >> 16 & 0xff),
50 static_cast<uint8_t>(value >> 24 & 0xff),
51 kData...};
52}
53
54TEST(TokenizeStringLiteral, EmptyString_IsZero) {
55 constexpr pw_TokenizerStringToken token = PW_TOKENIZE_STRING("");
56 EXPECT_EQ(0u, token);
57}
58
59TEST(TokenizeStringLiteral, String_MatchesHash) {
60 constexpr uint32_t token = PW_TOKENIZE_STRING("[:-)");
61 EXPECT_EQ(TestHash("[:-)"), token);
62}
63
64constexpr uint32_t kGlobalToken = PW_TOKENIZE_STRING(">:-[]");
65
66TEST(TokenizeStringLiteral, GlobalVariable_MatchesHash) {
67 EXPECT_EQ(TestHash(">:-[]"), kGlobalToken);
68}
69
Keir Mierle42f41f72020-08-05 00:15:56 -070070// Verify that we can tokenize multiple strings from one source line.
71#define THREE_FOR_ONE(first, second, third) \
72 [[maybe_unused]] constexpr uint32_t token_1 = \
73 PW_TOKENIZE_STRING_DOMAIN("ignored", first); \
74 [[maybe_unused]] constexpr uint32_t token_2 = \
75 PW_TOKENIZE_STRING_DOMAIN("ignored", second); \
76 [[maybe_unused]] constexpr uint32_t token_3 = \
77 PW_TOKENIZE_STRING_DOMAIN("ignored", third);
78
79TEST(TokenizeStringLiteral, MultipleTokenizationsInOneMacroExpansion) {
80 // This verifies that we can safely tokenize multiple times in a single macro
81 // expansion. This can be useful when for example a name and description are
82 // both tokenized after being passed into a macro.
83 //
84 // This test only verifies that this compiles correctly; it does not test
85 // that the tokenizations make it to the final token database.
86 THREE_FOR_ONE("hello", "yes", "something");
87}
88
Wyatt Hepler80c6ee52020-01-03 09:54:58 -080089class TokenizeToBuffer : public ::testing::Test {
90 public:
91 TokenizeToBuffer() : buffer_{} {}
92
93 protected:
94 uint8_t buffer_[64];
95};
96
97TEST_F(TokenizeToBuffer, Integer64) {
98 size_t message_size = 14;
99 PW_TOKENIZE_TO_BUFFER(
100 buffer_,
101 &message_size,
102 "%" PRIu64,
103 static_cast<uint64_t>(0x55555555'55555555ull)); // 0xAAAAAAAA'AAAAAAAA
104
105 // Pattern becomes 10101010'11010101'10101010 ...
106 constexpr std::array<uint8_t, 14> expected =
107 ExpectedData<0xAA, 0xD5, 0xAA, 0xD5, 0xAA, 0xD5, 0xAA, 0xD5, 0xAA, 0x01>(
108 "%" PRIu64);
109 ASSERT_EQ(expected.size(), message_size);
110 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
111}
112
113TEST_F(TokenizeToBuffer, Integer64Overflow) {
114 size_t message_size;
115
116 for (size_t size = 4; size < 20; ++size) {
117 message_size = size;
118
119 PW_TOKENIZE_TO_BUFFER(
120 buffer_,
121 &message_size,
122 "%" PRIx64,
123 static_cast<uint64_t>(std::numeric_limits<int64_t>::min()));
124
125 if (size < 14) {
126 constexpr std::array<uint8_t, 4> empty = ExpectedData("%" PRIx64);
127 ASSERT_EQ(sizeof(uint32_t), message_size);
128 EXPECT_EQ(std::memcmp(empty.data(), &buffer_, empty.size()), 0);
129
130 // Make sure nothing was written past the end of the buffer.
131 EXPECT_TRUE(std::all_of(&buffer_[size], std::end(buffer_), [](uint8_t v) {
132 return v == '\0';
133 }));
134 } else {
135 constexpr std::array<uint8_t, 14> expected =
136 ExpectedData<0xff,
137 0xff,
138 0xff,
139 0xff,
140 0xff,
141 0xff,
142 0xff,
143 0xff,
144 0xff,
145 0x01>("%" PRIx64);
146 ASSERT_EQ(expected.size(), message_size);
147 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
148 }
149 }
150}
151
152TEST_F(TokenizeToBuffer, IntegerNegative) {
153 size_t message_size = 9;
154 PW_TOKENIZE_TO_BUFFER(
155 buffer_, &message_size, "%" PRId32, std::numeric_limits<int32_t>::min());
156
157 // 0x8000'0000 -zig-zag-> 0xff'ff'ff'ff'0f
158 constexpr std::array<uint8_t, 9> expected =
159 ExpectedData<0xff, 0xff, 0xff, 0xff, 0x0f>("%" PRId32);
160 ASSERT_EQ(expected.size(), message_size);
161 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
162}
163
164TEST_F(TokenizeToBuffer, IntegerMin) {
165 size_t message_size = 9;
166 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "%d", -1);
167
168 constexpr std::array<uint8_t, 5> expected = ExpectedData<0x01>("%d");
169 ASSERT_EQ(expected.size(), message_size);
170 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
171}
172
173TEST_F(TokenizeToBuffer, IntegerDoesntFit) {
174 size_t message_size = 8;
175 PW_TOKENIZE_TO_BUFFER(
176 buffer_, &message_size, "%" PRId32, std::numeric_limits<int32_t>::min());
177
178 constexpr std::array<uint8_t, 4> expected = ExpectedData<>("%" PRId32);
179 ASSERT_EQ(expected.size(), message_size);
180 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
181}
182
183TEST_F(TokenizeToBuffer, String) {
184 size_t message_size = sizeof(buffer_);
185
186 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
187 constexpr std::array<uint8_t, 10> expected =
188 ExpectedData<5, '5', '4', '3', '2', '!'>("The answer is: %s");
189
190 ASSERT_EQ(expected.size(), message_size);
191 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
192}
193
194TEST_F(TokenizeToBuffer, String_BufferTooSmall_TruncatesAndSetsTopStatusBit) {
195 size_t message_size = 8;
196 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
197
198 constexpr std::array<uint8_t, 8> truncated_1 =
199 ExpectedData<0x83, '5', '4', '3'>("The answer is: %s");
200
201 ASSERT_EQ(truncated_1.size(), message_size);
202 EXPECT_EQ(std::memcmp(truncated_1.data(), buffer_, truncated_1.size()), 0);
203}
204
205TEST_F(TokenizeToBuffer, String_TwoBytesLeft_TruncatesToOneCharacter) {
206 size_t message_size = 6;
207 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
208
209 constexpr std::array<uint8_t, 6> truncated_2 =
210 ExpectedData<0x81, '5'>("The answer is: %s");
211
212 ASSERT_EQ(truncated_2.size(), message_size);
213 EXPECT_EQ(std::memcmp(truncated_2.data(), buffer_, truncated_2.size()), 0);
214}
215
216TEST_F(TokenizeToBuffer, String_OneByteLeft_OnlyWritesTruncatedStatusByte) {
217 size_t message_size = 5;
218 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
219
220 std::array<uint8_t, 5> result = ExpectedData<0x80>("The answer is: %s");
221 ASSERT_EQ(result.size(), message_size);
222 EXPECT_EQ(std::memcmp(result.data(), buffer_, result.size()), 0);
223}
224
225TEST_F(TokenizeToBuffer, EmptyString_OneByteLeft_EncodesCorrectly) {
226 size_t message_size = 5;
227 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "");
228
229 std::array<uint8_t, 5> result = ExpectedData<0>("The answer is: %s");
230 ASSERT_EQ(result.size(), message_size);
231 EXPECT_EQ(std::memcmp(result.data(), buffer_, result.size()), 0);
232}
233
234TEST_F(TokenizeToBuffer, String_ZeroBytesLeft_WritesNothing) {
235 size_t message_size = 4;
236 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
237
238 constexpr std::array<uint8_t, 4> empty = ExpectedData<>("The answer is: %s");
239 ASSERT_EQ(empty.size(), message_size);
240 EXPECT_EQ(std::memcmp(empty.data(), buffer_, empty.size()), 0);
241}
242
243TEST_F(TokenizeToBuffer, NullptrString_EncodesNull) {
244 char* string = nullptr;
245 size_t message_size = 9;
246 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", string);
247
248 std::array<uint8_t, 9> result =
249 ExpectedData<4, 'N', 'U', 'L', 'L'>("The answer is: %s");
250 ASSERT_EQ(result.size(), message_size);
251 EXPECT_EQ(std::memcmp(result.data(), buffer_, result.size()), 0);
252}
253
254TEST_F(TokenizeToBuffer, NullptrString_BufferTooSmall_EncodesTruncatedNull) {
255 char* string = nullptr;
256 size_t message_size = 6;
257 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", string);
258
259 std::array<uint8_t, 6> result = ExpectedData<0x81, 'N'>("The answer is: %s");
260 ASSERT_EQ(result.size(), message_size);
261 EXPECT_EQ(std::memcmp(result.data(), buffer_, result.size()), 0);
262}
263
Wyatt Heplerd58eef92020-05-08 10:39:56 -0700264TEST_F(TokenizeToBuffer, Domain_String) {
265 size_t message_size = sizeof(buffer_);
266
267 PW_TOKENIZE_TO_BUFFER_DOMAIN(
268 "TEST_DOMAIN", buffer_, &message_size, "The answer was: %s", "5432!");
269 constexpr std::array<uint8_t, 10> expected =
270 ExpectedData<5, '5', '4', '3', '2', '!'>("The answer was: %s");
271
272 ASSERT_EQ(expected.size(), message_size);
273 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
274}
275
Wyatt Hepler80c6ee52020-01-03 09:54:58 -0800276TEST_F(TokenizeToBuffer, TruncateArgs) {
277 // Args that can't fit are dropped completely
278 size_t message_size = 6;
279 PW_TOKENIZE_TO_BUFFER(buffer_,
280 &message_size,
281 "%u %d",
282 static_cast<uint8_t>(0b0010'1010u),
283 0xffffff);
284
285 constexpr std::array<uint8_t, 5> expected =
286 ExpectedData<0b0101'0100u>("%u %d");
287 ASSERT_EQ(expected.size(), message_size);
288 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
289}
290
291TEST_F(TokenizeToBuffer, NoRoomForToken) {
292 // Nothing is written if there isn't room for the token.
293 std::memset(buffer_, '$', sizeof(buffer_));
294 auto is_untouched = [](uint8_t v) { return v == '$'; };
295
296 size_t message_size = 3;
297 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer: \"%s\"", "5432!");
298 EXPECT_EQ(0u, message_size);
299 EXPECT_TRUE(std::all_of(buffer_, std::end(buffer_), is_untouched));
300
301 message_size = 2;
302 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "Jello, world!");
303 EXPECT_EQ(0u, message_size);
304 EXPECT_TRUE(std::all_of(buffer_, std::end(buffer_), is_untouched));
305
306 message_size = 1;
307 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "Jello!");
308 EXPECT_EQ(0u, message_size);
309 EXPECT_TRUE(std::all_of(buffer_, std::end(buffer_), is_untouched));
310
311 message_size = 0;
312 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "Jello?");
313 EXPECT_EQ(0u, message_size);
314 EXPECT_TRUE(std::all_of(buffer_, std::end(buffer_), is_untouched));
315}
316
317TEST_F(TokenizeToBuffer, C_StringShortFloat) {
318 size_t size = sizeof(buffer_);
319 pw_TokenizeToBufferTest_StringShortFloat(buffer_, &size);
320 constexpr std::array<uint8_t, 11> expected = // clang-format off
321 ExpectedData<1, '1', // string '1'
322 3, // -2 (zig-zag encoded)
323 0x00, 0x00, 0x40, 0x40 // 3.0 in floating point
324 >(TEST_FORMAT_STRING_SHORT_FLOAT);
325 ASSERT_EQ(expected.size(), size); // clang-format on
326 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
327}
328
329TEST_F(TokenizeToBuffer, C_SequentialZigZag) {
330 size_t size = sizeof(buffer_);
331 pw_TokenizeToBufferTest_SequentialZigZag(buffer_, &size);
332 constexpr std::array<uint8_t, 18> expected =
333 ExpectedData<0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13>(
334 TEST_FORMAT_SEQUENTIAL_ZIG_ZAG);
335
336 ASSERT_EQ(expected.size(), size);
337 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
338}
339
340TEST_F(TokenizeToBuffer, C_Overflow) {
341 std::memset(buffer_, '$', sizeof(buffer_));
342
343 {
344 size_t size = 7;
345 pw_TokenizeToBufferTest_Requires8(buffer_, &size);
346 constexpr std::array<uint8_t, 7> expected =
347 ExpectedData<2, 'h', 'i'>(TEST_FORMAT_REQUIRES_8);
348 ASSERT_EQ(expected.size(), size);
349 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
350 EXPECT_EQ(buffer_[7], '$');
351 }
352
353 {
354 size_t size = 8;
355 pw_TokenizeToBufferTest_Requires8(buffer_, &size);
356 constexpr std::array<uint8_t, 8> expected =
357 ExpectedData<2, 'h', 'i', 13>(TEST_FORMAT_REQUIRES_8);
358 ASSERT_EQ(expected.size(), size);
359 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
360 EXPECT_EQ(buffer_[8], '$');
361 }
362}
363
364// Test fixture for callback and global handler. Both of these need a global
365// message buffer. To keep the message buffers separate, template this on the
366// derived class type.
367template <typename Impl>
368class GlobalMessage : public ::testing::Test {
369 public:
370 static void SetMessage(const uint8_t* message, size_t size) {
371 ASSERT_LE(size, sizeof(message_));
372 std::memcpy(message_, message, size);
373 message_size_bytes_ = size;
374 }
375
376 protected:
377 GlobalMessage() {
378 std::memset(message_, 0, sizeof(message_));
379 message_size_bytes_ = 0;
380 }
381
382 static uint8_t message_[256];
383 static size_t message_size_bytes_;
384};
385
386template <typename Impl>
387uint8_t GlobalMessage<Impl>::message_[256] = {};
388template <typename Impl>
389size_t GlobalMessage<Impl>::message_size_bytes_ = 0;
390
391class TokenizeToCallback : public GlobalMessage<TokenizeToCallback> {};
392
393TEST_F(TokenizeToCallback, Variety) {
394 PW_TOKENIZE_TO_CALLBACK(
395 SetMessage, "%s there are %x (%.2f) of them%c", "Now", 2u, 2.0f, '.');
396 const auto expected = // clang-format off
397 ExpectedData<3, 'N', 'o', 'w', // string "Now"
398 0x04, // unsigned 2 (zig-zag encoded)
399 0x00, 0x00, 0x00, 0x40, // float 2.0
400 0x5C // char '.' (0x2E, zig-zag encoded)
401 >("%s there are %x (%.2f) of them%c");
402 // clang-format on
403 ASSERT_EQ(expected.size(), message_size_bytes_);
404 EXPECT_EQ(std::memcmp(expected.data(), message_, expected.size()), 0);
405}
406
407TEST_F(TokenizeToCallback, Strings) {
408 PW_TOKENIZE_TO_CALLBACK(SetMessage, "The answer is: %s", "5432!");
409 constexpr std::array<uint8_t, 10> expected =
410 ExpectedData<5, '5', '4', '3', '2', '!'>("The answer is: %s");
411 ASSERT_EQ(expected.size(), message_size_bytes_);
412 EXPECT_EQ(std::memcmp(expected.data(), message_, expected.size()), 0);
413}
414
Wyatt Heplerd58eef92020-05-08 10:39:56 -0700415TEST_F(TokenizeToCallback, Domain_Strings) {
416 PW_TOKENIZE_TO_CALLBACK_DOMAIN(
417 "TEST_DOMAIN", SetMessage, "The answer is: %s", "5432!");
418 constexpr std::array<uint8_t, 10> expected =
419 ExpectedData<5, '5', '4', '3', '2', '!'>("The answer is: %s");
420 ASSERT_EQ(expected.size(), message_size_bytes_);
421 EXPECT_EQ(std::memcmp(expected.data(), message_, expected.size()), 0);
422}
423
Wyatt Hepler80c6ee52020-01-03 09:54:58 -0800424TEST_F(TokenizeToCallback, C_SequentialZigZag) {
425 pw_TokenizeToCallbackTest_SequentialZigZag(SetMessage);
426
427 constexpr std::array<uint8_t, 18> expected =
428 ExpectedData<0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13>(
429 TEST_FORMAT_SEQUENTIAL_ZIG_ZAG);
430 ASSERT_EQ(expected.size(), message_size_bytes_);
431 EXPECT_EQ(std::memcmp(expected.data(), message_, expected.size()), 0);
432}
433
Wyatt Heplerd58eef92020-05-08 10:39:56 -0700434// Hijack the PW_TOKENIZE_STRING_DOMAIN macro to capture the domain name.
435#undef PW_TOKENIZE_STRING_DOMAIN
436#define PW_TOKENIZE_STRING_DOMAIN(domain, string) \
437 /* assigned to a variable */ PW_TOKENIZER_STRING_TOKEN(string); \
438 tokenizer_domain = domain; \
439 string_literal = string
440
441TEST_F(TokenizeToBuffer, Domain_Default) {
442 const char* tokenizer_domain = nullptr;
443 const char* string_literal = nullptr;
444
445 size_t message_size = sizeof(buffer_);
446
447 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
448
449 EXPECT_STREQ(tokenizer_domain, PW_TOKENIZER_DEFAULT_DOMAIN);
450 EXPECT_STREQ(string_literal, "The answer is: %s");
451}
452
453TEST_F(TokenizeToBuffer, Domain_Specified) {
454 const char* tokenizer_domain = nullptr;
455 const char* string_literal = nullptr;
456
457 size_t message_size = sizeof(buffer_);
458
459 PW_TOKENIZE_TO_BUFFER_DOMAIN(
460 "._.", buffer_, &message_size, "The answer is: %s", "5432!");
461
462 EXPECT_STREQ(tokenizer_domain, "._.");
463 EXPECT_STREQ(string_literal, "The answer is: %s");
464}
465
466TEST_F(TokenizeToCallback, Domain_Default) {
467 const char* tokenizer_domain = nullptr;
468 const char* string_literal = nullptr;
469
470 PW_TOKENIZE_TO_CALLBACK(SetMessage, "The answer is: %s", "5432!");
471
472 EXPECT_STREQ(tokenizer_domain, PW_TOKENIZER_DEFAULT_DOMAIN);
473 EXPECT_STREQ(string_literal, "The answer is: %s");
474}
475
476TEST_F(TokenizeToCallback, Domain_Specified) {
477 const char* tokenizer_domain = nullptr;
478 const char* string_literal = nullptr;
479
480 PW_TOKENIZE_TO_CALLBACK_DOMAIN(
481 "ThisIsTheDomain", SetMessage, "The answer is: %s", "5432!");
482
483 EXPECT_STREQ(tokenizer_domain, "ThisIsTheDomain");
484 EXPECT_STREQ(string_literal, "The answer is: %s");
485}
486
Wyatt Hepler80c6ee52020-01-03 09:54:58 -0800487} // namespace
488} // namespace pw::tokenizer