blob: 18987fb0909a14f15b01902c9953aec23d10d70e [file] [log] [blame]
Wyatt Hepler80c6ee52020-01-03 09:54:58 -08001// Copyright 2020 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
15#include "pw_tokenizer/tokenize.h"
16
17#include <cinttypes>
18#include <cstdint>
19#include <cstring>
20#include <iterator>
21
22#include "gtest/gtest.h"
23#include "pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h"
24#include "pw_tokenizer_private/tokenize_test.h"
25#include "pw_varint/varint.h"
26
27namespace pw::tokenizer {
28namespace {
29
30// The hash to use for this test. This makes sure the strings are shorter than
31// the configured max length to ensure this test works with any reasonable
32// configuration.
33template <size_t kSize>
34constexpr uint32_t TestHash(const char (&string)[kSize]) {
35 constexpr unsigned kTestHashLength = 48;
36 static_assert(kTestHashLength <= PW_TOKENIZER_CFG_HASH_LENGTH);
37 static_assert(kSize <= kTestHashLength + 1);
38 return PwTokenizer65599FixedLengthHash(std::string_view(string, kSize - 1),
39 kTestHashLength);
40}
41
42// Constructs an array with the hashed string followed by the provided bytes.
43template <uint8_t... kData, size_t kSize>
44constexpr auto ExpectedData(const char (&format)[kSize]) {
45 const uint32_t value = TestHash(format);
46 return std::array<uint8_t, sizeof(uint32_t) + sizeof...(kData)>{
47 static_cast<uint8_t>(value & 0xff),
48 static_cast<uint8_t>(value >> 8 & 0xff),
49 static_cast<uint8_t>(value >> 16 & 0xff),
50 static_cast<uint8_t>(value >> 24 & 0xff),
51 kData...};
52}
53
54TEST(TokenizeStringLiteral, EmptyString_IsZero) {
55 constexpr pw_TokenizerStringToken token = PW_TOKENIZE_STRING("");
56 EXPECT_EQ(0u, token);
57}
58
59TEST(TokenizeStringLiteral, String_MatchesHash) {
60 constexpr uint32_t token = PW_TOKENIZE_STRING("[:-)");
61 EXPECT_EQ(TestHash("[:-)"), token);
62}
63
64constexpr uint32_t kGlobalToken = PW_TOKENIZE_STRING(">:-[]");
65
66TEST(TokenizeStringLiteral, GlobalVariable_MatchesHash) {
67 EXPECT_EQ(TestHash(">:-[]"), kGlobalToken);
68}
69
70class TokenizeToBuffer : public ::testing::Test {
71 public:
72 TokenizeToBuffer() : buffer_{} {}
73
74 protected:
75 uint8_t buffer_[64];
76};
77
78TEST_F(TokenizeToBuffer, Integer64) {
79 size_t message_size = 14;
80 PW_TOKENIZE_TO_BUFFER(
81 buffer_,
82 &message_size,
83 "%" PRIu64,
84 static_cast<uint64_t>(0x55555555'55555555ull)); // 0xAAAAAAAA'AAAAAAAA
85
86 // Pattern becomes 10101010'11010101'10101010 ...
87 constexpr std::array<uint8_t, 14> expected =
88 ExpectedData<0xAA, 0xD5, 0xAA, 0xD5, 0xAA, 0xD5, 0xAA, 0xD5, 0xAA, 0x01>(
89 "%" PRIu64);
90 ASSERT_EQ(expected.size(), message_size);
91 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
92}
93
94TEST_F(TokenizeToBuffer, Integer64Overflow) {
95 size_t message_size;
96
97 for (size_t size = 4; size < 20; ++size) {
98 message_size = size;
99
100 PW_TOKENIZE_TO_BUFFER(
101 buffer_,
102 &message_size,
103 "%" PRIx64,
104 static_cast<uint64_t>(std::numeric_limits<int64_t>::min()));
105
106 if (size < 14) {
107 constexpr std::array<uint8_t, 4> empty = ExpectedData("%" PRIx64);
108 ASSERT_EQ(sizeof(uint32_t), message_size);
109 EXPECT_EQ(std::memcmp(empty.data(), &buffer_, empty.size()), 0);
110
111 // Make sure nothing was written past the end of the buffer.
112 EXPECT_TRUE(std::all_of(&buffer_[size], std::end(buffer_), [](uint8_t v) {
113 return v == '\0';
114 }));
115 } else {
116 constexpr std::array<uint8_t, 14> expected =
117 ExpectedData<0xff,
118 0xff,
119 0xff,
120 0xff,
121 0xff,
122 0xff,
123 0xff,
124 0xff,
125 0xff,
126 0x01>("%" PRIx64);
127 ASSERT_EQ(expected.size(), message_size);
128 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
129 }
130 }
131}
132
133TEST_F(TokenizeToBuffer, IntegerNegative) {
134 size_t message_size = 9;
135 PW_TOKENIZE_TO_BUFFER(
136 buffer_, &message_size, "%" PRId32, std::numeric_limits<int32_t>::min());
137
138 // 0x8000'0000 -zig-zag-> 0xff'ff'ff'ff'0f
139 constexpr std::array<uint8_t, 9> expected =
140 ExpectedData<0xff, 0xff, 0xff, 0xff, 0x0f>("%" PRId32);
141 ASSERT_EQ(expected.size(), message_size);
142 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
143}
144
145TEST_F(TokenizeToBuffer, IntegerMin) {
146 size_t message_size = 9;
147 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "%d", -1);
148
149 constexpr std::array<uint8_t, 5> expected = ExpectedData<0x01>("%d");
150 ASSERT_EQ(expected.size(), message_size);
151 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
152}
153
154TEST_F(TokenizeToBuffer, IntegerDoesntFit) {
155 size_t message_size = 8;
156 PW_TOKENIZE_TO_BUFFER(
157 buffer_, &message_size, "%" PRId32, std::numeric_limits<int32_t>::min());
158
159 constexpr std::array<uint8_t, 4> expected = ExpectedData<>("%" PRId32);
160 ASSERT_EQ(expected.size(), message_size);
161 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
162}
163
164TEST_F(TokenizeToBuffer, String) {
165 size_t message_size = sizeof(buffer_);
166
167 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
168 constexpr std::array<uint8_t, 10> expected =
169 ExpectedData<5, '5', '4', '3', '2', '!'>("The answer is: %s");
170
171 ASSERT_EQ(expected.size(), message_size);
172 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
173}
174
175TEST_F(TokenizeToBuffer, String_BufferTooSmall_TruncatesAndSetsTopStatusBit) {
176 size_t message_size = 8;
177 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
178
179 constexpr std::array<uint8_t, 8> truncated_1 =
180 ExpectedData<0x83, '5', '4', '3'>("The answer is: %s");
181
182 ASSERT_EQ(truncated_1.size(), message_size);
183 EXPECT_EQ(std::memcmp(truncated_1.data(), buffer_, truncated_1.size()), 0);
184}
185
186TEST_F(TokenizeToBuffer, String_TwoBytesLeft_TruncatesToOneCharacter) {
187 size_t message_size = 6;
188 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
189
190 constexpr std::array<uint8_t, 6> truncated_2 =
191 ExpectedData<0x81, '5'>("The answer is: %s");
192
193 ASSERT_EQ(truncated_2.size(), message_size);
194 EXPECT_EQ(std::memcmp(truncated_2.data(), buffer_, truncated_2.size()), 0);
195}
196
197TEST_F(TokenizeToBuffer, String_OneByteLeft_OnlyWritesTruncatedStatusByte) {
198 size_t message_size = 5;
199 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
200
201 std::array<uint8_t, 5> result = ExpectedData<0x80>("The answer is: %s");
202 ASSERT_EQ(result.size(), message_size);
203 EXPECT_EQ(std::memcmp(result.data(), buffer_, result.size()), 0);
204}
205
206TEST_F(TokenizeToBuffer, EmptyString_OneByteLeft_EncodesCorrectly) {
207 size_t message_size = 5;
208 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "");
209
210 std::array<uint8_t, 5> result = ExpectedData<0>("The answer is: %s");
211 ASSERT_EQ(result.size(), message_size);
212 EXPECT_EQ(std::memcmp(result.data(), buffer_, result.size()), 0);
213}
214
215TEST_F(TokenizeToBuffer, String_ZeroBytesLeft_WritesNothing) {
216 size_t message_size = 4;
217 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
218
219 constexpr std::array<uint8_t, 4> empty = ExpectedData<>("The answer is: %s");
220 ASSERT_EQ(empty.size(), message_size);
221 EXPECT_EQ(std::memcmp(empty.data(), buffer_, empty.size()), 0);
222}
223
224TEST_F(TokenizeToBuffer, NullptrString_EncodesNull) {
225 char* string = nullptr;
226 size_t message_size = 9;
227 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", string);
228
229 std::array<uint8_t, 9> result =
230 ExpectedData<4, 'N', 'U', 'L', 'L'>("The answer is: %s");
231 ASSERT_EQ(result.size(), message_size);
232 EXPECT_EQ(std::memcmp(result.data(), buffer_, result.size()), 0);
233}
234
235TEST_F(TokenizeToBuffer, NullptrString_BufferTooSmall_EncodesTruncatedNull) {
236 char* string = nullptr;
237 size_t message_size = 6;
238 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", string);
239
240 std::array<uint8_t, 6> result = ExpectedData<0x81, 'N'>("The answer is: %s");
241 ASSERT_EQ(result.size(), message_size);
242 EXPECT_EQ(std::memcmp(result.data(), buffer_, result.size()), 0);
243}
244
Wyatt Heplerd58eef92020-05-08 10:39:56 -0700245TEST_F(TokenizeToBuffer, Domain_String) {
246 size_t message_size = sizeof(buffer_);
247
248 PW_TOKENIZE_TO_BUFFER_DOMAIN(
249 "TEST_DOMAIN", buffer_, &message_size, "The answer was: %s", "5432!");
250 constexpr std::array<uint8_t, 10> expected =
251 ExpectedData<5, '5', '4', '3', '2', '!'>("The answer was: %s");
252
253 ASSERT_EQ(expected.size(), message_size);
254 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
255}
256
Wyatt Hepler80c6ee52020-01-03 09:54:58 -0800257TEST_F(TokenizeToBuffer, TruncateArgs) {
258 // Args that can't fit are dropped completely
259 size_t message_size = 6;
260 PW_TOKENIZE_TO_BUFFER(buffer_,
261 &message_size,
262 "%u %d",
263 static_cast<uint8_t>(0b0010'1010u),
264 0xffffff);
265
266 constexpr std::array<uint8_t, 5> expected =
267 ExpectedData<0b0101'0100u>("%u %d");
268 ASSERT_EQ(expected.size(), message_size);
269 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
270}
271
272TEST_F(TokenizeToBuffer, NoRoomForToken) {
273 // Nothing is written if there isn't room for the token.
274 std::memset(buffer_, '$', sizeof(buffer_));
275 auto is_untouched = [](uint8_t v) { return v == '$'; };
276
277 size_t message_size = 3;
278 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer: \"%s\"", "5432!");
279 EXPECT_EQ(0u, message_size);
280 EXPECT_TRUE(std::all_of(buffer_, std::end(buffer_), is_untouched));
281
282 message_size = 2;
283 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "Jello, world!");
284 EXPECT_EQ(0u, message_size);
285 EXPECT_TRUE(std::all_of(buffer_, std::end(buffer_), is_untouched));
286
287 message_size = 1;
288 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "Jello!");
289 EXPECT_EQ(0u, message_size);
290 EXPECT_TRUE(std::all_of(buffer_, std::end(buffer_), is_untouched));
291
292 message_size = 0;
293 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "Jello?");
294 EXPECT_EQ(0u, message_size);
295 EXPECT_TRUE(std::all_of(buffer_, std::end(buffer_), is_untouched));
296}
297
298TEST_F(TokenizeToBuffer, C_StringShortFloat) {
299 size_t size = sizeof(buffer_);
300 pw_TokenizeToBufferTest_StringShortFloat(buffer_, &size);
301 constexpr std::array<uint8_t, 11> expected = // clang-format off
302 ExpectedData<1, '1', // string '1'
303 3, // -2 (zig-zag encoded)
304 0x00, 0x00, 0x40, 0x40 // 3.0 in floating point
305 >(TEST_FORMAT_STRING_SHORT_FLOAT);
306 ASSERT_EQ(expected.size(), size); // clang-format on
307 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
308}
309
310TEST_F(TokenizeToBuffer, C_SequentialZigZag) {
311 size_t size = sizeof(buffer_);
312 pw_TokenizeToBufferTest_SequentialZigZag(buffer_, &size);
313 constexpr std::array<uint8_t, 18> expected =
314 ExpectedData<0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13>(
315 TEST_FORMAT_SEQUENTIAL_ZIG_ZAG);
316
317 ASSERT_EQ(expected.size(), size);
318 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
319}
320
321TEST_F(TokenizeToBuffer, C_Overflow) {
322 std::memset(buffer_, '$', sizeof(buffer_));
323
324 {
325 size_t size = 7;
326 pw_TokenizeToBufferTest_Requires8(buffer_, &size);
327 constexpr std::array<uint8_t, 7> expected =
328 ExpectedData<2, 'h', 'i'>(TEST_FORMAT_REQUIRES_8);
329 ASSERT_EQ(expected.size(), size);
330 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
331 EXPECT_EQ(buffer_[7], '$');
332 }
333
334 {
335 size_t size = 8;
336 pw_TokenizeToBufferTest_Requires8(buffer_, &size);
337 constexpr std::array<uint8_t, 8> expected =
338 ExpectedData<2, 'h', 'i', 13>(TEST_FORMAT_REQUIRES_8);
339 ASSERT_EQ(expected.size(), size);
340 EXPECT_EQ(std::memcmp(expected.data(), buffer_, expected.size()), 0);
341 EXPECT_EQ(buffer_[8], '$');
342 }
343}
344
345// Test fixture for callback and global handler. Both of these need a global
346// message buffer. To keep the message buffers separate, template this on the
347// derived class type.
348template <typename Impl>
349class GlobalMessage : public ::testing::Test {
350 public:
351 static void SetMessage(const uint8_t* message, size_t size) {
352 ASSERT_LE(size, sizeof(message_));
353 std::memcpy(message_, message, size);
354 message_size_bytes_ = size;
355 }
356
357 protected:
358 GlobalMessage() {
359 std::memset(message_, 0, sizeof(message_));
360 message_size_bytes_ = 0;
361 }
362
363 static uint8_t message_[256];
364 static size_t message_size_bytes_;
365};
366
367template <typename Impl>
368uint8_t GlobalMessage<Impl>::message_[256] = {};
369template <typename Impl>
370size_t GlobalMessage<Impl>::message_size_bytes_ = 0;
371
372class TokenizeToCallback : public GlobalMessage<TokenizeToCallback> {};
373
374TEST_F(TokenizeToCallback, Variety) {
375 PW_TOKENIZE_TO_CALLBACK(
376 SetMessage, "%s there are %x (%.2f) of them%c", "Now", 2u, 2.0f, '.');
377 const auto expected = // clang-format off
378 ExpectedData<3, 'N', 'o', 'w', // string "Now"
379 0x04, // unsigned 2 (zig-zag encoded)
380 0x00, 0x00, 0x00, 0x40, // float 2.0
381 0x5C // char '.' (0x2E, zig-zag encoded)
382 >("%s there are %x (%.2f) of them%c");
383 // clang-format on
384 ASSERT_EQ(expected.size(), message_size_bytes_);
385 EXPECT_EQ(std::memcmp(expected.data(), message_, expected.size()), 0);
386}
387
388TEST_F(TokenizeToCallback, Strings) {
389 PW_TOKENIZE_TO_CALLBACK(SetMessage, "The answer is: %s", "5432!");
390 constexpr std::array<uint8_t, 10> expected =
391 ExpectedData<5, '5', '4', '3', '2', '!'>("The answer is: %s");
392 ASSERT_EQ(expected.size(), message_size_bytes_);
393 EXPECT_EQ(std::memcmp(expected.data(), message_, expected.size()), 0);
394}
395
Wyatt Heplerd58eef92020-05-08 10:39:56 -0700396TEST_F(TokenizeToCallback, Domain_Strings) {
397 PW_TOKENIZE_TO_CALLBACK_DOMAIN(
398 "TEST_DOMAIN", SetMessage, "The answer is: %s", "5432!");
399 constexpr std::array<uint8_t, 10> expected =
400 ExpectedData<5, '5', '4', '3', '2', '!'>("The answer is: %s");
401 ASSERT_EQ(expected.size(), message_size_bytes_);
402 EXPECT_EQ(std::memcmp(expected.data(), message_, expected.size()), 0);
403}
404
Wyatt Hepler80c6ee52020-01-03 09:54:58 -0800405TEST_F(TokenizeToCallback, C_SequentialZigZag) {
406 pw_TokenizeToCallbackTest_SequentialZigZag(SetMessage);
407
408 constexpr std::array<uint8_t, 18> expected =
409 ExpectedData<0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13>(
410 TEST_FORMAT_SEQUENTIAL_ZIG_ZAG);
411 ASSERT_EQ(expected.size(), message_size_bytes_);
412 EXPECT_EQ(std::memcmp(expected.data(), message_, expected.size()), 0);
413}
414
Wyatt Heplerd58eef92020-05-08 10:39:56 -0700415// Hijack the PW_TOKENIZE_STRING_DOMAIN macro to capture the domain name.
416#undef PW_TOKENIZE_STRING_DOMAIN
417#define PW_TOKENIZE_STRING_DOMAIN(domain, string) \
418 /* assigned to a variable */ PW_TOKENIZER_STRING_TOKEN(string); \
419 tokenizer_domain = domain; \
420 string_literal = string
421
422TEST_F(TokenizeToBuffer, Domain_Default) {
423 const char* tokenizer_domain = nullptr;
424 const char* string_literal = nullptr;
425
426 size_t message_size = sizeof(buffer_);
427
428 PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, "The answer is: %s", "5432!");
429
430 EXPECT_STREQ(tokenizer_domain, PW_TOKENIZER_DEFAULT_DOMAIN);
431 EXPECT_STREQ(string_literal, "The answer is: %s");
432}
433
434TEST_F(TokenizeToBuffer, Domain_Specified) {
435 const char* tokenizer_domain = nullptr;
436 const char* string_literal = nullptr;
437
438 size_t message_size = sizeof(buffer_);
439
440 PW_TOKENIZE_TO_BUFFER_DOMAIN(
441 "._.", buffer_, &message_size, "The answer is: %s", "5432!");
442
443 EXPECT_STREQ(tokenizer_domain, "._.");
444 EXPECT_STREQ(string_literal, "The answer is: %s");
445}
446
447TEST_F(TokenizeToCallback, Domain_Default) {
448 const char* tokenizer_domain = nullptr;
449 const char* string_literal = nullptr;
450
451 PW_TOKENIZE_TO_CALLBACK(SetMessage, "The answer is: %s", "5432!");
452
453 EXPECT_STREQ(tokenizer_domain, PW_TOKENIZER_DEFAULT_DOMAIN);
454 EXPECT_STREQ(string_literal, "The answer is: %s");
455}
456
457TEST_F(TokenizeToCallback, Domain_Specified) {
458 const char* tokenizer_domain = nullptr;
459 const char* string_literal = nullptr;
460
461 PW_TOKENIZE_TO_CALLBACK_DOMAIN(
462 "ThisIsTheDomain", SetMessage, "The answer is: %s", "5432!");
463
464 EXPECT_STREQ(tokenizer_domain, "ThisIsTheDomain");
465 EXPECT_STREQ(string_literal, "The answer is: %s");
466}
467
Wyatt Hepler80c6ee52020-01-03 09:54:58 -0800468} // namespace
469} // namespace pw::tokenizer