pw_tokenizer: Make Base64 encoding easier to use
- Have PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE represent the whole encoded
buffer, including the 4-byte token. This makes the value easier to use
directly since the token is already accounted for.
- Update the Base64 encoding functions to always add a null terminator
to the Base64 output.
- Provide a pw::tokenizer::PrefixedBase64Encode overload that allocates
the buffer using pw::Vector.
Change-Id: Id78ef06a7d2111e7dfe5604ee091975be40ceed4
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/19720
Reviewed-by: Ewout van Bekkum <ewout@google.com>
Commit-Queue: Wyatt Hepler <hepler@google.com>
diff --git a/pw_tokenizer/BUILD.gn b/pw_tokenizer/BUILD.gn
index 26e582f..6250372 100644
--- a/pw_tokenizer/BUILD.gn
+++ b/pw_tokenizer/BUILD.gn
@@ -97,10 +97,12 @@
public = [ "public/pw_tokenizer/base64.h" ]
sources = [ "base64.cc" ]
public_deps = [
+ ":pw_tokenizer",
+ dir_pw_base64,
+ dir_pw_containers,
dir_pw_preprocessor,
dir_pw_span,
]
- deps = [ dir_pw_base64 ]
}
pw_source_set("decoder") {
@@ -164,7 +166,6 @@
group_deps = [
"$dir_pw_preprocessor:tests",
"$dir_pw_span:tests",
- "$dir_pw_status:tests",
]
}
diff --git a/pw_tokenizer/CMakeLists.txt b/pw_tokenizer/CMakeLists.txt
index 34044c4..51cc4a3 100644
--- a/pw_tokenizer/CMakeLists.txt
+++ b/pw_tokenizer/CMakeLists.txt
@@ -28,10 +28,10 @@
SOURCES
base64.cc
PUBLIC_DEPS
+ pw_base64
+ pw_containers
pw_preprocessor
pw_span
- PRIVATE_DEPS
- pw_base64
)
pw_add_module_library(pw_tokenizer.decoder
diff --git a/pw_tokenizer/base64.cc b/pw_tokenizer/base64.cc
index 3404916..7aab4af 100644
--- a/pw_tokenizer/base64.cc
+++ b/pw_tokenizer/base64.cc
@@ -14,10 +14,6 @@
#include "pw_tokenizer/base64.h"
-#include <span>
-
-#include "pw_base64/base64.h"
-
namespace pw::tokenizer {
extern "C" size_t pw_tokenizer_PrefixedBase64Encode(
@@ -25,19 +21,22 @@
size_t binary_size_bytes,
void* output_buffer,
size_t output_buffer_size_bytes) {
- const size_t encoded_size = base64::EncodedSize(binary_size_bytes) + 1;
+ char* output = static_cast<char*>(output_buffer);
+ const size_t encoded_size = Base64EncodedSize(binary_size_bytes);
- if (output_buffer_size_bytes < encoded_size) {
+ if (output_buffer_size_bytes < encoded_size + sizeof('\0')) {
+ if (output_buffer_size_bytes > 0u) {
+ output[0] = '\0';
+ }
+
return 0;
}
- char* output = static_cast<char*>(output_buffer);
output[0] = kBase64Prefix;
-
base64::Encode(std::span(static_cast<const std::byte*>(binary_message),
binary_size_bytes),
&output[1]);
-
+ output[encoded_size] = '\0';
return encoded_size;
}
diff --git a/pw_tokenizer/base64_test.cc b/pw_tokenizer/base64_test.cc
index d6ff56e..e751b81 100644
--- a/pw_tokenizer/base64_test.cc
+++ b/pw_tokenizer/base64_test.cc
@@ -27,7 +27,12 @@
class PrefixedBase64 : public ::testing::Test {
protected:
- PrefixedBase64() : binary_{}, base64_{} {}
+ static constexpr char kUnset = '#';
+
+ PrefixedBase64() {
+ std::memset(binary_, kUnset, sizeof(binary_));
+ std::memset(base64_, kUnset, sizeof(base64_));
+ }
byte binary_[32];
char base64_[32];
@@ -62,6 +67,7 @@
for (auto& [binary, base64] : kTestData) {
EXPECT_EQ(base64.size(), PrefixedBase64Encode(binary, base64_));
ASSERT_EQ(base64, base64_);
+ EXPECT_EQ('\0', base64_[base64.size()]);
}
}
@@ -73,7 +79,47 @@
TEST_F(PrefixedBase64, Encode_EmptyOutput_WritesNothing) {
EXPECT_EQ(0u,
PrefixedBase64Encode(kTestData[5].binary, std::span(base64_, 0)));
+ EXPECT_EQ(kUnset, base64_[0]);
+}
+
+TEST_F(PrefixedBase64, Encode_SingleByteOutput_OnlyNullTerminates) {
+ EXPECT_EQ(0u,
+ PrefixedBase64Encode(kTestData[5].binary, std::span(base64_, 1)));
EXPECT_EQ('\0', base64_[0]);
+ EXPECT_EQ(kUnset, base64_[1]);
+}
+
+TEST_F(PrefixedBase64, Encode_NoRoomForNullAfterMessage_OnlyNullTerminates) {
+ EXPECT_EQ(
+ 0u,
+ PrefixedBase64Encode(kTestData[5].binary,
+ std::span(base64_, kTestData[5].base64.size())));
+ EXPECT_EQ('\0', base64_[0]);
+ EXPECT_EQ(kUnset, base64_[1]);
+}
+
+TEST_F(PrefixedBase64, EncodeToVector_EmptyInput_WritesPrefix) {
+ auto buffer = PrefixedBase64Encode(std::span<byte>());
+ ASSERT_EQ(1u, buffer.size());
+ EXPECT_EQ('$', buffer[0]);
+ EXPECT_EQ('\0', buffer[1]);
+}
+
+TEST_F(PrefixedBase64, EncodeToVector_Successful) {
+ auto buffer = PrefixedBase64Encode(kTestData[5].binary);
+ ASSERT_EQ(buffer.size(), kTestData[5].base64.size());
+ EXPECT_EQ(
+ 0, std::memcmp(buffer.data(), kTestData[5].base64.data(), buffer.size()));
+ EXPECT_EQ('\0', buffer[buffer.size()]);
+}
+
+TEST_F(PrefixedBase64, EncodeToVector_VectorTooSmall_OnlyNullTerminates) {
+ constexpr byte big[Base64EncodedSize(
+ PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES + 1)] = {};
+
+ auto buffer = PrefixedBase64Encode(big);
+ ASSERT_EQ(0u, buffer.size());
+ EXPECT_EQ('\0', buffer[0]);
}
TEST_F(PrefixedBase64, Decode) {
@@ -85,18 +131,18 @@
TEST_F(PrefixedBase64, Decode_EmptyInput_WritesNothing) {
EXPECT_EQ(0u, PrefixedBase64Decode({}, binary_));
- EXPECT_EQ(byte{0}, binary_[0]);
+ EXPECT_EQ(byte{kUnset}, binary_[0]);
}
TEST_F(PrefixedBase64, Decode_OnlyPrefix_WritesNothing) {
EXPECT_EQ(0u, PrefixedBase64Decode("$", binary_));
- EXPECT_EQ(byte{0}, binary_[0]);
+ EXPECT_EQ(byte{kUnset}, binary_[0]);
}
TEST_F(PrefixedBase64, Decode_EmptyOutput_WritesNothing) {
EXPECT_EQ(0u,
PrefixedBase64Decode(kTestData[5].base64, std::span(binary_, 0)));
- EXPECT_EQ(byte{0}, binary_[0]);
+ EXPECT_EQ(byte{kUnset}, binary_[0]);
}
TEST_F(PrefixedBase64, Decode_OutputTooSmall_WritesNothing) {
@@ -104,7 +150,7 @@
EXPECT_EQ(0u,
PrefixedBase64Decode(item.base64,
std::span(binary_, item.binary.size() - 1)));
- EXPECT_EQ(byte{0}, binary_[0]);
+ EXPECT_EQ(byte{kUnset}, binary_[0]);
}
TEST(PrefixedBase64, DecodeInPlace) {
diff --git a/pw_tokenizer/public/pw_tokenizer/base64.h b/pw_tokenizer/public/pw_tokenizer/base64.h
index e91e66d..42d1f5e 100644
--- a/pw_tokenizer/public/pw_tokenizer/base64.h
+++ b/pw_tokenizer/public/pw_tokenizer/base64.h
@@ -38,15 +38,17 @@
PW_EXTERN_C_START
-// Encodes a binary tokenized message as prefixed Base64. Returns the size of
-// the number of characters written to output_buffer. Returns 0 if the buffer is
-// too small.
+// Encodes a binary tokenized message as prefixed Base64 with a null terminator.
+// Returns the encoded string length (excluding the null terminator). Returns 0
+// if the buffer is too small. Always null terminates if the output buffer is
+// not empty.
//
// Equivalent to pw::tokenizer::PrefixedBase64Encode.
size_t pw_tokenizer_PrefixedBase64Encode(const void* binary_message,
size_t binary_size_bytes,
void* output_buffer,
size_t output_buffer_size_bytes);
+
// Decodes a prefixed Base64 tokenized message to binary. Returns the size of
// the decoded binary data. The resulting data is ready to be passed to
// pw::tokenizer::Detokenizer::Detokenize. Returns 0 if the buffer is too small,
@@ -65,13 +67,30 @@
#include <span>
#include <string_view>
+#include "pw_base64/base64.h"
+#include "pw_containers/vector.h"
+#include "pw_tokenizer/config.h"
+
namespace pw::tokenizer {
inline constexpr char kBase64Prefix = PW_TOKENIZER_BASE64_PREFIX;
-// Encodes a binary tokenized message as prefixed Base64. Returns the size of
-// the number of characters written to output_buffer. Returns 0 if the buffer is
-// too small or does not start with kBase64Prefix.
+// Returns the size of a Base64-encoded tokenized message. Includes the prefix
+// character ($) and the encoded data, but excludes the null terminator.
+constexpr size_t Base64EncodedSize(size_t data) {
+ return sizeof(kBase64Prefix) + base64::EncodedSize(data);
+}
+
+// The minimum buffer size that can hold a tokenized message that is
+// PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES long encoded as prefixed Base64.
+inline constexpr size_t kBase64EncodedBufferSize =
+ Base64EncodedSize(PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES) +
+ sizeof('\0');
+
+// Encodes a binary tokenized message as prefixed Base64 with a null terminator.
+// Returns the encoded string length (excluding the null terminator). Returns 0
+// if the buffer is too small. Always null terminates if the output buffer is
+// not empty.
inline size_t PrefixedBase64Encode(std::span<const std::byte> binary_message,
std::span<char> output_buffer) {
return pw_tokenizer_PrefixedBase64Encode(binary_message.data(),
@@ -86,6 +105,38 @@
return PrefixedBase64Encode(std::as_bytes(binary_message), output_buffer);
}
+// Encodes to a pw::Vector, which defaults to fit a message
+// PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES long encoded as prefixed Base64.
+// The returned vector always contains a null-terminated Base64 string. If
+// size() is zero, the binary message did not fit.
+template <size_t buffer_size = kBase64EncodedBufferSize>
+Vector<char, buffer_size> PrefixedBase64Encode(
+ std::span<const std::byte> binary_message) {
+ static_assert(buffer_size >= Base64EncodedSize(sizeof(uint32_t)));
+
+ Vector<char, buffer_size> output;
+ const size_t encoded_size = Base64EncodedSize(binary_message.size());
+
+ // Make sure the encoded data and a null terminator can fit.
+ if (encoded_size + sizeof('\0') > buffer_size) {
+ output[0] = '\0';
+ return output;
+ }
+
+ output.resize(encoded_size);
+ output[0] = kBase64Prefix;
+ base64::Encode(binary_message, &output[1]);
+ output[encoded_size] = '\0';
+ return output;
+}
+
+// Encode to a pw::Vector from std::span<const uint8_t>.
+template <size_t buffer_size = kBase64EncodedBufferSize>
+Vector<char, buffer_size> PrefixedBase64Encode(
+ std::span<const uint8_t> binary_message) {
+ return PrefixedBase64Encode(std::as_bytes(binary_message));
+}
+
// Decodes a prefixed Base64 tokenized message to binary. Returns the size of
// the decoded binary data. The resulting data is ready to be passed to
// pw::tokenizer::Detokenizer::Detokenize.
diff --git a/pw_tokenizer/public/pw_tokenizer/config.h b/pw_tokenizer/public/pw_tokenizer/config.h
index fede273..c4f34c8 100644
--- a/pw_tokenizer/public/pw_tokenizer/config.h
+++ b/pw_tokenizer/public/pw_tokenizer/config.h
@@ -58,9 +58,9 @@
// The size of the stack-allocated argument encoding buffer to use. This only
// affects tokenization macros that stack-allocate the encoding buffer
-// (PW_TOKENIZE_TO_CALLBACK and PW_TOKENIZE_TO_GLOBAL_HANDLER). This buffer size
-// is only allocated for argument encoding and does not include the 4-byte
-// token.
+// (PW_TOKENIZE_TO_CALLBACK and PW_TOKENIZE_TO_GLOBAL_HANDLER). A buffer of this
+// size is allocated and used for the 4-byte token and for encoding all
+// arguments. It must be at least large enough for the token (4 bytes).
//
// This buffer does not need to be large to accommodate a good number of
// tokenized string arguments. Integer arguments are usually encoded smaller
@@ -68,5 +68,5 @@
// point types are encoded as four bytes. Null-terminated strings are encoded
// 1:1 in size.
#ifndef PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES
-#define PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES 48
+#define PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES 52
#endif // PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES
diff --git a/pw_tokenizer/pw_tokenizer_private/encode_args.h b/pw_tokenizer/pw_tokenizer_private/encode_args.h
index 7a85963..d16c3bf 100644
--- a/pw_tokenizer/pw_tokenizer_private/encode_args.h
+++ b/pw_tokenizer/pw_tokenizer_private/encode_args.h
@@ -28,10 +28,19 @@
// Buffer for encoding a tokenized string and arguments.
struct EncodedMessage {
pw_tokenizer_Token token;
- std::array<uint8_t, PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES> args;
+ std::array<uint8_t,
+ PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES - sizeof(token)>
+ args;
};
-static_assert(offsetof(EncodedMessage, args) == sizeof(EncodedMessage::token),
+static_assert(PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES >=
+ sizeof(pw_tokenizer_Token),
+ "PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES must be at least "
+ "large enough for a token (4 bytes)");
+
+static_assert(offsetof(EncodedMessage, args) == sizeof(EncodedMessage::token) &&
+ PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES ==
+ sizeof(EncodedMessage),
"EncodedMessage should not have padding bytes between members");
// Encodes a tokenized string's arguments to a buffer. The