pw_tokenizer: Make Base64 encoding easier to use - Have PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE represent the whole encoded buffer, including the 4-byte token. This makes the value easier to use directly since the token is already accounted for. - Update the Base64 encoding functions to always add a null terminator to the Base64 output. - Provide a pw::tokenizer::PrefixedBase64Encode overload that allocates the buffer using pw::Vector. Change-Id: Id78ef06a7d2111e7dfe5604ee091975be40ceed4 Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/19720 Reviewed-by: Ewout van Bekkum <ewout@google.com> Commit-Queue: Wyatt Hepler <hepler@google.com>

commit: cdafbb4d57162c3fd16df577615ee4716a6586de [log] [tgz]
author: Wyatt Hepler <hepler@google.com> Mon Oct 05 11:51:05 2020 -0700
committer: CQ Bot Account <pigweed-scoped@luci-project-accounts.iam.gserviceaccount.com> Tue Oct 06 21:04:17 2020 +0000
tree: 0b17c3093b0cf0ce30bb06e1a1127ec36106524c
parent: 2cbb313f931a2b91837dc0fca4eaf4dd42fc5ffc [diff]
diff --git a/pw_tokenizer/BUILD.gn b/pw_tokenizer/BUILD.gn
index 26e582f..6250372 100644
--- a/pw_tokenizer/BUILD.gn
+++ b/pw_tokenizer/BUILD.gn

@@ -97,10 +97,12 @@
   public = [ "public/pw_tokenizer/base64.h" ]
   sources = [ "base64.cc" ]
   public_deps = [
+    ":pw_tokenizer",
+    dir_pw_base64,
+    dir_pw_containers,
     dir_pw_preprocessor,
     dir_pw_span,
   ]
-  deps = [ dir_pw_base64 ]
 }
 
 pw_source_set("decoder") {
@@ -164,7 +166,6 @@
   group_deps = [
     "$dir_pw_preprocessor:tests",
     "$dir_pw_span:tests",
-    "$dir_pw_status:tests",
   ]
 }
 

diff --git a/pw_tokenizer/CMakeLists.txt b/pw_tokenizer/CMakeLists.txt
index 34044c4..51cc4a3 100644
--- a/pw_tokenizer/CMakeLists.txt
+++ b/pw_tokenizer/CMakeLists.txt

@@ -28,10 +28,10 @@
   SOURCES
     base64.cc
   PUBLIC_DEPS
+    pw_base64
+    pw_containers
     pw_preprocessor
     pw_span
-  PRIVATE_DEPS
-    pw_base64
 )
 
 pw_add_module_library(pw_tokenizer.decoder

diff --git a/pw_tokenizer/base64.cc b/pw_tokenizer/base64.cc
index 3404916..7aab4af 100644
--- a/pw_tokenizer/base64.cc
+++ b/pw_tokenizer/base64.cc

@@ -14,10 +14,6 @@
 
 #include "pw_tokenizer/base64.h"
 
-#include <span>
-
-#include "pw_base64/base64.h"
-
 namespace pw::tokenizer {
 
 extern "C" size_t pw_tokenizer_PrefixedBase64Encode(
@@ -25,19 +21,22 @@
     size_t binary_size_bytes,
     void* output_buffer,
     size_t output_buffer_size_bytes) {
-  const size_t encoded_size = base64::EncodedSize(binary_size_bytes) + 1;
+  char* output = static_cast<char*>(output_buffer);
+  const size_t encoded_size = Base64EncodedSize(binary_size_bytes);
 
-  if (output_buffer_size_bytes < encoded_size) {
+  if (output_buffer_size_bytes < encoded_size + sizeof('\0')) {
+    if (output_buffer_size_bytes > 0u) {
+      output[0] = '\0';
+    }
+
     return 0;
   }
 
-  char* output = static_cast<char*>(output_buffer);
   output[0] = kBase64Prefix;
-
   base64::Encode(std::span(static_cast<const std::byte*>(binary_message),
                            binary_size_bytes),
                  &output[1]);
-
+  output[encoded_size] = '\0';
   return encoded_size;
 }
 

diff --git a/pw_tokenizer/base64_test.cc b/pw_tokenizer/base64_test.cc
index d6ff56e..e751b81 100644
--- a/pw_tokenizer/base64_test.cc
+++ b/pw_tokenizer/base64_test.cc

@@ -27,7 +27,12 @@
 
 class PrefixedBase64 : public ::testing::Test {
  protected:
-  PrefixedBase64() : binary_{}, base64_{} {}
+  static constexpr char kUnset = '#';
+
+  PrefixedBase64() {
+    std::memset(binary_, kUnset, sizeof(binary_));
+    std::memset(base64_, kUnset, sizeof(base64_));
+  }
 
   byte binary_[32];
   char base64_[32];
@@ -62,6 +67,7 @@
   for (auto& [binary, base64] : kTestData) {
     EXPECT_EQ(base64.size(), PrefixedBase64Encode(binary, base64_));
     ASSERT_EQ(base64, base64_);
+    EXPECT_EQ('\0', base64_[base64.size()]);
   }
 }
 
@@ -73,7 +79,47 @@
 TEST_F(PrefixedBase64, Encode_EmptyOutput_WritesNothing) {
   EXPECT_EQ(0u,
             PrefixedBase64Encode(kTestData[5].binary, std::span(base64_, 0)));
+  EXPECT_EQ(kUnset, base64_[0]);
+}
+
+TEST_F(PrefixedBase64, Encode_SingleByteOutput_OnlyNullTerminates) {
+  EXPECT_EQ(0u,
+            PrefixedBase64Encode(kTestData[5].binary, std::span(base64_, 1)));
   EXPECT_EQ('\0', base64_[0]);
+  EXPECT_EQ(kUnset, base64_[1]);
+}
+
+TEST_F(PrefixedBase64, Encode_NoRoomForNullAfterMessage_OnlyNullTerminates) {
+  EXPECT_EQ(
+      0u,
+      PrefixedBase64Encode(kTestData[5].binary,
+                           std::span(base64_, kTestData[5].base64.size())));
+  EXPECT_EQ('\0', base64_[0]);
+  EXPECT_EQ(kUnset, base64_[1]);
+}
+
+TEST_F(PrefixedBase64, EncodeToVector_EmptyInput_WritesPrefix) {
+  auto buffer = PrefixedBase64Encode(std::span<byte>());
+  ASSERT_EQ(1u, buffer.size());
+  EXPECT_EQ('$', buffer[0]);
+  EXPECT_EQ('\0', buffer[1]);
+}
+
+TEST_F(PrefixedBase64, EncodeToVector_Successful) {
+  auto buffer = PrefixedBase64Encode(kTestData[5].binary);
+  ASSERT_EQ(buffer.size(), kTestData[5].base64.size());
+  EXPECT_EQ(
+      0, std::memcmp(buffer.data(), kTestData[5].base64.data(), buffer.size()));
+  EXPECT_EQ('\0', buffer[buffer.size()]);
+}
+
+TEST_F(PrefixedBase64, EncodeToVector_VectorTooSmall_OnlyNullTerminates) {
+  constexpr byte big[Base64EncodedSize(
+      PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES + 1)] = {};
+
+  auto buffer = PrefixedBase64Encode(big);
+  ASSERT_EQ(0u, buffer.size());
+  EXPECT_EQ('\0', buffer[0]);
 }
 
 TEST_F(PrefixedBase64, Decode) {
@@ -85,18 +131,18 @@
 
 TEST_F(PrefixedBase64, Decode_EmptyInput_WritesNothing) {
   EXPECT_EQ(0u, PrefixedBase64Decode({}, binary_));
-  EXPECT_EQ(byte{0}, binary_[0]);
+  EXPECT_EQ(byte{kUnset}, binary_[0]);
 }
 
 TEST_F(PrefixedBase64, Decode_OnlyPrefix_WritesNothing) {
   EXPECT_EQ(0u, PrefixedBase64Decode("$", binary_));
-  EXPECT_EQ(byte{0}, binary_[0]);
+  EXPECT_EQ(byte{kUnset}, binary_[0]);
 }
 
 TEST_F(PrefixedBase64, Decode_EmptyOutput_WritesNothing) {
   EXPECT_EQ(0u,
             PrefixedBase64Decode(kTestData[5].base64, std::span(binary_, 0)));
-  EXPECT_EQ(byte{0}, binary_[0]);
+  EXPECT_EQ(byte{kUnset}, binary_[0]);
 }
 
 TEST_F(PrefixedBase64, Decode_OutputTooSmall_WritesNothing) {
@@ -104,7 +150,7 @@
   EXPECT_EQ(0u,
             PrefixedBase64Decode(item.base64,
                                  std::span(binary_, item.binary.size() - 1)));
-  EXPECT_EQ(byte{0}, binary_[0]);
+  EXPECT_EQ(byte{kUnset}, binary_[0]);
 }
 
 TEST(PrefixedBase64, DecodeInPlace) {

diff --git a/pw_tokenizer/public/pw_tokenizer/base64.h b/pw_tokenizer/public/pw_tokenizer/base64.h
index e91e66d..42d1f5e 100644
--- a/pw_tokenizer/public/pw_tokenizer/base64.h
+++ b/pw_tokenizer/public/pw_tokenizer/base64.h

@@ -38,15 +38,17 @@
 
 PW_EXTERN_C_START
 
-// Encodes a binary tokenized message as prefixed Base64. Returns the size of
-// the number of characters written to output_buffer. Returns 0 if the buffer is
-// too small.
+// Encodes a binary tokenized message as prefixed Base64 with a null terminator.
+// Returns the encoded string length (excluding the null terminator). Returns 0
+// if the buffer is too small. Always null terminates if the output buffer is
+// not empty.
 //
 // Equivalent to pw::tokenizer::PrefixedBase64Encode.
 size_t pw_tokenizer_PrefixedBase64Encode(const void* binary_message,
                                          size_t binary_size_bytes,
                                          void* output_buffer,
                                          size_t output_buffer_size_bytes);
+
 // Decodes a prefixed Base64 tokenized message to binary. Returns the size of
 // the decoded binary data. The resulting data is ready to be passed to
 // pw::tokenizer::Detokenizer::Detokenize. Returns 0 if the buffer is too small,
@@ -65,13 +67,30 @@
 #include <span>
 #include <string_view>
 
+#include "pw_base64/base64.h"
+#include "pw_containers/vector.h"
+#include "pw_tokenizer/config.h"
+
 namespace pw::tokenizer {
 
 inline constexpr char kBase64Prefix = PW_TOKENIZER_BASE64_PREFIX;
 
-// Encodes a binary tokenized message as prefixed Base64. Returns the size of
-// the number of characters written to output_buffer. Returns 0 if the buffer is
-// too small or does not start with kBase64Prefix.
+// Returns the size of a Base64-encoded tokenized message. Includes the prefix
+// character ($) and the encoded data, but excludes the null terminator.
+constexpr size_t Base64EncodedSize(size_t data) {
+  return sizeof(kBase64Prefix) + base64::EncodedSize(data);
+}
+
+// The minimum buffer size that can hold a tokenized message that is
+// PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES long encoded as prefixed Base64.
+inline constexpr size_t kBase64EncodedBufferSize =
+    Base64EncodedSize(PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES) +
+    sizeof('\0');
+
+// Encodes a binary tokenized message as prefixed Base64 with a null terminator.
+// Returns the encoded string length (excluding the null terminator). Returns 0
+// if the buffer is too small. Always null terminates if the output buffer is
+// not empty.
 inline size_t PrefixedBase64Encode(std::span<const std::byte> binary_message,
                                    std::span<char> output_buffer) {
   return pw_tokenizer_PrefixedBase64Encode(binary_message.data(),
@@ -86,6 +105,38 @@
   return PrefixedBase64Encode(std::as_bytes(binary_message), output_buffer);
 }
 
+// Encodes to a pw::Vector, which defaults to fit a message
+// PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES long encoded as prefixed Base64.
+// The returned vector always contains a null-terminated Base64 string. If
+// size() is zero, the binary message did not fit.
+template <size_t buffer_size = kBase64EncodedBufferSize>
+Vector<char, buffer_size> PrefixedBase64Encode(
+    std::span<const std::byte> binary_message) {
+  static_assert(buffer_size >= Base64EncodedSize(sizeof(uint32_t)));
+
+  Vector<char, buffer_size> output;
+  const size_t encoded_size = Base64EncodedSize(binary_message.size());
+
+  // Make sure the encoded data and a null terminator can fit.
+  if (encoded_size + sizeof('\0') > buffer_size) {
+    output[0] = '\0';
+    return output;
+  }
+
+  output.resize(encoded_size);
+  output[0] = kBase64Prefix;
+  base64::Encode(binary_message, &output[1]);
+  output[encoded_size] = '\0';
+  return output;
+}
+
+// Encode to a pw::Vector from std::span<const uint8_t>.
+template <size_t buffer_size = kBase64EncodedBufferSize>
+Vector<char, buffer_size> PrefixedBase64Encode(
+    std::span<const uint8_t> binary_message) {
+  return PrefixedBase64Encode(std::as_bytes(binary_message));
+}
+
 // Decodes a prefixed Base64 tokenized message to binary. Returns the size of
 // the decoded binary data. The resulting data is ready to be passed to
 // pw::tokenizer::Detokenizer::Detokenize.

diff --git a/pw_tokenizer/public/pw_tokenizer/config.h b/pw_tokenizer/public/pw_tokenizer/config.h
index fede273..c4f34c8 100644
--- a/pw_tokenizer/public/pw_tokenizer/config.h
+++ b/pw_tokenizer/public/pw_tokenizer/config.h

@@ -58,9 +58,9 @@
 
 // The size of the stack-allocated argument encoding buffer to use. This only
 // affects tokenization macros that stack-allocate the encoding buffer
-// (PW_TOKENIZE_TO_CALLBACK and PW_TOKENIZE_TO_GLOBAL_HANDLER). This buffer size
-// is only allocated for argument encoding and does not include the 4-byte
-// token.
+// (PW_TOKENIZE_TO_CALLBACK and PW_TOKENIZE_TO_GLOBAL_HANDLER). A buffer of this
+// size is allocated and used for the 4-byte token and for encoding all
+// arguments. It must be at least large enough for the token (4 bytes).
 //
 // This buffer does not need to be large to accommodate a good number of
 // tokenized string arguments. Integer arguments are usually encoded smaller
@@ -68,5 +68,5 @@
 // point types are encoded as four bytes. Null-terminated strings are encoded
 // 1:1 in size.
 #ifndef PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES
-#define PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES 48
+#define PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES 52
 #endif  // PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES

diff --git a/pw_tokenizer/pw_tokenizer_private/encode_args.h b/pw_tokenizer/pw_tokenizer_private/encode_args.h
index 7a85963..d16c3bf 100644
--- a/pw_tokenizer/pw_tokenizer_private/encode_args.h
+++ b/pw_tokenizer/pw_tokenizer_private/encode_args.h

@@ -28,10 +28,19 @@
 // Buffer for encoding a tokenized string and arguments.
 struct EncodedMessage {
   pw_tokenizer_Token token;
-  std::array<uint8_t, PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES> args;
+  std::array<uint8_t,
+             PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES - sizeof(token)>
+      args;
 };
 
-static_assert(offsetof(EncodedMessage, args) == sizeof(EncodedMessage::token),
+static_assert(PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES >=
+                  sizeof(pw_tokenizer_Token),
+              "PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES must be at least "
+              "large enough for a token (4 bytes)");
+
+static_assert(offsetof(EncodedMessage, args) == sizeof(EncodedMessage::token) &&
+                  PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES ==
+                      sizeof(EncodedMessage),
               "EncodedMessage should not have padding bytes between members");
 
 // Encodes a tokenized string's arguments to a buffer. The
commit	cdafbb4d57162c3fd16df577615ee4716a6586de	[log] [tgz]
author	Wyatt Hepler <hepler@google.com>	Mon Oct 05 11:51:05 2020 -0700
committer	CQ Bot Account <pigweed-scoped@luci-project-accounts.iam.gserviceaccount.com>	Tue Oct 06 21:04:17 2020 +0000
tree	0b17c3093b0cf0ce30bb06e1a1127ec36106524c
parent	2cbb313f931a2b91837dc0fca4eaf4dd42fc5ffc [diff]