pw_tokenizer: Support tokenizing __func__
- In C++, take an array reference for PW_TOKENIZE_STRING, so that
literals or arrays may be tokenized, but not const char*.
- Use std::to_array to copy the original string to the tokenized section
so that character arrays may be used.
- Add tests for tokenizing __func__ and __PRETTY_FUNCTION__ in C++.
Change-Id: I7a9e997d862a2eccad464e9113ce0cf5fc96697b
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/16962
Reviewed-by: Keir Mierle <keir@google.com>
Reviewed-by: Paul Mathieu <paulmathieu@google.com>
Commit-Queue: Wyatt Hepler <hepler@google.com>
diff --git a/pw_tokenizer/CMakeLists.txt b/pw_tokenizer/CMakeLists.txt
index 00fbe75..34044c4 100644
--- a/pw_tokenizer/CMakeLists.txt
+++ b/pw_tokenizer/CMakeLists.txt
@@ -17,6 +17,7 @@
encode_args.cc
tokenize.cc
PUBLIC_DEPS
+ pw_polyfill.overrides
pw_preprocessor
pw_span
PRIVATE_DEPS
diff --git a/pw_tokenizer/docs.rst b/pw_tokenizer/docs.rst
index 72a8040..f9f8536 100644
--- a/pw_tokenizer/docs.rst
+++ b/pw_tokenizer/docs.rst
@@ -294,6 +294,30 @@
additional tokens, but it may not be desirable to fill a token database with
duplicate log lines.
+Tokenizing function names
+-------------------------
+The string literal tokenization functions support tokenizing string literals or
+constexpr character arrays (``constexpr const char[]``). In GCC and Clang, the
+special ``__func__`` variable and ``__PRETTY_FUNCTION__`` extension are declared
+as ``static constexpr char[]`` in C++ instead of the standard ``static const
+char[]``. This means that ``__func__`` and ``__PRETTY_FUNCTION__`` can be
+tokenized while compiling C++ with GCC or Clang.
+
+.. code-block:: cpp
+
+ // Tokenize the special function name variables.
+ constexpr uint32_t function = PW_TOKENIZE_STRING(__func__);
+ constexpr uint32_t pretty_function = PW_TOKENIZE_STRING(__PRETTY_FUNCTION__);
+
+ // Tokenize the function name variables to a handler function.
+ PW_TOKENIZE_TO_GLOBAL_HANDLER(__func__)
+ PW_TOKENIZE_TO_GLOBAL_HANDLER(__PRETTY_FUNCTION__)
+
+Note that ``__func__`` and ``__PRETTY_FUNCTION__`` are not string literals.
+They are defined as static character arrays, so they cannot be implicitly
+concatentated with string literals. For example, ``printf(__func__ ": %d",
+123);`` will not compile.
+
Tokenization in Python
----------------------
The Python ``pw_tokenizer.encode`` module has limited support for encoding
diff --git a/pw_tokenizer/public/pw_tokenizer/internal/tokenize_string.h b/pw_tokenizer/public/pw_tokenizer/internal/tokenize_string.h
index b30223a..27434bd 100644
--- a/pw_tokenizer/public/pw_tokenizer/internal/tokenize_string.h
+++ b/pw_tokenizer/public/pw_tokenizer/internal/tokenize_string.h
@@ -33,9 +33,8 @@
#include "pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h"
#define PW_TOKENIZER_STRING_TOKEN(format) \
- pw::tokenizer::PwTokenizer65599FixedLengthHash( \
- std::string_view((format), sizeof(format "") - 1), \
- PW_TOKENIZER_CFG_HASH_LENGTH)
+ ::pw::tokenizer::PwTokenizer65599FixedLengthHashArray( \
+ format, PW_TOKENIZER_CFG_HASH_LENGTH)
#else // In C or older C++ code, use the hashing macro.
diff --git a/pw_tokenizer/public/pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h b/pw_tokenizer/public/pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h
index 2b6039e..2d2983e 100644
--- a/pw_tokenizer/public/pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h
+++ b/pw_tokenizer/public/pw_tokenizer/pw_tokenizer_65599_fixed_length_hash.h
@@ -61,4 +61,14 @@
return hash;
}
+// Take the string as an array to support either literals or character arrays,
+// but not const char*.
+template <size_t length>
+constexpr uint32_t PwTokenizer65599FixedLengthHashArray(
+ const char (&string)[length], size_t hash_length) {
+ static_assert(length > 0);
+ return PwTokenizer65599FixedLengthHash(std::string_view(string, length - 1),
+ hash_length);
+}
+
} // namespace pw::tokenizer
diff --git a/pw_tokenizer/public/pw_tokenizer/tokenize.h b/pw_tokenizer/public/pw_tokenizer/tokenize.h
index 98eb303..c988fe5 100644
--- a/pw_tokenizer/public/pw_tokenizer/tokenize.h
+++ b/pw_tokenizer/public/pw_tokenizer/tokenize.h
@@ -13,10 +13,20 @@
// the License.
#pragma once
+#ifdef __cplusplus
+
+#include <array>
+#include <cstddef>
+#include <cstdint>
+
+#else
+
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
+#endif // __cplusplus
+
#include "pw_preprocessor/compiler.h"
#include "pw_preprocessor/concat.h"
#include "pw_preprocessor/macro_arg_count.h"
@@ -32,9 +42,12 @@
// If no domain is specified, this default is used.
#define PW_TOKENIZER_DEFAULT_DOMAIN "default"
-// Tokenizes a string literal and converts it to a pw_TokenizerStringToken. This
-// expression can be assigned to a local or global variable, but cannot be used
-// in another expression. For example:
+// Tokenizes a string and converts it to a pw_TokenizerStringToken. In C++, the
+// string may be a literal or a constexpr char array. In C, the argument must be
+// a string literal.
+//
+// This expression can be assigned to a local or global variable, but cannot be
+// used in another expression. For example:
//
// constexpr uint32_t global = PW_TOKENIZE_STRING("Wow!"); // This works.
//
@@ -48,16 +61,10 @@
PW_TOKENIZE_STRING_DOMAIN(PW_TOKENIZER_DEFAULT_DOMAIN, string_literal)
// Same as PW_TOKENIZE_STRING, but tokenizes to the specified domain.
-#define PW_TOKENIZE_STRING_DOMAIN(domain, string_literal) \
- /* assign to a variable */ PW_TOKENIZER_STRING_TOKEN(string_literal); \
- \
- /* Declare the format string as an array in the special tokenized string */ \
- /* section, which should be excluded from the final binary. Use __LINE__ */ \
- /* to create unique names for the section and variable, which avoids */ \
- /* compiler warnings. */ \
- static _PW_TOKENIZER_CONST char PW_CONCAT( \
- _pw_tokenizer_string_literal_DO_NOT_USE_, \
- __COUNTER__)[] _PW_TOKENIZER_SECTION(domain) = string_literal
+#define PW_TOKENIZE_STRING_DOMAIN(domain, string_literal) \
+ /* assign to a variable */ PW_TOKENIZER_STRING_TOKEN(string_literal); \
+ \
+ _PW_TOKENIZER_RECORD_ORIGINAL_STRING(domain, string_literal)
// Encodes a tokenized string and arguments to the provided buffer. The size of
// the buffer is passed via a pointer to a size_t. After encoding is complete,
@@ -253,3 +260,25 @@
#define _PW_TOKENIZER_SECTION(domain) \
PW_KEEP_IN_SECTION(".pw_tokenized." domain "." PW_STRINGIFY(__LINE__))
#endif // __APPLE__
+
+// Declare the format string as an array in the special tokenized string
+// section, which should be excluded from the final binary. Use __COUNTER__
+// to create unique names for the section and variable, which avoids
+// compiler warnings.
+#ifdef __cplusplus
+
+// In C++, use std::to_array to support tokenizing string literals or constexpr
+// char arrays.
+#define _PW_TOKENIZER_RECORD_ORIGINAL_STRING(domain, string) \
+ static constexpr std::array<char, sizeof(string)> PW_CONCAT( \
+ _pw_tokenizer_string_literal_DO_NOT_USE_, __COUNTER__) \
+ _PW_TOKENIZER_SECTION(domain) = std::to_array<const char>(string)
+
+#else // In C, only string literals may be tokenized.
+
+#define _PW_TOKENIZER_RECORD_ORIGINAL_STRING(domain, string_literal) \
+ static const char PW_CONCAT(_pw_tokenizer_string_literal_DO_NOT_USE_, \
+ __COUNTER__)[] _PW_TOKENIZER_SECTION(domain) = \
+ string_literal
+
+#endif // __cplusplus
diff --git a/pw_tokenizer/tokenize_test.cc b/pw_tokenizer/tokenize_test.cc
index bc552bc..cd96681 100644
--- a/pw_tokenizer/tokenize_test.cc
+++ b/pw_tokenizer/tokenize_test.cc
@@ -32,7 +32,7 @@
// configuration.
template <size_t kSize>
constexpr uint32_t TestHash(const char (&string)[kSize]) {
- constexpr unsigned kTestHashLength = 48;
+ constexpr unsigned kTestHashLength = 64;
static_assert(kTestHashLength <= PW_TOKENIZER_CFG_HASH_LENGTH);
static_assert(kSize <= kTestHashLength + 1);
return PwTokenizer65599FixedLengthHash(std::string_view(string, kSize - 1),
@@ -51,22 +51,60 @@
kData...};
}
-TEST(TokenizeStringLiteral, EmptyString_IsZero) {
+TEST(TokenizeString, EmptyString_IsZero) {
constexpr pw_TokenizerStringToken token = PW_TOKENIZE_STRING("");
EXPECT_EQ(0u, token);
}
-TEST(TokenizeStringLiteral, String_MatchesHash) {
+TEST(TokenizeString, String_MatchesHash) {
constexpr uint32_t token = PW_TOKENIZE_STRING("[:-)");
EXPECT_EQ(TestHash("[:-)"), token);
}
constexpr uint32_t kGlobalToken = PW_TOKENIZE_STRING(">:-[]");
-TEST(TokenizeStringLiteral, GlobalVariable_MatchesHash) {
+TEST(TokenizeString, GlobalVariable_MatchesHash) {
EXPECT_EQ(TestHash(">:-[]"), kGlobalToken);
}
+struct TokenizedWithinClass {
+ static constexpr uint32_t kThisToken = PW_TOKENIZE_STRING("???");
+};
+
+static_assert(TestHash("???") == TokenizedWithinClass::kThisToken);
+
+TEST(TokenizeString, ClassMember_MatchesHash) {
+ EXPECT_EQ(TestHash("???"), TokenizedWithinClass().kThisToken);
+}
+
+// Use a function with a shorter name to test tokenizing __func__ and
+// __PRETTY_FUNCTION__.
+//
+// WARNING: This function might cause errors for compilers other than GCC and
+// clang. It relies on two GCC/clang extensions:
+//
+// 1 - The __PRETTY_FUNCTION__ C++ function name variable.
+// 2 - __func__ as a static constexpr array instead of static const. See
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66639 for background.
+//
+void TestName() {
+ constexpr uint32_t function_hash = PW_TOKENIZE_STRING(__func__);
+ EXPECT_EQ(pw::tokenizer::TestHash(__func__), function_hash);
+
+ // Check the non-standard __PRETTY_FUNCTION__ name.
+ constexpr uint32_t pretty_function = PW_TOKENIZE_STRING(__PRETTY_FUNCTION__);
+ EXPECT_EQ(pw::tokenizer::TestHash(__PRETTY_FUNCTION__), pretty_function);
+}
+
+TEST(TokenizeString, FunctionName) { TestName(); }
+
+TEST(TokenizeString, Array) {
+ constexpr char array[] = "won-won-won-wonderful";
+
+ const uint32_t array_hash = PW_TOKENIZE_STRING(array);
+ EXPECT_EQ(TestHash(array), array_hash);
+}
+
// Verify that we can tokenize multiple strings from one source line.
#define THREE_FOR_ONE(first, second, third) \
[[maybe_unused]] constexpr uint32_t token_1 = \
@@ -76,7 +114,7 @@
[[maybe_unused]] constexpr uint32_t token_3 = \
PW_TOKENIZE_STRING_DOMAIN("ignored", third);
-TEST(TokenizeStringLiteral, MultipleTokenizationsInOneMacroExpansion) {
+TEST(TokenizeString, MultipleTokenizationsInOneMacroExpansion) {
// This verifies that we can safely tokenize multiple times in a single macro
// expansion. This can be useful when for example a name and description are
// both tokenized after being passed into a macro.
@@ -240,6 +278,16 @@
EXPECT_EQ(std::memcmp(empty.data(), buffer_, empty.size()), 0);
}
+TEST_F(TokenizeToBuffer, Array) {
+ static constexpr char array[] = "1234";
+ size_t message_size = 4;
+ PW_TOKENIZE_TO_BUFFER(buffer_, &message_size, array);
+
+ constexpr std::array<uint8_t, 4> result = ExpectedData<>("1234");
+ ASSERT_EQ(result.size(), message_size);
+ EXPECT_EQ(std::memcmp(result.data(), buffer_, result.size()), 0);
+}
+
TEST_F(TokenizeToBuffer, NullptrString_EncodesNull) {
char* string = nullptr;
size_t message_size = 9;