blob: b7ab5057eb100bf0c8478f96e8172316de1360e0 [file] [log] [blame]
Wyatt Hepler80c6ee52020-01-03 09:54:58 -08001// Copyright 2020 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
15// This file defines the functions that encode tokenized logs at runtime. These
16// are the only pw_tokenizer functions present in a binary that tokenizes
17// strings. All other tokenizing code is resolved at compile time.
18
19#include "pw_tokenizer/tokenize.h"
20
21#include <algorithm>
22#include <array>
23#include <cstdarg>
24#include <cstddef>
25#include <cstring>
26
Wyatt Heplera6d5cc62020-01-17 14:15:40 -080027#include "pw_polyfill/language_features.h" // static_assert
Wyatt Hepler80c6ee52020-01-03 09:54:58 -080028#include "pw_varint/varint.h"
29
Wyatt Heplera6d5cc62020-01-17 14:15:40 -080030namespace pw {
31namespace tokenizer {
Wyatt Hepler80c6ee52020-01-03 09:54:58 -080032namespace {
33
34// Store metadata about this compilation's string tokenization in the ELF.
35//
36// The tokenizer metadata will not go into the on-device executable binary code.
37// This metadata will be present in the ELF file's .tokenizer_info section, from
38// which the host-side tooling (Python, Java, etc.) can understand how to decode
39// tokenized strings for the given binary. Only attributes that affect the
40// decoding process are recorded.
41//
42// Tokenizer metadata is stored in an array of key-value pairs. Each Metadata
43// object is 32 bytes: a 24-byte string and an 8-byte value. Metadata structs
44// may be parsed in Python with the struct format '24s<Q'.
45PW_PACKED(struct) Metadata {
46 char name[24]; // name of the metadata field
47 uint64_t value; // value of the field
48};
49
50static_assert(sizeof(Metadata) == 32);
51
52// Store tokenization metadata in its own section.
53constexpr Metadata metadata[] PW_KEEP_IN_SECTION(".tokenzier_info") = {
54 {"hash_length_bytes", PW_TOKENIZER_CFG_HASH_LENGTH},
55 {"sizeof_long", sizeof(long)}, // %l conversion specifier
56 {"sizeof_intmax_t", sizeof(intmax_t)}, // %j conversion specifier
57 {"sizeof_size_t", sizeof(size_t)}, // %z conversion specifier
58 {"sizeof_ptrdiff_t", sizeof(ptrdiff_t)}, // %t conversion specifier
59};
60
61// Declare the types as an enum for convenience.
62enum class ArgType : uint8_t {
63 kInt = PW_TOKENIZER_ARG_TYPE_INT,
64 kInt64 = PW_TOKENIZER_ARG_TYPE_INT64,
65 kDouble = PW_TOKENIZER_ARG_TYPE_DOUBLE,
66 kString = PW_TOKENIZER_ARG_TYPE_STRING,
67};
68
69// Just to be safe, make sure these values are what we expect them to be.
70static_assert(0b00u == static_cast<uint8_t>(ArgType::kInt));
71static_assert(0b01u == static_cast<uint8_t>(ArgType::kInt64));
72static_assert(0b10u == static_cast<uint8_t>(ArgType::kDouble));
73static_assert(0b11u == static_cast<uint8_t>(ArgType::kString));
74
75// Buffer for encoding a tokenized string and arguments.
76struct EncodedMessage {
77 pw_TokenizerStringToken token;
78 std::array<uint8_t, PW_TOKENIZER_CFG_ENCODING_BUFFER_SIZE_BYTES> args;
79};
80
81static_assert(offsetof(EncodedMessage, args) == sizeof(EncodedMessage::token),
82 "EncodedMessage should not have padding bytes between members");
83
84size_t EncodeInt(int value, const span<uint8_t>& output) {
85 return varint::Encode(value, pw::as_writable_bytes(output));
86}
87
88size_t EncodeInt64(int64_t value, const span<uint8_t>& output) {
89 return varint::Encode(value, pw::as_writable_bytes(output));
90}
91
92size_t EncodeFloat(float value, const span<uint8_t>& output) {
93 if (output.size() < sizeof(value)) {
94 return 0;
95 }
96 std::memcpy(output.data(), &value, sizeof(value));
97 return sizeof(value);
98}
99
100size_t EncodeString(const char* string, const span<uint8_t>& output) {
101 // The top bit of the status byte indicates if the string was truncated.
102 static constexpr size_t kMaxStringLength = 0x7Fu;
103
104 if (output.empty()) { // At least one byte is needed for the status/size.
105 return 0;
106 }
107
108 if (string == nullptr) {
109 string = "NULL";
110 }
111
112 // Subtract 1 to save room for the status byte.
113 const size_t max_bytes = std::min(output.size(), kMaxStringLength) - 1;
114
115 // Scan the string to find out how many bytes to copy.
116 size_t bytes_to_copy = 0;
117 uint8_t overflow_bit = 0;
118
119 while (string[bytes_to_copy] != '\0') {
120 if (bytes_to_copy == max_bytes) {
121 overflow_bit = '\x80';
122 break;
123 }
124 bytes_to_copy += 1;
125 }
126
127 output[0] = bytes_to_copy | overflow_bit;
128 std::memcpy(output.data() + 1, string, bytes_to_copy);
129
130 return bytes_to_copy + 1; // include the status byte in the total
131}
132
133size_t EncodeArgs(pw_TokenizerArgTypes types,
134 va_list args,
135 span<uint8_t> output) {
136 size_t arg_count = types & PW_TOKENIZER_TYPE_COUNT_MASK;
137 types >>= PW_TOKENIZER_TYPE_COUNT_SIZE_BITS;
138
139 size_t encoded_bytes = 0;
140 while (arg_count != 0u) {
141 // How many bytes were encoded; 0 indicates that there wasn't enough space.
142 size_t argument_bytes = 0;
143
144 switch (static_cast<ArgType>(types & 0b11u)) {
145 case ArgType::kInt:
146 argument_bytes = EncodeInt(va_arg(args, int), output);
147 break;
148 case ArgType::kInt64:
149 argument_bytes = EncodeInt64(va_arg(args, int64_t), output);
150 break;
151 case ArgType::kDouble:
152 argument_bytes =
153 EncodeFloat(static_cast<float>(va_arg(args, double)), output);
154 break;
155 case ArgType::kString:
156 argument_bytes = EncodeString(va_arg(args, const char*), output);
157 break;
158 }
159
160 // If zero bytes were encoded, the encoding buffer is full.
161 if (argument_bytes == 0u) {
162 break;
163 }
164
165 output = output.subspan(argument_bytes);
166 encoded_bytes += argument_bytes;
167
168 arg_count -= 1;
169 types >>= 2; // each argument type is encoded in two bits
170 }
171
172 return encoded_bytes;
173}
174
175} // namespace
176
177extern "C" {
178
179void pw_TokenizeToBuffer(void* buffer,
180 size_t* buffer_size_bytes,
181 pw_TokenizerStringToken token,
182 pw_TokenizerArgTypes types,
183 ...) {
184 if (*buffer_size_bytes < sizeof(token)) {
185 *buffer_size_bytes = 0;
186 return;
187 }
188
189 std::memcpy(buffer, &token, sizeof(token));
190
191 va_list args;
192 va_start(args, types);
193 const size_t encoded_bytes =
194 EncodeArgs(types,
195 args,
Wyatt Heplera6d5cc62020-01-17 14:15:40 -0800196 span<uint8_t>(static_cast<uint8_t*>(buffer) + sizeof(token),
197 *buffer_size_bytes - sizeof(token)));
Wyatt Hepler80c6ee52020-01-03 09:54:58 -0800198 va_end(args);
199
200 *buffer_size_bytes = sizeof(token) + encoded_bytes;
201}
202
203void pw_TokenizeToCallback(void (*callback)(const uint8_t* encoded_message,
204 size_t size_bytes),
205 pw_TokenizerStringToken token,
206 pw_TokenizerArgTypes types,
207 ...) {
208 EncodedMessage encoded;
209 encoded.token = token;
210
211 va_list args;
212 va_start(args, types);
213 const size_t encoded_bytes = EncodeArgs(types, args, encoded.args);
214 va_end(args);
215
216 callback(reinterpret_cast<const uint8_t*>(&encoded),
217 sizeof(encoded.token) + encoded_bytes);
218}
219
220#if PW_TOKENIZER_CFG_ENABLE_TOKENIZE_TO_GLOBAL_HANDLER
221
222void pw_TokenizeToGlobalHandler(pw_TokenizerStringToken token,
223 pw_TokenizerArgTypes types,
224 ...) {
225 EncodedMessage encoded;
226 encoded.token = token;
227
228 va_list args;
229 va_start(args, types);
230 const size_t encoded_bytes = EncodeArgs(types, args, encoded.args);
231 va_end(args);
232
233 pw_TokenizerHandleEncodedMessage(reinterpret_cast<const uint8_t*>(&encoded),
234 sizeof(encoded.token) + encoded_bytes);
235}
236
237#endif // PW_TOKENIZER_CFG_ENABLE_TOKENIZE_TO_GLOBAL_HANDLER
238
239#if PW_TOKENIZER_CFG_ENABLE_TOKENIZE_TO_GLOBAL_HANDLER_WITH_PAYLOAD
240
241void pw_TokenizeToGlobalHandlerWithPayload(const pw_TokenizerPayload payload,
242 pw_TokenizerStringToken token,
243 pw_TokenizerArgTypes types,
244 ...) {
245 EncodedMessage encoded;
246 encoded.token = token;
247
248 va_list args;
249 va_start(args, types);
250 const size_t encoded_bytes = EncodeArgs(types, args, encoded.args);
251 va_end(args);
252
253 pw_TokenizerHandleEncodedMessageWithPayload(
254 payload,
255 reinterpret_cast<const uint8_t*>(&encoded),
256 sizeof(encoded.token) + encoded_bytes);
257}
258
259#endif // PW_TOKENIZER_CFG_ENABLE_TOKENIZE_TO_GLOBAL_HANDLER_WITH_PAYLOAD
260
261} // extern "C"
262
Wyatt Heplera6d5cc62020-01-17 14:15:40 -0800263} // namespace tokenizer
264} // namespace pw