Blame - pw_tokenizer/detokenize.cc - platform/external/pigweed

blob: ad7bb78dd5c6139cb7da683661133a655c953fb6 [file] [log] [blame]

Wyatt Hepler	80c6ee5	2020-01-03 09:54:58 -0800	[diff] [blame]	1	// Copyright 2020 The Pigweed Authors
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License"); you may not
				4	// use this file except in compliance with the License. You may obtain a copy of
				5	// the License at
				6	//
				7	// https://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
				11	// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
				12	// License for the specific language governing permissions and limitations under
				13	// the License.
				14
				15	#include "pw_tokenizer/detokenize.h"
				16
				17	#include <algorithm>
				18
				19	#include "pw_tokenizer/internal/decode.h"
				20
				21	namespace pw::tokenizer {
				22	namespace {
				23
				24	std::string UnknownTokenMessage(uint32_t value) {
				25	std::string output(PW_TOKENIZER_ARG_DECODING_ERROR_PREFIX "unknown token ");
				26
				27	// Output a hexadecimal version of the token.
				28	for (int shift = 28; shift >= 0; shift -= 4) {
				29	output.push_back("0123456789abcdef"[(value >> shift) & 0xF]);
				30	}
				31
				32	output.append(PW_TOKENIZER_ARG_DECODING_ERROR_SUFFIX);
				33	return output;
				34	}
				35
				36	// Decoding result with the date removed, for sorting.
				37	using DecodingResult = std::pair<DecodedFormatString, uint32_t>;
				38
				39	// Determines if one result is better than the other if collisions occurred.
				40	// Returns true if lhs is preferred over rhs. This logic should match the
				41	// collision resolution logic in detokenize.py.
				42	bool IsBetterResult(const DecodingResult& lhs, const DecodingResult& rhs) {
				43	// Favor the result for which decoding succeeded.
				44	if (lhs.first.ok() != rhs.first.ok()) {
				45	return lhs.first.ok();
				46	}
				47
				48	// Favor the result for which all bytes were decoded.
				49	if ((lhs.first.remaining_bytes() == 0u) !=
				50	(rhs.first.remaining_bytes() == 0u)) {
				51	return lhs.first.remaining_bytes() == 0u;
				52	}
				53
				54	// Favor the result with fewer decoding errors.
				55	if (lhs.first.decoding_errors() != rhs.first.decoding_errors()) {
				56	return lhs.first.decoding_errors() < rhs.first.decoding_errors();
				57	}
				58
				59	// Favor the result that successfully decoded the most arguments.
				60	if (lhs.first.argument_count() != rhs.first.argument_count()) {
				61	return lhs.first.argument_count() > rhs.first.argument_count();
				62	}
				63
				64	// Favor the result that was removed from the database most recently.
				65	return lhs.second > rhs.second;
				66	}
				67
				68	} // namespace
				69
				70	DetokenizedString::DetokenizedString(
				71	uint32_t token,
Wyatt Hepler	e2cbadf	2020-06-22 11:21:45 -0700	[diff] [blame]	72	const std::span<const TokenizedStringEntry>& entries,
				73	const std::span<const uint8_t>& arguments)
Wyatt Hepler	80c6ee5	2020-01-03 09:54:58 -0800	[diff] [blame]	74	: token_(token), has_token_(true) {
				75	std::vector<DecodingResult> results;
				76
				77	for (const auto& [format, date_removed] : entries) {
				78	results.push_back(DecodingResult{format.Format(arguments), date_removed});
				79	}
				80
				81	std::sort(results.begin(), results.end(), IsBetterResult);
				82
				83	for (auto& result : results) {
				84	matches_.push_back(std::move(result.first));
				85	}
				86	}
				87
				88	std::string DetokenizedString::BestString() const {
				89	return matches_.empty() ? std::string() : matches_[0].value();
				90	}
				91
				92	std::string DetokenizedString::BestStringWithErrors() const {
				93	if (matches_.empty()) {
				94	return has_token_ ? UnknownTokenMessage(token_)
				95	: PW_TOKENIZER_ARG_DECODING_ERROR("missing token");
				96	}
				97	return matches_[0].value_with_errors();
				98	}
				99
				100	Detokenizer::Detokenizer(const TokenDatabase& database) {
				101	for (const auto& entry : database) {
				102	database_[entry.token].emplace_back(entry.string, entry.date_removed);
				103	}
				104	}
				105
				106	DetokenizedString Detokenizer::Detokenize(
Wyatt Hepler	e2cbadf	2020-06-22 11:21:45 -0700	[diff] [blame]	107	const std::span<const uint8_t>& encoded) const {
Wyatt Hepler	80c6ee5	2020-01-03 09:54:58 -0800	[diff] [blame]	108	// The token is missing from the encoded data; there is nothing to do.
				109	if (encoded.size() < sizeof(uint32_t)) {
				110	return DetokenizedString();
				111	}
				112
				113	const uint32_t token =
				114	encoded[3] << 24 \| encoded[2] << 16 \| encoded[1] << 8 \| encoded[0];
				115
				116	const auto result = database_.find(token);
				117
				118	return DetokenizedString(token,
				119	result == database_.end()
Wyatt Hepler	e2cbadf	2020-06-22 11:21:45 -0700	[diff] [blame]	120	? std::span<TokenizedStringEntry>()
				121	: std::span(result->second),
Wyatt Hepler	80c6ee5	2020-01-03 09:54:58 -0800	[diff] [blame]	122	encoded.subspan(sizeof(token)));
				123	}
				124
				125	} // namespace pw::tokenizer