blob: ad7bb78dd5c6139cb7da683661133a655c953fb6 [file] [log] [blame]
Wyatt Hepler80c6ee52020-01-03 09:54:58 -08001// Copyright 2020 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
15#include "pw_tokenizer/detokenize.h"
16
17#include <algorithm>
18
19#include "pw_tokenizer/internal/decode.h"
20
21namespace pw::tokenizer {
22namespace {
23
24std::string UnknownTokenMessage(uint32_t value) {
25 std::string output(PW_TOKENIZER_ARG_DECODING_ERROR_PREFIX "unknown token ");
26
27 // Output a hexadecimal version of the token.
28 for (int shift = 28; shift >= 0; shift -= 4) {
29 output.push_back("0123456789abcdef"[(value >> shift) & 0xF]);
30 }
31
32 output.append(PW_TOKENIZER_ARG_DECODING_ERROR_SUFFIX);
33 return output;
34}
35
36// Decoding result with the date removed, for sorting.
37using DecodingResult = std::pair<DecodedFormatString, uint32_t>;
38
39// Determines if one result is better than the other if collisions occurred.
40// Returns true if lhs is preferred over rhs. This logic should match the
41// collision resolution logic in detokenize.py.
42bool IsBetterResult(const DecodingResult& lhs, const DecodingResult& rhs) {
43 // Favor the result for which decoding succeeded.
44 if (lhs.first.ok() != rhs.first.ok()) {
45 return lhs.first.ok();
46 }
47
48 // Favor the result for which all bytes were decoded.
49 if ((lhs.first.remaining_bytes() == 0u) !=
50 (rhs.first.remaining_bytes() == 0u)) {
51 return lhs.first.remaining_bytes() == 0u;
52 }
53
54 // Favor the result with fewer decoding errors.
55 if (lhs.first.decoding_errors() != rhs.first.decoding_errors()) {
56 return lhs.first.decoding_errors() < rhs.first.decoding_errors();
57 }
58
59 // Favor the result that successfully decoded the most arguments.
60 if (lhs.first.argument_count() != rhs.first.argument_count()) {
61 return lhs.first.argument_count() > rhs.first.argument_count();
62 }
63
64 // Favor the result that was removed from the database most recently.
65 return lhs.second > rhs.second;
66}
67
68} // namespace
69
70DetokenizedString::DetokenizedString(
71 uint32_t token,
Wyatt Heplere2cbadf2020-06-22 11:21:45 -070072 const std::span<const TokenizedStringEntry>& entries,
73 const std::span<const uint8_t>& arguments)
Wyatt Hepler80c6ee52020-01-03 09:54:58 -080074 : token_(token), has_token_(true) {
75 std::vector<DecodingResult> results;
76
77 for (const auto& [format, date_removed] : entries) {
78 results.push_back(DecodingResult{format.Format(arguments), date_removed});
79 }
80
81 std::sort(results.begin(), results.end(), IsBetterResult);
82
83 for (auto& result : results) {
84 matches_.push_back(std::move(result.first));
85 }
86}
87
88std::string DetokenizedString::BestString() const {
89 return matches_.empty() ? std::string() : matches_[0].value();
90}
91
92std::string DetokenizedString::BestStringWithErrors() const {
93 if (matches_.empty()) {
94 return has_token_ ? UnknownTokenMessage(token_)
95 : PW_TOKENIZER_ARG_DECODING_ERROR("missing token");
96 }
97 return matches_[0].value_with_errors();
98}
99
100Detokenizer::Detokenizer(const TokenDatabase& database) {
101 for (const auto& entry : database) {
102 database_[entry.token].emplace_back(entry.string, entry.date_removed);
103 }
104}
105
106DetokenizedString Detokenizer::Detokenize(
Wyatt Heplere2cbadf2020-06-22 11:21:45 -0700107 const std::span<const uint8_t>& encoded) const {
Wyatt Hepler80c6ee52020-01-03 09:54:58 -0800108 // The token is missing from the encoded data; there is nothing to do.
109 if (encoded.size() < sizeof(uint32_t)) {
110 return DetokenizedString();
111 }
112
113 const uint32_t token =
114 encoded[3] << 24 | encoded[2] << 16 | encoded[1] << 8 | encoded[0];
115
116 const auto result = database_.find(token);
117
118 return DetokenizedString(token,
119 result == database_.end()
Wyatt Heplere2cbadf2020-06-22 11:21:45 -0700120 ? std::span<TokenizedStringEntry>()
121 : std::span(result->second),
Wyatt Hepler80c6ee52020-01-03 09:54:58 -0800122 encoded.subspan(sizeof(token)));
123}
124
125} // namespace pw::tokenizer