blob: 9391f870206fe99fefb4fcaa9ffae25343736d0d [file] [log] [blame]
karthik bharadwaj2ee244b2020-04-16 14:08:22 -07001// Copyright 2020 The Pigweed Authors
2//
3// Licensed under the Apache License, Version 2.0 (the "License"); you may not
4// use this file except in compliance with the License. You may obtain a copy of
5// the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12// License for the specific language governing permissions and limitations under
13// the License.
14
15// This file implements a basic fuzz test for the TokenDatabase class
16// A database is created from fuzz data, and a random entry count (also
17// derived from the fuzz data) is set. We then run iterations and 'find'
18// operations on this database.
19
20#include <cstring>
Wyatt Heplere2cbadf2020-06-22 11:21:45 -070021#include <span>
karthik bharadwaj2ee244b2020-04-16 14:08:22 -070022
23#include "pw_fuzzer/asan_interface.h"
24#include "pw_fuzzer/fuzzed_data_provider.h"
25#include "pw_preprocessor/util.h"
karthik bharadwaj2ee244b2020-04-16 14:08:22 -070026#include "pw_tokenizer/token_database.h"
27
28namespace pw::tokenizer {
29namespace {
30
31enum FuzzTestType : uint8_t {
32 kValidHeader,
33 kRandomHeader,
34 kMaxValue = kRandomHeader,
35};
36
37constexpr size_t kTokenHeaderSize = 16;
38
39// The default max length in bytes of fuzzed data provided. Note that
40// this needs to change if the fuzzer executable is run with a
41// '-max_len' argument.
42constexpr size_t kFuzzDataSizeMax = 4096;
43
44// Location of the 'EntryCount' field in the token header.
45constexpr size_t kEntryCountOffset = 8;
46constexpr size_t kEntryCountSize = 4;
47
48void SetTokenEntryCountInBuffer(uint8_t* buffer, uint32_t count) {
49 memcpy(buffer + kEntryCountOffset, &count, kEntryCountSize);
50}
51
52void IterateOverDatabase(TokenDatabase* const database) {
53 for (TokenDatabase::Entry entry : *database) {
54 // Since we don't "use" the contents of the entry, we exercise
55 // the entry by extracting its contents into volatile variables
56 // to prevent it from being optimized out during compilation.
Prashanth Swaminathanc9968632021-02-03 13:13:35 -080057 [[maybe_unused]] volatile const char* entry_string = entry.string;
58 [[maybe_unused]] volatile uint32_t entry_token = entry.token;
karthik bharadwaj2ee244b2020-04-16 14:08:22 -070059 }
60}
karthik bharadwaj8f535362020-05-04 13:40:46 -070061
karthik bharadwaj2ee244b2020-04-16 14:08:22 -070062} // namespace
63
64extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
65 constexpr size_t kBufferSizeMax = kFuzzDataSizeMax + kTokenHeaderSize;
66 constexpr char kDefaultHeader[] = "TOKENS\0\0\0\0\0\0\0\0\0";
67 static uint8_t buffer[kBufferSizeMax];
68
69 if (size > kFuzzDataSizeMax) {
70 return 0;
71 }
72
73 FuzzedDataProvider provider(data, size);
74
75 // Initialize the token header with either a valid or invalid header
76 // based on a random enum consumed from the fuzz data.
77 switch (provider.ConsumeEnum<FuzzTestType>()) {
78 case kValidHeader:
79 memcpy(buffer, kDefaultHeader, kTokenHeaderSize);
80 break;
81
82 case kRandomHeader: {
83 std::vector<uint8_t> random_header =
84 provider.ConsumeBytes<uint8_t>(kTokenHeaderSize);
85 random_header.resize(kTokenHeaderSize);
86 memcpy(buffer, &random_header[0], kTokenHeaderSize);
87 break;
88 }
89 }
90
91 // Consume a 'test token' integer to look up later in the database.
92 uint32_t random_token = provider.ConsumeIntegral<uint32_t>();
93
94 // Consume a 'token count' integer to set as our database entry count.
95 uint32_t random_token_count =
96 provider.ConsumeIntegralInRange<uint32_t>(0, kFuzzDataSizeMax);
97
98 // Consume the remaining data. Note that the data corresponding to the
99 // string entries in the database are not explicitly null-terminated.
karthik bharadwaj8f535362020-05-04 13:40:46 -0700100 // TODO(karthikmb): Once OSS-Fuzz updates to Clang11.0, switch to
101 // provider.ConsumeData() to avoid extra memory and the memcpy call.
102 auto consumed_bytes =
103 provider.ConsumeBytes<uint8_t>(provider.remaining_bytes());
104 memcpy(buffer + kTokenHeaderSize, &consumed_bytes[0], consumed_bytes.size());
karthik bharadwaj2ee244b2020-04-16 14:08:22 -0700105
106 SetTokenEntryCountInBuffer(buffer, random_token_count);
107
108 // Poison the unused buffer space for this run of the fuzzer to
109 // prevent the token database creator from reading too far in.
karthik bharadwaj8f535362020-05-04 13:40:46 -0700110 size_t data_size = kTokenHeaderSize + consumed_bytes.size();
karthik bharadwaj2ee244b2020-04-16 14:08:22 -0700111 size_t poisoned_length = kBufferSizeMax - data_size;
112 void* poisoned = &buffer[data_size];
113
114 ASAN_POISON_MEMORY_REGION(poisoned, poisoned_length);
115
Wyatt Heplere2cbadf2020-06-22 11:21:45 -0700116 // We create a database from a std::span of the buffer since the string
karthik bharadwaj2ee244b2020-04-16 14:08:22 -0700117 // entries might not be null terminated, and the creation of a database
118 // from a raw buffer has an explicit null terminated string requirement
119 // specified in the API.
Wyatt Heplere2cbadf2020-06-22 11:21:45 -0700120 std::span<uint8_t> data_span(buffer, data_size);
121 auto token_database = TokenDatabase::Create<std::span<uint8_t>>(data_span);
Prashanth Swaminathanc9968632021-02-03 13:13:35 -0800122 [[maybe_unused]] volatile auto match = token_database.Find(random_token);
karthik bharadwaj2ee244b2020-04-16 14:08:22 -0700123
124 IterateOverDatabase(&token_database);
125
126 // Un-poison for the next iteration.
127 ASAN_UNPOISON_MEMORY_REGION(poisoned, poisoned_length);
128
129 return 0;
130}
131
132} // namespace pw::tokenizer