Blame - darwin-x86/lib64/clang/14.0.0/include/fuzzer/FuzzedDataProvider.h - platform/prebuilts/clang-tools

blob: 71cb427ec4a9764da328e3d7da0cd177ccb579e9 [file] [log] [blame]

Sasha Smundak	0fc590b	2020-10-07 08:11:59 -0700	[diff] [blame]	1	//===- FuzzedDataProvider.h - Utility header for fuzz targets ---- C++ - ===//
				2	//
				3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
				6	//
				7	//===----------------------------------------------------------------------===//
				8	// A single header library providing an utility class to break up an array of
				9	// bytes. Whenever run on the same input, provides the same output, as long as
				10	// its methods are called in the same order, with the same arguments.
				11	//===----------------------------------------------------------------------===//
				12
				13	#ifndef LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
				14	#define LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_
				15
				16	#include <algorithm>
Pirama Arumuga Nainar	986b880	2021-06-03 16:00:34 -0700	[diff] [blame]	17	#include <array>
Sasha Smundak	0fc590b	2020-10-07 08:11:59 -0700	[diff] [blame]	18	#include <climits>
				19	#include <cstddef>
				20	#include <cstdint>
				21	#include <cstring>
				22	#include <initializer_list>
Pirama Arumuga Nainar	7e1f839	2021-08-16 17:30:48 -0700	[diff] [blame]	23	#include <limits>
Sasha Smundak	0fc590b	2020-10-07 08:11:59 -0700	[diff] [blame]	24	#include <string>
				25	#include <type_traits>
				26	#include <utility>
				27	#include <vector>
				28
				29	// In addition to the comments below, the API is also briefly documented at
				30	// https://github.com/google/fuzzing/blob/master/docs/split-inputs.md#fuzzed-data-provider
				31	class FuzzedDataProvider {
				32	public:
				33	// \|data\| is an array of length \|size\| that the FuzzedDataProvider wraps to
				34	// provide more granular access. \|data\| must outlive the FuzzedDataProvider.
				35	FuzzedDataProvider(const uint8_t *data, size_t size)
				36	: data_ptr_(data), remaining_bytes_(size) {}
				37	~FuzzedDataProvider() = default;
				38
				39	// See the implementation below (after the class definition) for more verbose
				40	// comments for each of the methods.
				41
				42	// Methods returning std::vector of bytes. These are the most popular choice
				43	// when splitting fuzzing input into pieces, as every piece is put into a
				44	// separate buffer (i.e. ASan would catch any under-/overflow) and the memory
				45	// will be released automatically.
				46	template <typename T> std::vector<T> ConsumeBytes(size_t num_bytes);
				47	template <typename T>
				48	std::vector<T> ConsumeBytesWithTerminator(size_t num_bytes, T terminator = 0);
				49	template <typename T> std::vector<T> ConsumeRemainingBytes();
				50
				51	// Methods returning strings. Use only when you need a std::string or a null
				52	// terminated C-string. Otherwise, prefer the methods returning std::vector.
				53	std::string ConsumeBytesAsString(size_t num_bytes);
				54	std::string ConsumeRandomLengthString(size_t max_length);
				55	std::string ConsumeRandomLengthString();
				56	std::string ConsumeRemainingBytesAsString();
				57
				58	// Methods returning integer values.
				59	template <typename T> T ConsumeIntegral();
				60	template <typename T> T ConsumeIntegralInRange(T min, T max);
				61
				62	// Methods returning floating point values.
				63	template <typename T> T ConsumeFloatingPoint();
				64	template <typename T> T ConsumeFloatingPointInRange(T min, T max);
				65
				66	// 0 <= return value <= 1.
				67	template <typename T> T ConsumeProbability();
				68
				69	bool ConsumeBool();
				70
				71	// Returns a value chosen from the given enum.
				72	template <typename T> T ConsumeEnum();
				73
				74	// Returns a value from the given array.
				75	template <typename T, size_t size> T PickValueInArray(const T (&array)[size]);
Pirama Arumuga Nainar	986b880	2021-06-03 16:00:34 -0700	[diff] [blame]	76	template <typename T, size_t size>
				77	T PickValueInArray(const std::array<T, size> &array);
Sasha Smundak	0fc590b	2020-10-07 08:11:59 -0700	[diff] [blame]	78	template <typename T> T PickValueInArray(std::initializer_list<const T> list);
				79
				80	// Writes data to the given destination and returns number of bytes written.
				81	size_t ConsumeData(void *destination, size_t num_bytes);
				82
				83	// Reports the remaining bytes available for fuzzed input.
				84	size_t remaining_bytes() { return remaining_bytes_; }
				85
				86	private:
				87	FuzzedDataProvider(const FuzzedDataProvider &) = delete;
				88	FuzzedDataProvider &operator=(const FuzzedDataProvider &) = delete;
				89
				90	void CopyAndAdvance(void *destination, size_t num_bytes);
				91
				92	void Advance(size_t num_bytes);
				93
				94	template <typename T>
				95	std::vector<T> ConsumeBytes(size_t size, size_t num_bytes);
				96
				97	template <typename TS, typename TU> TS ConvertUnsignedToSigned(TU value);
				98
				99	const uint8_t *data_ptr_;
				100	size_t remaining_bytes_;
				101	};
				102
				103	// Returns a std::vector containing \|num_bytes\| of input data. If fewer than
				104	// \|num_bytes\| of data remain, returns a shorter std::vector containing all
				105	// of the data that's left. Can be used with any byte sized type, such as
				106	// char, unsigned char, uint8_t, etc.
				107	template <typename T>
				108	std::vector<T> FuzzedDataProvider::ConsumeBytes(size_t num_bytes) {
				109	num_bytes = std::min(num_bytes, remaining_bytes_);
				110	return ConsumeBytes<T>(num_bytes, num_bytes);
				111	}
				112
				113	// Similar to \|ConsumeBytes\|, but also appends the terminator value at the end
				114	// of the resulting vector. Useful, when a mutable null-terminated C-string is
				115	// needed, for example. But that is a rare case. Better avoid it, if possible,
				116	// and prefer using \|ConsumeBytes\| or \|ConsumeBytesAsString\| methods.
				117	template <typename T>
				118	std::vector<T> FuzzedDataProvider::ConsumeBytesWithTerminator(size_t num_bytes,
				119	T terminator) {
				120	num_bytes = std::min(num_bytes, remaining_bytes_);
				121	std::vector<T> result = ConsumeBytes<T>(num_bytes + 1, num_bytes);
				122	result.back() = terminator;
				123	return result;
				124	}
				125
				126	// Returns a std::vector containing all remaining bytes of the input data.
				127	template <typename T>
				128	std::vector<T> FuzzedDataProvider::ConsumeRemainingBytes() {
				129	return ConsumeBytes<T>(remaining_bytes_);
				130	}
				131
				132	// Returns a std::string containing \|num_bytes\| of input data. Using this and
				133	// \|.c_str()\| on the resulting string is the best way to get an immutable
				134	// null-terminated C string. If fewer than \|num_bytes\| of data remain, returns
				135	// a shorter std::string containing all of the data that's left.
				136	inline std::string FuzzedDataProvider::ConsumeBytesAsString(size_t num_bytes) {
				137	static_assert(sizeof(std::string::value_type) == sizeof(uint8_t),
				138	"ConsumeBytesAsString cannot convert the data to a string.");
				139
				140	num_bytes = std::min(num_bytes, remaining_bytes_);
				141	std::string result(
				142	reinterpret_cast<const std::string::value_type *>(data_ptr_), num_bytes);
				143	Advance(num_bytes);
				144	return result;
				145	}
				146
				147	// Returns a std::string of length from 0 to \|max_length\|. When it runs out of
				148	// input data, returns what remains of the input. Designed to be more stable
				149	// with respect to a fuzzer inserting characters than just picking a random
				150	// length and then consuming that many bytes with \|ConsumeBytes\|.
				151	inline std::string
				152	FuzzedDataProvider::ConsumeRandomLengthString(size_t max_length) {
				153	// Reads bytes from the start of \|data_ptr_\|. Maps "\\" to "\", and maps "\"
				154	// followed by anything else to the end of the string. As a result of this
				155	// logic, a fuzzer can insert characters into the string, and the string
				156	// will be lengthened to include those new characters, resulting in a more
				157	// stable fuzzer than picking the length of a string independently from
				158	// picking its contents.
				159	std::string result;
				160
				161	// Reserve the anticipated capaticity to prevent several reallocations.
				162	result.reserve(std::min(max_length, remaining_bytes_));
				163	for (size_t i = 0; i < max_length && remaining_bytes_ != 0; ++i) {
				164	char next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
				165	Advance(1);
				166	if (next == '\\' && remaining_bytes_ != 0) {
				167	next = ConvertUnsignedToSigned<char>(data_ptr_[0]);
				168	Advance(1);
				169	if (next != '\\')
				170	break;
				171	}
				172	result += next;
				173	}
				174
				175	result.shrink_to_fit();
				176	return result;
				177	}
				178
				179	// Returns a std::string of length from 0 to \|remaining_bytes_\|.
				180	inline std::string FuzzedDataProvider::ConsumeRandomLengthString() {
				181	return ConsumeRandomLengthString(remaining_bytes_);
				182	}
				183
				184	// Returns a std::string containing all remaining bytes of the input data.
				185	// Prefer using \|ConsumeRemainingBytes\| unless you actually need a std::string
				186	// object.
				187	inline std::string FuzzedDataProvider::ConsumeRemainingBytesAsString() {
				188	return ConsumeBytesAsString(remaining_bytes_);
				189	}
				190
				191	// Returns a number in the range [Type's min, Type's max]. The value might
				192	// not be uniformly distributed in the given range. If there's no input data
				193	// left, always returns \|min\|.
				194	template <typename T> T FuzzedDataProvider::ConsumeIntegral() {
				195	return ConsumeIntegralInRange(std::numeric_limits<T>::min(),
				196	std::numeric_limits<T>::max());
				197	}
				198
				199	// Returns a number in the range [min, max] by consuming bytes from the
				200	// input data. The value might not be uniformly distributed in the given
				201	// range. If there's no input data left, always returns \|min\|. \|min\| must
				202	// be less than or equal to \|max\|.
				203	template <typename T>
				204	T FuzzedDataProvider::ConsumeIntegralInRange(T min, T max) {
				205	static_assert(std::is_integral<T>::value, "An integral type is required.");
				206	static_assert(sizeof(T) <= sizeof(uint64_t), "Unsupported integral type.");
				207
				208	if (min > max)
				209	abort();
				210
				211	// Use the biggest type possible to hold the range and the result.
				212	uint64_t range = static_cast<uint64_t>(max) - min;
				213	uint64_t result = 0;
				214	size_t offset = 0;
				215
				216	while (offset < sizeof(T) * CHAR_BIT && (range >> offset) > 0 &&
				217	remaining_bytes_ != 0) {
				218	// Pull bytes off the end of the seed data. Experimentally, this seems to
				219	// allow the fuzzer to more easily explore the input space. This makes
				220	// sense, since it works by modifying inputs that caused new code to run,
				221	// and this data is often used to encode length of data read by
				222	// \|ConsumeBytes\|. Separating out read lengths makes it easier modify the
				223	// contents of the data that is actually read.
				224	--remaining_bytes_;
				225	result = (result << CHAR_BIT) \| data_ptr_[remaining_bytes_];
				226	offset += CHAR_BIT;
				227	}
				228
				229	// Avoid division by 0, in case \|range + 1\| results in overflow.
				230	if (range != std::numeric_limits<decltype(range)>::max())
				231	result = result % (range + 1);
				232
				233	return static_cast<T>(min + result);
				234	}
				235
				236	// Returns a floating point value in the range [Type's lowest, Type's max] by
				237	// consuming bytes from the input data. If there's no input data left, always
				238	// returns approximately 0.
				239	template <typename T> T FuzzedDataProvider::ConsumeFloatingPoint() {
				240	return ConsumeFloatingPointInRange<T>(std::numeric_limits<T>::lowest(),
				241	std::numeric_limits<T>::max());
				242	}
				243
				244	// Returns a floating point value in the given range by consuming bytes from
				245	// the input data. If there's no input data left, returns \|min\|. Note that
				246	// \|min\| must be less than or equal to \|max\|.
				247	template <typename T>
				248	T FuzzedDataProvider::ConsumeFloatingPointInRange(T min, T max) {
				249	if (min > max)
				250	abort();
				251
				252	T range = .0;
				253	T result = min;
				254	constexpr T zero(.0);
				255	if (max > zero && min < zero && max > min + std::numeric_limits<T>::max()) {
				256	// The diff \|max - min\| would overflow the given floating point type. Use
				257	// the half of the diff as the range and consume a bool to decide whether
				258	// the result is in the first of the second part of the diff.
				259	range = (max / 2.0) - (min / 2.0);
				260	if (ConsumeBool()) {
				261	result += range;
				262	}
				263	} else {
				264	range = max - min;
				265	}
				266
				267	return result + range * ConsumeProbability<T>();
				268	}
				269
				270	// Returns a floating point number in the range [0.0, 1.0]. If there's no
				271	// input data left, always returns 0.
				272	template <typename T> T FuzzedDataProvider::ConsumeProbability() {
				273	static_assert(std::is_floating_point<T>::value,
				274	"A floating point type is required.");
				275
				276	// Use different integral types for different floating point types in order
				277	// to provide better density of the resulting values.
				278	using IntegralType =
				279	typename std::conditional<(sizeof(T) <= sizeof(uint32_t)), uint32_t,
				280	uint64_t>::type;
				281
				282	T result = static_cast<T>(ConsumeIntegral<IntegralType>());
				283	result /= static_cast<T>(std::numeric_limits<IntegralType>::max());
				284	return result;
				285	}
				286
				287	// Reads one byte and returns a bool, or false when no data remains.
				288	inline bool FuzzedDataProvider::ConsumeBool() {
				289	return 1 & ConsumeIntegral<uint8_t>();
				290	}
				291
				292	// Returns an enum value. The enum must start at 0 and be contiguous. It must
				293	// also contain \|kMaxValue\| aliased to its largest (inclusive) value. Such as:
				294	// enum class Foo { SomeValue, OtherValue, kMaxValue = OtherValue };
				295	template <typename T> T FuzzedDataProvider::ConsumeEnum() {
				296	static_assert(std::is_enum<T>::value, "\|T\| must be an enum type.");
				297	return static_cast<T>(
				298	ConsumeIntegralInRange<uint32_t>(0, static_cast<uint32_t>(T::kMaxValue)));
				299	}
				300
				301	// Returns a copy of the value selected from the given fixed-size \|array\|.
				302	template <typename T, size_t size>
				303	T FuzzedDataProvider::PickValueInArray(const T (&array)[size]) {
				304	static_assert(size > 0, "The array must be non empty.");
				305	return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
				306	}
				307
Pirama Arumuga Nainar	986b880	2021-06-03 16:00:34 -0700	[diff] [blame]	308	template <typename T, size_t size>
				309	T FuzzedDataProvider::PickValueInArray(const std::array<T, size> &array) {
				310	static_assert(size > 0, "The array must be non empty.");
				311	return array[ConsumeIntegralInRange<size_t>(0, size - 1)];
				312	}
				313
Sasha Smundak	0fc590b	2020-10-07 08:11:59 -0700	[diff] [blame]	314	template <typename T>
				315	T FuzzedDataProvider::PickValueInArray(std::initializer_list<const T> list) {
				316	// TODO(Dor1s): switch to static_assert once C++14 is allowed.
				317	if (!list.size())
				318	abort();
				319
				320	return *(list.begin() + ConsumeIntegralInRange<size_t>(0, list.size() - 1));
				321	}
				322
				323	// Writes \|num_bytes\| of input data to the given destination pointer. If there
				324	// is not enough data left, writes all remaining bytes. Return value is the
				325	// number of bytes written.
				326	// In general, it's better to avoid using this function, but it may be useful
				327	// in cases when it's necessary to fill a certain buffer or object with
				328	// fuzzing data.
				329	inline size_t FuzzedDataProvider::ConsumeData(void *destination,
				330	size_t num_bytes) {
				331	num_bytes = std::min(num_bytes, remaining_bytes_);
				332	CopyAndAdvance(destination, num_bytes);
				333	return num_bytes;
				334	}
				335
				336	// Private methods.
				337	inline void FuzzedDataProvider::CopyAndAdvance(void *destination,
				338	size_t num_bytes) {
				339	std::memcpy(destination, data_ptr_, num_bytes);
				340	Advance(num_bytes);
				341	}
				342
				343	inline void FuzzedDataProvider::Advance(size_t num_bytes) {
				344	if (num_bytes > remaining_bytes_)
				345	abort();
				346
				347	data_ptr_ += num_bytes;
				348	remaining_bytes_ -= num_bytes;
				349	}
				350
				351	template <typename T>
				352	std::vector<T> FuzzedDataProvider::ConsumeBytes(size_t size, size_t num_bytes) {
				353	static_assert(sizeof(T) == sizeof(uint8_t), "Incompatible data type.");
				354
				355	// The point of using the size-based constructor below is to increase the
				356	// odds of having a vector object with capacity being equal to the length.
				357	// That part is always implementation specific, but at least both libc++ and
				358	// libstdc++ allocate the requested number of bytes in that constructor,
				359	// which seems to be a natural choice for other implementations as well.
				360	// To increase the odds even more, we also call \|shrink_to_fit\| below.
				361	std::vector<T> result(size);
				362	if (size == 0) {
				363	if (num_bytes != 0)
				364	abort();
				365	return result;
				366	}
				367
				368	CopyAndAdvance(result.data(), num_bytes);
				369
				370	// Even though \|shrink_to_fit\| is also implementation specific, we expect it
				371	// to provide an additional assurance in case vector's constructor allocated
				372	// a buffer which is larger than the actual amount of data we put inside it.
				373	result.shrink_to_fit();
				374	return result;
				375	}
				376
				377	template <typename TS, typename TU>
				378	TS FuzzedDataProvider::ConvertUnsignedToSigned(TU value) {
				379	static_assert(sizeof(TS) == sizeof(TU), "Incompatible data types.");
				380	static_assert(!std::numeric_limits<TU>::is_signed,
				381	"Source type must be unsigned.");
				382
				383	// TODO(Dor1s): change to `if constexpr` once C++17 becomes mainstream.
				384	if (std::numeric_limits<TS>::is_modulo)
				385	return static_cast<TS>(value);
				386
				387	// Avoid using implementation-defined unsigned to signed conversions.
				388	// To learn more, see https://stackoverflow.com/questions/13150449.
				389	if (value <= std::numeric_limits<TS>::max()) {
				390	return static_cast<TS>(value);
				391	} else {
				392	constexpr auto TS_min = std::numeric_limits<TS>::min();
Pirama Arumuga Nainar	7e1f839	2021-08-16 17:30:48 -0700	[diff] [blame]	393	return TS_min + static_cast<TS>(value - TS_min);
Sasha Smundak	0fc590b	2020-10-07 08:11:59 -0700	[diff] [blame]	394	}
				395	}
				396
				397	#endif // LLVM_FUZZER_FUZZED_DATA_PROVIDER_H_