blob: ea4f0c706c7bba9efa657f58d393a839d4439a42 [file] [log] [blame]
Kostya Serebryany6f5a8042016-09-21 01:50:50 +00001//===- FuzzerCorpus.h - Internal header for the Fuzzer ----------*- C++ -* ===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9// fuzzer::InputCorpus
10//===----------------------------------------------------------------------===//
11
12#ifndef LLVM_FUZZER_CORPUS
13#define LLVM_FUZZER_CORPUS
14
Kostya Serebryany20801e12016-09-21 21:41:48 +000015#include <random>
Kostya Serebryany29bb6642016-09-21 22:42:17 +000016#include <unordered_set>
Kostya Serebryany20801e12016-09-21 21:41:48 +000017
Kostya Serebryany6f5a8042016-09-21 01:50:50 +000018#include "FuzzerDefs.h"
Kostya Serebryany20801e12016-09-21 21:41:48 +000019#include "FuzzerRandom.h"
Kostya Serebryany2c556132016-09-30 01:19:56 +000020#include "FuzzerTracePC.h"
Kostya Serebryany6f5a8042016-09-21 01:50:50 +000021
22namespace fuzzer {
23
24struct InputInfo {
25 Unit U; // The actual input data.
Kostya Serebryany20801e12016-09-21 21:41:48 +000026 uint8_t Sha1[kSHA1NumBytes]; // Checksum.
Kostya Serebryany2c556132016-09-30 01:19:56 +000027 // Number of features that this input has and no smaller input has.
28 size_t NumFeatures = 0;
29 size_t Tmp = 0; // Used by ValidateFeatureSet.
Kostya Serebryany29bb6642016-09-21 22:42:17 +000030 // Stats.
Kostya Serebryany2c556132016-09-30 01:19:56 +000031 size_t NumExecutedMutations = 0;
32 size_t NumSuccessfullMutations = 0;
Kostya Serebryany6f5a8042016-09-21 01:50:50 +000033};
34
35class InputCorpus {
36 public:
37 InputCorpus() {
Kostya Serebryany20801e12016-09-21 21:41:48 +000038 Inputs.reserve(1 << 14); // Avoid too many resizes.
Kostya Serebryany2c556132016-09-30 01:19:56 +000039 memset(FeatureSet, 0, sizeof(FeatureSet));
Kostya Serebryany6f5a8042016-09-21 01:50:50 +000040 }
Kostya Serebryany20801e12016-09-21 21:41:48 +000041 size_t size() const { return Inputs.size(); }
Kostya Serebryany2455f0d2016-10-05 00:25:17 +000042 size_t SizeInBytes() const {
43 size_t Res = 0;
44 for (auto &II : Inputs)
45 Res += II.U.size();
46 return Res;
47 }
48 size_t NumActiveUnits() const {
49 size_t Res = 0;
50 for (auto &II : Inputs)
51 Res += !II.U.empty();
52 return Res;
53 }
Kostya Serebryany20801e12016-09-21 21:41:48 +000054 bool empty() const { return Inputs.empty(); }
55 const Unit &operator[] (size_t Idx) const { return Inputs[Idx].U; }
Kostya Serebryany16a145f2016-09-23 01:58:51 +000056 void AddToCorpus(const Unit &U) {
Kostya Serebryany2455f0d2016-10-05 00:25:17 +000057 assert(!U.empty());
Kostya Serebryany624f59f2016-09-22 01:34:58 +000058 uint8_t Hash[kSHA1NumBytes];
59 ComputeSHA1(U.data(), U.size(), Hash);
60 if (!Hashes.insert(Sha1ToString(Hash)).second) return;
61 Inputs.push_back(InputInfo());
62 InputInfo &II = Inputs.back();
Kostya Serebryany6f5a8042016-09-21 01:50:50 +000063 II.U = U;
Kostya Serebryany624f59f2016-09-22 01:34:58 +000064 memcpy(II.Sha1, Hash, kSHA1NumBytes);
Kostya Serebryany2c556132016-09-30 01:19:56 +000065 UpdateFeatureSet(Inputs.size() - 1);
Kostya Serebryany20801e12016-09-21 21:41:48 +000066 UpdateCorpusDistribution();
Kostya Serebryany6f5a8042016-09-21 01:50:50 +000067 }
68
69 typedef const std::vector<InputInfo>::const_iterator ConstIter;
Kostya Serebryany20801e12016-09-21 21:41:48 +000070 ConstIter begin() const { return Inputs.begin(); }
71 ConstIter end() const { return Inputs.end(); }
Kostya Serebryany6f5a8042016-09-21 01:50:50 +000072
73 bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); }
Kostya Serebryanyd2169222016-10-01 01:04:29 +000074 bool HasUnit(const std::string &H) { return Hashes.count(H); }
Kostya Serebryany29bb6642016-09-21 22:42:17 +000075 InputInfo &ChooseUnitToMutate(Random &Rand) {
Kostya Serebryany2455f0d2016-10-05 00:25:17 +000076 InputInfo &II = Inputs[ChooseUnitIdxToMutate(Rand)];
77 assert(!II.U.empty());
78 return II;
Kostya Serebryany20801e12016-09-21 21:41:48 +000079 };
Kostya Serebryany6f5a8042016-09-21 01:50:50 +000080
Kostya Serebryany20801e12016-09-21 21:41:48 +000081 // Returns an index of random unit from the corpus to mutate.
82 // Hypothesis: units added to the corpus last are more likely to be
83 // interesting. This function gives more weight to the more recent units.
84 size_t ChooseUnitIdxToMutate(Random &Rand) {
Kostya Serebryany29bb6642016-09-21 22:42:17 +000085 size_t Idx = static_cast<size_t>(CorpusDistribution(Rand.Get_mt19937()));
Kostya Serebryany20801e12016-09-21 21:41:48 +000086 assert(Idx < Inputs.size());
87 return Idx;
88 }
89
Kostya Serebryany29bb6642016-09-21 22:42:17 +000090 void PrintStats() {
91 for (size_t i = 0; i < Inputs.size(); i++) {
92 const auto &II = Inputs[i];
Kostya Serebryany16a145f2016-09-23 01:58:51 +000093 Printf(" [%zd %s]\tsz: %zd\truns: %zd\tsucc: %zd\n", i,
Kostya Serebryany29bb6642016-09-21 22:42:17 +000094 Sha1ToString(II.Sha1).c_str(), II.U.size(),
Kostya Serebryany16a145f2016-09-23 01:58:51 +000095 II.NumExecutedMutations, II.NumSuccessfullMutations);
Kostya Serebryany29bb6642016-09-21 22:42:17 +000096 }
97 }
98
Kostya Serebryany2c556132016-09-30 01:19:56 +000099 void PrintFeatureSet() {
Kostya Serebryanyd2169222016-10-01 01:04:29 +0000100 Printf("Features [id: idx sz] ");
Kostya Serebryany2c556132016-09-30 01:19:56 +0000101 for (size_t i = 0; i < kFeatureSetSize; i++) {
102 auto &Fe = FeatureSet[i];
103 if (!Fe.Count) continue;
104 Printf("[%zd: %zd %zd] ", i, Fe.SmallestElementIdx,
105 Fe.SmallestElementSize);
106 }
107 Printf("\n\t");
108 for (size_t i = 0; i < Inputs.size(); i++)
109 if (size_t N = Inputs[i].NumFeatures)
110 Printf(" %zd=>%zd ", i, N);
111 Printf("\n");
112 }
113
Kostya Serebryany20801e12016-09-21 21:41:48 +0000114private:
115
Kostya Serebryany2c556132016-09-30 01:19:56 +0000116 static const bool FeatureDebug = false;
117 static const size_t kFeatureSetSize = TracePC::kFeatureSetSize;
118
119 void ValidateFeatureSet() {
120 for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++) {
121 Feature &Fe = FeatureSet[Idx];
122 if(Fe.Count && Fe.SmallestElementSize)
123 Inputs[Fe.SmallestElementIdx].Tmp++;
124 }
125 for (auto &II: Inputs) {
126 assert(II.Tmp == II.NumFeatures);
127 II.Tmp = 0;
128 }
129 }
130
131 void UpdateFeatureSet(size_t CurrentElementIdx) {
132 auto &II = Inputs[CurrentElementIdx];
133 size_t Size = II.U.size();
134 if (!Size)
135 return;
136 bool Updated = false;
137 for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++) {
138 if (!TPC.HasFeature(Idx))
139 continue;
140 Feature &Fe = FeatureSet[Idx];
141 Fe.Count++;
142 if (!Fe.SmallestElementSize ||
143 Fe.SmallestElementSize > Size) {
144 II.NumFeatures++;
Kostya Serebryany5a52a112016-10-04 01:51:44 +0000145 CountingFeatures = true;
Kostya Serebryany2c556132016-09-30 01:19:56 +0000146 if (Fe.SmallestElementSize > Size) {
147 auto &OlderII = Inputs[Fe.SmallestElementIdx];
148 assert(OlderII.NumFeatures > 0);
149 OlderII.NumFeatures--;
Kostya Serebryany2455f0d2016-10-05 00:25:17 +0000150 if (!OlderII.NumFeatures) {
151 OlderII.U.clear(); // Will be never used again.
152 if (FeatureDebug)
153 Printf("EVICTED %zd\n", Fe.SmallestElementIdx);
154 }
Kostya Serebryany2c556132016-09-30 01:19:56 +0000155 }
156 Fe.SmallestElementIdx = CurrentElementIdx;
157 Fe.SmallestElementSize = Size;
158 Updated = true;
159 }
160 }
161 if (Updated && FeatureDebug) PrintFeatureSet();
162 ValidateFeatureSet();
163 }
164
Kostya Serebryany20801e12016-09-21 21:41:48 +0000165 // Updates the probability distribution for the units in the corpus.
166 // Must be called whenever the corpus or unit weights are changed.
167 void UpdateCorpusDistribution() {
168 size_t N = Inputs.size();
Kostya Serebryany5a52a112016-10-04 01:51:44 +0000169 Intervals.resize(N + 1);
170 Weights.resize(N);
Kostya Serebryany20801e12016-09-21 21:41:48 +0000171 std::iota(Intervals.begin(), Intervals.end(), 0);
Kostya Serebryany5a52a112016-10-04 01:51:44 +0000172 if (CountingFeatures)
173 for (size_t i = 0; i < N; i++)
174 Weights[i] = Inputs[i].NumFeatures * (i + 1);
175 else
176 std::iota(Weights.begin(), Weights.end(), 1);
Kostya Serebryany20801e12016-09-21 21:41:48 +0000177 CorpusDistribution = std::piecewise_constant_distribution<double>(
178 Intervals.begin(), Intervals.end(), Weights.begin());
179 }
180 std::piecewise_constant_distribution<double> CorpusDistribution;
181
Kostya Serebryany5a52a112016-10-04 01:51:44 +0000182 std::vector<double> Intervals;
183 std::vector<double> Weights;
184
Kostya Serebryany6f5a8042016-09-21 01:50:50 +0000185 std::unordered_set<std::string> Hashes;
Kostya Serebryany20801e12016-09-21 21:41:48 +0000186 std::vector<InputInfo> Inputs;
Kostya Serebryany2c556132016-09-30 01:19:56 +0000187
188 struct Feature {
189 size_t Count;
190 size_t SmallestElementIdx;
191 size_t SmallestElementSize;
192 };
Kostya Serebryany5a52a112016-10-04 01:51:44 +0000193 bool CountingFeatures = false;
Kostya Serebryany2c556132016-09-30 01:19:56 +0000194 Feature FeatureSet[kFeatureSetSize];
Kostya Serebryany6f5a8042016-09-21 01:50:50 +0000195};
196
197} // namespace fuzzer
198
199#endif // LLVM_FUZZER_CORPUS