|  | //===- FuzzerCorpus.h - Internal header for the Fuzzer ----------*- C++ -* ===// | 
|  | // | 
|  | //                     The LLVM Compiler Infrastructure | 
|  | // | 
|  | // This file is distributed under the University of Illinois Open Source | 
|  | // License. See LICENSE.TXT for details. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // fuzzer::InputCorpus | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #ifndef LLVM_FUZZER_CORPUS | 
|  | #define LLVM_FUZZER_CORPUS | 
|  |  | 
|  | #include <random> | 
|  | #include <unordered_set> | 
|  |  | 
|  | #include "FuzzerDefs.h" | 
|  | #include "FuzzerRandom.h" | 
|  | #include "FuzzerTracePC.h" | 
|  |  | 
|  | namespace fuzzer { | 
|  |  | 
|  | struct InputInfo { | 
|  | Unit U;  // The actual input data. | 
|  | uint8_t Sha1[kSHA1NumBytes];  // Checksum. | 
|  | // Number of features that this input has and no smaller input has. | 
|  | size_t NumFeatures = 0; | 
|  | size_t Tmp = 0; // Used by ValidateFeatureSet. | 
|  | // Stats. | 
|  | size_t NumExecutedMutations = 0; | 
|  | size_t NumSuccessfullMutations = 0; | 
|  | }; | 
|  |  | 
|  | class InputCorpus { | 
|  | public: | 
|  | static const size_t kFeatureSetSize = 1 << 16; | 
|  | InputCorpus() { | 
|  | Inputs.reserve(1 << 14);  // Avoid too many resizes. | 
|  | memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature)); | 
|  | memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature)); | 
|  | } | 
|  | size_t size() const { return Inputs.size(); } | 
|  | size_t SizeInBytes() const { | 
|  | size_t Res = 0; | 
|  | for (auto &II : Inputs) | 
|  | Res += II.U.size(); | 
|  | return Res; | 
|  | } | 
|  | size_t NumActiveUnits() const { | 
|  | size_t Res = 0; | 
|  | for (auto &II : Inputs) | 
|  | Res += !II.U.empty(); | 
|  | return Res; | 
|  | } | 
|  | bool empty() const { return Inputs.empty(); } | 
|  | const Unit &operator[] (size_t Idx) const { return Inputs[Idx].U; } | 
|  | void AddToCorpus(const Unit &U, size_t NumFeatures) { | 
|  | assert(!U.empty()); | 
|  | uint8_t Hash[kSHA1NumBytes]; | 
|  | if (FeatureDebug) | 
|  | Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures); | 
|  | ComputeSHA1(U.data(), U.size(), Hash); | 
|  | Hashes.insert(Sha1ToString(Hash)); | 
|  | Inputs.push_back(InputInfo()); | 
|  | InputInfo &II = Inputs.back(); | 
|  | II.U = U; | 
|  | II.NumFeatures = NumFeatures; | 
|  | memcpy(II.Sha1, Hash, kSHA1NumBytes); | 
|  | UpdateCorpusDistribution(); | 
|  | ValidateFeatureSet(); | 
|  | } | 
|  |  | 
|  | typedef const std::vector<InputInfo>::const_iterator ConstIter; | 
|  | ConstIter begin() const { return Inputs.begin(); } | 
|  | ConstIter end() const { return Inputs.end(); } | 
|  |  | 
|  | bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } | 
|  | bool HasUnit(const std::string &H) { return Hashes.count(H); } | 
|  | InputInfo &ChooseUnitToMutate(Random &Rand) { | 
|  | InputInfo &II = Inputs[ChooseUnitIdxToMutate(Rand)]; | 
|  | assert(!II.U.empty()); | 
|  | return II; | 
|  | }; | 
|  |  | 
|  | // Returns an index of random unit from the corpus to mutate. | 
|  | // Hypothesis: units added to the corpus last are more likely to be | 
|  | // interesting. This function gives more weight to the more recent units. | 
|  | size_t ChooseUnitIdxToMutate(Random &Rand) { | 
|  | size_t Idx = static_cast<size_t>(CorpusDistribution(Rand.Get_mt19937())); | 
|  | assert(Idx < Inputs.size()); | 
|  | return Idx; | 
|  | } | 
|  |  | 
|  | void PrintStats() { | 
|  | for (size_t i = 0; i < Inputs.size(); i++) { | 
|  | const auto &II = Inputs[i]; | 
|  | Printf("  [%zd %s]\tsz: %zd\truns: %zd\tsucc: %zd\n", i, | 
|  | Sha1ToString(II.Sha1).c_str(), II.U.size(), | 
|  | II.NumExecutedMutations, II.NumSuccessfullMutations); | 
|  | } | 
|  | } | 
|  |  | 
|  | void PrintFeatureSet() { | 
|  | for (size_t i = 0; i < kFeatureSetSize; i++) { | 
|  | if(size_t Sz = GetFeature(i)) | 
|  | Printf("[%zd: id %zd sz%zd] ", i, SmallestElementPerFeature[i], Sz); | 
|  | } | 
|  | Printf("\n\t"); | 
|  | for (size_t i = 0; i < Inputs.size(); i++) | 
|  | if (size_t N = Inputs[i].NumFeatures) | 
|  | Printf(" %zd=>%zd ", i, N); | 
|  | Printf("\n"); | 
|  | } | 
|  |  | 
|  | bool AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) { | 
|  | assert(NewSize); | 
|  | Idx = Idx % kFeatureSetSize; | 
|  | uint32_t OldSize = GetFeature(Idx); | 
|  | if (OldSize == 0 || (Shrink && OldSize > NewSize)) { | 
|  | if (OldSize > 0) { | 
|  | InputInfo &II = Inputs[SmallestElementPerFeature[Idx]]; | 
|  | assert(II.NumFeatures > 0); | 
|  | II.NumFeatures--; | 
|  | if (II.NumFeatures == 0) { | 
|  | II.U.clear(); | 
|  | if (FeatureDebug) | 
|  | Printf("EVICTED %zd\n", SmallestElementPerFeature[Idx]); | 
|  | } | 
|  | } | 
|  | if (FeatureDebug) | 
|  | Printf("ADD FEATURE %zd sz %d\n", Idx, NewSize); | 
|  | SmallestElementPerFeature[Idx] = Inputs.size(); | 
|  | InputSizesPerFeature[Idx] = NewSize; | 
|  | CountingFeatures = true; | 
|  | return true; | 
|  | } | 
|  | return false; | 
|  | } | 
|  |  | 
|  | size_t NumFeatures() const { | 
|  | size_t Res = 0; | 
|  | for (size_t i = 0; i < kFeatureSetSize; i++) | 
|  | Res += GetFeature(i) != 0; | 
|  | return Res; | 
|  | } | 
|  |  | 
|  | private: | 
|  |  | 
|  | static const bool FeatureDebug = false; | 
|  |  | 
|  | size_t GetFeature(size_t Idx) const { return InputSizesPerFeature[Idx]; } | 
|  |  | 
|  | void ValidateFeatureSet() { | 
|  | if (!CountingFeatures) return; | 
|  | if (FeatureDebug) | 
|  | PrintFeatureSet(); | 
|  | for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++) | 
|  | if (GetFeature(Idx)) | 
|  | Inputs[SmallestElementPerFeature[Idx]].Tmp++; | 
|  | for (auto &II: Inputs) { | 
|  | if (II.Tmp != II.NumFeatures) | 
|  | Printf("ZZZ %zd %zd\n", II.Tmp, II.NumFeatures); | 
|  | assert(II.Tmp == II.NumFeatures); | 
|  | II.Tmp = 0; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Updates the probability distribution for the units in the corpus. | 
|  | // Must be called whenever the corpus or unit weights are changed. | 
|  | void UpdateCorpusDistribution() { | 
|  | size_t N = Inputs.size(); | 
|  | Intervals.resize(N + 1); | 
|  | Weights.resize(N); | 
|  | std::iota(Intervals.begin(), Intervals.end(), 0); | 
|  | if (CountingFeatures) | 
|  | for (size_t i = 0; i < N; i++) | 
|  | Weights[i] = Inputs[i].NumFeatures * (i + 1); | 
|  | else | 
|  | std::iota(Weights.begin(), Weights.end(), 1); | 
|  | CorpusDistribution = std::piecewise_constant_distribution<double>( | 
|  | Intervals.begin(), Intervals.end(), Weights.begin()); | 
|  | } | 
|  | std::piecewise_constant_distribution<double> CorpusDistribution; | 
|  |  | 
|  | std::vector<double> Intervals; | 
|  | std::vector<double> Weights; | 
|  |  | 
|  | std::unordered_set<std::string> Hashes; | 
|  | std::vector<InputInfo> Inputs; | 
|  |  | 
|  | bool CountingFeatures = false; | 
|  | uint32_t InputSizesPerFeature[kFeatureSetSize]; | 
|  | uint32_t SmallestElementPerFeature[kFeatureSetSize]; | 
|  | }; | 
|  |  | 
|  | }  // namespace fuzzer | 
|  |  | 
|  | #endif  // LLVM_FUZZER_CORPUS |