|  | //===- FuzzerInternal.h - Internal header for the Fuzzer --------*- C++ -* ===// | 
|  | // | 
|  | //                     The LLVM Compiler Infrastructure | 
|  | // | 
|  | // This file is distributed under the University of Illinois Open Source | 
|  | // License. See LICENSE.TXT for details. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // Define the main class fuzzer::Fuzzer and most functions. | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #ifndef LLVM_FUZZER_INTERNAL_H | 
|  | #define LLVM_FUZZER_INTERNAL_H | 
|  |  | 
|  | #include <algorithm> | 
|  | #include <atomic> | 
|  | #include <cassert> | 
|  | #include <chrono> | 
|  | #include <climits> | 
|  | #include <cstddef> | 
|  | #include <cstdlib> | 
|  | #include <random> | 
|  | #include <string.h> | 
|  | #include <string> | 
|  | #include <unordered_set> | 
|  | #include <vector> | 
|  |  | 
|  | #include "FuzzerExtFunctions.h" | 
|  | #include "FuzzerInterface.h" | 
|  | #include "FuzzerTracePC.h" | 
|  |  | 
|  | // Platform detection. | 
|  | #ifdef __linux__ | 
|  | #define LIBFUZZER_LINUX 1 | 
|  | #define LIBFUZZER_APPLE 0 | 
|  | #elif __APPLE__ | 
|  | #define LIBFUZZER_LINUX 0 | 
|  | #define LIBFUZZER_APPLE 1 | 
|  | #else | 
|  | #error "Support for your platform has not been implemented" | 
|  | #endif | 
|  |  | 
|  | namespace fuzzer { | 
|  |  | 
|  | typedef int (*UserCallback)(const uint8_t *Data, size_t Size); | 
|  | int FuzzerDriver(int *argc, char ***argv, UserCallback Callback); | 
|  |  | 
|  | using namespace std::chrono; | 
|  | typedef std::vector<uint8_t> Unit; | 
|  | typedef std::vector<Unit> UnitVector; | 
|  |  | 
|  | // A simple POD sized array of bytes. | 
|  | template <size_t kMaxSize> class FixedWord { | 
|  | public: | 
|  | FixedWord() {} | 
|  | FixedWord(const uint8_t *B, uint8_t S) { Set(B, S); } | 
|  |  | 
|  | void Set(const uint8_t *B, uint8_t S) { | 
|  | assert(S <= kMaxSize); | 
|  | memcpy(Data, B, S); | 
|  | Size = S; | 
|  | } | 
|  |  | 
|  | bool operator==(const FixedWord<kMaxSize> &w) const { | 
|  | return Size == w.Size && 0 == memcmp(Data, w.Data, Size); | 
|  | } | 
|  |  | 
|  | bool operator<(const FixedWord<kMaxSize> &w) const { | 
|  | if (Size != w.Size) | 
|  | return Size < w.Size; | 
|  | return memcmp(Data, w.Data, Size) < 0; | 
|  | } | 
|  |  | 
|  | static size_t GetMaxSize() { return kMaxSize; } | 
|  | const uint8_t *data() const { return Data; } | 
|  | uint8_t size() const { return Size; } | 
|  |  | 
|  | private: | 
|  | uint8_t Size = 0; | 
|  | uint8_t Data[kMaxSize]; | 
|  | }; | 
|  |  | 
|  | typedef FixedWord<27> Word; // 28 bytes. | 
|  |  | 
|  | bool IsFile(const std::string &Path); | 
|  | std::string FileToString(const std::string &Path); | 
|  | Unit FileToVector(const std::string &Path, size_t MaxSize = 0); | 
|  | void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V, | 
|  | long *Epoch, size_t MaxSize); | 
|  | void WriteToFile(const Unit &U, const std::string &Path); | 
|  | void CopyFileToErr(const std::string &Path); | 
|  | // Returns "Dir/FileName" or equivalent for the current OS. | 
|  | std::string DirPlusFile(const std::string &DirPath, | 
|  | const std::string &FileName); | 
|  |  | 
|  | void DupAndCloseStderr(); | 
|  | void CloseStdout(); | 
|  | void Printf(const char *Fmt, ...); | 
|  | void PrintHexArray(const Unit &U, const char *PrintAfter = ""); | 
|  | void PrintHexArray(const uint8_t *Data, size_t Size, | 
|  | const char *PrintAfter = ""); | 
|  | void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter = ""); | 
|  | void PrintASCII(const Unit &U, const char *PrintAfter = ""); | 
|  | void PrintASCII(const Word &W, const char *PrintAfter = ""); | 
|  | std::string Hash(const Unit &U); | 
|  | void SetTimer(int Seconds); | 
|  | void SetSigSegvHandler(); | 
|  | void SetSigBusHandler(); | 
|  | void SetSigAbrtHandler(); | 
|  | void SetSigIllHandler(); | 
|  | void SetSigFpeHandler(); | 
|  | void SetSigIntHandler(); | 
|  | void SetSigTermHandler(); | 
|  | std::string Base64(const Unit &U); | 
|  | int ExecuteCommand(const std::string &Command); | 
|  | size_t GetPeakRSSMb(); | 
|  |  | 
|  | // Private copy of SHA1 implementation. | 
|  | static const int kSHA1NumBytes = 20; | 
|  | // Computes SHA1 hash of 'Len' bytes in 'Data', writes kSHA1NumBytes to 'Out'. | 
|  | void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out); | 
|  |  | 
|  | // Changes U to contain only ASCII (isprint+isspace) characters. | 
|  | // Returns true iff U has been changed. | 
|  | bool ToASCII(uint8_t *Data, size_t Size); | 
|  | bool IsASCII(const Unit &U); | 
|  | bool IsASCII(const uint8_t *Data, size_t Size); | 
|  |  | 
|  | int NumberOfCpuCores(); | 
|  | int GetPid(); | 
|  | void SleepSeconds(int Seconds); | 
|  |  | 
|  | class Random { | 
|  | public: | 
|  | Random(unsigned int seed) : R(seed) {} | 
|  | size_t Rand() { return R(); } | 
|  | size_t RandBool() { return Rand() % 2; } | 
|  | size_t operator()(size_t n) { return n ? Rand() % n : 0; } | 
|  | std::mt19937 &Get_mt19937() { return R; } | 
|  | private: | 
|  | std::mt19937 R; | 
|  | }; | 
|  |  | 
|  | // Dictionary. | 
|  |  | 
|  | // Parses one dictionary entry. | 
|  | // If successfull, write the enty to Unit and returns true, | 
|  | // otherwise returns false. | 
|  | bool ParseOneDictionaryEntry(const std::string &Str, Unit *U); | 
|  | // Parses the dictionary file, fills Units, returns true iff all lines | 
|  | // were parsed succesfully. | 
|  | bool ParseDictionaryFile(const std::string &Text, std::vector<Unit> *Units); | 
|  |  | 
|  | class DictionaryEntry { | 
|  | public: | 
|  | DictionaryEntry() {} | 
|  | DictionaryEntry(Word W) : W(W) {} | 
|  | DictionaryEntry(Word W, size_t PositionHint) : W(W), PositionHint(PositionHint) {} | 
|  | const Word &GetW() const { return W; } | 
|  |  | 
|  | bool HasPositionHint() const { return PositionHint != std::numeric_limits<size_t>::max(); } | 
|  | size_t GetPositionHint() const { | 
|  | assert(HasPositionHint()); | 
|  | return PositionHint; | 
|  | } | 
|  | void IncUseCount() { UseCount++; } | 
|  | void IncSuccessCount() { SuccessCount++; } | 
|  | size_t GetUseCount() const { return UseCount; } | 
|  | size_t GetSuccessCount() const {return SuccessCount; } | 
|  |  | 
|  | private: | 
|  | Word W; | 
|  | size_t PositionHint = std::numeric_limits<size_t>::max(); | 
|  | size_t UseCount = 0; | 
|  | size_t SuccessCount = 0; | 
|  | }; | 
|  |  | 
|  | class Dictionary { | 
|  | public: | 
|  | static const size_t kMaxDictSize = 1 << 14; | 
|  |  | 
|  | bool ContainsWord(const Word &W) const { | 
|  | return std::any_of(begin(), end(), [&](const DictionaryEntry &DE) { | 
|  | return DE.GetW() == W; | 
|  | }); | 
|  | } | 
|  | const DictionaryEntry *begin() const { return &DE[0]; } | 
|  | const DictionaryEntry *end() const { return begin() + Size; } | 
|  | DictionaryEntry & operator[] (size_t Idx) { | 
|  | assert(Idx < Size); | 
|  | return DE[Idx]; | 
|  | } | 
|  | void push_back(DictionaryEntry DE) { | 
|  | if (Size < kMaxDictSize) | 
|  | this->DE[Size++] = DE; | 
|  | } | 
|  | void clear() { Size = 0; } | 
|  | bool empty() const { return Size == 0; } | 
|  | size_t size() const { return Size; } | 
|  |  | 
|  | private: | 
|  | DictionaryEntry DE[kMaxDictSize]; | 
|  | size_t Size = 0; | 
|  | }; | 
|  |  | 
|  | class MutationDispatcher { | 
|  | public: | 
|  | MutationDispatcher(Random &Rand); | 
|  | ~MutationDispatcher() {} | 
|  | /// Indicate that we are about to start a new sequence of mutations. | 
|  | void StartMutationSequence(); | 
|  | /// Print the current sequence of mutations. | 
|  | void PrintMutationSequence(); | 
|  | /// Indicate that the current sequence of mutations was successfull. | 
|  | void RecordSuccessfulMutationSequence(); | 
|  | /// Mutates data by invoking user-provided mutator. | 
|  | size_t Mutate_Custom(uint8_t *Data, size_t Size, size_t MaxSize); | 
|  | /// Mutates data by shuffling bytes. | 
|  | size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize); | 
|  | /// Mutates data by erasing a byte. | 
|  | size_t Mutate_EraseByte(uint8_t *Data, size_t Size, size_t MaxSize); | 
|  | /// Mutates data by inserting a byte. | 
|  | size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize); | 
|  | /// Mutates data by chanding one byte. | 
|  | size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize); | 
|  | /// Mutates data by chanding one bit. | 
|  | size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize); | 
|  |  | 
|  | /// Mutates data by adding a word from the manual dictionary. | 
|  | size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size, | 
|  | size_t MaxSize); | 
|  |  | 
|  | /// Mutates data by adding a word from the temporary automatic dictionary. | 
|  | size_t Mutate_AddWordFromTemporaryAutoDictionary(uint8_t *Data, size_t Size, | 
|  | size_t MaxSize); | 
|  |  | 
|  | /// Mutates data by adding a word from the persistent automatic dictionary. | 
|  | size_t Mutate_AddWordFromPersistentAutoDictionary(uint8_t *Data, size_t Size, | 
|  | size_t MaxSize); | 
|  |  | 
|  | /// Tries to find an ASCII integer in Data, changes it to another ASCII int. | 
|  | size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize); | 
|  |  | 
|  | /// CrossOver Data with some other element of the corpus. | 
|  | size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize); | 
|  |  | 
|  | /// Applies one of the configured mutations. | 
|  | /// Returns the new size of data which could be up to MaxSize. | 
|  | size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize); | 
|  | /// Applies one of the default mutations. Provided as a service | 
|  | /// to mutation authors. | 
|  | size_t DefaultMutate(uint8_t *Data, size_t Size, size_t MaxSize); | 
|  |  | 
|  | /// Creates a cross-over of two pieces of Data, returns its size. | 
|  | size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2, | 
|  | size_t Size2, uint8_t *Out, size_t MaxOutSize); | 
|  |  | 
|  | void AddWordToManualDictionary(const Word &W); | 
|  |  | 
|  | void AddWordToAutoDictionary(const Word &W, size_t PositionHint); | 
|  | void ClearAutoDictionary(); | 
|  | void PrintRecommendedDictionary(); | 
|  |  | 
|  | void SetCorpus(const std::vector<Unit> *Corpus) { this->Corpus = Corpus; } | 
|  |  | 
|  | Random &GetRand() { return Rand; } | 
|  |  | 
|  | private: | 
|  |  | 
|  | struct Mutator { | 
|  | size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max); | 
|  | const char *Name; | 
|  | }; | 
|  |  | 
|  | size_t AddWordFromDictionary(Dictionary &D, uint8_t *Data, size_t Size, | 
|  | size_t MaxSize); | 
|  | size_t MutateImpl(uint8_t *Data, size_t Size, size_t MaxSize, | 
|  | const std::vector<Mutator> &Mutators); | 
|  |  | 
|  | // Interface to functions that may or may not be available. | 
|  | const ExternalFunctions EF; | 
|  |  | 
|  | Random &Rand; | 
|  | // Dictionary provided by the user via -dict=DICT_FILE. | 
|  | Dictionary ManualDictionary; | 
|  | // Temporary dictionary modified by the fuzzer itself, | 
|  | // recreated periodically. | 
|  | Dictionary TempAutoDictionary; | 
|  | // Persistent dictionary modified by the fuzzer, consists of | 
|  | // entries that led to successfull discoveries in the past mutations. | 
|  | Dictionary PersistentAutoDictionary; | 
|  | std::vector<Mutator> CurrentMutatorSequence; | 
|  | std::vector<DictionaryEntry *> CurrentDictionaryEntrySequence; | 
|  | const std::vector<Unit> *Corpus = nullptr; | 
|  | std::vector<uint8_t> MutateInPlaceHere; | 
|  |  | 
|  | std::vector<Mutator> Mutators; | 
|  | std::vector<Mutator> DefaultMutators; | 
|  | }; | 
|  |  | 
|  | class Fuzzer { | 
|  | public: | 
|  | struct FuzzingOptions { | 
|  | int Verbosity = 1; | 
|  | size_t MaxLen = 0; | 
|  | int UnitTimeoutSec = 300; | 
|  | int TimeoutExitCode = 77; | 
|  | int ErrorExitCode = 77; | 
|  | int MaxTotalTimeSec = 0; | 
|  | int RssLimitMb = 0; | 
|  | bool DoCrossOver = true; | 
|  | int MutateDepth = 5; | 
|  | bool UseCounters = false; | 
|  | bool UseIndirCalls = true; | 
|  | bool UseTraces = false; | 
|  | bool UseMemcmp = true; | 
|  | bool UseFullCoverageSet = false; | 
|  | bool Reload = true; | 
|  | bool ShuffleAtStartUp = true; | 
|  | bool PreferSmall = true; | 
|  | size_t MaxNumberOfRuns = ULONG_MAX; | 
|  | int ReportSlowUnits = 10; | 
|  | bool OnlyASCII = false; | 
|  | std::string OutputCorpus; | 
|  | std::string ArtifactPrefix = "./"; | 
|  | std::string ExactArtifactPath; | 
|  | bool SaveArtifacts = true; | 
|  | bool PrintNEW = true; // Print a status line when new units are found; | 
|  | bool OutputCSV = false; | 
|  | bool PrintNewCovPcs = false; | 
|  | bool PrintFinalStats = false; | 
|  | bool DetectLeaks = true; | 
|  | bool TruncateUnits = false; | 
|  | }; | 
|  |  | 
|  | // Aggregates all available coverage measurements. | 
|  | struct Coverage { | 
|  | Coverage() { Reset(); } | 
|  |  | 
|  | void Reset() { | 
|  | BlockCoverage = 0; | 
|  | CallerCalleeCoverage = 0; | 
|  | PcMapBits = 0; | 
|  | CounterBitmapBits = 0; | 
|  | PcBufferLen = 0; | 
|  | CounterBitmap.clear(); | 
|  | PCMap.Reset(); | 
|  | } | 
|  |  | 
|  | std::string DebugString() const; | 
|  |  | 
|  | size_t BlockCoverage; | 
|  | size_t CallerCalleeCoverage; | 
|  |  | 
|  | size_t PcBufferLen; | 
|  | // Precalculated number of bits in CounterBitmap. | 
|  | size_t CounterBitmapBits; | 
|  | std::vector<uint8_t> CounterBitmap; | 
|  | // Precalculated number of bits in PCMap. | 
|  | size_t PcMapBits; | 
|  | PcCoverageMap PCMap; | 
|  | }; | 
|  |  | 
|  | Fuzzer(UserCallback CB, MutationDispatcher &MD, FuzzingOptions Options); | 
|  | void AddToCorpus(const Unit &U) { | 
|  | Corpus.push_back(U); | 
|  | UpdateCorpusDistribution(); | 
|  | } | 
|  | size_t ChooseUnitIdxToMutate(); | 
|  | const Unit &ChooseUnitToMutate() { return Corpus[ChooseUnitIdxToMutate()]; }; | 
|  | void TruncateUnits(std::vector<Unit> *NewCorpus); | 
|  | void Loop(); | 
|  | void Drill(); | 
|  | void ShuffleAndMinimize(); | 
|  | void InitializeTraceState(); | 
|  | void AssignTaintLabels(uint8_t *Data, size_t Size); | 
|  | size_t CorpusSize() const { return Corpus.size(); } | 
|  | size_t MaxUnitSizeInCorpus() const; | 
|  | void ReadDir(const std::string &Path, long *Epoch, size_t MaxSize) { | 
|  | Printf("Loading corpus: %s\n", Path.c_str()); | 
|  | ReadDirToVectorOfUnits(Path.c_str(), &Corpus, Epoch, MaxSize); | 
|  | } | 
|  | void RereadOutputCorpus(size_t MaxSize); | 
|  | // Save the current corpus to OutputCorpus. | 
|  | void SaveCorpus(); | 
|  |  | 
|  | size_t secondsSinceProcessStartUp() { | 
|  | return duration_cast<seconds>(system_clock::now() - ProcessStartTime) | 
|  | .count(); | 
|  | } | 
|  | size_t execPerSec() { | 
|  | size_t Seconds = secondsSinceProcessStartUp(); | 
|  | return Seconds ? TotalNumberOfRuns / Seconds : 0; | 
|  | } | 
|  |  | 
|  | size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; } | 
|  |  | 
|  | static void StaticAlarmCallback(); | 
|  | static void StaticCrashSignalCallback(); | 
|  | static void StaticInterruptCallback(); | 
|  |  | 
|  | void ExecuteCallback(const uint8_t *Data, size_t Size); | 
|  | bool RunOne(const uint8_t *Data, size_t Size); | 
|  |  | 
|  | // Merge Corpora[1:] into Corpora[0]. | 
|  | void Merge(const std::vector<std::string> &Corpora); | 
|  | // Returns a subset of 'Extra' that adds coverage to 'Initial'. | 
|  | UnitVector FindExtraUnits(const UnitVector &Initial, const UnitVector &Extra); | 
|  | MutationDispatcher &GetMD() { return MD; } | 
|  | void PrintFinalStats(); | 
|  | void SetMaxLen(size_t MaxLen); | 
|  | void RssLimitCallback(); | 
|  |  | 
|  | // Public for tests. | 
|  | void ResetCoverage(); | 
|  |  | 
|  | bool InFuzzingThread() const { return IsMyThread; } | 
|  | size_t GetCurrentUnitInFuzzingThead(const uint8_t **Data) const; | 
|  |  | 
|  | private: | 
|  | void AlarmCallback(); | 
|  | void CrashCallback(); | 
|  | void InterruptCallback(); | 
|  | void MutateAndTestOne(); | 
|  | void ReportNewCoverage(const Unit &U); | 
|  | bool RunOne(const Unit &U) { return RunOne(U.data(), U.size()); } | 
|  | void RunOneAndUpdateCorpus(const uint8_t *Data, size_t Size); | 
|  | void WriteToOutputCorpus(const Unit &U); | 
|  | void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix); | 
|  | void PrintStats(const char *Where, const char *End = "\n"); | 
|  | void PrintStatusForNewUnit(const Unit &U); | 
|  | void ShuffleCorpus(UnitVector *V); | 
|  | void TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size, | 
|  | bool DuringInitialCorpusExecution); | 
|  |  | 
|  | // Updates the probability distribution for the units in the corpus. | 
|  | // Must be called whenever the corpus or unit weights are changed. | 
|  | void UpdateCorpusDistribution(); | 
|  |  | 
|  | bool UpdateMaxCoverage(); | 
|  |  | 
|  | // Trace-based fuzzing: we run a unit with some kind of tracing | 
|  | // enabled and record potentially useful mutations. Then | 
|  | // We apply these mutations one by one to the unit and run it again. | 
|  |  | 
|  | // Start tracing; forget all previously proposed mutations. | 
|  | void StartTraceRecording(); | 
|  | // Stop tracing. | 
|  | void StopTraceRecording(); | 
|  |  | 
|  | void SetDeathCallback(); | 
|  | static void StaticDeathCallback(); | 
|  | void DumpCurrentUnit(const char *Prefix); | 
|  | void DeathCallback(); | 
|  |  | 
|  | void LazyAllocateCurrentUnitData(); | 
|  | uint8_t *CurrentUnitData = nullptr; | 
|  | std::atomic<size_t> CurrentUnitSize; | 
|  |  | 
|  | size_t TotalNumberOfRuns = 0; | 
|  | size_t NumberOfNewUnitsAdded = 0; | 
|  |  | 
|  | bool HasMoreMallocsThanFrees = false; | 
|  | size_t NumberOfLeakDetectionAttempts = 0; | 
|  |  | 
|  | std::vector<Unit> Corpus; | 
|  | std::unordered_set<std::string> UnitHashesAddedToCorpus; | 
|  |  | 
|  | std::piecewise_constant_distribution<double> CorpusDistribution; | 
|  | UserCallback CB; | 
|  | MutationDispatcher &MD; | 
|  | FuzzingOptions Options; | 
|  | system_clock::time_point ProcessStartTime = system_clock::now(); | 
|  | system_clock::time_point UnitStartTime; | 
|  | long TimeOfLongestUnitInSeconds = 0; | 
|  | long EpochOfLastReadOfOutputCorpus = 0; | 
|  |  | 
|  | // Maximum recorded coverage. | 
|  | Coverage MaxCoverage; | 
|  |  | 
|  | // Need to know our own thread. | 
|  | static thread_local bool IsMyThread; | 
|  |  | 
|  | // Interface to functions that may or may not be available. | 
|  | // For future use, currently not used. | 
|  | const ExternalFunctions EF; | 
|  | }; | 
|  |  | 
|  | }; // namespace fuzzer | 
|  |  | 
|  | #endif // LLVM_FUZZER_INTERNAL_H |