blob: d3f1ab185255f86e79909801eaafc45e60e90d67 [file] [log] [blame]
Kostya Serebryany111e1d62016-12-09 01:17:24 +00001//===- FuzzerMerge.cpp - merging corpora ----------------------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9// Merging corpora.
10//===----------------------------------------------------------------------===//
11
12#include "FuzzerInternal.h"
13#include "FuzzerIO.h"
14#include "FuzzerMerge.h"
15#include "FuzzerTracePC.h"
16#include "FuzzerUtil.h"
17
18#include <fstream>
19#include <sstream>
20
21namespace fuzzer {
22
23bool Merger::Parse(const std::string &Str, bool ParseCoverage) {
24 std::istringstream SS(Str);
25 return Parse(SS, ParseCoverage);
26}
27
28void Merger::ParseOrExit(std::istream &IS, bool ParseCoverage) {
29 if (!Parse(IS, ParseCoverage)) {
30 Printf("MERGE: failed to parse the control file (unexpected error)\n");
31 exit(1);
32 }
33}
34
35// The control file example:
36//
37// 3 # The number of inputs
38// 1 # The number of inputs in the first corpus, <= the previous number
39// file0
40// file1
41// file2 # One file name per line.
42// STARTED 0 123 # FileID, file size
43// DONE 0 1 4 6 8 # FileID COV1 COV2 ...
44// STARTED 1 456 # If DONE is missing, the input crashed while processing.
45// STARTED 2 567
46// DONE 2 8 9
47bool Merger::Parse(std::istream &IS, bool ParseCoverage) {
48 LastFailure.clear();
49 std::string Line;
50
51 // Parse NumFiles.
52 if (!std::getline(IS, Line, '\n')) return false;
53 std::istringstream L1(Line);
54 size_t NumFiles = 0;
55 L1 >> NumFiles;
56 if (NumFiles == 0 || NumFiles > 10000000) return false;
57
58 // Parse NumFilesInFirstCorpus.
59 if (!std::getline(IS, Line, '\n')) return false;
60 std::istringstream L2(Line);
61 NumFilesInFirstCorpus = NumFiles + 1;
62 L2 >> NumFilesInFirstCorpus;
63 if (NumFilesInFirstCorpus > NumFiles) return false;
64
65 // Parse file names.
66 Files.resize(NumFiles);
67 for (size_t i = 0; i < NumFiles; i++)
68 if (!std::getline(IS, Files[i].Name, '\n'))
69 return false;
70
71 // Parse STARTED and DONE lines.
72 size_t ExpectedStartMarker = 0;
73 const size_t kInvalidStartMarker = -1;
74 size_t LastSeenStartMarker = kInvalidStartMarker;
75 while (std::getline(IS, Line, '\n')) {
76 std::istringstream ISS1(Line);
77 std::string Marker;
78 size_t N;
79 ISS1 >> Marker;
80 ISS1 >> N;
81 if (Marker == "STARTED") {
82 // STARTED FILE_ID FILE_SIZE
83 if (ExpectedStartMarker != N)
84 return false;
85 ISS1 >> Files[ExpectedStartMarker].Size;
86 LastSeenStartMarker = ExpectedStartMarker;
87 assert(ExpectedStartMarker < Files.size());
88 ExpectedStartMarker++;
89 } else if (Marker == "DONE") {
90 // DONE FILE_SIZE COV1 COV2 COV3 ...
91 size_t CurrentFileIdx = N;
92 if (CurrentFileIdx != LastSeenStartMarker)
93 return false;
94 LastSeenStartMarker = kInvalidStartMarker;
95 if (ParseCoverage) {
96 while (!ISS1.rdstate()) {
97 ISS1 >> std::hex >> N;
98 Files[CurrentFileIdx].Features.insert(N);
99 }
100 }
101 } else {
102 return false;
103 }
104 }
105 if (LastSeenStartMarker != kInvalidStartMarker)
106 LastFailure = Files[LastSeenStartMarker].Name;
107
108 FirstNotProcessedFile = ExpectedStartMarker;
109 return true;
110}
111
112// Decides which files need to be merged (add thost to NewFiles).
113// Returns the number of new features added.
114size_t Merger::Merge(std::vector<std::string> *NewFiles) {
115 NewFiles->clear();
116 assert(NumFilesInFirstCorpus <= Files.size());
117 std::set<size_t> AllFeatures;
118
119 // What features are in the initial corpus?
120 for (size_t i = 0; i < NumFilesInFirstCorpus; i++) {
121 auto &Cur = Files[i].Features;
122 AllFeatures.insert(Cur.begin(), Cur.end());
123 }
124 size_t InitialNumFeatures = AllFeatures.size();
125
126 // Remove all features that we already know from all other inputs.
127 for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) {
128 auto &Cur = Files[i].Features;
129 std::set<size_t> Tmp;
130 std::set_difference(Cur.begin(), Cur.end(), AllFeatures.begin(),
131 AllFeatures.end(), std::inserter(Tmp, Tmp.begin()));
132 Cur.swap(Tmp);
133 }
134
135 // Sort. Give preference to
136 // * smaller files
137 // * files with more features.
138 std::sort(Files.begin() + NumFilesInFirstCorpus, Files.end(),
139 [&](const MergeFileInfo &a, const MergeFileInfo &b) -> bool {
140 if (a.Size != b.Size)
141 return a.Size < b.Size;
142 return a.Features.size() > b.Features.size();
143 });
144
145 // One greedy pass: add the file's features to AllFeatures.
146 // If new features were added, add this file to NewFiles.
147 for (size_t i = NumFilesInFirstCorpus; i < Files.size(); i++) {
148 auto &Cur = Files[i].Features;
149 // Printf("%s -> sz %zd ft %zd\n", Files[i].Name.c_str(),
150 // Files[i].Size, Cur.size());
151 size_t OldSize = AllFeatures.size();
152 AllFeatures.insert(Cur.begin(), Cur.end());
153 if (AllFeatures.size() > OldSize)
154 NewFiles->push_back(Files[i].Name);
155 }
156 return AllFeatures.size() - InitialNumFeatures;
157}
158
159// Inner process. May crash if the target crashes.
160void Fuzzer::CrashResistantMergeInternalStep(const std::string &CFPath) {
161 Printf("MERGE-INNER: using the control file '%s'\n", CFPath.c_str());
162 Merger M;
163 std::ifstream IF(CFPath);
164 M.ParseOrExit(IF, false);
165 IF.close();
166 if (!M.LastFailure.empty())
167 Printf("MERGE-INNER: '%s' caused a failure at the previous merge step\n",
168 M.LastFailure.c_str());
169
170 Printf("MERGE-INNER: %zd total files;"
171 " %zd processed earlier; will process %zd files now\n",
172 M.Files.size(), M.FirstNotProcessedFile,
173 M.Files.size() - M.FirstNotProcessedFile);
174
175 std::ofstream OF(CFPath, std::ofstream::out | std::ofstream::app);
176 for (size_t i = M.FirstNotProcessedFile; i < M.Files.size(); i++) {
177 auto U = FileToVector(M.Files[i].Name);
178 std::ostringstream StartedLine;
179 // Write the pre-run marker.
180 OF << "STARTED " << std::dec << i << " " << U.size() << "\n";
181 OF.flush(); // Flush is important since ExecuteCommand may crash.
182 // Run.
183 TPC.ResetMaps();
184 ExecuteCallback(U.data(), U.size());
185 // Collect coverage.
186 std::set<size_t> Features;
187 TPC.CollectFeatures([&](size_t Feature) -> bool {
188 Features.insert(Feature);
189 return true;
190 });
191 // Show stats.
192 TotalNumberOfRuns++;
193 if (!(TotalNumberOfRuns & (TotalNumberOfRuns - 1)))
194 PrintStats("pulse ");
195 // Write the post-run marker and the coverage.
196 OF << "DONE " << i;
197 for (size_t F : Features)
198 OF << " " << std::hex << F;
199 OF << "\n";
200 }
201}
202
203// Outer process. Does not call the target code and thus sohuld not fail.
204void Fuzzer::CrashResistantMerge(const std::vector<std::string> &Args,
205 const std::vector<std::string> &Corpora) {
206 if (Corpora.size() <= 1) {
207 Printf("Merge requires two or more corpus dirs\n");
208 return;
209 }
210 std::vector<std::string> AllFiles;
211 ListFilesInDirRecursive(Corpora[0], nullptr, &AllFiles, /*TopDir*/true);
212 size_t NumFilesInFirstCorpus = AllFiles.size();
213 for (size_t i = 1; i < Corpora.size(); i++)
214 ListFilesInDirRecursive(Corpora[i], nullptr, &AllFiles, /*TopDir*/true);
215 Printf("MERGE-OUTER: %zd files, %zd in the initial corpus\n",
216 AllFiles.size(), NumFilesInFirstCorpus);
217 std::string CFPath =
218 "libFuzzerTemp." + std::to_string(GetPid()) + ".txt";
219 // Write the control file.
220 DeleteFile(CFPath);
221 std::ofstream ControlFile(CFPath);
222 ControlFile << AllFiles.size() << "\n";
223 ControlFile << NumFilesInFirstCorpus << "\n";
224 for (auto &Path: AllFiles)
225 ControlFile << Path << "\n";
226 ControlFile.close();
227
228 // Execute the inner process untill it passes.
229 // Every inner process should execute at least one input.
230 std::string BaseCmd = CloneArgsWithoutX(Args, "keep-all-flags");
231 for (size_t i = 1; i <= AllFiles.size(); i++) {
232 Printf("MERGE-OUTER: attempt %zd\n", i);
233 auto ExitCode =
234 ExecuteCommand(BaseCmd + " -merge_control_file=" + CFPath);
235 if (!ExitCode) {
236 Printf("MERGE-OUTER: succesfull in %zd attempt(s)\n", i);
237 break;
238 }
239 }
240 // Read the control file and do the merge.
241 Merger M;
242 std::ifstream IF(CFPath);
243 M.ParseOrExit(IF, true);
244 IF.close();
245 std::vector<std::string> NewFiles;
246 size_t NumNewFeatures = M.Merge(&NewFiles);
247 Printf("MERGE-OUTER: %zd new files with %zd new features added\n",
248 NewFiles.size(), NumNewFeatures);
249 for (auto &F: NewFiles)
250 WriteToOutputCorpus(FileToVector(F));
251 // We are done, delete the control file.
252 DeleteFile(CFPath);
253}
254
255} // namespace fuzzer