[libFuzzer] experimental option -cleanse_crash: tries to replace all bytes in a crash reproducer with garbage, while still preserving the crash

llvm-svn: 300498
diff --git a/llvm/lib/Fuzzer/FuzzerDriver.cpp b/llvm/lib/Fuzzer/FuzzerDriver.cpp
index 0fb83ca..e085e65 100644
--- a/llvm/lib/Fuzzer/FuzzerDriver.cpp
+++ b/llvm/lib/Fuzzer/FuzzerDriver.cpp
@@ -289,6 +289,66 @@
   return S.substr(Beg, End - Beg);
 }
 
+int CleanseCrashInput(const std::vector<std::string> &Args,
+                       const FuzzingOptions &Options) {
+  if (Inputs->size() != 1 || !Flags.exact_artifact_path) {
+    Printf("ERROR: -cleanse_crash should be given one input file and"
+          " -exact_artifact_path\n");
+    exit(1);
+  }
+  std::string InputFilePath = Inputs->at(0);
+  std::string OutputFilePath = Flags.exact_artifact_path;
+  std::string BaseCmd =
+      CloneArgsWithoutX(Args, "cleanse_crash", "cleanse_crash");
+
+  auto InputPos = BaseCmd.find(" " + InputFilePath + " ");
+  assert(InputPos != std::string::npos);
+  BaseCmd.erase(InputPos, InputFilePath.size() + 1);
+
+  auto LogFilePath = DirPlusFile(
+      TmpDir(), "libFuzzerTemp." + std::to_string(GetPid()) + ".txt");
+  auto TmpFilePath = DirPlusFile(
+      TmpDir(), "libFuzzerTemp." + std::to_string(GetPid()) + ".repro");
+  auto LogFileRedirect = " > " + LogFilePath + " 2>&1 ";
+
+  auto Cmd = BaseCmd + " " + TmpFilePath + LogFileRedirect;
+
+  std::string CurrentFilePath = InputFilePath;
+  auto U = FileToVector(CurrentFilePath);
+  size_t Size = U.size();
+
+  const std::vector<uint8_t> ReplacementBytes = {' ', 0xff};
+  for (int NumAttempts = 0; NumAttempts < 5; NumAttempts++) {
+    bool Changed = false;
+    for (size_t Idx = 0; Idx < Size; Idx++) {
+      Printf("CLEANSE[%d]: Trying to replace byte %zd of %zd\n", NumAttempts,
+             Idx, Size);
+      uint8_t OriginalByte = U[Idx];
+      if (ReplacementBytes.end() != std::find(ReplacementBytes.begin(),
+                                              ReplacementBytes.end(),
+                                              OriginalByte))
+        continue;
+      for (auto NewByte : ReplacementBytes) {
+        U[Idx] = NewByte;
+        WriteToFile(U, TmpFilePath);
+        auto ExitCode = ExecuteCommand(Cmd);
+        RemoveFile(TmpFilePath);
+        if (!ExitCode) {
+          U[Idx] = OriginalByte;
+        } else {
+          Changed = true;
+          Printf("CLEANSE: Replaced byte %zd with 0x%x\n", Idx, NewByte);
+          WriteToFile(U, OutputFilePath);
+          break;
+        }
+      }
+    }
+    if (!Changed) break;
+  }
+  RemoveFile(LogFilePath);
+  return 0;
+}
+
 int MinimizeCrashInput(const std::vector<std::string> &Args,
                        const FuzzingOptions &Options) {
   if (Inputs->size() != 1) {
@@ -583,6 +643,9 @@
   if (Flags.minimize_crash_internal_step)
     return MinimizeCrashInputInternalStep(F, Corpus);
 
+  if (Flags.cleanse_crash)
+    return CleanseCrashInput(Args, Options);
+
   if (auto Name = Flags.run_equivalence_server) {
     SMR.Destroy(Name);
     if (!SMR.Create(Name)) {
diff --git a/llvm/lib/Fuzzer/FuzzerFlags.def b/llvm/lib/Fuzzer/FuzzerFlags.def
index 28bf0ca..f5a9b7d 100644
--- a/llvm/lib/Fuzzer/FuzzerFlags.def
+++ b/llvm/lib/Fuzzer/FuzzerFlags.def
@@ -49,6 +49,8 @@
 FUZZER_FLAG_INT(minimize_crash, 0, "If 1, minimizes the provided"
   " crash input. Use with -runs=N or -max_total_time=N to limit "
   "the number attempts")
+FUZZER_FLAG_INT(cleanse_crash, 0, "If 1, tries to cleanse the provided"
+  " crash input to make it contain fewer original bytes.")
 FUZZER_FLAG_INT(minimize_crash_internal_step, 0, "internal flag")
 FUZZER_FLAG_INT(use_counters, 1, "Use coverage counters")
 FUZZER_FLAG_INT(use_indir_calls, 1, "Use indirect caller-callee counters")
diff --git a/llvm/lib/Fuzzer/test/CMakeLists.txt b/llvm/lib/Fuzzer/test/CMakeLists.txt
index f72bc39..cd049d3 100644
--- a/llvm/lib/Fuzzer/test/CMakeLists.txt
+++ b/llvm/lib/Fuzzer/test/CMakeLists.txt
@@ -80,6 +80,7 @@
   BogusInitializeTest
   BufferOverflowOnInput
   CallerCalleeTest
+  CleanseTest
   CounterTest
   CustomCrossOverAndMutateTest
   CustomCrossOverTest
diff --git a/llvm/lib/Fuzzer/test/CleanseTest.cpp b/llvm/lib/Fuzzer/test/CleanseTest.cpp
new file mode 100644
index 0000000..faea8dc
--- /dev/null
+++ b/llvm/lib/Fuzzer/test/CleanseTest.cpp
@@ -0,0 +1,16 @@
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+
+// Test the the fuzzer is able to 'cleanse' the reproducer
+// by replacing all irrelevant bytes with garbage.
+#include <cstdint>
+#include <cstdlib>
+#include <cstddef>
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
+  if (Size >= 20 && Data[1] == '1' && Data[5] == '5' && Data[10] == 'A' &&
+      Data[19] == 'Z')
+    abort();
+  return 0;
+}
+
diff --git a/llvm/lib/Fuzzer/test/cleanse.test b/llvm/lib/Fuzzer/test/cleanse.test
new file mode 100644
index 0000000..ad08591
--- /dev/null
+++ b/llvm/lib/Fuzzer/test/cleanse.test
@@ -0,0 +1,3 @@
+RUN: echo -n 0123456789ABCDEFGHIZ > %t-in
+RUN: LLVMFuzzer-CleanseTest -cleanse_crash=1 %t-in -exact_artifact_path=%t-out
+RUN: echo -n ' 1   5    A        Z' | diff - %t-out