[libFuzzer] new experimental feature: value profiling. Profiles values that affect control flow and treats new values as new coverage.

llvm-svn: 278839
diff --git a/llvm/lib/Fuzzer/FuzzerTraceState.cpp b/llvm/lib/Fuzzer/FuzzerTraceState.cpp
index 6f87fca..9eae068 100644
--- a/llvm/lib/Fuzzer/FuzzerTraceState.cpp
+++ b/llvm/lib/Fuzzer/FuzzerTraceState.cpp
@@ -173,6 +173,7 @@
 static bool RecordingTraces = false;
 static bool RecordingMemcmp = false;
 static bool RecordingMemmem = false;
+static bool RecordingValueProfile = false;
 static bool DoingMyOwnMemmem = false;
 
 struct ScopedDoingMyOwnMemmem {
@@ -529,11 +530,60 @@
   return Len;
 }
 
+// Value profile.
+// We keep track of various values that affect control flow.
+// These values are inserted into a bit-set-based hash map (ValueBitMap VP).
+// Every new bit in the map is treated as a new coverage.
+//
+// For memcmp/strcmp/etc the interesting value is the length of the common
+// prefix of the parameters.
+// For cmp instructions the interesting value is a XOR of the parameters.
+// The interesting value is mixed up with the PC and is then added to the map.
+static ValueBitMap VP;
+
+void EnableValueProfile() { RecordingValueProfile = true; }
+
+size_t VPMapMergeFromCurrent(ValueBitMap &M) {
+  if (!RecordingValueProfile) return 0;
+  return M.MergeFrom(VP);
+}
+
+static void AddValueForMemcmp(void *caller_pc, const void *s1, const void *s2,
+                              size_t n) {
+  if (!n) return;
+  size_t Len = std::min(n, (size_t)32);
+  const char *A1 = reinterpret_cast<const char *>(s1);
+  const char *A2 = reinterpret_cast<const char *>(s2);
+  size_t LastSameByte = 0;
+  for (; LastSameByte < Len; LastSameByte++)
+    if (A1[LastSameByte] != A2[LastSameByte])
+      break;
+  size_t PC = reinterpret_cast<size_t>(caller_pc);
+  VP.AddValue((PC & 4095) | (LastSameByte << 12));
+}
+
+static void AddValueForStrcmp(void *caller_pc, const char *s1, const char *s2,
+                              size_t n) {
+  if (!n) return;
+  size_t Len = std::min(n, (size_t)32);
+  size_t LastSameByte = 0;
+  for (; LastSameByte < Len; LastSameByte++)
+    if (s1[LastSameByte] != s2[LastSameByte] || s1[LastSameByte] == 0)
+      break;
+  size_t PC = reinterpret_cast<size_t>(caller_pc);
+  VP.AddValue((PC & 4095) | (LastSameByte << 12));
+}
+
+static void AddValueForCmp(uintptr_t PC, uint64_t Arg1, uint64_t Arg2) {
+  VP.AddValue((PC & 4095) | (__builtin_popcountl(Arg1 ^ Arg2) << 12));
+}
+
 }  // namespace fuzzer
 
 using fuzzer::TS;
 using fuzzer::RecordingTraces;
 using fuzzer::RecordingMemcmp;
+using fuzzer::RecordingValueProfile;
 
 extern "C" {
 void __dfsw___sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
@@ -597,6 +647,8 @@
 #if LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
 void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1,
                                   const void *s2, size_t n, int result) {
+  if (RecordingValueProfile)
+    fuzzer::AddValueForMemcmp(caller_pc, s1, s2, n);
   if (!RecordingMemcmp) return;
   if (result == 0) return;  // No reason to mutate.
   if (n <= 1) return;  // Not interesting.
@@ -606,6 +658,8 @@
 
 void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1,
                                    const char *s2, size_t n, int result) {
+  if (RecordingValueProfile)
+    fuzzer::AddValueForStrcmp(caller_pc, s1, s2, n);
   if (!RecordingMemcmp) return;
   if (result == 0) return;  // No reason to mutate.
   size_t Len1 = fuzzer::InternalStrnlen(s1, n);
@@ -619,6 +673,8 @@
 
 void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1,
                                    const char *s2, int result) {
+  if (RecordingValueProfile)
+    fuzzer::AddValueForStrcmp(caller_pc, s1, s2, 64);
   if (!RecordingMemcmp) return;
   if (result == 0) return;  // No reason to mutate.
   size_t Len1 = strlen(s1);
@@ -656,11 +712,15 @@
 __attribute__((visibility("default")))
 void __sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
                                uint64_t Arg2) {
-  if (!RecordingTraces) return;
-  uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
-  uint64_t CmpSize = (SizeAndType >> 32) / 8;
-  uint64_t Type = (SizeAndType << 32) >> 32;
-  TS->TraceCmpCallback(PC, CmpSize, Type, Arg1, Arg2);
+  if (RecordingTraces) {
+    uintptr_t PC = reinterpret_cast<uintptr_t>(__builtin_return_address(0));
+    uint64_t CmpSize = (SizeAndType >> 32) / 8;
+    uint64_t Type = (SizeAndType << 32) >> 32;
+    TS->TraceCmpCallback(PC, CmpSize, Type, Arg1, Arg2);
+  }
+  if (RecordingValueProfile)
+    fuzzer::AddValueForCmp(
+        reinterpret_cast<uintptr_t>(__builtin_return_address(0)), Arg1, Arg2);
 }
 
 __attribute__((visibility("default")))