[libclang] Store unsaved file hashes when recording parsing invocations

Storing the contents of unsaved files is too expensive.
Instead a hash is stored with a record invocation. When a reproducer is
generated, Clang will compare the stored hashes to the new hashes to determine
if the contents of a file has changed. This way we'll know when a reproducer was
generated for a different source to the one that triggered the original crash.

rdar://35322543

llvm-svn: 319729
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index b642014..b2edd42 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -3438,10 +3438,9 @@
   unsigned PrecompilePreambleAfterNParses =
       !PrecompilePreamble ? 0 : 2 - CreatePreambleOnFirstParse;
 
-  // FIXME: Record the hash of the unsaved files.
   LibclangInvocationReporter InvocationReporter(
       *CXXIdx, LibclangInvocationReporter::OperationKind::ParseOperation,
-      options, llvm::makeArrayRef(*Args));
+      options, llvm::makeArrayRef(*Args), unsaved_files);
   std::unique_ptr<ASTUnit> Unit(ASTUnit::LoadFromCommandLine(
       Args->data(), Args->data() + Args->size(),
       CXXIdx->getPCHContainerOperations(), Diags,
diff --git a/clang/tools/libclang/CIndexer.cpp b/clang/tools/libclang/CIndexer.cpp
index 13774bd..b705016 100644
--- a/clang/tools/libclang/CIndexer.cpp
+++ b/clang/tools/libclang/CIndexer.cpp
@@ -12,11 +12,13 @@
 //===----------------------------------------------------------------------===//
 
 #include "CIndexer.h"
+#include "CXString.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/Version.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Config/llvm-config.h"
+#include "llvm/Support/MD5.h"
 #include "llvm/Support/MutexGuard.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
@@ -90,7 +92,8 @@
 
 LibclangInvocationReporter::LibclangInvocationReporter(
     CIndexer &Idx, OperationKind Op, unsigned ParseOptions,
-    llvm::ArrayRef<const char *> Args) {
+    llvm::ArrayRef<const char *> Args,
+    llvm::ArrayRef<CXUnsavedFile> UnsavedFiles) {
   StringRef Path = Idx.getInvocationEmissionPath();
   if (Path.empty())
     return;
@@ -124,6 +127,23 @@
       OS << ',';
     OS << '"' << I.value() << '"';
   }
+  if (!UnsavedFiles.empty()) {
+    OS << R"(],"unsaved_file_hashes":[)";
+    for (const auto &UF : llvm::enumerate(UnsavedFiles)) {
+      if (UF.index())
+        OS << ',';
+      OS << '{';
+      WriteStringKey("name", UF.value().Filename);
+      OS << ',';
+      llvm::MD5 Hash;
+      Hash.update(getContents(UF.value()));
+      llvm::MD5::MD5Result Result;
+      Hash.final(Result);
+      SmallString<32> Digest = Result.digest();
+      WriteStringKey("md5", Digest);
+      OS << '}';
+    }
+  }
   OS << "]}";
 }
 
diff --git a/clang/tools/libclang/CIndexer.h b/clang/tools/libclang/CIndexer.h
index b3346cd..dafbb08 100644
--- a/clang/tools/libclang/CIndexer.h
+++ b/clang/tools/libclang/CIndexer.h
@@ -94,7 +94,8 @@
 
   LibclangInvocationReporter(CIndexer &Idx, OperationKind Op,
                              unsigned ParseOptions,
-                             llvm::ArrayRef<const char *> Args);
+                             llvm::ArrayRef<const char *> Args,
+                             llvm::ArrayRef<CXUnsavedFile> UnsavedFiles);
   ~LibclangInvocationReporter();
 
 private: