[Reproducers] Add file provider

This patch adds the file provider which is responsible for capturing
files used by LLDB.

When capturing a reproducer, we use a file collector that is very
similar to the one used in clang. For every file that we touch, we add
an entry with a mapping from its virtual to its real path. When we
decide to generate a reproducer we copy over the files and their
permission into to reproducer folder.

When replaying a reproducer, we load the VFS mapping and instantiate a
RedirectingFileSystem. The latter will transparently use the files
available in the reproducer.

I've tested this on two macOS machines with an artificial example.
Still, it is very likely that I missed some places where we (still) use
native file system calls. I'm hoping to flesh those out while testing
with more advanced examples. However, I will fix those things in
separate patches.

Differential revision: https://reviews.llvm.org/D54617

llvm-svn: 352538
diff --git a/lldb/include/lldb/Host/FileSystem.h b/lldb/include/lldb/Host/FileSystem.h
index c023400..c8da1d1 100644
--- a/lldb/include/lldb/Host/FileSystem.h
+++ b/lldb/include/lldb/Host/FileSystem.h
@@ -11,6 +11,7 @@
 
 #include "lldb/Host/File.h"
 #include "lldb/Utility/DataBufferLLVM.h"
+#include "lldb/Utility/FileCollector.h"
 #include "lldb/Utility/FileSpec.h"
 #include "lldb/Utility/Status.h"
 
@@ -30,8 +31,15 @@
   static const char *DEV_NULL;
   static const char *PATH_CONVERSION_ERROR;
 
-  FileSystem() : m_fs(llvm::vfs::getRealFileSystem()) {}
-  FileSystem(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs) : m_fs(fs) {}
+  FileSystem()
+      : m_fs(llvm::vfs::getRealFileSystem()), m_collector(nullptr),
+        m_mapped(false) {}
+  FileSystem(FileCollector &collector)
+      : m_fs(llvm::vfs::getRealFileSystem()), m_collector(&collector),
+        m_mapped(false) {}
+  FileSystem(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs,
+             bool mapped = false)
+      : m_fs(fs), m_mapped(mapped) {}
 
   FileSystem(const FileSystem &fs) = delete;
   FileSystem &operator=(const FileSystem &fs) = delete;
@@ -39,6 +47,8 @@
   static FileSystem &Instance();
 
   static void Initialize();
+  static void Initialize(FileCollector &collector);
+  static llvm::Error Initialize(const FileSpec &mapping);
   static void Initialize(llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> fs);
   static void Terminate();
 
@@ -167,9 +177,14 @@
   std::error_code GetRealPath(const llvm::Twine &path,
                               llvm::SmallVectorImpl<char> &output) const;
 
+  llvm::ErrorOr<std::string> GetExternalPath(const llvm::Twine &path);
+  llvm::ErrorOr<std::string> GetExternalPath(const FileSpec &file_spec);
+
 private:
   static llvm::Optional<FileSystem> &InstanceImpl();
   llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> m_fs;
+  FileCollector *m_collector;
+  bool m_mapped;
 };
 } // namespace lldb_private
 
diff --git a/lldb/include/lldb/Utility/FileCollector.h b/lldb/include/lldb/Utility/FileCollector.h
new file mode 100644
index 0000000..d7bdcf4
--- /dev/null
+++ b/lldb/include/lldb/Utility/FileCollector.h
@@ -0,0 +1,74 @@
+//===-- FileCollector.h -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLDB_UTILITY_FILE_COLLECTOR_H
+#define LLDB_UTILITY_FILE_COLLECTOR_H
+
+#include "lldb/Utility/FileSpec.h"
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/VirtualFileSystem.h"
+
+#include <mutex>
+
+namespace lldb_private {
+
+/// Collects files into a directory and generates a mapping that can be used by
+/// the VFS.
+class FileCollector {
+public:
+  FileCollector(const FileSpec &root);
+
+  void AddFile(const llvm::Twine &file);
+  void AddFile(const FileSpec &file) { return AddFile(file.GetPath()); }
+
+  /// Write the yaml mapping (for the VFS) to the given file.
+  std::error_code WriteMapping(const FileSpec &mapping_file);
+
+  /// Copy the files into the root directory.
+  ///
+  /// When stop_on_error is true (the default) we abort as soon as one file
+  /// cannot be copied. This is relatively common, for example when a file was
+  /// removed after it was added to the mapping.
+  std::error_code CopyFiles(bool stop_on_error = true);
+
+protected:
+  void AddFileImpl(llvm::StringRef src_path);
+
+  bool MarkAsSeen(llvm::StringRef path) { return m_seen.insert(path).second; }
+
+  bool GetRealPath(llvm::StringRef src_path,
+                   llvm::SmallVectorImpl<char> &result);
+
+  void AddFileToMapping(llvm::StringRef virtual_path,
+                        llvm::StringRef real_path) {
+    m_vfs_writer.addFileMapping(virtual_path, real_path);
+  }
+
+  /// Synchronizes adding files.
+  std::mutex m_mutex;
+
+  /// The root directory where files are copied.
+  FileSpec m_root;
+
+  /// Tracks already seen files so they can be skipped.
+  llvm::StringSet<> m_seen;
+
+  /// The yaml mapping writer.
+  llvm::vfs::YAMLVFSWriter m_vfs_writer;
+
+  /// Caches real_path calls when resolving symlinks.
+  llvm::StringMap<std::string> m_symlink_map;
+};
+
+} // namespace lldb_private
+
+#endif // LLDB_UTILITY_FILE_COLLECTOR_H
diff --git a/lldb/include/lldb/Utility/Reproducer.h b/lldb/include/lldb/Utility/Reproducer.h
index 2ad2546..83f7d01 100644
--- a/lldb/include/lldb/Utility/Reproducer.h
+++ b/lldb/include/lldb/Utility/Reproducer.h
@@ -9,6 +9,7 @@
 #ifndef LLDB_UTILITY_REPRODUCER_H
 #define LLDB_UTILITY_REPRODUCER_H
 
+#include "lldb/Utility/FileCollector.h"
 #include "lldb/Utility/FileSpec.h"
 
 #include "llvm/ADT/DenseMap.h"
@@ -81,6 +82,35 @@
   using ProviderBase::ProviderBase; // Inherit constructor.
 };
 
+struct FileInfo {
+  static const char *name;
+  static const char *file;
+};
+
+class FileProvider : public Provider<FileProvider> {
+public:
+  typedef FileInfo info;
+
+  FileProvider(const FileSpec &directory)
+      : Provider(directory),
+        m_collector(directory.CopyByAppendingPathComponent("root")) {}
+
+  FileCollector &GetFileCollector() { return m_collector; }
+
+  void Keep() override {
+    auto mapping = GetRoot().CopyByAppendingPathComponent(info::file);
+    // Temporary files that are removed during execution can cause copy errors.
+    if (auto ec = m_collector.CopyFiles(/*stop_on_error=*/false))
+      return;
+    m_collector.WriteMapping(mapping);
+  }
+
+  static char ID;
+
+private:
+  FileCollector m_collector;
+};
+
 /// The generator is responsible for the logic needed to generate a
 /// reproducer. For doing so it relies on providers, who serialize data that
 /// is necessary for reproducing  a failure.
diff --git a/lldb/lit/Reproducer/Inputs/FileCapture.in b/lldb/lit/Reproducer/Inputs/FileCapture.in
new file mode 100644
index 0000000..bf6f852
--- /dev/null
+++ b/lldb/lit/Reproducer/Inputs/FileCapture.in
@@ -0,0 +1,3 @@
+run
+reproducer status
+reproducer generate
diff --git a/lldb/lit/Reproducer/Inputs/FileReplay.in b/lldb/lit/Reproducer/Inputs/FileReplay.in
new file mode 100644
index 0000000..2ed2de1
--- /dev/null
+++ b/lldb/lit/Reproducer/Inputs/FileReplay.in
@@ -0,0 +1,2 @@
+reproducer status
+run
diff --git a/lldb/lit/Reproducer/TestFileRepro.test b/lldb/lit/Reproducer/TestFileRepro.test
new file mode 100644
index 0000000..b33f39e
--- /dev/null
+++ b/lldb/lit/Reproducer/TestFileRepro.test
@@ -0,0 +1,20 @@
+# REQUIRES: system-darwin
+
+# This tests the replaying of GDB remote packets.
+#
+# We issue the same commands and ensure the output is identical to the original
+# process. To ensure we're not actually running the original binary we check
+# that the string "testing" is not printed.
+
+# RUN: %clang %S/Inputs/simple.c -g -o %t.out
+# RUN: %lldb -x -b -s %S/Inputs/FileCapture.in --capture %t.repro -- %t.out | FileCheck %s --check-prefix CHECK --check-prefix CAPTURE
+# RUN: rm %t.out
+# RUN: %lldb -x -b -s %S/Inputs/FileReplay.in --replay %t.repro -- %t.out | FileCheck %s --check-prefix CHECK --check-prefix REPLAY
+
+# CAPTURE: testing
+# REPLAY-NOT: testing
+
+# CHECK: Process {{.*}} exited
+
+# CAPTURE: Reproducer is in capture mode.
+# CAPTURE: Reproducer written
diff --git a/lldb/lit/Reproducer/TestGDBRemoteRepro.test b/lldb/lit/Reproducer/TestGDBRemoteRepro.test
index 6a6bdd7..e254705 100644
--- a/lldb/lit/Reproducer/TestGDBRemoteRepro.test
+++ b/lldb/lit/Reproducer/TestGDBRemoteRepro.test
@@ -7,8 +7,8 @@
 # that the string "testing" is not printed.
 
 # RUN: %clang %S/Inputs/simple.c -g -o %t.out
-# RUN: %lldb -x -b -s %S/Inputs/GDBRemoteCapture.in --capture %T/reproducer -- %t.out | FileCheck %s --check-prefix CHECK --check-prefix CAPTURE
-# RUN: %lldb -x -b -s %S/Inputs/GDBRemoteReplay.in --replay %T/reproducer -- %t.out | FileCheck %s --check-prefix CHECK --check-prefix REPLAY
+# RUN: %lldb -x -b -s %S/Inputs/GDBRemoteCapture.in --capture %t.repro -- %t.out | FileCheck %s --check-prefix CHECK --check-prefix CAPTURE
+# RUN: %lldb -x -b -s %S/Inputs/GDBRemoteReplay.in --replay %t.repro -- %t.out | FileCheck %s --check-prefix CHECK --check-prefix REPLAY
 
 # CHECK: Breakpoint 1
 # CHECK: Process {{.*}} stopped
diff --git a/lldb/source/Host/common/FileSystem.cpp b/lldb/source/Host/common/FileSystem.cpp
index dc7050b..6fbd7bc 100644
--- a/lldb/source/Host/common/FileSystem.cpp
+++ b/lldb/source/Host/common/FileSystem.cpp
@@ -11,7 +11,9 @@
 #include "lldb/Utility/LLDBAssert.h"
 #include "lldb/Utility/TildeExpressionResolver.h"
 
+#include "llvm/Support/Errc.h"
 #include "llvm/Support/Errno.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Program.h"
@@ -47,6 +49,26 @@
   InstanceImpl().emplace();
 }
 
+void FileSystem::Initialize(FileCollector &collector) {
+  lldbassert(!InstanceImpl() && "Already initialized.");
+  InstanceImpl().emplace(collector);
+}
+
+llvm::Error FileSystem::Initialize(const FileSpec &mapping) {
+  lldbassert(!InstanceImpl() && "Already initialized.");
+
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> buffer =
+      llvm::vfs::getRealFileSystem()->getBufferForFile(mapping.GetPath());
+
+  if (!buffer)
+    return llvm::errorCodeToError(buffer.getError());
+
+  InstanceImpl().emplace(
+      llvm::vfs::getVFSFromYAML(std::move(buffer.get()), nullptr, ""), true);
+
+  return llvm::Error::success();
+}
+
 void FileSystem::Initialize(IntrusiveRefCntPtr<vfs::FileSystem> fs) {
   lldbassert(!InstanceImpl() && "Already initialized.");
   InstanceImpl().emplace(fs);
@@ -249,18 +271,25 @@
 std::shared_ptr<DataBufferLLVM>
 FileSystem::CreateDataBuffer(const llvm::Twine &path, uint64_t size,
                              uint64_t offset) {
+  if (m_collector)
+    m_collector->AddFile(path);
+
   const bool is_volatile = !IsLocal(path);
+  const ErrorOr<std::string> external_path = GetExternalPath(path);
+
+  if (!external_path)
+    return nullptr;
 
   std::unique_ptr<llvm::WritableMemoryBuffer> buffer;
   if (size == 0) {
     auto buffer_or_error =
-        llvm::WritableMemoryBuffer::getFile(path, -1, is_volatile);
+        llvm::WritableMemoryBuffer::getFile(*external_path, -1, is_volatile);
     if (!buffer_or_error)
       return nullptr;
     buffer = std::move(*buffer_or_error);
   } else {
     auto buffer_or_error = llvm::WritableMemoryBuffer::getFileSlice(
-        path, size, offset, is_volatile);
+        *external_path, size, offset, is_volatile);
     if (!buffer_or_error)
       return nullptr;
     buffer = std::move(*buffer_or_error);
@@ -380,16 +409,22 @@
 
 Status FileSystem::Open(File &File, const FileSpec &file_spec, uint32_t options,
                         uint32_t permissions) {
+  if (m_collector)
+    m_collector->AddFile(file_spec);
+
   if (File.IsValid())
     File.Close();
 
   const int open_flags = GetOpenFlags(options);
   const mode_t open_mode =
       (open_flags & O_CREAT) ? GetOpenMode(permissions) : 0;
-  const std::string path = file_spec.GetPath();
+
+  auto path = GetExternalPath(file_spec);
+  if (!path)
+    return Status(path.getError());
 
   int descriptor = llvm::sys::RetryAfterSignal(
-      -1, OpenWithFS, *this, path.c_str(), open_flags, open_mode);
+      -1, OpenWithFS, *this, path->c_str(), open_flags, open_mode);
 
   Status error;
   if (!File::DescriptorIsValid(descriptor)) {
@@ -401,3 +436,28 @@
   }
   return error;
 }
+
+ErrorOr<std::string> FileSystem::GetExternalPath(const llvm::Twine &path) {
+  if (!m_mapped)
+    return path.str();
+
+  // If VFS mapped we know the underlying FS is a RedirectingFileSystem.
+  ErrorOr<vfs::RedirectingFileSystem::Entry *> E =
+      static_cast<vfs::RedirectingFileSystem &>(*m_fs).lookupPath(path);
+  if (!E) {
+    if (E.getError() == llvm::errc::no_such_file_or_directory) {
+      return path.str();
+    }
+    return E.getError();
+  }
+
+  auto *F = dyn_cast<vfs::RedirectingFileSystem::RedirectingFileEntry>(*E);
+  if (!F)
+    return make_error_code(llvm::errc::not_supported);
+
+  return F->getExternalContentsPath().str();
+}
+
+ErrorOr<std::string> FileSystem::GetExternalPath(const FileSpec &file_spec) {
+  return GetExternalPath(file_spec.GetPath());
+}
diff --git a/lldb/source/Host/macosx/objcxx/Host.mm b/lldb/source/Host/macosx/objcxx/Host.mm
index 7b07f6c..3ebfc0c 100644
--- a/lldb/source/Host/macosx/objcxx/Host.mm
+++ b/lldb/source/Host/macosx/objcxx/Host.mm
@@ -1299,12 +1299,15 @@
 
   lldb::pid_t pid = LLDB_INVALID_PROCESS_ID;
 
-  if (ShouldLaunchUsingXPC(launch_info)) {
-    error = LaunchProcessXPC(exe_spec.GetPath().c_str(), launch_info, pid);
-  } else {
-    error =
-        LaunchProcessPosixSpawn(exe_spec.GetPath().c_str(), launch_info, pid);
-  }
+  // From now on we'll deal with the external (devirtualized) path.
+  auto exe_path = fs.GetExternalPath(exe_spec);
+  if (!exe_path)
+    return Status(exe_path.getError());
+
+  if (ShouldLaunchUsingXPC(launch_info))
+    error = LaunchProcessXPC(exe_path->c_str(), launch_info, pid);
+  else
+    error = LaunchProcessPosixSpawn(exe_path->c_str(), launch_info, pid);
 
   if (pid != LLDB_INVALID_PROCESS_ID) {
     // If all went well, then set the process ID into the launch info
diff --git a/lldb/source/Initialization/SystemInitializerCommon.cpp b/lldb/source/Initialization/SystemInitializerCommon.cpp
index 2cc3aa8..173d483 100644
--- a/lldb/source/Initialization/SystemInitializerCommon.cpp
+++ b/lldb/source/Initialization/SystemInitializerCommon.cpp
@@ -65,6 +65,7 @@
   }
 #endif
 
+  // Initialize the reproducer.
   ReproducerMode mode = ReproducerMode::Off;
   if (options.reproducer_capture)
     mode = ReproducerMode::Capture;
@@ -74,7 +75,23 @@
   if (auto e = Reproducer::Initialize(mode, FileSpec(options.reproducer_path)))
     return e;
 
-  FileSystem::Initialize();
+  // Initialize the file system.
+  auto &r = repro::Reproducer::Instance();
+  if (repro::Loader *loader = r.GetLoader()) {
+    FileSpec vfs_mapping = loader->GetFile<FileInfo>();
+    if (vfs_mapping) {
+      if (llvm::Error e = FileSystem::Initialize(vfs_mapping))
+        return e;
+    } else {
+      FileSystem::Initialize();
+    }
+  } else if (repro::Generator *g = r.GetGenerator()) {
+    repro::FileProvider &fp = g->GetOrCreate<repro::FileProvider>();
+    FileSystem::Initialize(fp.GetFileCollector());
+  } else {
+    FileSystem::Initialize();
+  }
+
   Log::Initialize();
   HostInfo::Initialize();
   static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
diff --git a/lldb/source/Utility/CMakeLists.txt b/lldb/source/Utility/CMakeLists.txt
index 008054a..4c981c6 100644
--- a/lldb/source/Utility/CMakeLists.txt
+++ b/lldb/source/Utility/CMakeLists.txt
@@ -54,6 +54,7 @@
   DataEncoder.cpp
   DataExtractor.cpp
   Environment.cpp
+  FileCollector.cpp
   Event.cpp
   FileSpec.cpp
   IOObject.cpp
diff --git a/lldb/source/Utility/FileCollector.cpp b/lldb/source/Utility/FileCollector.cpp
new file mode 100644
index 0000000..1758ad8c
--- /dev/null
+++ b/lldb/source/Utility/FileCollector.cpp
@@ -0,0 +1,148 @@
+//===-- FileCollector.cpp ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Utility/FileCollector.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+
+using namespace lldb_private;
+using namespace llvm;
+
+static bool IsCaseSensitivePath(StringRef path) {
+  SmallString<256> tmp_dest = path, upper_dest, real_dest;
+
+  // Remove component traversals, links, etc.
+  if (!sys::fs::real_path(path, tmp_dest))
+    return true; // Current default value in vfs.yaml
+  path = tmp_dest;
+
+  // Change path to all upper case and ask for its real path, if the latter
+  // exists and is equal to path, it's not case sensitive. Default to case
+  // sensitive in the absence of real_path, since this is the YAMLVFSWriter
+  // default.
+  upper_dest = path.upper();
+  if (sys::fs::real_path(upper_dest, real_dest) && path.equals(real_dest))
+    return false;
+  return true;
+}
+
+FileCollector::FileCollector(const FileSpec &root) : m_root(root) {
+  sys::fs::create_directories(m_root.GetPath(), true);
+}
+
+bool FileCollector::GetRealPath(StringRef src_path,
+                                SmallVectorImpl<char> &result) {
+  SmallString<256> real_path;
+  StringRef FileName = sys::path::filename(src_path);
+  std::string directory = sys::path::parent_path(src_path).str();
+  auto dir_with_symlink = m_symlink_map.find(directory);
+
+  // Use real_path to fix any symbolic link component present in a path.
+  // Computing the real path is expensive, cache the search through the
+  // parent path directory.
+  if (dir_with_symlink == m_symlink_map.end()) {
+    auto ec = sys::fs::real_path(directory, real_path);
+    if (ec)
+      return false;
+    m_symlink_map[directory] = real_path.str();
+  } else {
+    real_path = dir_with_symlink->second;
+  }
+
+  sys::path::append(real_path, FileName);
+  result.swap(real_path);
+  return true;
+}
+
+void FileCollector::AddFile(const Twine &file) {
+  std::lock_guard<std::mutex> lock(m_mutex);
+  std::string file_str = file.str();
+  if (MarkAsSeen(file_str))
+    AddFileImpl(file_str);
+}
+
+void FileCollector::AddFileImpl(StringRef src_path) {
+  std::string root = m_root.GetPath();
+
+  // We need an absolute src path to append to the root.
+  SmallString<256> absolute_src = src_path;
+  sys::fs::make_absolute(absolute_src);
+
+  // Canonicalize src to a native path to avoid mixed separator styles.
+  sys::path::native(absolute_src);
+
+  // Remove redundant leading "./" pieces and consecutive separators.
+  absolute_src = sys::path::remove_leading_dotslash(absolute_src);
+
+  // Canonicalize the source path by removing "..", "." components.
+  SmallString<256> virtual_path = absolute_src;
+  sys::path::remove_dots(virtual_path, /*remove_dot_dot=*/true);
+
+  // If a ".." component is present after a symlink component, remove_dots may
+  // lead to the wrong real destination path. Let the source be canonicalized
+  // like that but make sure we always use the real path for the destination.
+  SmallString<256> copy_from;
+  if (!GetRealPath(absolute_src, copy_from))
+    copy_from = virtual_path;
+
+  SmallString<256> dst_path = StringRef(root);
+  sys::path::append(dst_path, sys::path::relative_path(copy_from));
+
+  // Always map a canonical src path to its real path into the YAML, by doing
+  // this we map different virtual src paths to the same entry in the VFS
+  // overlay, which is a way to emulate symlink inside the VFS; this is also
+  // needed for correctness, not doing that can lead to module redefinition
+  // errors.
+  AddFileToMapping(virtual_path, dst_path);
+}
+
+std::error_code FileCollector::CopyFiles(bool stop_on_error) {
+  for (auto &entry : m_vfs_writer.getMappings()) {
+    // Create directory tree.
+    if (std::error_code ec =
+            sys::fs::create_directories(sys::path::parent_path(entry.RPath),
+                                        /*IgnoreExisting=*/true)) {
+      if (stop_on_error)
+        return ec;
+    }
+
+    // Copy file over.
+    if (std::error_code ec = sys::fs::copy_file(entry.VPath, entry.RPath)) {
+      if (stop_on_error)
+        return ec;
+    }
+
+    // Copy over permissions.
+    if (auto perms = sys::fs::getPermissions(entry.VPath)) {
+      if (std::error_code ec = sys::fs::setPermissions(entry.RPath, *perms)) {
+        if (stop_on_error)
+          return ec;
+      }
+    }
+  }
+  return {};
+}
+
+std::error_code FileCollector::WriteMapping(const FileSpec &mapping_file) {
+  std::lock_guard<std::mutex> lock(m_mutex);
+
+  const std::string root = m_root.GetPath();
+  m_vfs_writer.setCaseSensitivity(IsCaseSensitivePath(root));
+  m_vfs_writer.setUseExternalNames(false);
+
+  std::error_code ec;
+  raw_fd_ostream os(mapping_file.GetPath(), ec, sys::fs::F_Text);
+  if (ec)
+    return ec;
+
+  m_vfs_writer.write(os);
+
+  return {};
+}
diff --git a/lldb/source/Utility/Reproducer.cpp b/lldb/source/Utility/Reproducer.cpp
index 059702f..3fb0624 100644
--- a/lldb/source/Utility/Reproducer.cpp
+++ b/lldb/source/Utility/Reproducer.cpp
@@ -220,3 +220,6 @@
 
 void ProviderBase::anchor() {}
 char ProviderBase::ID = 0;
+char FileProvider::ID = 0;
+const char *FileInfo::name = "files";
+const char *FileInfo::file = "files.yaml";
diff --git a/lldb/unittests/Utility/CMakeLists.txt b/lldb/unittests/Utility/CMakeLists.txt
index b3970fe..1e7baef 100644
--- a/lldb/unittests/Utility/CMakeLists.txt
+++ b/lldb/unittests/Utility/CMakeLists.txt
@@ -10,6 +10,7 @@
   DataExtractorTest.cpp
   EnvironmentTest.cpp
   EventTest.cpp
+  FileCollectorTest.cpp
   FileSpecTest.cpp
   FlagsTest.cpp
   JSONTest.cpp
diff --git a/lldb/unittests/Utility/FileCollectorTest.cpp b/lldb/unittests/Utility/FileCollectorTest.cpp
new file mode 100644
index 0000000..bf49f59
--- /dev/null
+++ b/lldb/unittests/Utility/FileCollectorTest.cpp
@@ -0,0 +1,214 @@
+//===-- FileCollectorTest.cpp -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+#include "lldb/Utility/FileCollector.h"
+#include "lldb/Utility/FileSpec.h"
+
+#include "llvm/Support/FileSystem.h"
+
+using namespace llvm;
+using namespace lldb_private;
+
+namespace llvm {
+namespace vfs {
+inline bool operator==(const llvm::vfs::YAMLVFSEntry &LHS,
+                       const llvm::vfs::YAMLVFSEntry &RHS) {
+  return LHS.VPath == RHS.VPath && LHS.RPath == RHS.RPath;
+}
+} // namespace vfs
+} // namespace llvm
+
+namespace {
+class TestingFileCollector : public FileCollector {
+public:
+  using FileCollector::FileCollector;
+  using FileCollector::m_root;
+  using FileCollector::m_seen;
+  using FileCollector::m_symlink_map;
+  using FileCollector::m_vfs_writer;
+
+  bool HasSeen(FileSpec fs) {
+    return m_seen.find(fs.GetPath()) != m_seen.end();
+  }
+};
+
+struct ScopedDir {
+  SmallString<128> Path;
+  ScopedDir(const Twine &Name, bool Unique = false) {
+    std::error_code EC;
+    if (Unique) {
+      EC = llvm::sys::fs::createUniqueDirectory(Name, Path);
+    } else {
+      Path = Name.str();
+      EC = llvm::sys::fs::create_directory(Twine(Path));
+    }
+    if (EC)
+      Path = "";
+    EXPECT_FALSE(EC);
+    // Ensure the path is the real path so tests can use it to compare against
+    // realpath output.
+    SmallString<128> RealPath;
+    if (!llvm::sys::fs::real_path(Path, RealPath))
+      Path.swap(RealPath);
+  }
+  ~ScopedDir() {
+    if (Path != "") {
+      EXPECT_FALSE(llvm::sys::fs::remove_directories(Path.str()));
+    }
+  }
+  operator StringRef() { return Path.str(); }
+};
+
+struct ScopedLink {
+  SmallString<128> Path;
+  ScopedLink(const Twine &To, const Twine &From) {
+    Path = From.str();
+    std::error_code EC = sys::fs::create_link(To, From);
+    if (EC)
+      Path = "";
+    EXPECT_FALSE(EC);
+  }
+  ~ScopedLink() {
+    if (Path != "") {
+      EXPECT_FALSE(llvm::sys::fs::remove(Path.str()));
+    }
+  }
+  operator StringRef() { return Path.str(); }
+};
+
+struct ScopedFile {
+  SmallString<128> Path;
+  ScopedFile(const Twine &Name) {
+    std::error_code EC;
+    EC = llvm::sys::fs::createUniqueFile(Name, Path);
+    if (EC)
+      Path = "";
+    EXPECT_FALSE(EC);
+  }
+  ~ScopedFile() {
+    if (Path != "") {
+      EXPECT_FALSE(llvm::sys::fs::remove(Path.str()));
+    }
+  }
+  operator StringRef() { return Path.str(); }
+};
+} // end anonymous namespace
+
+TEST(FileCollectorTest, AddFile) {
+  ScopedDir root("add_file_root", true);
+  FileSpec root_fs(root.Path);
+  TestingFileCollector file_collector(root_fs);
+
+  file_collector.AddFile(FileSpec("/path/to/a"));
+  file_collector.AddFile(FileSpec("/path/to/b"));
+  file_collector.AddFile(FileSpec("/path/to/c"));
+
+  // Make sure the root is correct.
+  EXPECT_EQ(file_collector.m_root, root_fs);
+
+  // Make sure we've seen all the added files.
+  EXPECT_TRUE(file_collector.HasSeen(FileSpec("/path/to/a")));
+  EXPECT_TRUE(file_collector.HasSeen(FileSpec("/path/to/b")));
+  EXPECT_TRUE(file_collector.HasSeen(FileSpec("/path/to/c")));
+
+  // Make sure we've only seen the added files.
+  EXPECT_FALSE(file_collector.HasSeen(FileSpec("/path/to/d")));
+}
+
+TEST(FileCollectorTest, CopyFiles) {
+  ScopedDir file_root("file_root", true);
+  ScopedFile a(file_root + "/aaa");
+  ScopedFile b(file_root + "/bbb");
+  ScopedFile c(file_root + "/ccc");
+
+  // Create file collector and add files.
+  ScopedDir root("copy_files_root", true);
+  FileSpec root_fs(root.Path);
+  TestingFileCollector file_collector(root_fs);
+  file_collector.AddFile(a.Path);
+  file_collector.AddFile(b.Path);
+  file_collector.AddFile(c.Path);
+
+  // Make sure we can copy the files.
+  std::error_code ec = file_collector.CopyFiles(true);
+  EXPECT_FALSE(ec);
+
+  // Now add a bogus file and make sure we error out.
+  file_collector.AddFile("/some/bogus/file");
+  ec = file_collector.CopyFiles(true);
+  EXPECT_TRUE(ec);
+
+  // However, if stop_on_error is true the copy should still succeed.
+  ec = file_collector.CopyFiles(false);
+  EXPECT_FALSE(ec);
+}
+
+#ifndef WINDOWS
+TEST(FileCollectorTest, Symlinks) {
+  // Root where the original files live.
+  ScopedDir file_root("file_root", true);
+
+  // Create some files in the file root.
+  ScopedFile a(file_root + "/aaa");
+  ScopedFile b(file_root + "/bbb");
+  ScopedFile c(file_root + "/ccc");
+
+  // Create a directory foo with file ddd.
+  ScopedDir foo(file_root + "/foo");
+  ScopedFile d(foo + "/ddd");
+
+  // Create a file eee in the foo's parent directory.
+  ScopedFile e(foo + "/../eee");
+
+  // Create a symlink bar pointing to foo.
+  ScopedLink symlink(file_root + "/foo", file_root + "/bar");
+
+  // Root where files are copied to.
+  ScopedDir reproducer_root("reproducer_root", true);
+  FileSpec root_fs(reproducer_root.Path);
+  TestingFileCollector file_collector(root_fs);
+
+  // Add all the files to the collector.
+  file_collector.AddFile(a.Path);
+  file_collector.AddFile(b.Path);
+  file_collector.AddFile(c.Path);
+  file_collector.AddFile(d.Path);
+  file_collector.AddFile(e.Path);
+  file_collector.AddFile(file_root + "/bar/ddd");
+
+  auto mapping = file_collector.m_vfs_writer.getMappings();
+
+  {
+    // Make sure the common case works.
+    std::string vpath = (file_root + "/aaa").str();
+    std::string rpath = (reproducer_root.Path + file_root.Path + "/aaa").str();
+    printf("%s -> %s\n", vpath.c_str(), rpath.c_str());
+    EXPECT_THAT(mapping, testing::Contains(vfs::YAMLVFSEntry(vpath, rpath)));
+  }
+
+  {
+    // Make sure the virtual path points to the real source path.
+    std::string vpath = (file_root + "/bar/ddd").str();
+    std::string rpath =
+        (reproducer_root.Path + file_root.Path + "/foo/ddd").str();
+    printf("%s -> %s\n", vpath.c_str(), rpath.c_str());
+    EXPECT_THAT(mapping, testing::Contains(vfs::YAMLVFSEntry(vpath, rpath)));
+  }
+
+  {
+    // Make sure that .. is removed from the source path.
+    std::string vpath = (file_root + "/eee").str();
+    std::string rpath = (reproducer_root.Path + file_root.Path + "/eee").str();
+    printf("%s -> %s\n", vpath.c_str(), rpath.c_str());
+    EXPECT_THAT(mapping, testing::Contains(vfs::YAMLVFSEntry(vpath, rpath)));
+  }
+}
+#endif