[Reproducers] Add file provider
This patch adds the file provider which is responsible for capturing
files used by LLDB.
When capturing a reproducer, we use a file collector that is very
similar to the one used in clang. For every file that we touch, we add
an entry with a mapping from its virtual to its real path. When we
decide to generate a reproducer we copy over the files and their
permission into to reproducer folder.
When replaying a reproducer, we load the VFS mapping and instantiate a
RedirectingFileSystem. The latter will transparently use the files
available in the reproducer.
I've tested this on two macOS machines with an artificial example.
Still, it is very likely that I missed some places where we (still) use
native file system calls. I'm hoping to flesh those out while testing
with more advanced examples. However, I will fix those things in
separate patches.
Differential revision: https://reviews.llvm.org/D54617
llvm-svn: 352538
diff --git a/lldb/source/Host/common/FileSystem.cpp b/lldb/source/Host/common/FileSystem.cpp
index dc7050b..6fbd7bc 100644
--- a/lldb/source/Host/common/FileSystem.cpp
+++ b/lldb/source/Host/common/FileSystem.cpp
@@ -11,7 +11,9 @@
#include "lldb/Utility/LLDBAssert.h"
#include "lldb/Utility/TildeExpressionResolver.h"
+#include "llvm/Support/Errc.h"
#include "llvm/Support/Errno.h"
+#include "llvm/Support/Error.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/Program.h"
@@ -47,6 +49,26 @@
InstanceImpl().emplace();
}
+void FileSystem::Initialize(FileCollector &collector) {
+ lldbassert(!InstanceImpl() && "Already initialized.");
+ InstanceImpl().emplace(collector);
+}
+
+llvm::Error FileSystem::Initialize(const FileSpec &mapping) {
+ lldbassert(!InstanceImpl() && "Already initialized.");
+
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> buffer =
+ llvm::vfs::getRealFileSystem()->getBufferForFile(mapping.GetPath());
+
+ if (!buffer)
+ return llvm::errorCodeToError(buffer.getError());
+
+ InstanceImpl().emplace(
+ llvm::vfs::getVFSFromYAML(std::move(buffer.get()), nullptr, ""), true);
+
+ return llvm::Error::success();
+}
+
void FileSystem::Initialize(IntrusiveRefCntPtr<vfs::FileSystem> fs) {
lldbassert(!InstanceImpl() && "Already initialized.");
InstanceImpl().emplace(fs);
@@ -249,18 +271,25 @@
std::shared_ptr<DataBufferLLVM>
FileSystem::CreateDataBuffer(const llvm::Twine &path, uint64_t size,
uint64_t offset) {
+ if (m_collector)
+ m_collector->AddFile(path);
+
const bool is_volatile = !IsLocal(path);
+ const ErrorOr<std::string> external_path = GetExternalPath(path);
+
+ if (!external_path)
+ return nullptr;
std::unique_ptr<llvm::WritableMemoryBuffer> buffer;
if (size == 0) {
auto buffer_or_error =
- llvm::WritableMemoryBuffer::getFile(path, -1, is_volatile);
+ llvm::WritableMemoryBuffer::getFile(*external_path, -1, is_volatile);
if (!buffer_or_error)
return nullptr;
buffer = std::move(*buffer_or_error);
} else {
auto buffer_or_error = llvm::WritableMemoryBuffer::getFileSlice(
- path, size, offset, is_volatile);
+ *external_path, size, offset, is_volatile);
if (!buffer_or_error)
return nullptr;
buffer = std::move(*buffer_or_error);
@@ -380,16 +409,22 @@
Status FileSystem::Open(File &File, const FileSpec &file_spec, uint32_t options,
uint32_t permissions) {
+ if (m_collector)
+ m_collector->AddFile(file_spec);
+
if (File.IsValid())
File.Close();
const int open_flags = GetOpenFlags(options);
const mode_t open_mode =
(open_flags & O_CREAT) ? GetOpenMode(permissions) : 0;
- const std::string path = file_spec.GetPath();
+
+ auto path = GetExternalPath(file_spec);
+ if (!path)
+ return Status(path.getError());
int descriptor = llvm::sys::RetryAfterSignal(
- -1, OpenWithFS, *this, path.c_str(), open_flags, open_mode);
+ -1, OpenWithFS, *this, path->c_str(), open_flags, open_mode);
Status error;
if (!File::DescriptorIsValid(descriptor)) {
@@ -401,3 +436,28 @@
}
return error;
}
+
+ErrorOr<std::string> FileSystem::GetExternalPath(const llvm::Twine &path) {
+ if (!m_mapped)
+ return path.str();
+
+ // If VFS mapped we know the underlying FS is a RedirectingFileSystem.
+ ErrorOr<vfs::RedirectingFileSystem::Entry *> E =
+ static_cast<vfs::RedirectingFileSystem &>(*m_fs).lookupPath(path);
+ if (!E) {
+ if (E.getError() == llvm::errc::no_such_file_or_directory) {
+ return path.str();
+ }
+ return E.getError();
+ }
+
+ auto *F = dyn_cast<vfs::RedirectingFileSystem::RedirectingFileEntry>(*E);
+ if (!F)
+ return make_error_code(llvm::errc::not_supported);
+
+ return F->getExternalContentsPath().str();
+}
+
+ErrorOr<std::string> FileSystem::GetExternalPath(const FileSpec &file_spec) {
+ return GetExternalPath(file_spec.GetPath());
+}
diff --git a/lldb/source/Host/macosx/objcxx/Host.mm b/lldb/source/Host/macosx/objcxx/Host.mm
index 7b07f6c..3ebfc0c 100644
--- a/lldb/source/Host/macosx/objcxx/Host.mm
+++ b/lldb/source/Host/macosx/objcxx/Host.mm
@@ -1299,12 +1299,15 @@
lldb::pid_t pid = LLDB_INVALID_PROCESS_ID;
- if (ShouldLaunchUsingXPC(launch_info)) {
- error = LaunchProcessXPC(exe_spec.GetPath().c_str(), launch_info, pid);
- } else {
- error =
- LaunchProcessPosixSpawn(exe_spec.GetPath().c_str(), launch_info, pid);
- }
+ // From now on we'll deal with the external (devirtualized) path.
+ auto exe_path = fs.GetExternalPath(exe_spec);
+ if (!exe_path)
+ return Status(exe_path.getError());
+
+ if (ShouldLaunchUsingXPC(launch_info))
+ error = LaunchProcessXPC(exe_path->c_str(), launch_info, pid);
+ else
+ error = LaunchProcessPosixSpawn(exe_path->c_str(), launch_info, pid);
if (pid != LLDB_INVALID_PROCESS_ID) {
// If all went well, then set the process ID into the launch info
diff --git a/lldb/source/Initialization/SystemInitializerCommon.cpp b/lldb/source/Initialization/SystemInitializerCommon.cpp
index 2cc3aa8..173d483 100644
--- a/lldb/source/Initialization/SystemInitializerCommon.cpp
+++ b/lldb/source/Initialization/SystemInitializerCommon.cpp
@@ -65,6 +65,7 @@
}
#endif
+ // Initialize the reproducer.
ReproducerMode mode = ReproducerMode::Off;
if (options.reproducer_capture)
mode = ReproducerMode::Capture;
@@ -74,7 +75,23 @@
if (auto e = Reproducer::Initialize(mode, FileSpec(options.reproducer_path)))
return e;
- FileSystem::Initialize();
+ // Initialize the file system.
+ auto &r = repro::Reproducer::Instance();
+ if (repro::Loader *loader = r.GetLoader()) {
+ FileSpec vfs_mapping = loader->GetFile<FileInfo>();
+ if (vfs_mapping) {
+ if (llvm::Error e = FileSystem::Initialize(vfs_mapping))
+ return e;
+ } else {
+ FileSystem::Initialize();
+ }
+ } else if (repro::Generator *g = r.GetGenerator()) {
+ repro::FileProvider &fp = g->GetOrCreate<repro::FileProvider>();
+ FileSystem::Initialize(fp.GetFileCollector());
+ } else {
+ FileSystem::Initialize();
+ }
+
Log::Initialize();
HostInfo::Initialize();
static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
diff --git a/lldb/source/Utility/CMakeLists.txt b/lldb/source/Utility/CMakeLists.txt
index 008054a..4c981c6 100644
--- a/lldb/source/Utility/CMakeLists.txt
+++ b/lldb/source/Utility/CMakeLists.txt
@@ -54,6 +54,7 @@
DataEncoder.cpp
DataExtractor.cpp
Environment.cpp
+ FileCollector.cpp
Event.cpp
FileSpec.cpp
IOObject.cpp
diff --git a/lldb/source/Utility/FileCollector.cpp b/lldb/source/Utility/FileCollector.cpp
new file mode 100644
index 0000000..1758ad8c
--- /dev/null
+++ b/lldb/source/Utility/FileCollector.cpp
@@ -0,0 +1,148 @@
+//===-- FileCollector.cpp ---------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Utility/FileCollector.h"
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+
+using namespace lldb_private;
+using namespace llvm;
+
+static bool IsCaseSensitivePath(StringRef path) {
+ SmallString<256> tmp_dest = path, upper_dest, real_dest;
+
+ // Remove component traversals, links, etc.
+ if (!sys::fs::real_path(path, tmp_dest))
+ return true; // Current default value in vfs.yaml
+ path = tmp_dest;
+
+ // Change path to all upper case and ask for its real path, if the latter
+ // exists and is equal to path, it's not case sensitive. Default to case
+ // sensitive in the absence of real_path, since this is the YAMLVFSWriter
+ // default.
+ upper_dest = path.upper();
+ if (sys::fs::real_path(upper_dest, real_dest) && path.equals(real_dest))
+ return false;
+ return true;
+}
+
+FileCollector::FileCollector(const FileSpec &root) : m_root(root) {
+ sys::fs::create_directories(m_root.GetPath(), true);
+}
+
+bool FileCollector::GetRealPath(StringRef src_path,
+ SmallVectorImpl<char> &result) {
+ SmallString<256> real_path;
+ StringRef FileName = sys::path::filename(src_path);
+ std::string directory = sys::path::parent_path(src_path).str();
+ auto dir_with_symlink = m_symlink_map.find(directory);
+
+ // Use real_path to fix any symbolic link component present in a path.
+ // Computing the real path is expensive, cache the search through the
+ // parent path directory.
+ if (dir_with_symlink == m_symlink_map.end()) {
+ auto ec = sys::fs::real_path(directory, real_path);
+ if (ec)
+ return false;
+ m_symlink_map[directory] = real_path.str();
+ } else {
+ real_path = dir_with_symlink->second;
+ }
+
+ sys::path::append(real_path, FileName);
+ result.swap(real_path);
+ return true;
+}
+
+void FileCollector::AddFile(const Twine &file) {
+ std::lock_guard<std::mutex> lock(m_mutex);
+ std::string file_str = file.str();
+ if (MarkAsSeen(file_str))
+ AddFileImpl(file_str);
+}
+
+void FileCollector::AddFileImpl(StringRef src_path) {
+ std::string root = m_root.GetPath();
+
+ // We need an absolute src path to append to the root.
+ SmallString<256> absolute_src = src_path;
+ sys::fs::make_absolute(absolute_src);
+
+ // Canonicalize src to a native path to avoid mixed separator styles.
+ sys::path::native(absolute_src);
+
+ // Remove redundant leading "./" pieces and consecutive separators.
+ absolute_src = sys::path::remove_leading_dotslash(absolute_src);
+
+ // Canonicalize the source path by removing "..", "." components.
+ SmallString<256> virtual_path = absolute_src;
+ sys::path::remove_dots(virtual_path, /*remove_dot_dot=*/true);
+
+ // If a ".." component is present after a symlink component, remove_dots may
+ // lead to the wrong real destination path. Let the source be canonicalized
+ // like that but make sure we always use the real path for the destination.
+ SmallString<256> copy_from;
+ if (!GetRealPath(absolute_src, copy_from))
+ copy_from = virtual_path;
+
+ SmallString<256> dst_path = StringRef(root);
+ sys::path::append(dst_path, sys::path::relative_path(copy_from));
+
+ // Always map a canonical src path to its real path into the YAML, by doing
+ // this we map different virtual src paths to the same entry in the VFS
+ // overlay, which is a way to emulate symlink inside the VFS; this is also
+ // needed for correctness, not doing that can lead to module redefinition
+ // errors.
+ AddFileToMapping(virtual_path, dst_path);
+}
+
+std::error_code FileCollector::CopyFiles(bool stop_on_error) {
+ for (auto &entry : m_vfs_writer.getMappings()) {
+ // Create directory tree.
+ if (std::error_code ec =
+ sys::fs::create_directories(sys::path::parent_path(entry.RPath),
+ /*IgnoreExisting=*/true)) {
+ if (stop_on_error)
+ return ec;
+ }
+
+ // Copy file over.
+ if (std::error_code ec = sys::fs::copy_file(entry.VPath, entry.RPath)) {
+ if (stop_on_error)
+ return ec;
+ }
+
+ // Copy over permissions.
+ if (auto perms = sys::fs::getPermissions(entry.VPath)) {
+ if (std::error_code ec = sys::fs::setPermissions(entry.RPath, *perms)) {
+ if (stop_on_error)
+ return ec;
+ }
+ }
+ }
+ return {};
+}
+
+std::error_code FileCollector::WriteMapping(const FileSpec &mapping_file) {
+ std::lock_guard<std::mutex> lock(m_mutex);
+
+ const std::string root = m_root.GetPath();
+ m_vfs_writer.setCaseSensitivity(IsCaseSensitivePath(root));
+ m_vfs_writer.setUseExternalNames(false);
+
+ std::error_code ec;
+ raw_fd_ostream os(mapping_file.GetPath(), ec, sys::fs::F_Text);
+ if (ec)
+ return ec;
+
+ m_vfs_writer.write(os);
+
+ return {};
+}
diff --git a/lldb/source/Utility/Reproducer.cpp b/lldb/source/Utility/Reproducer.cpp
index 059702f..3fb0624 100644
--- a/lldb/source/Utility/Reproducer.cpp
+++ b/lldb/source/Utility/Reproducer.cpp
@@ -220,3 +220,6 @@
void ProviderBase::anchor() {}
char ProviderBase::ID = 0;
+char FileProvider::ID = 0;
+const char *FileInfo::name = "files";
+const char *FileInfo::file = "files.yaml";