Adding HardLink Support to VirtualFileSystem.

Summary:
Added support of creating a hardlink from one file to another file.
After a hardlink is added between two files, both file will have the same:
  1. UniqueID (inode)
  2. Size
  3. Buffer

This will bring replay of compilation closer to the actual compilation. There are instances where clang checks for the UniqueID of the file/header to be loaded which leads to a different behavior during replay as all files have different UniqueIDs.

Patch by Utkarsh Saxena!

Reviewers: ilya-biryukov

Reviewed By: ilya-biryukov

Subscribers: cfe-commits

Differential Revision: https://reviews.llvm.org/D51359

llvm-svn: 341366
diff --git a/clang/lib/Basic/VirtualFileSystem.cpp b/clang/lib/Basic/VirtualFileSystem.cpp
index 1af4a11..35716fd7 100644
--- a/clang/lib/Basic/VirtualFileSystem.cpp
+++ b/clang/lib/Basic/VirtualFileSystem.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
@@ -25,9 +26,9 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Config/llvm-config.h"
-#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Chrono.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -466,53 +467,44 @@
 
 namespace detail {
 
-enum InMemoryNodeKind { IME_File, IME_Directory };
+enum InMemoryNodeKind { IME_File, IME_Directory, IME_HardLink };
 
 /// The in memory file system is a tree of Nodes. Every node can either be a
-/// file or a directory.
+/// file , hardlink or a directory.
 class InMemoryNode {
-  Status Stat;
   InMemoryNodeKind Kind;
-
-protected:
-  /// Return Stat.  This should only be used for internal/debugging use.  When
-  /// clients wants the Status of this node, they should use
-  /// \p getStatus(StringRef).
-  const Status &getStatus() const { return Stat; }
+  std::string FileName;
 
 public:
-  InMemoryNode(Status Stat, InMemoryNodeKind Kind)
-      : Stat(std::move(Stat)), Kind(Kind) {}
+  InMemoryNode(llvm::StringRef FileName, InMemoryNodeKind Kind)
+      : Kind(Kind), FileName(llvm::sys::path::filename(FileName)) {}
   virtual ~InMemoryNode() = default;
 
+  /// Get the filename of this node (the name without the directory part).
+  StringRef getFileName() const { return FileName; }
+  InMemoryNodeKind getKind() const { return Kind; }
+  virtual std::string toString(unsigned Indent) const = 0;
+};
+
+class InMemoryFile : public InMemoryNode {
+  Status Stat;
+  std::unique_ptr<llvm::MemoryBuffer> Buffer;
+
+public:
+  InMemoryFile(Status Stat, std::unique_ptr<llvm::MemoryBuffer> Buffer)
+      : InMemoryNode(Stat.getName(), IME_File), Stat(std::move(Stat)),
+        Buffer(std::move(Buffer)) {}
+
   /// Return the \p Status for this node. \p RequestedName should be the name
   /// through which the caller referred to this node. It will override
   /// \p Status::Name in the return value, to mimic the behavior of \p RealFile.
   Status getStatus(StringRef RequestedName) const {
     return Status::copyWithNewName(Stat, RequestedName);
   }
-
-  /// Get the filename of this node (the name without the directory part).
-  StringRef getFileName() const {
-    return llvm::sys::path::filename(Stat.getName());
-  }
-  InMemoryNodeKind getKind() const { return Kind; }
-  virtual std::string toString(unsigned Indent) const = 0;
-};
-
-namespace {
-
-class InMemoryFile : public InMemoryNode {
-  std::unique_ptr<llvm::MemoryBuffer> Buffer;
-
-public:
-  InMemoryFile(Status Stat, std::unique_ptr<llvm::MemoryBuffer> Buffer)
-      : InMemoryNode(std::move(Stat), IME_File), Buffer(std::move(Buffer)) {}
-
-  llvm::MemoryBuffer *getBuffer() { return Buffer.get(); }
+  llvm::MemoryBuffer *getBuffer() const { return Buffer.get(); }
 
   std::string toString(unsigned Indent) const override {
-    return (std::string(Indent, ' ') + getStatus().getName() + "\n").str();
+    return (std::string(Indent, ' ') + Stat.getName() + "\n").str();
   }
 
   static bool classof(const InMemoryNode *N) {
@@ -520,16 +512,37 @@
   }
 };
 
+namespace {
+
+class InMemoryHardLink : public InMemoryNode {
+  const InMemoryFile &ResolvedFile;
+
+public:
+  InMemoryHardLink(StringRef Path, const InMemoryFile &ResolvedFile)
+      : InMemoryNode(Path, IME_HardLink), ResolvedFile(ResolvedFile) {}
+  const InMemoryFile &getResolvedFile() const { return ResolvedFile; }
+
+  std::string toString(unsigned Indent) const override {
+    return std::string(Indent, ' ') + "HardLink to -> " +
+           ResolvedFile.toString(0);
+  }
+
+  static bool classof(const InMemoryNode *N) {
+    return N->getKind() == IME_HardLink;
+  }
+};
+
 /// Adapt a InMemoryFile for VFS' File interface.  The goal is to make
 /// \p InMemoryFileAdaptor mimic as much as possible the behavior of
 /// \p RealFile.
 class InMemoryFileAdaptor : public File {
-  InMemoryFile &Node;
+  const InMemoryFile &Node;
   /// The name to use when returning a Status for this file.
   std::string RequestedName;
 
 public:
-  explicit InMemoryFileAdaptor(InMemoryFile &Node, std::string RequestedName)
+  explicit InMemoryFileAdaptor(const InMemoryFile &Node,
+                               std::string RequestedName)
       : Node(Node), RequestedName(std::move(RequestedName)) {}
 
   llvm::ErrorOr<Status> status() override {
@@ -546,16 +559,22 @@
 
   std::error_code close() override { return {}; }
 };
-
 } // namespace
 
 class InMemoryDirectory : public InMemoryNode {
+  Status Stat;
   std::map<std::string, std::unique_ptr<InMemoryNode>> Entries;
 
 public:
   InMemoryDirectory(Status Stat)
-      : InMemoryNode(std::move(Stat), IME_Directory) {}
+      : InMemoryNode(Stat.getName(), IME_Directory), Stat(std::move(Stat)) {}
 
+  /// Return the \p Status for this node. \p RequestedName should be the name
+  /// through which the caller referred to this node. It will override
+  /// \p Status::Name in the return value, to mimic the behavior of \p RealFile.
+  Status getStatus(StringRef RequestedName) const {
+    return Status::copyWithNewName(Stat, RequestedName);
+  }
   InMemoryNode *getChild(StringRef Name) {
     auto I = Entries.find(Name);
     if (I != Entries.end())
@@ -575,7 +594,7 @@
 
   std::string toString(unsigned Indent) const override {
     std::string Result =
-        (std::string(Indent, ' ') + getStatus().getName() + "\n").str();
+        (std::string(Indent, ' ') + Stat.getName() + "\n").str();
     for (const auto &Entry : Entries)
       Result += Entry.second->toString(Indent + 2);
     return Result;
@@ -586,6 +605,17 @@
   }
 };
 
+namespace {
+Status getNodeStatus(const InMemoryNode *Node, StringRef RequestedName) {
+  if (auto Dir = dyn_cast<detail::InMemoryDirectory>(Node))
+    return Dir->getStatus(RequestedName);
+  if (auto File = dyn_cast<detail::InMemoryFile>(Node))
+    return File->getStatus(RequestedName);
+  if (auto Link = dyn_cast<detail::InMemoryHardLink>(Node))
+    return Link->getResolvedFile().getStatus(RequestedName);
+  llvm_unreachable("Unknown node type");
+}
+} // namespace
 } // namespace detail
 
 InMemoryFileSystem::InMemoryFileSystem(bool UseNormalizedPaths)
@@ -606,7 +636,8 @@
                                  Optional<uint32_t> User,
                                  Optional<uint32_t> Group,
                                  Optional<llvm::sys::fs::file_type> Type,
-                                 Optional<llvm::sys::fs::perms> Perms) {
+                                 Optional<llvm::sys::fs::perms> Perms,
+                                 const detail::InMemoryFile *HardLinkTarget) {
   SmallString<128> Path;
   P.toVector(Path);
 
@@ -627,6 +658,7 @@
   const auto ResolvedGroup = Group.getValueOr(0);
   const auto ResolvedType = Type.getValueOr(sys::fs::file_type::regular_file);
   const auto ResolvedPerms = Perms.getValueOr(sys::fs::all_all);
+  assert(!(HardLinkTarget && Buffer) && "HardLink cannot have a buffer");
   // Any intermediate directories we create should be accessible by
   // the owner, even if Perms says otherwise for the final path.
   const auto NewDirectoryPerms = ResolvedPerms | sys::fs::owner_all;
@@ -636,17 +668,22 @@
     ++I;
     if (!Node) {
       if (I == E) {
-        // End of the path, create a new file or directory.
-        Status Stat(P.str(), getNextVirtualUniqueID(),
-                    llvm::sys::toTimePoint(ModificationTime), ResolvedUser,
-                    ResolvedGroup, Buffer->getBufferSize(), ResolvedType,
-                    ResolvedPerms);
+        // End of the path.
         std::unique_ptr<detail::InMemoryNode> Child;
-        if (ResolvedType == sys::fs::file_type::directory_file) {
-          Child.reset(new detail::InMemoryDirectory(std::move(Stat)));
-        } else {
-          Child.reset(new detail::InMemoryFile(std::move(Stat),
-                                               std::move(Buffer)));
+        if (HardLinkTarget)
+          Child.reset(new detail::InMemoryHardLink(P.str(), *HardLinkTarget));
+        else {
+          // Create a new file or directory.
+          Status Stat(P.str(), getNextVirtualUniqueID(),
+                      llvm::sys::toTimePoint(ModificationTime), ResolvedUser,
+                      ResolvedGroup, Buffer->getBufferSize(), ResolvedType,
+                      ResolvedPerms);
+          if (ResolvedType == sys::fs::file_type::directory_file) {
+            Child.reset(new detail::InMemoryDirectory(std::move(Stat)));
+          } else {
+            Child.reset(
+                new detail::InMemoryFile(std::move(Stat), std::move(Buffer)));
+          }
         }
         Dir->addChild(Name, std::move(Child));
         return true;
@@ -656,8 +693,8 @@
       Status Stat(
           StringRef(Path.str().begin(), Name.end() - Path.str().begin()),
           getNextVirtualUniqueID(), llvm::sys::toTimePoint(ModificationTime),
-          ResolvedUser, ResolvedGroup, Buffer->getBufferSize(),
-          sys::fs::file_type::directory_file, NewDirectoryPerms);
+          ResolvedUser, ResolvedGroup, 0, sys::fs::file_type::directory_file,
+          NewDirectoryPerms);
       Dir = cast<detail::InMemoryDirectory>(Dir->addChild(
           Name, llvm::make_unique<detail::InMemoryDirectory>(std::move(Stat))));
       continue;
@@ -666,20 +703,35 @@
     if (auto *NewDir = dyn_cast<detail::InMemoryDirectory>(Node)) {
       Dir = NewDir;
     } else {
-      assert(isa<detail::InMemoryFile>(Node) &&
-             "Must be either file or directory!");
+      assert((isa<detail::InMemoryFile>(Node) ||
+              isa<detail::InMemoryHardLink>(Node)) &&
+             "Must be either file, hardlink or directory!");
 
       // Trying to insert a directory in place of a file.
       if (I != E)
         return false;
 
       // Return false only if the new file is different from the existing one.
+      if (auto Link = dyn_cast<detail::InMemoryHardLink>(Node)) {
+        return Link->getResolvedFile().getBuffer()->getBuffer() ==
+               Buffer->getBuffer();
+      }
       return cast<detail::InMemoryFile>(Node)->getBuffer()->getBuffer() ==
              Buffer->getBuffer();
     }
   }
 }
 
+bool InMemoryFileSystem::addFile(const Twine &P, time_t ModificationTime,
+                                 std::unique_ptr<llvm::MemoryBuffer> Buffer,
+                                 Optional<uint32_t> User,
+                                 Optional<uint32_t> Group,
+                                 Optional<llvm::sys::fs::file_type> Type,
+                                 Optional<llvm::sys::fs::perms> Perms) {
+  return addFile(P, ModificationTime, std::move(Buffer), User, Group, Type,
+                 Perms, /*HardLinkTarget=*/nullptr);
+}
+
 bool InMemoryFileSystem::addFileNoOwn(const Twine &P, time_t ModificationTime,
                                       llvm::MemoryBuffer *Buffer,
                                       Optional<uint32_t> User,
@@ -693,7 +745,7 @@
                  std::move(Perms));
 }
 
-static ErrorOr<detail::InMemoryNode *>
+static ErrorOr<const detail::InMemoryNode *>
 lookupInMemoryNode(const InMemoryFileSystem &FS, detail::InMemoryDirectory *Dir,
                    const Twine &P) {
   SmallString<128> Path;
@@ -724,6 +776,12 @@
       return errc::no_such_file_or_directory;
     }
 
+    // If Node is HardLink then return the resolved file.
+    if (auto File = dyn_cast<detail::InMemoryHardLink>(Node)) {
+      if (I == E)
+        return &File->getResolvedFile();
+      return errc::no_such_file_or_directory;
+    }
     // Traverse directories.
     Dir = cast<detail::InMemoryDirectory>(Node);
     if (I == E)
@@ -731,10 +789,22 @@
   }
 }
 
+bool InMemoryFileSystem::addHardLink(const Twine &FromPath,
+                                     const Twine &ToPath) {
+  auto FromNode = lookupInMemoryNode(*this, Root.get(), FromPath);
+  auto ToNode = lookupInMemoryNode(*this, Root.get(), ToPath);
+  // FromPath must not have been added before. ToPath must have been added
+  // before. Resolved ToPath must be a File.
+  if (!ToNode || FromNode || !isa<detail::InMemoryFile>(*ToNode))
+    return false;
+  return this->addFile(FromPath, 0, nullptr, None, None, None, None,
+                       cast<detail::InMemoryFile>(*ToNode));
+}
+
 llvm::ErrorOr<Status> InMemoryFileSystem::status(const Twine &Path) {
   auto Node = lookupInMemoryNode(*this, Root.get(), Path);
   if (Node)
-    return (*Node)->getStatus(Path.str());
+    return detail::getNodeStatus(*Node, Path.str());
   return Node.getError();
 }
 
@@ -766,7 +836,7 @@
     if (I != E) {
       SmallString<256> Path(RequestedDirName);
       llvm::sys::path::append(Path, I->second->getFileName());
-      CurrentEntry = I->second->getStatus(Path);
+      CurrentEntry = detail::getNodeStatus(I->second.get(), Path);
     } else {
       // When we're at the end, make CurrentEntry invalid and DirIterImpl will
       // do the rest.
@@ -777,7 +847,7 @@
 public:
   InMemoryDirIterator() = default;
 
-  explicit InMemoryDirIterator(detail::InMemoryDirectory &Dir,
+  explicit InMemoryDirIterator(const detail::InMemoryDirectory &Dir,
                                std::string RequestedDirName)
       : I(Dir.begin()), E(Dir.end()),
         RequestedDirName(std::move(RequestedDirName)) {