Reapply [2]: [VFS] Add support for handling path traversals

This was applied twice r261551 and 263617 and later reverted because:

(1) Windows bot failing on unittests. Change the current behavior to do
not handle path traversals on windows.

(2) Windows bot failed to include llvm/Config/config.h in order to use
HAVE_REALPATH. Use LLVM_ON_UNIX instead, as done in lib/Basic/FileManager.cpp.

Handle ".", ".." and "./" with trailing slashes while collecting files
to be dumped into the vfs overlay directory.

Include the support for symlinks into components. Given the path:

/install-dir/bin/../lib/clang/3.8.0/include/altivec.h, if "bin"
component is a symlink, it's not safe to use `path::remove_dots` here,
and `realpath` is used to get the right answer. Since `realpath`
is expensive, we only do it at collecting time (which only happens
during the crash reproducer) and cache the base directory for fast lookups.

Overall, this makes the input to the VFS YAML file to be canonicalized
to never contain traversal components.

Differential Revision: http://reviews.llvm.org/D17104

rdar://problem/24499339

llvm-svn: 263686
diff --git a/clang/lib/Basic/VirtualFileSystem.cpp b/clang/lib/Basic/VirtualFileSystem.cpp
index ba846c8..e1c4d71 100644
--- a/clang/lib/Basic/VirtualFileSystem.cpp
+++ b/clang/lib/Basic/VirtualFileSystem.cpp
@@ -112,6 +112,20 @@
   return Status && Status->exists();
 }
 
+#ifndef NDEBUG
+static bool isTraversalComponent(StringRef Component) {
+  return Component.equals("..") || Component.equals(".");
+}
+
+static bool pathHasTraversal(StringRef Path) {
+  using namespace llvm::sys;
+  for (StringRef Comp : llvm::make_range(path::begin(Path), path::end(Path)))
+    if (isTraversalComponent(Comp))
+      return true;
+  return false;
+}
+#endif
+
 //===-----------------------------------------------------------------------===/
 // RealFileSystem implementation
 //===-----------------------------------------------------------------------===/
@@ -819,6 +833,16 @@
   bool UseExternalNames;
   /// @}
 
+  /// Virtual file paths and external files could be canonicalized without "..",
+  /// "." and "./" in their paths. FIXME: some unittests currently fail on
+  /// win32 when using remove_dots and remove_leading_dotslash on paths.
+  bool UseCanonicalizedPaths =
+#ifdef LLVM_ON_WIN32
+      false;
+#else
+      true;
+#endif
+
   friend class RedirectingFileSystemParser;
 
 private:
@@ -954,7 +978,7 @@
     return true;
   }
 
-  std::unique_ptr<Entry> parseEntry(yaml::Node *N) {
+  std::unique_ptr<Entry> parseEntry(yaml::Node *N, RedirectingFileSystem *FS) {
     yaml::MappingNode *M = dyn_cast<yaml::MappingNode>(N);
     if (!M) {
       error(N, "expected mapping node for file or directory entry");
@@ -994,7 +1018,17 @@
       if (Key == "name") {
         if (!parseScalarString(I->getValue(), Value, Buffer))
           return nullptr;
-        Name = Value;
+
+        if (FS->UseCanonicalizedPaths) {
+          SmallString<256> Path(Value);
+          // Guarantee that old YAML files containing paths with ".." and "."
+          // are properly canonicalized before read into the VFS.
+          Path = sys::path::remove_leading_dotslash(Path);
+          sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+          Name = Path.str();
+        } else {
+          Name = Value;
+        }
       } else if (Key == "type") {
         if (!parseScalarString(I->getValue(), Value, Buffer))
           return nullptr;
@@ -1024,7 +1058,7 @@
         for (yaml::SequenceNode::iterator I = Contents->begin(),
                                           E = Contents->end();
              I != E; ++I) {
-          if (std::unique_ptr<Entry> E = parseEntry(&*I))
+          if (std::unique_ptr<Entry> E = parseEntry(&*I, FS))
             EntryArrayContents.push_back(std::move(E));
           else
             return nullptr;
@@ -1038,7 +1072,16 @@
         HasContents = true;
         if (!parseScalarString(I->getValue(), Value, Buffer))
           return nullptr;
-        ExternalContentsPath = Value;
+        if (FS->UseCanonicalizedPaths) {
+          SmallString<256> Path(Value);
+          // Guarantee that old YAML files containing paths with ".." and "."
+          // are properly canonicalized before read into the VFS.
+          Path = sys::path::remove_leading_dotslash(Path);
+          sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+          ExternalContentsPath = Path.str();
+        } else {
+          ExternalContentsPath = Value;
+        }
       } else if (Key == "use-external-name") {
         bool Val;
         if (!parseScalarBool(I->getValue(), Val))
@@ -1149,7 +1192,7 @@
 
         for (yaml::SequenceNode::iterator I = Roots->begin(), E = Roots->end();
              I != E; ++I) {
-          if (std::unique_ptr<Entry> E = parseEntry(&*I))
+          if (std::unique_ptr<Entry> E = parseEntry(&*I, FS))
             FS->Roots.push_back(std::move(E));
           else
             return false;
@@ -1228,6 +1271,14 @@
   if (std::error_code EC = makeAbsolute(Path))
     return EC;
 
+  // Canonicalize path by removing ".", "..", "./", etc components. This is
+  // a VFS request, do bot bother about symlinks in the path components
+  // but canonicalize in order to perform the correct entry search.
+  if (UseCanonicalizedPaths) {
+    Path = sys::path::remove_leading_dotslash(Path);
+    sys::path::remove_dots(Path, /*remove_dot_dot=*/true);
+  }
+
   if (Path.empty())
     return make_error_code(llvm::errc::invalid_argument);
 
@@ -1244,10 +1295,17 @@
 ErrorOr<Entry *>
 RedirectingFileSystem::lookupPath(sys::path::const_iterator Start,
                                   sys::path::const_iterator End, Entry *From) {
+#ifndef LLVM_ON_WIN32
+  assert(!isTraversalComponent(*Start) &&
+         !isTraversalComponent(From->getName()) &&
+         "Paths should not contain traversal components");
+#else
+  // FIXME: this is here to support windows, remove it once canonicalized
+  // paths become globally default.
   if (Start->equals("."))
     ++Start;
+#endif
 
-  // FIXME: handle ..
   if (CaseSensitive ? !Start->equals(From->getName())
                     : !Start->equals_lower(From->getName()))
     // failure to match
@@ -1366,16 +1424,6 @@
   return UniqueID(std::numeric_limits<uint64_t>::max(), ID);
 }
 
-#ifndef NDEBUG
-static bool pathHasTraversal(StringRef Path) {
-  using namespace llvm::sys;
-  for (StringRef Comp : llvm::make_range(path::begin(Path), path::end(Path)))
-    if (Comp == "." || Comp == "..")
-      return true;
-  return false;
-}
-#endif
-
 void YAMLVFSWriter::addFileMapping(StringRef VirtualPath, StringRef RealPath) {
   assert(sys::path::is_absolute(VirtualPath) && "virtual path not absolute");
   assert(sys::path::is_absolute(RealPath) && "real path not absolute");