Modules: Cache PCMs in memory and avoid a use-after-free

Clang's internal build system for implicit modules uses lock files to
ensure that after a process writes a PCM it will read the same one back
in (without contention from other -cc1 commands).  Since PCMs are read
from disk repeatedly while invalidating, building, and importing, the
lock is not released quickly.  Furthermore, the LockFileManager is not
robust in every environment.  Other -cc1 commands can stall until
timeout (after about eight minutes).

This commit changes the lock file from being necessary for correctness
to a (possibly dubious) performance hack.  The remaining benefit is to
reduce duplicate work in competing -cc1 commands which depend on the
same module.  Follow-up commits will change the internal build system to
continue after a timeout, and reduce the timeout.  Perhaps we should
reconsider blocking at all.

This also fixes a use-after-free, when one part of a compilation
validates a PCM and starts using it, and another tries to swap out the
PCM for something new.

The PCMCache is a new type called MemoryBufferCache, which saves memory
buffers based on their filename.  Its ownership is shared by the
CompilerInstance and ModuleManager.

  - The ModuleManager stores PCMs there that it loads from disk, never
    touching the disk if the cache is hot.

  - When modules fail to validate, they're removed from the cache.

  - When a CompilerInstance is spawned to build a new module, each
    already-loaded PCM is assumed to be valid, and is frozen to avoid
    the use-after-free.

  - Any newly-built module is written directly to the cache to avoid the
    round-trip to the filesystem, making lock files unnecessary for
    correctness.

Original patch by Manman Ren; most testcases by Adrian Prantl!

llvm-svn: 298165
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index e4eddae..eac0854 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -37,6 +37,7 @@
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/FileSystemOptions.h"
 #include "clang/Basic/LangOptions.h"
+#include "clang/Basic/MemoryBufferCache.h"
 #include "clang/Basic/ObjCRuntime.h"
 #include "clang/Basic/OperatorKinds.h"
 #include "clang/Basic/Sanitizers.h"
@@ -463,6 +464,30 @@
   return checkDiagnosticGroupMappings(StoredDiags, Diags, Complain);
 }
 
+/// Return the top import module if it is implicit, nullptr otherwise.
+static Module *getTopImportImplicitModule(ModuleManager &ModuleMgr,
+                                          Preprocessor &PP) {
+  // If the original import came from a file explicitly generated by the user,
+  // don't check the diagnostic mappings.
+  // FIXME: currently this is approximated by checking whether this is not a
+  // module import of an implicitly-loaded module file.
+  // Note: ModuleMgr.rbegin() may not be the current module, but it must be in
+  // the transitive closure of its imports, since unrelated modules cannot be
+  // imported until after this module finishes validation.
+  ModuleFile *TopImport = &*ModuleMgr.rbegin();
+  while (!TopImport->ImportedBy.empty())
+    TopImport = TopImport->ImportedBy[0];
+  if (TopImport->Kind != MK_ImplicitModule)
+    return nullptr;
+
+  StringRef ModuleName = TopImport->ModuleName;
+  assert(!ModuleName.empty() && "diagnostic options read before module name");
+
+  Module *M = PP.getHeaderSearchInfo().lookupModule(ModuleName);
+  assert(M && "missing module");
+  return M;
+}
+
 bool PCHValidator::ReadDiagnosticOptions(
     IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts, bool Complain) {
   DiagnosticsEngine &ExistingDiags = PP.getDiagnostics();
@@ -476,28 +501,14 @@
   ModuleManager &ModuleMgr = Reader.getModuleManager();
   assert(ModuleMgr.size() >= 1 && "what ASTFile is this then");
 
-  // If the original import came from a file explicitly generated by the user,
-  // don't check the diagnostic mappings.
-  // FIXME: currently this is approximated by checking whether this is not a
-  // module import of an implicitly-loaded module file.
-  // Note: ModuleMgr.rbegin() may not be the current module, but it must be in
-  // the transitive closure of its imports, since unrelated modules cannot be
-  // imported until after this module finishes validation.
-  ModuleFile *TopImport = &*ModuleMgr.rbegin();
-  while (!TopImport->ImportedBy.empty())
-    TopImport = TopImport->ImportedBy[0];
-  if (TopImport->Kind != MK_ImplicitModule)
+  Module *TopM = getTopImportImplicitModule(ModuleMgr, PP);
+  if (!TopM)
     return false;
 
-  StringRef ModuleName = TopImport->ModuleName;
-  assert(!ModuleName.empty() && "diagnostic options read before module name");
-
-  Module *M = PP.getHeaderSearchInfo().lookupModule(ModuleName);
-  assert(M && "missing module");
-
   // FIXME: if the diagnostics are incompatible, save a DiagnosticOptions that
   // contains the union of their flags.
-  return checkDiagnosticMappings(*Diags, ExistingDiags, M->IsSystem, Complain);
+  return checkDiagnosticMappings(*Diags, ExistingDiags, TopM->IsSystem,
+                                 Complain);
 }
 
 /// \brief Collect the macro definitions provided by the given preprocessor
@@ -4064,12 +4075,41 @@
       Listener.get(),
       WasImportedBy ? false : HSOpts.ModulesValidateDiagnosticOptions);
 
+  // If F was directly imported by another module, it's implicitly validated by
+  // the importing module.
   if (DisableValidation || WasImportedBy ||
       (AllowConfigurationMismatch && Result == ConfigurationMismatch))
     return Success;
 
-  if (Result == Failure)
+  if (Result == Failure) {
     Error("malformed block record in AST file");
+    return Failure;
+  }
+
+  if (Result == OutOfDate && F.Kind == MK_ImplicitModule) {
+    // If this module has already been finalized in the PCMCache, we're stuck
+    // with it; we can only load a single version of each module.
+    //
+    // This can happen when a module is imported in two contexts: in one, as a
+    // user module; in another, as a system module (due to an import from
+    // another module marked with the [system] flag).  It usually indicates a
+    // bug in the module map: this module should also be marked with [system].
+    //
+    // If -Wno-system-headers (the default), and the first import is as a
+    // system module, then validation will fail during the as-user import,
+    // since -Werror flags won't have been validated.  However, it's reasonable
+    // to treat this consistently as a system module.
+    //
+    // If -Wsystem-headers, the PCM on disk was built with
+    // -Wno-system-headers, and the first import is as a user module, then
+    // validation will fail during the as-system import since the PCM on disk
+    // doesn't guarantee that -Werror was respected.  However, the -Werror
+    // flags were checked during the initial as-user import.
+    if (PCMCache.isBufferFinal(F.FileName)) {
+      Diag(diag::warn_module_system_bit_conflict) << F.FileName;
+      return Success;
+    }
+  }
 
   return Result;
 }
@@ -4122,7 +4162,7 @@
       if (Listener && ValidateDiagnosticOptions &&
           !AllowCompatibleConfigurationMismatch &&
           ParseDiagnosticOptions(Record, Complain, *Listener))
-        return OutOfDate;
+        Result = OutOfDate; // Don't return early.  Read the signature.
       break;
     }
     case DIAG_PRAGMA_MAPPINGS:
@@ -7301,7 +7341,7 @@
 /// by heap-backed versus mmap'ed memory.
 void ASTReader::getMemoryBufferSizes(MemoryBufferSizes &sizes) const {
   for (ModuleFile &I : ModuleMgr) {
-    if (llvm::MemoryBuffer *buf = I.Buffer.get()) {
+    if (llvm::MemoryBuffer *buf = I.Buffer) {
       size_t bytes = buf->getBufferSize();
       switch (buf->getBufferKind()) {
         case llvm::MemoryBuffer::MemoryBuffer_Malloc:
@@ -9628,8 +9668,10 @@
                    : cast<ASTReaderListener>(new PCHValidator(PP, *this))),
       SourceMgr(PP.getSourceManager()), FileMgr(PP.getFileManager()),
       PCHContainerRdr(PCHContainerRdr), Diags(PP.getDiagnostics()), PP(PP),
-      Context(Context), ModuleMgr(PP.getFileManager(), PCHContainerRdr),
-      DummyIdResolver(PP), ReadTimer(std::move(ReadTimer)), isysroot(isysroot),
+      Context(Context),
+      ModuleMgr(PP.getFileManager(), PP.getPCMCache(), PCHContainerRdr),
+      PCMCache(PP.getPCMCache()), DummyIdResolver(PP),
+      ReadTimer(std::move(ReadTimer)), isysroot(isysroot),
       DisableValidation(DisableValidation),
       AllowASTWithCompilerErrors(AllowASTWithCompilerErrors),
       AllowConfigurationMismatch(AllowConfigurationMismatch),
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index e9e64c2..524bc2d 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -35,6 +35,7 @@
 #include "clang/Basic/FileSystemOptions.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/LangOptions.h"
+#include "clang/Basic/MemoryBufferCache.h"
 #include "clang/Basic/Module.h"
 #include "clang/Basic/ObjCRuntime.h"
 #include "clang/Basic/SourceManager.h"
@@ -4304,10 +4305,11 @@
 }
 
 ASTWriter::ASTWriter(llvm::BitstreamWriter &Stream,
-                     SmallVectorImpl<char> &Buffer,
+                     SmallVectorImpl<char> &Buffer, MemoryBufferCache &PCMCache,
                      ArrayRef<std::shared_ptr<ModuleFileExtension>> Extensions,
                      bool IncludeTimestamps)
-    : Stream(Stream), Buffer(Buffer), IncludeTimestamps(IncludeTimestamps) {
+    : Stream(Stream), Buffer(Buffer), PCMCache(PCMCache),
+      IncludeTimestamps(IncludeTimestamps) {
   for (const auto &Ext : Extensions) {
     if (auto Writer = Ext->createExtensionWriter(*this))
       ModuleFileExtensionWriters.push_back(std::move(Writer));
@@ -4354,6 +4356,12 @@
   this->BaseDirectory.clear();
 
   WritingAST = false;
+  if (SemaRef.Context.getLangOpts().ImplicitModules && WritingModule) {
+    // Construct MemoryBuffer and update buffer manager.
+    PCMCache.addBuffer(OutputFile,
+                       llvm::MemoryBuffer::getMemBufferCopy(
+                           StringRef(Buffer.begin(), Buffer.size())));
+  }
   return Signature;
 }
 
diff --git a/clang/lib/Serialization/GeneratePCH.cpp b/clang/lib/Serialization/GeneratePCH.cpp
index 141a559..429ae8e 100644
--- a/clang/lib/Serialization/GeneratePCH.cpp
+++ b/clang/lib/Serialization/GeneratePCH.cpp
@@ -28,7 +28,8 @@
     bool AllowASTWithErrors, bool IncludeTimestamps)
     : PP(PP), OutputFile(OutputFile), isysroot(isysroot.str()),
       SemaPtr(nullptr), Buffer(Buffer), Stream(Buffer->Data),
-      Writer(Stream, Buffer->Data, Extensions, IncludeTimestamps),
+      Writer(Stream, Buffer->Data, PP.getPCMCache(), Extensions,
+             IncludeTimestamps),
       AllowASTWithErrors(AllowASTWithErrors) {
   Buffer->IsComplete = false;
 }
diff --git a/clang/lib/Serialization/ModuleManager.cpp b/clang/lib/Serialization/ModuleManager.cpp
index f0654fc..d9c9fb7 100644
--- a/clang/lib/Serialization/ModuleManager.cpp
+++ b/clang/lib/Serialization/ModuleManager.cpp
@@ -12,6 +12,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "clang/Serialization/ModuleManager.h"
+#include "clang/Basic/MemoryBufferCache.h"
 #include "clang/Frontend/PCHContainerOperations.h"
 #include "clang/Lex/HeaderSearch.h"
 #include "clang/Lex/ModuleMap.h"
@@ -137,7 +138,9 @@
   // Load the contents of the module
   if (std::unique_ptr<llvm::MemoryBuffer> Buffer = lookupBuffer(FileName)) {
     // The buffer was already provided for us.
-    NewModule->Buffer = std::move(Buffer);
+    NewModule->Buffer = &PCMCache->addBuffer(FileName, std::move(Buffer));
+  } else if (llvm::MemoryBuffer *Buffer = PCMCache->lookupBuffer(FileName)) {
+    NewModule->Buffer = Buffer;
   } else {
     // Open the AST file.
     llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Buf((std::error_code()));
@@ -158,7 +161,7 @@
       return Missing;
     }
 
-    NewModule->Buffer = std::move(*Buf);
+    NewModule->Buffer = &PCMCache->addBuffer(FileName, std::move(*Buf));
   }
 
   // Initialize the stream.
@@ -167,8 +170,13 @@
   // Read the signature eagerly now so that we can check it.  Avoid calling
   // ReadSignature unless there's something to check though.
   if (ExpectedSignature && checkSignature(ReadSignature(NewModule->Data),
-                                          ExpectedSignature, ErrorStr))
+                                          ExpectedSignature, ErrorStr)) {
+    // Try to remove the buffer.  If it can't be removed, then it was already
+    // validated by this process.
+    if (!PCMCache->tryToRemoveBuffer(NewModule->FileName))
+      FileMgr.invalidateCache(NewModule->File);
     return OutOfDate;
+  }
 
   // We're keeping this module.  Store it everywhere.
   Module = Modules[Entry] = NewModule.get();
@@ -235,7 +243,12 @@
     // Files that didn't make it through ReadASTCore successfully will be
     // rebuilt (or there was an error). Invalidate them so that we can load the
     // new files that will be renamed over the old ones.
-    if (LoadedSuccessfully.count(&*victim) == 0)
+    //
+    // The PCMCache tracks whether the module was succesfully loaded in another
+    // thread/context; in that case, it won't need to be rebuilt (and we can't
+    // safely invalidate it anyway).
+    if (LoadedSuccessfully.count(&*victim) == 0 &&
+        !PCMCache->tryToRemoveBuffer(victim->FileName))
       FileMgr.invalidateCache(victim->File);
   }
 
@@ -292,10 +305,10 @@
   ModulesInCommonWithGlobalIndex.push_back(MF);
 }
 
-ModuleManager::ModuleManager(FileManager &FileMgr,
+ModuleManager::ModuleManager(FileManager &FileMgr, MemoryBufferCache &PCMCache,
                              const PCHContainerReader &PCHContainerRdr)
-    : FileMgr(FileMgr), PCHContainerRdr(PCHContainerRdr), GlobalIndex(),
-      FirstVisitState(nullptr) {}
+    : FileMgr(FileMgr), PCMCache(&PCMCache), PCHContainerRdr(PCHContainerRdr),
+      GlobalIndex(), FirstVisitState(nullptr) {}
 
 ModuleManager::~ModuleManager() { delete FirstVisitState; }