Make the loading of information attached to an IdentifierInfo from an
AST file more lazy, so that we don't eagerly load that information for
all known identifiers each time a new AST file is loaded. The eager
reloading made some sense in the context of precompiled headers, since
very few identifiers were defined before PCH load time. With modules,
however, a huge amount of code can get parsed before we see an
@import, so laziness becomes important here.

The approach taken to make this information lazy is fairly simple:
when we load a new AST file, we mark all of the existing identifiers
as being out-of-date. Whenever we want to access information that may
come from an AST (e.g., whether the identifier has a macro definition,
or what top-level declarations have that name), we check the
out-of-date bit and, if it's set, ask the AST reader to update the
IdentifierInfo from the AST files. The update is a merge, and we now
take care to merge declarations before/after imports with declarations
from multiple imports.

The results of this optimization are fairly dramatic. On a small
application that brings in 14 non-trivial modules, this takes modules
from being > 3x slower than a "perfect" PCH file down to 30% slower
for a full rebuild. A partial rebuild (where the PCH file or modules
can be re-used) is down to 7% slower. Making the PCH file just a
little imperfect (e.g., adding two smallish modules used by a bunch of
.m files that aren't in the PCH file) tips the scales in favor of the
modules approach, with 24% faster partial rebuilds.

This is just a first step; the lazy scheme could possibly be improved
by adding versioning, so we don't search into modules we already
searched. Moreover, we'll need similar lazy schemes for all of the
other lookup data structures, such as DeclContexts.

llvm-svn: 143100
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index d47bb37..5898ff0 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -514,6 +514,7 @@
       II = &Reader.getIdentifierTable().getOwn(StringRef(k.first, k.second));
     Reader.SetIdentifierInfo(ID, II);
     II->setIsFromAST();
+    II->setOutOfDate(false);
     return II;
   }
 
@@ -539,7 +540,8 @@
   IdentifierInfo *II = KnownII;
   if (!II)
     II = &Reader.getIdentifierTable().getOwn(StringRef(k.first, k.second));
-  Reader.SetIdentifierInfo(ID, II);
+  II->setOutOfDate(false);
+  II->setIsFromAST();
 
   // Set or check the various bits in the IdentifierInfo structure.
   // Token IDs are read-only.
@@ -564,6 +566,8 @@
     DataLen -= 4;
   }
 
+  Reader.SetIdentifierInfo(ID, II);
+
   // Read all of the declarations visible at global scope with this
   // name.
   if (DataLen > 0) {
@@ -573,7 +577,6 @@
     Reader.SetGloballyVisibleDecls(II, DeclIDs);
   }
 
-  II->setIsFromAST();
   return II;
 }
 
@@ -1276,6 +1279,7 @@
         Error("macro must have a name in AST file");
         return;
       }
+      
       SourceLocation Loc = ReadSourceLocation(F, Record[1]);
       bool isUsed = Record[2];
 
@@ -1500,6 +1504,46 @@
   LoadMacroDefinition(Pos);
 }
 
+namespace {
+  /// \brief Visitor class used to look up identifirs in an AST file.
+  class IdentifierLookupVisitor {
+    StringRef Name;
+    IdentifierInfo *Found;
+  public:
+    explicit IdentifierLookupVisitor(StringRef Name) : Name(Name), Found() { }
+    
+    static bool visit(Module &M, void *UserData) {
+      IdentifierLookupVisitor *This
+        = static_cast<IdentifierLookupVisitor *>(UserData);
+      
+      ASTIdentifierLookupTable *IdTable
+        = (ASTIdentifierLookupTable *)M.IdentifierLookupTable;
+      if (!IdTable)
+        return false;
+      
+      std::pair<const char*, unsigned> Key(This->Name.begin(), 
+                                           This->Name.size());
+      ASTIdentifierLookupTable::iterator Pos = IdTable->find(Key);
+      if (Pos == IdTable->end())
+        return false;
+      
+      // Dereferencing the iterator has the effect of building the
+      // IdentifierInfo node and populating it with the various
+      // declarations it needs.
+      This->Found = *Pos;
+      return true;
+    }
+    
+    // \brief Retrieve the identifier info found within the module
+    // files.
+    IdentifierInfo *getIdentifierInfo() const { return Found; }
+  };
+}
+
+void ASTReader::updateOutOfDateIdentifier(IdentifierInfo &II) {
+  get(II.getName());
+}
+
 const FileEntry *ASTReader::getFileEntry(StringRef filenameStrRef) {
   std::string Filename = filenameStrRef;
   MaybeAddSystemRootToFilename(Filename);
@@ -2329,43 +2373,6 @@
   return Success;
 }
 
-namespace {
-  /// \brief Visitor class used to look up identifirs in an AST file.
-  class IdentifierLookupVisitor {
-    StringRef Name;
-    IdentifierInfo *Found;
-  public:
-    explicit IdentifierLookupVisitor(StringRef Name) : Name(Name), Found() { }
-    
-    static bool visit(Module &M, void *UserData) {
-      IdentifierLookupVisitor *This
-      = static_cast<IdentifierLookupVisitor *>(UserData);
-      
-      ASTIdentifierLookupTable *IdTable
-        = (ASTIdentifierLookupTable *)M.IdentifierLookupTable;
-      if (!IdTable)
-        return false;
-      
-      std::pair<const char*, unsigned> Key(This->Name.begin(), 
-                                           This->Name.size());
-      ASTIdentifierLookupTable::iterator Pos = IdTable->find(Key);
-      if (Pos == IdTable->end())
-        return false;
-      
-      // Dereferencing the iterator has the effect of building the
-      // IdentifierInfo node and populating it with the various
-      // declarations it needs.
-      This->Found = *Pos;
-      return true;
-    }
-    
-    // \brief Retrieve the identifier info found within the module
-    // files.
-    IdentifierInfo *getIdentifierInfo() const { return Found; }
-  };
-}
-
-
 ASTReader::ASTReadResult ASTReader::ReadAST(const std::string &FileName,
                                             ModuleKind Type) {
   switch(ReadASTCore(FileName, Type, /*ImportedBy=*/0)) {
@@ -2384,33 +2391,13 @@
       CheckPredefinesBuffers())
     return IgnorePCH;
 
-  // Initialization of keywords and pragmas occurs before the
-  // AST file is read, so there may be some identifiers that were
-  // loaded into the IdentifierTable before we intercepted the
-  // creation of identifiers. Iterate through the list of known
-  // identifiers and determine whether we have to establish
-  // preprocessor definitions or top-level identifier declaration
-  // chains for those identifiers.
-  //
-  // We copy the IdentifierInfo pointers to a small vector first,
-  // since de-serializing declarations or macro definitions can add
-  // new entries into the identifier table, invalidating the
-  // iterators.
-  //
-  // FIXME: We need a lazier way to load this information, e.g., by marking
-  // the identifier data as 'dirty', so that it will be looked up in the
-  // AST file(s) if it is uttered in the source. This could save us some
-  // module load time.
-  SmallVector<IdentifierInfo *, 128> Identifiers;
+  // Mark all of the identifiers in the identifier table as being out of date,
+  // so that various accessors know to check the loaded modules when the
+  // identifier is used.
   for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(),
                               IdEnd = PP.getIdentifierTable().end();
        Id != IdEnd; ++Id)
-    Identifiers.push_back(Id->second);
-  
-  for (unsigned I = 0, N = Identifiers.size(); I != N; ++I) {
-    IdentifierLookupVisitor Visitor(Identifiers[I]->getName());
-    ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor);
-  }
+    Id->second->setOutOfDate(true);
 
   InitializeContext();
 
@@ -4412,10 +4399,8 @@
   // Makes sure any declarations that were deserialized "too early"
   // still get added to the identifier's declaration chains.
   for (unsigned I = 0, N = PreloadedDecls.size(); I != N; ++I) {
-    if (SemaObj->TUScope)
-      SemaObj->TUScope->AddDecl(PreloadedDecls[I]);
-
-    SemaObj->IdResolver.AddDecl(PreloadedDecls[I]);
+    SemaObj->pushExternalDeclIntoScope(PreloadedDecls[I], 
+                                       PreloadedDecls[I]->getDeclName());
   }
   PreloadedDecls.clear();
 
@@ -4446,7 +4431,10 @@
 IdentifierInfo* ASTReader::get(const char *NameStart, const char *NameEnd) {
   IdentifierLookupVisitor Visitor(StringRef(NameStart, NameEnd - NameStart));
   ModuleMgr.visit(IdentifierLookupVisitor::visit, &Visitor);
-  return Visitor.getIdentifierInfo();
+  IdentifierInfo *II = Visitor.getIdentifierInfo();
+  if (II)
+    II->setOutOfDate(false);
+  return II;
 }
 
 namespace clang {
@@ -4775,13 +4763,10 @@
   for (unsigned I = 0, N = DeclIDs.size(); I != N; ++I) {
     NamedDecl *D = cast<NamedDecl>(GetDecl(DeclIDs[I]));
     if (SemaObj) {
-      if (SemaObj->TUScope) {
-        // Introduce this declaration into the translation-unit scope
-        // and add it to the declaration chain for this identifier, so
-        // that (unqualified) name lookup will find it.
-        SemaObj->TUScope->AddDecl(D);
-      }
-      SemaObj->IdResolver.AddDeclToIdentifierChain(II, D);
+      // Introduce this declaration into the translation-unit scope
+      // and add it to the declaration chain for this identifier, so
+      // that (unqualified) name lookup will find it.
+      SemaObj->pushExternalDeclIntoScope(D, II);
     } else {
       // Queue this declaration so that it will be added to the
       // translation unit scope and identifier's declaration chain