Lazy deserialization of the declaration chains associated with
identifiers from a precompiled header.

This patch changes the primary name lookup method for entities within
a precompiled header. Previously, we would load all of the names of
declarations at translation unit scope into a large DenseMap (inside
the TranslationUnitDecl's DeclContext), and then perform a special
"last resort" lookup into this DeclContext when we knew there was a
PCH file (see Sema::LookupName). Now, when we see an identifier named
for the first time, we load all of the declarations with that name
that are visible from the translation unit into the IdentifierInfo's
chain of declarations. Thus, the explicit "look into the translation
unit's DeclContext" code is gone, and Sema effectively uses the same
IdentifierInfo-based name lookup mechanism whether we are using a PCH
file or not. 

This approach should help PCH scale with the size of the input program
rather than the size of the PCH file. The "Hello, World!" application
with Carbon.h as a PCH file now loads 20% of the identifiers in the
PCH file rather than 85% of the identifiers. 

90% of the 20% of identifiers loaded are actually loaded when we
deserialize the preprocessor state. The next step is to make the
preprocessor load macros lazily, which should drastically reduce the
number of types, declarations, and identifiers loaded for "Hello,
World".



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@69737 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Frontend/PCHWriter.cpp b/lib/Frontend/PCHWriter.cpp
index 16eaf98..993ba92 100644
--- a/lib/Frontend/PCHWriter.cpp
+++ b/lib/Frontend/PCHWriter.cpp
@@ -1421,7 +1421,7 @@
 ///
 void PCHWriter::WritePreprocessor(const Preprocessor &PP) {
   // Enter the preprocessor block.
-  Stream.EnterSubblock(pch::PREPROCESSOR_BLOCK_ID, 3);
+  Stream.EnterSubblock(pch::PREPROCESSOR_BLOCK_ID, 2);
   
   // If the PCH file contains __DATE__ or __TIME__ emit a warning about this.
   // FIXME: use diagnostics subsystem for localization etc.
@@ -1732,13 +1732,13 @@
                       pch::IdentID ID) {
     unsigned KeyLen = strlen(II->getName()) + 1;
     clang::io::Emit16(Out, KeyLen);
-    unsigned DataLen = 4 + 4 + 2; // 4 bytes for token ID, builtin, flags
-                                  // 4 bytes for the persistent ID
-                                  // 2 bytes for the length of the decl chain
+    unsigned DataLen = 4 + 4; // 4 bytes for token ID, builtin, flags
+                              // 4 bytes for the persistent ID
     for (IdentifierResolver::iterator D = IdentifierResolver::begin(II),
                                    DEnd = IdentifierResolver::end();
          D != DEnd; ++D)
       DataLen += sizeof(pch::DeclID);
+    clang::io::Emit16(Out, DataLen);
     return std::make_pair(KeyLen, DataLen);
   }
   
@@ -1762,15 +1762,18 @@
     clang::io::Emit32(Out, Bits);
     clang::io::Emit32(Out, ID);
 
-    llvm::SmallVector<pch::DeclID, 8> Decls;
-    for (IdentifierResolver::iterator D = IdentifierResolver::begin(II),
-                                   DEnd = IdentifierResolver::end();
+    // Emit the declaration IDs in reverse order, because the
+    // IdentifierResolver provides the declarations as they would be
+    // visible (e.g., the function "stat" would come before the struct
+    // "stat"), but IdentifierResolver::AddDeclToIdentifierChain()
+    // adds declarations to the end of the list (so we need to see the
+    // struct "status" before the function "status").
+    llvm::SmallVector<Decl *, 16> Decls(IdentifierResolver::begin(II), 
+                                        IdentifierResolver::end());
+    for (llvm::SmallVector<Decl *, 16>::reverse_iterator D = Decls.rbegin(),
+                                                      DEnd = Decls.rend();
          D != DEnd; ++D)
-      Decls.push_back(Writer.getDeclID(*D));
-
-    clang::io::Emit16(Out, Decls.size());
-    for (unsigned I = 0; I < Decls.size(); ++I)
-      clang::io::Emit32(Out, Decls[I]);
+      clang::io::Emit32(Out, Writer.getDeclID(*D));
   }
 };
 } // end anonymous namespace
@@ -1799,21 +1802,24 @@
 
     // Create the on-disk hash table in a buffer.
     llvm::SmallVector<char, 4096> IdentifierTable; 
+    uint32_t BucketOffset;
     {
       PCHIdentifierTableTrait Trait(*this);
       llvm::raw_svector_ostream Out(IdentifierTable);
-      Generator.Emit(Out, Trait);
+      BucketOffset = Generator.Emit(Out, Trait);
     }
 
     // Create a blob abbreviation
     BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
     Abbrev->Add(BitCodeAbbrevOp(pch::IDENTIFIER_TABLE));
+    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
     unsigned IDTableAbbrev = Stream.EmitAbbrev(Abbrev);
 
     // Write the identifier table
     RecordData Record;
     Record.push_back(pch::IDENTIFIER_TABLE);
+    Record.push_back(BucketOffset);
     Stream.EmitRecordWithBlob(IDTableAbbrev, Record, 
                               &IdentifierTable.front(), 
                               IdentifierTable.size());