Lazy deserialization of the declaration chains associated with identifiers from a precompiled header. This patch changes the primary name lookup method for entities within a precompiled header. Previously, we would load all of the names of declarations at translation unit scope into a large DenseMap (inside the TranslationUnitDecl's DeclContext), and then perform a special "last resort" lookup into this DeclContext when we knew there was a PCH file (see Sema::LookupName). Now, when we see an identifier named for the first time, we load all of the declarations with that name that are visible from the translation unit into the IdentifierInfo's chain of declarations. Thus, the explicit "look into the translation unit's DeclContext" code is gone, and Sema effectively uses the same IdentifierInfo-based name lookup mechanism whether we are using a PCH file or not. This approach should help PCH scale with the size of the input program rather than the size of the PCH file. The "Hello, World!" application with Carbon.h as a PCH file now loads 20% of the identifiers in the PCH file rather than 85% of the identifiers. 90% of the 20% of identifiers loaded are actually loaded when we deserialize the preprocessor state. The next step is to make the preprocessor load macros lazily, which should drastically reduce the number of types, declarations, and identifiers loaded for "Hello, World". git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@69737 91177308-0d34-0410-b5e6-96231b3b80d8

commit: c713da9e66dd3c47ee7b0011172e03b59ff02420 [log] [tgz]
author: Douglas Gregor <doug.gregor@gmail.com> Tue Apr 21 22:25:48 2009 +0000
committer: Douglas Gregor <doug.gregor@gmail.com> Tue Apr 21 22:25:48 2009 +0000
tree: bd72a7861c9d2ba6b5f1b37cd41aa12910c20a8b
parent: 4d107bccf19ce4a94f3efba4c853f4f85e48c9a9 [diff] [blame]
diff --git a/lib/Frontend/PCHWriter.cpp b/lib/Frontend/PCHWriter.cpp
index 16eaf98..993ba92 100644
--- a/lib/Frontend/PCHWriter.cpp
+++ b/lib/Frontend/PCHWriter.cpp

@@ -1421,7 +1421,7 @@
 ///
 void PCHWriter::WritePreprocessor(const Preprocessor &PP) {
   // Enter the preprocessor block.
-  Stream.EnterSubblock(pch::PREPROCESSOR_BLOCK_ID, 3);
+  Stream.EnterSubblock(pch::PREPROCESSOR_BLOCK_ID, 2);
   
   // If the PCH file contains __DATE__ or __TIME__ emit a warning about this.
   // FIXME: use diagnostics subsystem for localization etc.
@@ -1732,13 +1732,13 @@
                       pch::IdentID ID) {
     unsigned KeyLen = strlen(II->getName()) + 1;
     clang::io::Emit16(Out, KeyLen);
-    unsigned DataLen = 4 + 4 + 2; // 4 bytes for token ID, builtin, flags
-                                  // 4 bytes for the persistent ID
-                                  // 2 bytes for the length of the decl chain
+    unsigned DataLen = 4 + 4; // 4 bytes for token ID, builtin, flags
+                              // 4 bytes for the persistent ID
     for (IdentifierResolver::iterator D = IdentifierResolver::begin(II),
                                    DEnd = IdentifierResolver::end();
          D != DEnd; ++D)
       DataLen += sizeof(pch::DeclID);
+    clang::io::Emit16(Out, DataLen);
     return std::make_pair(KeyLen, DataLen);
   }
   
@@ -1762,15 +1762,18 @@
     clang::io::Emit32(Out, Bits);
     clang::io::Emit32(Out, ID);
 
-    llvm::SmallVector<pch::DeclID, 8> Decls;
-    for (IdentifierResolver::iterator D = IdentifierResolver::begin(II),
-                                   DEnd = IdentifierResolver::end();
+    // Emit the declaration IDs in reverse order, because the
+    // IdentifierResolver provides the declarations as they would be
+    // visible (e.g., the function "stat" would come before the struct
+    // "stat"), but IdentifierResolver::AddDeclToIdentifierChain()
+    // adds declarations to the end of the list (so we need to see the
+    // struct "status" before the function "status").
+    llvm::SmallVector<Decl *, 16> Decls(IdentifierResolver::begin(II), 
+                                        IdentifierResolver::end());
+    for (llvm::SmallVector<Decl *, 16>::reverse_iterator D = Decls.rbegin(),
+                                                      DEnd = Decls.rend();
          D != DEnd; ++D)
-      Decls.push_back(Writer.getDeclID(*D));
-
-    clang::io::Emit16(Out, Decls.size());
-    for (unsigned I = 0; I < Decls.size(); ++I)
-      clang::io::Emit32(Out, Decls[I]);
+      clang::io::Emit32(Out, Writer.getDeclID(*D));
   }
 };
 } // end anonymous namespace
@@ -1799,21 +1802,24 @@
 
     // Create the on-disk hash table in a buffer.
     llvm::SmallVector<char, 4096> IdentifierTable; 
+    uint32_t BucketOffset;
     {
       PCHIdentifierTableTrait Trait(*this);
       llvm::raw_svector_ostream Out(IdentifierTable);
-      Generator.Emit(Out, Trait);
+      BucketOffset = Generator.Emit(Out, Trait);
     }
 
     // Create a blob abbreviation
     BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
     Abbrev->Add(BitCodeAbbrevOp(pch::IDENTIFIER_TABLE));
+    Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
     Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
     unsigned IDTableAbbrev = Stream.EmitAbbrev(Abbrev);
 
     // Write the identifier table
     RecordData Record;
     Record.push_back(pch::IDENTIFIER_TABLE);
+    Record.push_back(BucketOffset);
     Stream.EmitRecordWithBlob(IDTableAbbrev, Record, 
                               &IdentifierTable.front(), 
                               IdentifierTable.size());
commit	c713da9e66dd3c47ee7b0011172e03b59ff02420	[log] [tgz]
author	Douglas Gregor <doug.gregor@gmail.com>	Tue Apr 21 22:25:48 2009 +0000
committer	Douglas Gregor <doug.gregor@gmail.com>	Tue Apr 21 22:25:48 2009 +0000
tree	bd72a7861c9d2ba6b5f1b37cd41aa12910c20a8b
parent	4d107bccf19ce4a94f3efba4c853f4f85e48c9a9 [diff] [blame]