Implement caching of stat() calls for precompiled headers, which is
essentially the same thing we do with pretokenized headers. stat()
caching improves performance of the Cocoa-prefixed "Hello, World" by
45%.



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@70223 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Frontend/PCHReader.cpp b/lib/Frontend/PCHReader.cpp
index f26cd84..f87252a 100644
--- a/lib/Frontend/PCHReader.cpp
+++ b/lib/Frontend/PCHReader.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include <algorithm>
 #include <cstdio>
+#include <sys/stat.h>
 using namespace clang;
 
 //===----------------------------------------------------------------------===//
@@ -43,7 +44,8 @@
     IdentifierOffsets(0),
     MethodPoolLookupTable(0), MethodPoolLookupTableData(0),
     TotalSelectorsInMethodPool(0), SelectorOffsets(0),
-    TotalNumSelectors(0), NumSLocEntriesRead(0), NumStatementsRead(0), 
+    TotalNumSelectors(0), NumStatHits(0), NumStatMisses(0), 
+    NumSLocEntriesRead(0), NumStatementsRead(0), 
     NumMacrosRead(0), NumMethodPoolSelectorsRead(0), NumMethodPoolMisses(0),
     NumLexicalDeclContextsRead(0), NumVisibleDeclContextsRead(0) { }
 
@@ -377,6 +379,10 @@
   return true;
 }
 
+//===----------------------------------------------------------------------===//
+// Source Manager Deserialization
+//===----------------------------------------------------------------------===//
+
 /// \brief Read the line table in the source manager block.
 /// \returns true if ther was an error.
 static bool ParseLineTable(SourceManager &SourceMgr, 
@@ -420,6 +426,115 @@
   return false;
 }
 
+namespace {
+
+class VISIBILITY_HIDDEN PCHStatData {
+public:
+  const bool hasStat;
+  const ino_t ino;
+  const dev_t dev;
+  const mode_t mode;
+  const time_t mtime;
+  const off_t size;
+  
+  PCHStatData(ino_t i, dev_t d, mode_t mo, time_t m, off_t s)
+  : hasStat(true), ino(i), dev(d), mode(mo), mtime(m), size(s) {}  
+  
+  PCHStatData()
+    : hasStat(false), ino(0), dev(0), mode(0), mtime(0), size(0) {}
+};
+
+class VISIBILITY_HIDDEN PCHStatLookupTrait {
+ public:
+  typedef const char *external_key_type;
+  typedef const char *internal_key_type;
+
+  typedef PCHStatData data_type;
+
+  static unsigned ComputeHash(const char *path) {
+    return BernsteinHash(path);
+  }
+
+  static internal_key_type GetInternalKey(const char *path) { return path; }
+
+  static bool EqualKey(internal_key_type a, internal_key_type b) {
+    return strcmp(a, b) == 0;
+  }
+
+  static std::pair<unsigned, unsigned>
+  ReadKeyDataLength(const unsigned char*& d) {
+    unsigned KeyLen = (unsigned) clang::io::ReadUnalignedLE16(d);
+    unsigned DataLen = (unsigned) *d++;
+    return std::make_pair(KeyLen + 1, DataLen);
+  }
+
+  static internal_key_type ReadKey(const unsigned char *d, unsigned) {
+    return (const char *)d;
+  }
+
+  static data_type ReadData(const internal_key_type, const unsigned char *d,
+                            unsigned /*DataLen*/) {
+    using namespace clang::io;
+
+    if (*d++ == 1)
+      return data_type();
+
+    ino_t ino = (ino_t) ReadUnalignedLE32(d);
+    dev_t dev = (dev_t) ReadUnalignedLE32(d);
+    mode_t mode = (mode_t) ReadUnalignedLE16(d);
+    time_t mtime = (time_t) ReadUnalignedLE64(d);    
+    off_t size = (off_t) ReadUnalignedLE64(d);
+    return data_type(ino, dev, mode, mtime, size);
+  }
+};
+
+/// \brief stat() cache for precompiled headers.
+///
+/// This cache is very similar to the stat cache used by pretokenized
+/// headers.
+class VISIBILITY_HIDDEN PCHStatCache : public StatSysCallCache {
+  typedef OnDiskChainedHashTable<PCHStatLookupTrait> CacheTy;
+  CacheTy *Cache;
+
+  unsigned &NumStatHits, &NumStatMisses;
+public:  
+  PCHStatCache(const unsigned char *Buckets,
+               const unsigned char *Base,
+               unsigned &NumStatHits,
+               unsigned &NumStatMisses) 
+    : Cache(0), NumStatHits(NumStatHits), NumStatMisses(NumStatMisses) {
+    Cache = CacheTy::Create(Buckets, Base);
+  }
+
+  ~PCHStatCache() { delete Cache; }
+  
+  int stat(const char *path, struct stat *buf) {
+    // Do the lookup for the file's data in the PCH file.
+    CacheTy::iterator I = Cache->find(path);
+
+    // If we don't get a hit in the PCH file just forward to 'stat'.
+    if (I == Cache->end()) {
+      ++NumStatMisses;
+      return ::stat(path, buf);
+    }
+    
+    ++NumStatHits;
+    PCHStatData Data = *I;
+    
+    if (!Data.hasStat)
+      return 1;
+
+    buf->st_ino = Data.ino;
+    buf->st_dev = Data.dev;
+    buf->st_mtime = Data.mtime;
+    buf->st_mode = Data.mode;
+    buf->st_size = Data.size;
+    return 0;
+  }
+};
+} // end anonymous namespace
+
+
 /// \brief Read the source manager block
 PCHReader::PCHReadResult PCHReader::ReadSourceManagerBlock() {
   using namespace SrcMgr;
@@ -916,6 +1031,13 @@
           return Result;
       }
       break;
+
+    case pch::STAT_CACHE:
+      PP.getFileManager().setStatCache(
+                  new PCHStatCache((const unsigned char *)BlobStart + Record[0],
+                                   (const unsigned char *)BlobStart,
+                                   NumStatHits, NumStatMisses));
+      break;
     }
   }
   Error("Premature end of bitstream");
@@ -1505,6 +1627,8 @@
                                           SelectorsLoaded.end(),
                                           Selector());
 
+  std::fprintf(stderr, "  %u stat cache hits\n", NumStatHits);
+  std::fprintf(stderr, "  %u stat cache misses\n", NumStatMisses);
   if (TotalNumSLocEntries)
     std::fprintf(stderr, "  %u/%u source location entries read (%f%%)\n",
                  NumSLocEntriesRead, TotalNumSLocEntries,
diff --git a/lib/Frontend/PCHWriter.cpp b/lib/Frontend/PCHWriter.cpp
index 391a1f9..f0dd43a 100644
--- a/lib/Frontend/PCHWriter.cpp
+++ b/lib/Frontend/PCHWriter.cpp
@@ -49,7 +49,7 @@
     pch::TypeCode Code;
 
     PCHTypeWriter(PCHWriter &Writer, PCHWriter::RecordData &Record) 
-      : Writer(Writer), Record(Record) { }
+      : Writer(Writer), Record(Record), Code(pch::TYPE_EXT_QUAL) { }
 
     void VisitArrayType(const ArrayType *T);
     void VisitFunctionType(const FunctionType *T);
@@ -354,6 +354,7 @@
   RECORD(PP_COUNTER_VALUE);
   RECORD(SOURCE_LOCATION_OFFSETS);
   RECORD(SOURCE_LOCATION_PRELOADS);
+  RECORD(STAT_CACHE);
 
   // SourceManager Block.
   BLOCK(SOURCE_MANAGER_BLOCK);
@@ -514,6 +515,101 @@
 }
 
 //===----------------------------------------------------------------------===//
+// stat cache Serialization
+//===----------------------------------------------------------------------===//
+
+namespace {
+// Trait used for the on-disk hash table of stat cache results.
+class VISIBILITY_HIDDEN PCHStatCacheTrait {
+public:
+  typedef const char * key_type;
+  typedef key_type key_type_ref;
+  
+  typedef std::pair<int, struct stat> data_type;
+  typedef const data_type& data_type_ref;
+
+  static unsigned ComputeHash(const char *path) {
+    return BernsteinHash(path);
+  }
+  
+  std::pair<unsigned,unsigned> 
+    EmitKeyDataLength(llvm::raw_ostream& Out, const char *path,
+                      data_type_ref Data) {
+    unsigned StrLen = strlen(path);
+    clang::io::Emit16(Out, StrLen);
+    unsigned DataLen = 1; // result value
+    if (Data.first == 0)
+      DataLen += 4 + 4 + 2 + 8 + 8;
+    clang::io::Emit8(Out, DataLen);
+    return std::make_pair(StrLen + 1, DataLen);
+  }
+  
+  void EmitKey(llvm::raw_ostream& Out, const char *path, unsigned KeyLen) {
+    Out.write(path, KeyLen);
+  }
+  
+  void EmitData(llvm::raw_ostream& Out, key_type_ref,
+                data_type_ref Data, unsigned DataLen) {
+    using namespace clang::io;
+    uint64_t Start = Out.tell(); (void)Start;
+    
+    // Result of stat()
+    Emit8(Out, Data.first? 1 : 0);
+    
+    if (Data.first == 0) {
+      Emit32(Out, (uint32_t) Data.second.st_ino);
+      Emit32(Out, (uint32_t) Data.second.st_dev);
+      Emit16(Out, (uint16_t) Data.second.st_mode);
+      Emit64(Out, (uint64_t) Data.second.st_mtime);
+      Emit64(Out, (uint64_t) Data.second.st_size);
+    }
+
+    assert(Out.tell() - Start == DataLen && "Wrong data length");
+  }
+};
+} // end anonymous namespace
+
+/// \brief Write the stat() system call cache to the PCH file.
+void PCHWriter::WriteStatCache(MemorizeStatCalls &StatCalls) {
+  // Build the on-disk hash table containing information about every
+  // stat() call.
+  OnDiskChainedHashTableGenerator<PCHStatCacheTrait> Generator;
+  unsigned NumStatEntries = 0;
+  for (MemorizeStatCalls::iterator Stat = StatCalls.begin(), 
+                                StatEnd = StatCalls.end();
+       Stat != StatEnd; ++Stat, ++NumStatEntries)
+    Generator.insert(Stat->first(), Stat->second);
+  
+  // Create the on-disk hash table in a buffer.
+  llvm::SmallVector<char, 4096> StatCacheData; 
+  uint32_t BucketOffset;
+  {
+    llvm::raw_svector_ostream Out(StatCacheData);
+    // Make sure that no bucket is at offset 0
+    clang::io::Emit32(Out, 0);
+    BucketOffset = Generator.Emit(Out);
+  }
+
+  // Create a blob abbreviation
+  using namespace llvm;
+  BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+  Abbrev->Add(BitCodeAbbrevOp(pch::STAT_CACHE));
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
+  unsigned StatCacheAbbrev = Stream.EmitAbbrev(Abbrev);
+
+  // Write the stat cache
+  RecordData Record;
+  Record.push_back(pch::STAT_CACHE);
+  Record.push_back(BucketOffset);
+  Record.push_back(NumStatEntries);
+  Stream.EmitRecordWithBlob(StatCacheAbbrev, Record, 
+                            &StatCacheData.front(), 
+                            StatCacheData.size());
+}
+
+//===----------------------------------------------------------------------===//
 // Source Manager Serialization
 //===----------------------------------------------------------------------===//
 
@@ -747,6 +843,10 @@
   Stream.EmitRecord(pch::SOURCE_LOCATION_PRELOADS, PreloadSLocs);
 }
 
+//===----------------------------------------------------------------------===//
+// Preprocessor Serialization
+//===----------------------------------------------------------------------===//
+
 /// \brief Writes the block containing the serialized form of the
 /// preprocessor.
 ///
@@ -830,6 +930,9 @@
   Stream.ExitBlock();
 }
 
+//===----------------------------------------------------------------------===//
+// Type Serialization
+//===----------------------------------------------------------------------===//
 
 /// \brief Write the representation of a type to the PCH stream.
 void PCHWriter::WriteType(const Type *T) {
@@ -891,6 +994,10 @@
   Stream.ExitBlock();
 }
 
+//===----------------------------------------------------------------------===//
+// Declaration Serialization
+//===----------------------------------------------------------------------===//
+
 /// \brief Write the block containing all of the declaration IDs
 /// lexically declared within the given DeclContext.
 ///
@@ -961,6 +1068,10 @@
   return Offset;
 }
 
+//===----------------------------------------------------------------------===//
+// Global Method Pool and Selector Serialization
+//===----------------------------------------------------------------------===//
+
 namespace {
 // Trait used for the on-disk hash table used in the method pool.
 class VISIBILITY_HIDDEN PCHMethodPoolTrait {
@@ -1162,6 +1273,10 @@
   }
 }
 
+//===----------------------------------------------------------------------===//
+// Identifier Table Serialization
+//===----------------------------------------------------------------------===//
+
 namespace {
 class VISIBILITY_HIDDEN PCHIdentifierTableTrait {
   PCHWriter &Writer;
@@ -1339,6 +1454,10 @@
                             IdentifierOffsets.size() * sizeof(uint32_t));
 }
 
+//===----------------------------------------------------------------------===//
+// General Serialization Routines
+//===----------------------------------------------------------------------===//
+
 /// \brief Write a record containing the given attributes.
 void PCHWriter::WriteAttributeRecord(const Attr *Attr) {
   RecordData Record;
@@ -1487,7 +1606,7 @@
     NumStatements(0), NumMacros(0), NumLexicalDeclContexts(0),
     NumVisibleDeclContexts(0) { }
 
-void PCHWriter::WritePCH(Sema &SemaRef) {
+void PCHWriter::WritePCH(Sema &SemaRef, MemorizeStatCalls *StatCalls) {
   using namespace llvm;
 
   ASTContext &Context = SemaRef.Context;
@@ -1540,6 +1659,8 @@
   Stream.EnterSubblock(pch::PCH_BLOCK_ID, 4);
   WriteTargetTriple(Context.Target);
   WriteLanguageOptions(Context.getLangOptions());
+  if (StatCalls)
+    WriteStatCache(*StatCalls);
   WriteSourceManagerBlock(Context.getSourceManager(), PP);
   WritePreprocessor(PP);