Implement caching of stat() calls for precompiled headers, which is
essentially the same thing we do with pretokenized headers. stat()
caching improves performance of the Cocoa-prefixed "Hello, World" by
45%.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@70223 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Frontend/PCHReader.cpp b/lib/Frontend/PCHReader.cpp
index f26cd84..f87252a 100644
--- a/lib/Frontend/PCHReader.cpp
+++ b/lib/Frontend/PCHReader.cpp
@@ -31,6 +31,7 @@
#include "llvm/Support/MemoryBuffer.h"
#include <algorithm>
#include <cstdio>
+#include <sys/stat.h>
using namespace clang;
//===----------------------------------------------------------------------===//
@@ -43,7 +44,8 @@
IdentifierOffsets(0),
MethodPoolLookupTable(0), MethodPoolLookupTableData(0),
TotalSelectorsInMethodPool(0), SelectorOffsets(0),
- TotalNumSelectors(0), NumSLocEntriesRead(0), NumStatementsRead(0),
+ TotalNumSelectors(0), NumStatHits(0), NumStatMisses(0),
+ NumSLocEntriesRead(0), NumStatementsRead(0),
NumMacrosRead(0), NumMethodPoolSelectorsRead(0), NumMethodPoolMisses(0),
NumLexicalDeclContextsRead(0), NumVisibleDeclContextsRead(0) { }
@@ -377,6 +379,10 @@
return true;
}
+//===----------------------------------------------------------------------===//
+// Source Manager Deserialization
+//===----------------------------------------------------------------------===//
+
/// \brief Read the line table in the source manager block.
/// \returns true if ther was an error.
static bool ParseLineTable(SourceManager &SourceMgr,
@@ -420,6 +426,115 @@
return false;
}
+namespace {
+
+class VISIBILITY_HIDDEN PCHStatData {
+public:
+ const bool hasStat;
+ const ino_t ino;
+ const dev_t dev;
+ const mode_t mode;
+ const time_t mtime;
+ const off_t size;
+
+ PCHStatData(ino_t i, dev_t d, mode_t mo, time_t m, off_t s)
+ : hasStat(true), ino(i), dev(d), mode(mo), mtime(m), size(s) {}
+
+ PCHStatData()
+ : hasStat(false), ino(0), dev(0), mode(0), mtime(0), size(0) {}
+};
+
+class VISIBILITY_HIDDEN PCHStatLookupTrait {
+ public:
+ typedef const char *external_key_type;
+ typedef const char *internal_key_type;
+
+ typedef PCHStatData data_type;
+
+ static unsigned ComputeHash(const char *path) {
+ return BernsteinHash(path);
+ }
+
+ static internal_key_type GetInternalKey(const char *path) { return path; }
+
+ static bool EqualKey(internal_key_type a, internal_key_type b) {
+ return strcmp(a, b) == 0;
+ }
+
+ static std::pair<unsigned, unsigned>
+ ReadKeyDataLength(const unsigned char*& d) {
+ unsigned KeyLen = (unsigned) clang::io::ReadUnalignedLE16(d);
+ unsigned DataLen = (unsigned) *d++;
+ return std::make_pair(KeyLen + 1, DataLen);
+ }
+
+ static internal_key_type ReadKey(const unsigned char *d, unsigned) {
+ return (const char *)d;
+ }
+
+ static data_type ReadData(const internal_key_type, const unsigned char *d,
+ unsigned /*DataLen*/) {
+ using namespace clang::io;
+
+ if (*d++ == 1)
+ return data_type();
+
+ ino_t ino = (ino_t) ReadUnalignedLE32(d);
+ dev_t dev = (dev_t) ReadUnalignedLE32(d);
+ mode_t mode = (mode_t) ReadUnalignedLE16(d);
+ time_t mtime = (time_t) ReadUnalignedLE64(d);
+ off_t size = (off_t) ReadUnalignedLE64(d);
+ return data_type(ino, dev, mode, mtime, size);
+ }
+};
+
+/// \brief stat() cache for precompiled headers.
+///
+/// This cache is very similar to the stat cache used by pretokenized
+/// headers.
+class VISIBILITY_HIDDEN PCHStatCache : public StatSysCallCache {
+ typedef OnDiskChainedHashTable<PCHStatLookupTrait> CacheTy;
+ CacheTy *Cache;
+
+ unsigned &NumStatHits, &NumStatMisses;
+public:
+ PCHStatCache(const unsigned char *Buckets,
+ const unsigned char *Base,
+ unsigned &NumStatHits,
+ unsigned &NumStatMisses)
+ : Cache(0), NumStatHits(NumStatHits), NumStatMisses(NumStatMisses) {
+ Cache = CacheTy::Create(Buckets, Base);
+ }
+
+ ~PCHStatCache() { delete Cache; }
+
+ int stat(const char *path, struct stat *buf) {
+ // Do the lookup for the file's data in the PCH file.
+ CacheTy::iterator I = Cache->find(path);
+
+ // If we don't get a hit in the PCH file just forward to 'stat'.
+ if (I == Cache->end()) {
+ ++NumStatMisses;
+ return ::stat(path, buf);
+ }
+
+ ++NumStatHits;
+ PCHStatData Data = *I;
+
+ if (!Data.hasStat)
+ return 1;
+
+ buf->st_ino = Data.ino;
+ buf->st_dev = Data.dev;
+ buf->st_mtime = Data.mtime;
+ buf->st_mode = Data.mode;
+ buf->st_size = Data.size;
+ return 0;
+ }
+};
+} // end anonymous namespace
+
+
/// \brief Read the source manager block
PCHReader::PCHReadResult PCHReader::ReadSourceManagerBlock() {
using namespace SrcMgr;
@@ -916,6 +1031,13 @@
return Result;
}
break;
+
+ case pch::STAT_CACHE:
+ PP.getFileManager().setStatCache(
+ new PCHStatCache((const unsigned char *)BlobStart + Record[0],
+ (const unsigned char *)BlobStart,
+ NumStatHits, NumStatMisses));
+ break;
}
}
Error("Premature end of bitstream");
@@ -1505,6 +1627,8 @@
SelectorsLoaded.end(),
Selector());
+ std::fprintf(stderr, " %u stat cache hits\n", NumStatHits);
+ std::fprintf(stderr, " %u stat cache misses\n", NumStatMisses);
if (TotalNumSLocEntries)
std::fprintf(stderr, " %u/%u source location entries read (%f%%)\n",
NumSLocEntriesRead, TotalNumSLocEntries,
diff --git a/lib/Frontend/PCHWriter.cpp b/lib/Frontend/PCHWriter.cpp
index 391a1f9..f0dd43a 100644
--- a/lib/Frontend/PCHWriter.cpp
+++ b/lib/Frontend/PCHWriter.cpp
@@ -49,7 +49,7 @@
pch::TypeCode Code;
PCHTypeWriter(PCHWriter &Writer, PCHWriter::RecordData &Record)
- : Writer(Writer), Record(Record) { }
+ : Writer(Writer), Record(Record), Code(pch::TYPE_EXT_QUAL) { }
void VisitArrayType(const ArrayType *T);
void VisitFunctionType(const FunctionType *T);
@@ -354,6 +354,7 @@
RECORD(PP_COUNTER_VALUE);
RECORD(SOURCE_LOCATION_OFFSETS);
RECORD(SOURCE_LOCATION_PRELOADS);
+ RECORD(STAT_CACHE);
// SourceManager Block.
BLOCK(SOURCE_MANAGER_BLOCK);
@@ -514,6 +515,101 @@
}
//===----------------------------------------------------------------------===//
+// stat cache Serialization
+//===----------------------------------------------------------------------===//
+
+namespace {
+// Trait used for the on-disk hash table of stat cache results.
+class VISIBILITY_HIDDEN PCHStatCacheTrait {
+public:
+ typedef const char * key_type;
+ typedef key_type key_type_ref;
+
+ typedef std::pair<int, struct stat> data_type;
+ typedef const data_type& data_type_ref;
+
+ static unsigned ComputeHash(const char *path) {
+ return BernsteinHash(path);
+ }
+
+ std::pair<unsigned,unsigned>
+ EmitKeyDataLength(llvm::raw_ostream& Out, const char *path,
+ data_type_ref Data) {
+ unsigned StrLen = strlen(path);
+ clang::io::Emit16(Out, StrLen);
+ unsigned DataLen = 1; // result value
+ if (Data.first == 0)
+ DataLen += 4 + 4 + 2 + 8 + 8;
+ clang::io::Emit8(Out, DataLen);
+ return std::make_pair(StrLen + 1, DataLen);
+ }
+
+ void EmitKey(llvm::raw_ostream& Out, const char *path, unsigned KeyLen) {
+ Out.write(path, KeyLen);
+ }
+
+ void EmitData(llvm::raw_ostream& Out, key_type_ref,
+ data_type_ref Data, unsigned DataLen) {
+ using namespace clang::io;
+ uint64_t Start = Out.tell(); (void)Start;
+
+ // Result of stat()
+ Emit8(Out, Data.first? 1 : 0);
+
+ if (Data.first == 0) {
+ Emit32(Out, (uint32_t) Data.second.st_ino);
+ Emit32(Out, (uint32_t) Data.second.st_dev);
+ Emit16(Out, (uint16_t) Data.second.st_mode);
+ Emit64(Out, (uint64_t) Data.second.st_mtime);
+ Emit64(Out, (uint64_t) Data.second.st_size);
+ }
+
+ assert(Out.tell() - Start == DataLen && "Wrong data length");
+ }
+};
+} // end anonymous namespace
+
+/// \brief Write the stat() system call cache to the PCH file.
+void PCHWriter::WriteStatCache(MemorizeStatCalls &StatCalls) {
+ // Build the on-disk hash table containing information about every
+ // stat() call.
+ OnDiskChainedHashTableGenerator<PCHStatCacheTrait> Generator;
+ unsigned NumStatEntries = 0;
+ for (MemorizeStatCalls::iterator Stat = StatCalls.begin(),
+ StatEnd = StatCalls.end();
+ Stat != StatEnd; ++Stat, ++NumStatEntries)
+ Generator.insert(Stat->first(), Stat->second);
+
+ // Create the on-disk hash table in a buffer.
+ llvm::SmallVector<char, 4096> StatCacheData;
+ uint32_t BucketOffset;
+ {
+ llvm::raw_svector_ostream Out(StatCacheData);
+ // Make sure that no bucket is at offset 0
+ clang::io::Emit32(Out, 0);
+ BucketOffset = Generator.Emit(Out);
+ }
+
+ // Create a blob abbreviation
+ using namespace llvm;
+ BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
+ Abbrev->Add(BitCodeAbbrevOp(pch::STAT_CACHE));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+ Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
+ unsigned StatCacheAbbrev = Stream.EmitAbbrev(Abbrev);
+
+ // Write the stat cache
+ RecordData Record;
+ Record.push_back(pch::STAT_CACHE);
+ Record.push_back(BucketOffset);
+ Record.push_back(NumStatEntries);
+ Stream.EmitRecordWithBlob(StatCacheAbbrev, Record,
+ &StatCacheData.front(),
+ StatCacheData.size());
+}
+
+//===----------------------------------------------------------------------===//
// Source Manager Serialization
//===----------------------------------------------------------------------===//
@@ -747,6 +843,10 @@
Stream.EmitRecord(pch::SOURCE_LOCATION_PRELOADS, PreloadSLocs);
}
+//===----------------------------------------------------------------------===//
+// Preprocessor Serialization
+//===----------------------------------------------------------------------===//
+
/// \brief Writes the block containing the serialized form of the
/// preprocessor.
///
@@ -830,6 +930,9 @@
Stream.ExitBlock();
}
+//===----------------------------------------------------------------------===//
+// Type Serialization
+//===----------------------------------------------------------------------===//
/// \brief Write the representation of a type to the PCH stream.
void PCHWriter::WriteType(const Type *T) {
@@ -891,6 +994,10 @@
Stream.ExitBlock();
}
+//===----------------------------------------------------------------------===//
+// Declaration Serialization
+//===----------------------------------------------------------------------===//
+
/// \brief Write the block containing all of the declaration IDs
/// lexically declared within the given DeclContext.
///
@@ -961,6 +1068,10 @@
return Offset;
}
+//===----------------------------------------------------------------------===//
+// Global Method Pool and Selector Serialization
+//===----------------------------------------------------------------------===//
+
namespace {
// Trait used for the on-disk hash table used in the method pool.
class VISIBILITY_HIDDEN PCHMethodPoolTrait {
@@ -1162,6 +1273,10 @@
}
}
+//===----------------------------------------------------------------------===//
+// Identifier Table Serialization
+//===----------------------------------------------------------------------===//
+
namespace {
class VISIBILITY_HIDDEN PCHIdentifierTableTrait {
PCHWriter &Writer;
@@ -1339,6 +1454,10 @@
IdentifierOffsets.size() * sizeof(uint32_t));
}
+//===----------------------------------------------------------------------===//
+// General Serialization Routines
+//===----------------------------------------------------------------------===//
+
/// \brief Write a record containing the given attributes.
void PCHWriter::WriteAttributeRecord(const Attr *Attr) {
RecordData Record;
@@ -1487,7 +1606,7 @@
NumStatements(0), NumMacros(0), NumLexicalDeclContexts(0),
NumVisibleDeclContexts(0) { }
-void PCHWriter::WritePCH(Sema &SemaRef) {
+void PCHWriter::WritePCH(Sema &SemaRef, MemorizeStatCalls *StatCalls) {
using namespace llvm;
ASTContext &Context = SemaRef.Context;
@@ -1540,6 +1659,8 @@
Stream.EnterSubblock(pch::PCH_BLOCK_ID, 4);
WriteTargetTriple(Context.Target);
WriteLanguageOptions(Context.getLangOptions());
+ if (StatCalls)
+ WriteStatCache(*StatCalls);
WriteSourceManagerBlock(Context.getSourceManager(), PP);
WritePreprocessor(PP);