[PGO] Improve Indexed Profile Reader efficiency
With the support of value profiling added, the Indexed prof
reader gets less efficient. The prof reader initialization
used to be just reading the file header, but with VP support
added, initialization needs to walk through all profile keys
of ondisk hash table resulting in very poor locality and large
memory increase (keys are stored together with the profile data
in the mapped profile buffer). Even worse, when the reader is
used by the compiler (not llvm-profdata too), the penalty becomes
very high as compilation of each single module requires touching
profile data buffer for the whole program.
In this patch, the icall target values (MD5hash) are no longer eargerly
converted back to name strings when the data is read into memory. New
interface is added to to profile reader so that InstrProfSymtab can be
lazily created for Indexed profile reader on-demand. Creating of the
symtab is intended to be used by llvm-profdata tool for symbolic dumping
of VP data. It can be used with compiler (for legacy out of tree uses)
too but not recommended due to compile time and memory reasons
mentioned above.
Some other cleanups are also included: Function Addr to md5 map is now
consolated into InstrProfSymtab. InstrProfStringtab is no longer used and
eliminated.
llvm-svn: 256114
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 9255208..f5acd23 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -240,18 +240,19 @@
return Result;
}
+
// Map indirect call target name hash to name string.
uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind,
- ValueMapType *HashKeys) {
- if (!HashKeys)
+ ValueMapType *ValueMap) {
+ if (!ValueMap)
return Value;
switch (ValueKind) {
case IPVK_IndirectCallTarget: {
auto Result =
- std::lower_bound(HashKeys->begin(), HashKeys->end(), Value,
- [](const std::pair<uint64_t, const char *> &LHS,
+ std::lower_bound(ValueMap->begin(), ValueMap->end(), Value,
+ [](const std::pair<uint64_t, uint64_t> &LHS,
uint64_t RHS) { return LHS.first < RHS; });
- if (Result != HashKeys->end())
+ if (Result != ValueMap->end())
Value = (uint64_t)Result->second;
break;
}
@@ -259,21 +260,11 @@
return Value;
}
-void InstrProfRecord::updateStrings(InstrProfStringTable *StrTab) {
- if (!StrTab)
- return;
-
- Name = StrTab->insertString(Name);
- for (auto &VSite : IndirectCallSites)
- for (auto &VData : VSite.ValueData)
- VData.Value = (uint64_t)StrTab->insertString((const char *)VData.Value);
-}
-
void InstrProfRecord::addValueData(uint32_t ValueKind, uint32_t Site,
InstrProfValueData *VData, uint32_t N,
- ValueMapType *HashKeys) {
+ ValueMapType *ValueMap) {
for (uint32_t I = 0; I < N; I++) {
- VData[I].Value = remapValue(VData[I].Value, ValueKind, HashKeys);
+ VData[I].Value = remapValue(VData[I].Value, ValueKind, ValueMap);
}
std::vector<InstrProfValueSiteRecord> &ValueSites =
getValueSitesForKind(ValueKind);
@@ -314,19 +305,8 @@
void getValueForSiteInstrProf(const void *R, InstrProfValueData *Dst,
uint32_t K, uint32_t S,
uint64_t (*Mapper)(uint32_t, uint64_t)) {
- return reinterpret_cast<const InstrProfRecord *>(R)
- ->getValueForSite(Dst, K, S, Mapper);
-}
-
-uint64_t stringToHash(uint32_t ValueKind, uint64_t Value) {
- switch (ValueKind) {
- case IPVK_IndirectCallTarget:
- return IndexedInstrProf::ComputeHash((const char *)Value);
- break;
- default:
- llvm_unreachable("value kind not handled !");
- }
- return Value;
+ return reinterpret_cast<const InstrProfRecord *>(R)->getValueForSite(
+ Dst, K, S, Mapper);
}
ValueProfData *allocValueProfDataInstrProf(size_t TotalSizeInBytes) {
@@ -342,7 +322,7 @@
getNumValueSitesInstrProf,
getNumValueDataInstrProf,
getNumValueDataForSiteInstrProf,
- stringToHash,
+ 0,
getValueForSiteInstrProf,
allocValueProfDataInstrProf};
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index aca4354..5e83456 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -109,6 +109,11 @@
[](char c) { return ::isprint(c) || ::isspace(c); });
}
+std::error_code TextInstrProfReader::readHeader() {
+ Symtab.reset(new InstrProfSymtab());
+ return success();
+}
+
std::error_code
TextInstrProfReader::readValueProfileData(InstrProfRecord &Record) {
@@ -126,6 +131,7 @@
if (Line.is_at_end())
return success();
+
uint32_t NumValueKinds;
if (Line->getAsInteger(10, NumValueKinds)) {
// No value profile data
@@ -152,12 +158,13 @@
CHECK_LINE_END(Line);
std::pair<StringRef, StringRef> VD = Line->split(':');
uint64_t TakenCount, Value;
- READ_NUM(VD.second, TakenCount);
- if (VK == IPVK_IndirectCallTarget)
- Value = (uint64_t)StringTable.insertString(VD.first);
- else {
+ if (VK == IPVK_IndirectCallTarget) {
+ Symtab->addFuncName(VD.first);
+ Value = IndexedInstrProf::ComputeHash(VD.first);
+ } else {
READ_NUM(VD.first, Value);
}
+ READ_NUM(VD.second, TakenCount);
CurrentValues.push_back({Value, TakenCount});
Line++;
}
@@ -176,11 +183,14 @@
while (!Line.is_at_end() && (Line->empty() || Line->startswith("#")))
++Line;
// If we hit EOF while looking for a name, we're done.
- if (Line.is_at_end())
+ if (Line.is_at_end()) {
+ Symtab->finalizeSymtab();
return error(instrprof_error::eof);
+ }
// Read the function name.
Record.Name = *Line++;
+ Symtab->addFuncName(Record.Name);
// Read the function hash.
if (Line.is_at_end())
@@ -213,6 +223,9 @@
if (std::error_code EC = readValueProfileData(Record))
return EC;
+ // This is needed to avoid two pass parsing because llvm-profdata
+ // does dumping while reading.
+ Symtab->finalizeSymtab();
return success();
}
@@ -266,8 +279,21 @@
}
template <class IntPtrT>
-std::error_code RawInstrProfReader<IntPtrT>::readHeader(
- const RawInstrProf::Header &Header) {
+void RawInstrProfReader<IntPtrT>::createSymtab(InstrProfSymtab &Symtab) {
+ for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
+ StringRef FunctionName(getName(I->NamePtr), swap(I->NameSize));
+ Symtab.addFuncName(FunctionName);
+ const IntPtrT FPtr = swap(I->FunctionPointer);
+ if (!FPtr)
+ continue;
+ Symtab.mapAddress(FPtr, IndexedInstrProf::ComputeHash(FunctionName));
+ }
+ Symtab.finalizeSymtab();
+}
+
+template <class IntPtrT>
+std::error_code
+RawInstrProfReader<IntPtrT>::readHeader(const RawInstrProf::Header &Header) {
if (swap(Header.Version) != RawInstrProf::Version)
return error(instrprof_error::unsupported_version);
@@ -297,23 +323,12 @@
DataEnd = Data + DataSize;
CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset);
NamesStart = Start + NamesOffset;
- ValueDataStart = reinterpret_cast<const uint8_t*>(Start + ValueDataOffset);
+ ValueDataStart = reinterpret_cast<const uint8_t *>(Start + ValueDataOffset);
ProfileEnd = Start + ProfileSize;
- FunctionPtrToNameMap.clear();
- for (const RawInstrProf::ProfileData<IntPtrT> *I = Data; I != DataEnd; ++I) {
- const IntPtrT FPtr = swap(I->FunctionPointer);
- if (!FPtr)
- continue;
- StringRef FunctionName(getName(I->NamePtr), swap(I->NameSize));
- const char* NameEntryPtr = StringTable.insertString(FunctionName);
- FunctionPtrToNameMap.push_back(std::pair<const IntPtrT, const char*>
- (FPtr, NameEntryPtr));
- }
- std::sort(FunctionPtrToNameMap.begin(), FunctionPtrToNameMap.end(), less_first());
- FunctionPtrToNameMap.erase(std::unique(FunctionPtrToNameMap.begin(),
- FunctionPtrToNameMap.end()),
- FunctionPtrToNameMap.end());
+ std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>();
+ createSymtab(*NewSymtab.get());
+ Symtab = std::move(NewSymtab);
return success();
}
@@ -383,7 +398,7 @@
if (VDataPtrOrErr.getError())
return VDataPtrOrErr.getError();
- VDataPtrOrErr.get()->deserializeTo(Record, &FunctionPtrToNameMap);
+ VDataPtrOrErr.get()->deserializeTo(Record, &Symtab->getAddrHashMap());
CurValueDataSize = VDataPtrOrErr.get()->getSize();
return success();
}
@@ -437,7 +452,7 @@
if (VDataPtrOrErr.getError())
return false;
- VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), &HashKeys);
+ VDataPtrOrErr.get()->deserializeTo(DataBuffer.back(), nullptr);
D += VDataPtrOrErr.get()->TotalSize;
return true;
@@ -525,16 +540,6 @@
HashTable.reset(HashTableImpl::Create(
Buckets, Payload, Base,
typename HashTableImpl::InfoType(HashType, Version)));
- // Form the map of hash values to const char* keys in profiling data.
- std::vector<std::pair<uint64_t, const char *>> HashKeys;
- for (auto Key : HashTable->keys()) {
- const char *KeyTableRef = StringTable.insertString(Key);
- HashKeys.push_back(std::make_pair(ComputeHash(HashType, Key), KeyTableRef));
- }
- std::sort(HashKeys.begin(), HashKeys.end(), less_first());
- HashKeys.erase(std::unique(HashKeys.begin(), HashKeys.end()), HashKeys.end());
- // Set the hash key map for the InstrLookupTrait
- HashTable->getInfoObj().setHashKeys(std::move(HashKeys));
RecordIterator = HashTable->data_begin();
}
@@ -590,6 +595,17 @@
return success();
}
+InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
+ if (Symtab.get())
+ return *Symtab.get();
+
+ std::unique_ptr<InstrProfSymtab> NewSymtab = make_unique<InstrProfSymtab>();
+ Index->populateSymtab(*NewSymtab.get());
+
+ Symtab = std::move(NewSymtab);
+ return *Symtab.get();
+}
+
ErrorOr<InstrProfRecord>
IndexedInstrProfReader::getInstrProfRecord(StringRef FuncName,
uint64_t FuncHash) {
diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp
index 2f4ef08..1e18c26 100644
--- a/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -94,13 +94,8 @@
ValueProfDataEndianness = Endianness;
}
-void InstrProfWriter::updateStringTableReferences(InstrProfRecord &I) {
- I.updateStrings(&StringTable);
-}
-
std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I,
uint64_t Weight) {
- updateStringTableReferences(I);
auto &ProfileDataMap = FunctionData[I.Name];
bool NewFunc;
@@ -188,6 +183,7 @@
};
void InstrProfWriter::writeRecordInText(const InstrProfRecord &Func,
+ InstrProfSymtab &Symtab,
raw_fd_ostream &OS) {
OS << Func.Name << "\n";
OS << "# Func Hash:\n" << Func.Hash << "\n";
@@ -215,8 +211,7 @@
std::unique_ptr<InstrProfValueData[]> VD = Func.getValueForSite(VK, S);
for (uint32_t I = 0; I < ND; I++) {
if (VK == IPVK_IndirectCallTarget)
- OS << reinterpret_cast<const char *>(VD[I].Value) << ":"
- << VD[I].Count << "\n";
+ OS << Symtab.getFuncName(VD[I].Value) << ":" << VD[I].Count << "\n";
else
OS << VD[I].Value << ":" << VD[I].Count << "\n";
}
@@ -227,9 +222,14 @@
}
void InstrProfWriter::writeText(raw_fd_ostream &OS) {
+ InstrProfSymtab Symtab;
+ for (const auto &I : FunctionData)
+ Symtab.addFuncName(I.getKey());
+ Symtab.finalizeSymtab();
+
for (const auto &I : FunctionData)
for (const auto &Func : I.getValue())
- writeRecordInText(Func.second, OS);
+ writeRecordInText(Func.second, Symtab, OS);
}
std::unique_ptr<MemoryBuffer> InstrProfWriter::writeBuffer() {