[ThinLTO] Implement index-based WPD
This patch adds support to the WholeProgramDevirt pass to perform
index-based WPD, which is invoked from ThinLTO during the thin link.
The ThinLTO backend (WPD import phase) behaves the same regardless of
whether the WPD decisions were made with the index-based or (the
existing) IR-based analysis.
Depends on D54815.
Reviewers: pcc
Subscribers: mehdi_amini, inglorion, eraman, steven_wu, dexonsmith, arphaman, dang, llvm-commits
Differential Revision: https://reviews.llvm.org/D55153
llvm-svn: 367679
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 6bbcba9..ea3a055 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -24,12 +24,14 @@
// returns 0, or a single vtable's function returns 1, replace each virtual
// call with a comparison of the vptr against that vtable's address.
//
-// This pass is intended to be used during the regular and thin LTO pipelines.
+// This pass is intended to be used during the regular and thin LTO pipelines:
+//
// During regular LTO, the pass determines the best optimization for each
// virtual call and applies the resolutions directly to virtual calls that are
// eligible for virtual call optimization (i.e. calls that use either of the
-// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics). During
-// ThinLTO, the pass operates in two phases:
+// llvm.assume(llvm.type.test) or llvm.type.checked.load intrinsics).
+//
+// During hybrid Regular/ThinLTO, the pass operates in two phases:
// - Export phase: this is run during the thin link over a single merged module
// that contains all vtables with !type metadata that participate in the link.
// The pass computes a resolution for each virtual call and stores it in the
@@ -38,6 +40,14 @@
// modules. The pass applies the resolutions previously computed during the
// import phase to each eligible virtual call.
//
+// During ThinLTO, the pass operates in two phases:
+// - Export phase: this is run during the thin link over the index which
+// contains a summary of all vtables with !type metadata that participate in
+// the link. It computes a resolution for each virtual call and stores it in
+// the type identifier summary. Only single implementation devirtualization
+// is supported.
+// - Import phase: (same as with hybrid case above).
+//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
@@ -117,6 +127,11 @@
cl::desc("Maximum number of call targets per "
"call site to enable branch funnels"));
+static cl::opt<bool>
+ PrintSummaryDevirt("wholeprogramdevirt-print-index-based", cl::Hidden,
+ cl::init(false), cl::ZeroOrMore,
+ cl::desc("Print index-based devirtualization messages"));
+
// Find the minimum offset that we may store a value of size Size bits at. If
// IsAfter is set, look for an offset before the object, otherwise look for an
// offset after the object.
@@ -265,6 +280,25 @@
}
};
+template <> struct DenseMapInfo<VTableSlotSummary> {
+ static VTableSlotSummary getEmptyKey() {
+ return {DenseMapInfo<StringRef>::getEmptyKey(),
+ DenseMapInfo<uint64_t>::getEmptyKey()};
+ }
+ static VTableSlotSummary getTombstoneKey() {
+ return {DenseMapInfo<StringRef>::getTombstoneKey(),
+ DenseMapInfo<uint64_t>::getTombstoneKey()};
+ }
+ static unsigned getHashValue(const VTableSlotSummary &I) {
+ return DenseMapInfo<StringRef>::getHashValue(I.TypeID) ^
+ DenseMapInfo<uint64_t>::getHashValue(I.ByteOffset);
+ }
+ static bool isEqual(const VTableSlotSummary &LHS,
+ const VTableSlotSummary &RHS) {
+ return LHS.TypeID == RHS.TypeID && LHS.ByteOffset == RHS.ByteOffset;
+ }
+};
+
} // end namespace llvm
namespace {
@@ -342,6 +376,7 @@
/// pass the vector is non-empty, we will need to add a use of llvm.type.test
/// to each of the function summaries in the vector.
std::vector<FunctionSummary *> SummaryTypeCheckedLoadUsers;
+ std::vector<FunctionSummary *> SummaryTypeTestAssumeUsers;
bool isExported() const {
return SummaryHasTypeTestAssumeUsers ||
@@ -358,6 +393,11 @@
AllCallSitesDevirted = false;
}
+ void addSummaryTypeTestAssumeUser(FunctionSummary *FS) {
+ SummaryTypeTestAssumeUsers.push_back(FS);
+ markSummaryHasTypeTestAssumeUsers();
+ }
+
void markDevirt() {
AllCallSitesDevirted = true;
@@ -542,6 +582,38 @@
function_ref<DominatorTree &(Function &)> LookupDomTree);
};
+struct DevirtIndex {
+ ModuleSummaryIndex &ExportSummary;
+ // The set in which to record GUIDs exported from their module by
+ // devirtualization, used by client to ensure they are not internalized.
+ std::set<GlobalValue::GUID> &ExportedGUIDs;
+ // A map in which to record the information necessary to locate the WPD
+ // resolution for local targets in case they are exported by cross module
+ // importing.
+ std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap;
+
+ MapVector<VTableSlotSummary, VTableSlotInfo> CallSlots;
+
+ DevirtIndex(
+ ModuleSummaryIndex &ExportSummary,
+ std::set<GlobalValue::GUID> &ExportedGUIDs,
+ std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap)
+ : ExportSummary(ExportSummary), ExportedGUIDs(ExportedGUIDs),
+ LocalWPDTargetsMap(LocalWPDTargetsMap) {}
+
+ bool tryFindVirtualCallTargets(std::vector<ValueInfo> &TargetsForSlot,
+ const TypeIdCompatibleVtableInfo TIdInfo,
+ uint64_t ByteOffset);
+
+ bool trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
+ VTableSlotSummary &SlotSummary,
+ VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res,
+ std::set<ValueInfo> &DevirtTargets);
+
+ void run();
+};
+
struct WholeProgramDevirt : public ModulePass {
static char ID;
@@ -632,6 +704,43 @@
return PreservedAnalyses::none();
}
+namespace llvm {
+void runWholeProgramDevirtOnIndex(
+ ModuleSummaryIndex &Summary, std::set<GlobalValue::GUID> &ExportedGUIDs,
+ std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
+ DevirtIndex(Summary, ExportedGUIDs, LocalWPDTargetsMap).run();
+}
+
+void updateIndexWPDForExports(
+ ModuleSummaryIndex &Summary,
+ StringMap<FunctionImporter::ExportSetTy> &ExportLists,
+ std::map<ValueInfo, std::vector<VTableSlotSummary>> &LocalWPDTargetsMap) {
+ for (auto &T : LocalWPDTargetsMap) {
+ auto &VI = T.first;
+ // This was enforced earlier during trySingleImplDevirt.
+ assert(VI.getSummaryList().size() == 1 &&
+ "Devirt of local target has more than one copy");
+ auto &S = VI.getSummaryList()[0];
+ const auto &ExportList = ExportLists.find(S->modulePath());
+ if (ExportList == ExportLists.end() ||
+ !ExportList->second.count(VI.getGUID()))
+ continue;
+
+ // It's been exported by a cross module import.
+ for (auto &SlotSummary : T.second) {
+ auto *TIdSum = Summary.getTypeIdSummary(SlotSummary.TypeID);
+ assert(TIdSum);
+ auto WPDRes = TIdSum->WPDRes.find(SlotSummary.ByteOffset);
+ assert(WPDRes != TIdSum->WPDRes.end());
+ WPDRes->second.SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
+ WPDRes->second.SingleImplName,
+ Summary.getModuleHash(S->modulePath()));
+ }
+ }
+}
+
+} // end namespace llvm
+
bool DevirtModule::runForTesting(
Module &M, function_ref<AAResults &(Function &)> AARGetter,
function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter,
@@ -766,6 +875,34 @@
return !TargetsForSlot.empty();
}
+bool DevirtIndex::tryFindVirtualCallTargets(
+ std::vector<ValueInfo> &TargetsForSlot, const TypeIdCompatibleVtableInfo TIdInfo,
+ uint64_t ByteOffset) {
+ for (const TypeIdOffsetVtableInfo P : TIdInfo) {
+ // VTable initializer should have only one summary, or all copies must be
+ // linkonce/weak ODR.
+ assert(P.VTableVI.getSummaryList().size() == 1 ||
+ llvm::all_of(
+ P.VTableVI.getSummaryList(),
+ [&](const std::unique_ptr<GlobalValueSummary> &Summary) {
+ return GlobalValue::isLinkOnceODRLinkage(Summary->linkage()) ||
+ GlobalValue::isWeakODRLinkage(Summary->linkage());
+ }));
+ const auto *VS = cast<GlobalVarSummary>(P.VTableVI.getSummaryList()[0].get());
+ if (!P.VTableVI.getSummaryList()[0]->isLive())
+ continue;
+ for (auto VTP : VS->vTableFuncs()) {
+ if (VTP.VTableOffset != P.AddressPointOffset + ByteOffset)
+ continue;
+
+ TargetsForSlot.push_back(VTP.FuncVI);
+ }
+ }
+
+ // Give up if we couldn't find any targets.
+ return !TargetsForSlot.empty();
+}
+
void DevirtModule::applySingleImplDevirt(VTableSlotInfo &SlotInfo,
Constant *TheFn, bool &IsExported) {
auto Apply = [&](CallSiteInfo &CSInfo) {
@@ -837,6 +974,83 @@
return true;
}
+bool DevirtIndex::trySingleImplDevirt(MutableArrayRef<ValueInfo> TargetsForSlot,
+ VTableSlotSummary &SlotSummary,
+ VTableSlotInfo &SlotInfo,
+ WholeProgramDevirtResolution *Res,
+ std::set<ValueInfo> &DevirtTargets) {
+ // See if the program contains a single implementation of this virtual
+ // function.
+ auto TheFn = TargetsForSlot[0];
+ for (auto &&Target : TargetsForSlot)
+ if (TheFn != Target)
+ return false;
+
+ // Don't devirtualize if we don't have target definition.
+ auto Size = TheFn.getSummaryList().size();
+ if (!Size)
+ return false;
+
+ // If the summary list contains multiple summaries where at least one is
+ // a local, give up, as we won't know which (possibly promoted) name to use.
+ for (auto &S : TheFn.getSummaryList())
+ if (GlobalValue::isLocalLinkage(S->linkage()) && Size > 1)
+ return false;
+
+ // Collect functions devirtualized at least for one call site for stats.
+ if (PrintSummaryDevirt)
+ DevirtTargets.insert(TheFn);
+
+ auto &S = TheFn.getSummaryList()[0];
+ bool IsExported = false;
+
+ // Insert calls into the summary index so that the devirtualized targets
+ // are eligible for import.
+ // FIXME: Annotate type tests with hotness. For now, mark these as hot
+ // to better ensure we have the opportunity to inline them.
+ CalleeInfo CI(CalleeInfo::HotnessType::Hot, /* RelBF = */ 0);
+ auto AddCalls = [&](CallSiteInfo &CSInfo) {
+ for (auto *FS : CSInfo.SummaryTypeCheckedLoadUsers) {
+ FS->addCall({TheFn, CI});
+ IsExported |= S->modulePath() != FS->modulePath();
+ }
+ for (auto *FS : CSInfo.SummaryTypeTestAssumeUsers) {
+ FS->addCall({TheFn, CI});
+ IsExported |= S->modulePath() != FS->modulePath();
+ }
+ };
+ AddCalls(SlotInfo.CSInfo);
+ for (auto &P : SlotInfo.ConstCSInfo)
+ AddCalls(P.second);
+
+ if (IsExported)
+ ExportedGUIDs.insert(TheFn.getGUID());
+
+ // Record in summary for use in devirtualization during the ThinLTO import
+ // step.
+ Res->TheKind = WholeProgramDevirtResolution::SingleImpl;
+ if (GlobalValue::isLocalLinkage(S->linkage())) {
+ if (IsExported)
+ // If target is a local function and we are exporting it by
+ // devirtualizing a call in another module, we need to record the
+ // promoted name.
+ Res->SingleImplName = ModuleSummaryIndex::getGlobalNameForLocal(
+ TheFn.name(), ExportSummary.getModuleHash(S->modulePath()));
+ else {
+ LocalWPDTargetsMap[TheFn].push_back(SlotSummary);
+ Res->SingleImplName = TheFn.name();
+ }
+ } else
+ Res->SingleImplName = TheFn.name();
+
+ // Name will be empty if this thin link driven off of serialized combined
+ // index (e.g. llvm-lto). However, WPD is not supported/invoked for the
+ // legacy LTO API anyway.
+ assert(!Res->SingleImplName.empty());
+
+ return true;
+}
+
void DevirtModule::tryICallBranchFunnel(
MutableArrayRef<VirtualCallTarget> TargetsForSlot, VTableSlotInfo &SlotInfo,
WholeProgramDevirtResolution *Res, VTableSlot Slot) {
@@ -1486,8 +1700,11 @@
}
void DevirtModule::importResolution(VTableSlot Slot, VTableSlotInfo &SlotInfo) {
+ auto *TypeId = dyn_cast<MDString>(Slot.TypeID);
+ if (!TypeId)
+ return;
const TypeIdSummary *TidSummary =
- ImportSummary->getTypeIdSummary(cast<MDString>(Slot.TypeID)->getString());
+ ImportSummary->getTypeIdSummary(TypeId->getString());
if (!TidSummary)
return;
auto ResI = TidSummary->WPDRes.find(Slot.ByteOffset);
@@ -1496,6 +1713,7 @@
const WholeProgramDevirtResolution &Res = ResI->second;
if (Res.TheKind == WholeProgramDevirtResolution::SingleImpl) {
+ assert(!Res.SingleImplName.empty());
// The type of the function in the declaration is irrelevant because every
// call site will cast it to the correct type.
Constant *SingleImpl =
@@ -1713,7 +1931,7 @@
using namespace ore;
OREGetter(F).emit(OptimizationRemark(DEBUG_TYPE, "Devirtualized", F)
<< "devirtualized "
- << NV("FunctionName", F->getName()));
+ << NV("FunctionName", DT.first));
}
}
@@ -1727,3 +1945,78 @@
return true;
}
+
+void DevirtIndex::run() {
+ if (ExportSummary.typeIdCompatibleVtableMap().empty())
+ return;
+
+ DenseMap<GlobalValue::GUID, std::vector<StringRef>> NameByGUID;
+ for (auto &P : ExportSummary.typeIdCompatibleVtableMap()) {
+ NameByGUID[GlobalValue::getGUID(P.first)].push_back(P.first);
+ }
+
+ // Collect information from summary about which calls to try to devirtualize.
+ for (auto &P : ExportSummary) {
+ for (auto &S : P.second.SummaryList) {
+ auto *FS = dyn_cast<FunctionSummary>(S.get());
+ if (!FS)
+ continue;
+ // FIXME: Only add live functions.
+ for (FunctionSummary::VFuncId VF : FS->type_test_assume_vcalls()) {
+ for (StringRef Name : NameByGUID[VF.GUID]) {
+ CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeTestAssumeUser(FS);
+ }
+ }
+ for (FunctionSummary::VFuncId VF : FS->type_checked_load_vcalls()) {
+ for (StringRef Name : NameByGUID[VF.GUID]) {
+ CallSlots[{Name, VF.Offset}].CSInfo.addSummaryTypeCheckedLoadUser(FS);
+ }
+ }
+ for (const FunctionSummary::ConstVCall &VC :
+ FS->type_test_assume_const_vcalls()) {
+ for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
+ CallSlots[{Name, VC.VFunc.Offset}]
+ .ConstCSInfo[VC.Args]
+ .addSummaryTypeTestAssumeUser(FS);
+ }
+ }
+ for (const FunctionSummary::ConstVCall &VC :
+ FS->type_checked_load_const_vcalls()) {
+ for (StringRef Name : NameByGUID[VC.VFunc.GUID]) {
+ CallSlots[{Name, VC.VFunc.Offset}]
+ .ConstCSInfo[VC.Args]
+ .addSummaryTypeCheckedLoadUser(FS);
+ }
+ }
+ }
+ }
+
+ std::set<ValueInfo> DevirtTargets;
+ // For each (type, offset) pair:
+ for (auto &S : CallSlots) {
+ // Search each of the members of the type identifier for the virtual
+ // function implementation at offset S.first.ByteOffset, and add to
+ // TargetsForSlot.
+ std::vector<ValueInfo> TargetsForSlot;
+ auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
+ assert(TidSummary);
+ if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
+ S.first.ByteOffset)) {
+ WholeProgramDevirtResolution *Res =
+ &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID)
+ .WPDRes[S.first.ByteOffset];
+
+ if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
+ DevirtTargets))
+ continue;
+ }
+ }
+
+ // Optionally have the thin link print message for each devirtualized
+ // function.
+ if (PrintSummaryDevirt)
+ for (const auto &DT : DevirtTargets)
+ errs() << "Devirtualized call to " << DT << "\n";
+
+ return;
+}