Include GUIDs from the same module when computing GUIDs that needs to be imported.
Summary: In the compile phase of SamplePGO+ThinLTO, ICP is not invoked. Instead, indirect call targets will be included as function metadata for ThinIndex to buidl the call graph. This should not only include functions defined in other modules, but also functions defined in the same module, otherwise ThinIndex may find the callee dead and eliminate it, while ICP in backend will revive the symbol, which leads to undefined symbol.
Reviewers: tejohnson
Reviewed By: tejohnson
Subscribers: sanjoy, llvm-commits, mehdi_amini
Differential Revision: https://reviews.llvm.org/D39480
llvm-svn: 317118
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 3aba123..48cb501 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -352,17 +352,15 @@
return Result;
}
- /// Recursively traverses all children, if the corresponding function is
- /// not defined in module \p M, and its total sample is no less than
- /// \p Threshold, add its corresponding GUID to \p S. Also traverse the
- /// BodySamples to add hot CallTarget's GUID to \p S.
- void findImportedFunctions(DenseSet<GlobalValue::GUID> &S, const Module *M,
- uint64_t Threshold) const {
+ /// Recursively traverses all children, if the total sample count of the
+ /// corresponding function is no less than \p Threshold, add its corresponding
+ /// GUID to \p S. Also traverse the BodySamples to add hot CallTarget's GUID
+ /// to \p S.
+ void findInlinedFunctions(DenseSet<GlobalValue::GUID> &S, const Module *M,
+ uint64_t Threshold) const {
if (TotalSamples <= Threshold)
return;
- Function *F = M->getFunction(Name);
- if (!F || !F->getSubprogram())
- S.insert(Function::getGUID(Name));
+ S.insert(Function::getGUID(Name));
// Import hot CallTargets, which may not be available in IR because full
// profile annotation cannot be done until backend compilation in ThinLTO.
for (const auto &BS : BodySamples)
@@ -374,7 +372,7 @@
}
for (const auto &CS : CallsiteSamples)
for (const auto &NameFS : CS.second)
- NameFS.second.findImportedFunctions(S, M, Threshold);
+ NameFS.second.findInlinedFunctions(S, M, Threshold);
}
/// Set the name of the function.
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index bea55b3..34414f9 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -200,7 +200,7 @@
const FunctionSamples *findFunctionSamples(const Instruction &I) const;
bool inlineCallInstruction(Instruction *I);
bool inlineHotFunctions(Function &F,
- DenseSet<GlobalValue::GUID> &ImportGUIDs);
+ DenseSet<GlobalValue::GUID> &InlinedGUIDs);
void printEdgeWeight(raw_ostream &OS, Edge E);
void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
@@ -766,12 +766,12 @@
/// it to direct call. Each indirect call is limited with a single target.
///
/// \param F function to perform iterative inlining.
-/// \param ImportGUIDs a set to be updated to include all GUIDs that come
-/// from a different module but inlined in the profiled binary.
+/// \param InlinedGUIDs a set to be updated to include all GUIDs that are
+/// inlined in the profiled binary.
///
/// \returns True if there is any inline happened.
bool SampleProfileLoader::inlineHotFunctions(
- Function &F, DenseSet<GlobalValue::GUID> &ImportGUIDs) {
+ Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
DenseSet<Instruction *> PromotedInsns;
bool Changed = false;
while (true) {
@@ -804,9 +804,9 @@
uint64_t Sum;
for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
if (IsThinLTOPreLink) {
- FS->findImportedFunctions(ImportGUIDs, F.getParent(),
- Samples->getTotalSamples() *
- SampleProfileHotThreshold / 100);
+ FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
+ Samples->getTotalSamples() *
+ SampleProfileHotThreshold / 100);
continue;
}
auto CalleeFunctionName = FS->getName();
@@ -844,8 +844,8 @@
if (inlineCallInstruction(I))
LocalChanged = true;
} else if (IsThinLTOPreLink) {
- findCalleeFunctionSamples(*I)->findImportedFunctions(
- ImportGUIDs, F.getParent(),
+ findCalleeFunctionSamples(*I)->findInlinedFunctions(
+ InlinedGUIDs, F.getParent(),
Samples->getTotalSamples() * SampleProfileHotThreshold / 100);
}
}
@@ -1455,18 +1455,19 @@
DEBUG(dbgs() << "Line number for the first instruction in " << F.getName()
<< ": " << getFunctionLoc(F) << "\n");
- DenseSet<GlobalValue::GUID> ImportGUIDs;
- Changed |= inlineHotFunctions(F, ImportGUIDs);
+ DenseSet<GlobalValue::GUID> InlinedGUIDs;
+ Changed |= inlineHotFunctions(F, InlinedGUIDs);
// Compute basic block weights.
Changed |= computeBlockWeights(F);
if (Changed) {
// Add an entry count to the function using the samples gathered at the
- // function entry. Also sets the GUIDs that comes from a different
- // module but inlined in the profiled binary. This is aiming at making
- // the IR match the profiled binary before annotation.
- F.setEntryCount(Samples->getHeadSamples() + 1, &ImportGUIDs);
+ // function entry.
+ // Sets the GUIDs that are inlined in the profiled binary. This is used
+ // for ThinLink to make correct liveness analysis, and also make the IR
+ // match the profiled binary before annotation.
+ F.setEntryCount(Samples->getHeadSamples() + 1, &InlinedGUIDs);
// Compute dominance and loop info needed for propagation.
computeDominanceAndLoopInfo(F);
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/import.prof b/llvm/test/Transforms/SampleProfile/Inputs/function_metadata.prof
similarity index 65%
rename from llvm/test/Transforms/SampleProfile/Inputs/import.prof
rename to llvm/test/Transforms/SampleProfile/Inputs/function_metadata.prof
index e09ee6b..4a348d9 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/import.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/function_metadata.prof
@@ -6,3 +6,6 @@
1: 1000
4: foo2:1000
1: 1000 foo3:1000
+test_liveness:10000:0
+ 1: foo:1000
+ 1: foo_available:1000
diff --git a/llvm/test/Transforms/SampleProfile/function_metadata.ll b/llvm/test/Transforms/SampleProfile/function_metadata.ll
new file mode 100644
index 0000000..c866118
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/function_metadata.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file=%S/Inputs/function_metadata.prof -S | FileCheck %s
+
+; Tests whether the functions in the inline stack are added to the
+; function_entry_count metadata.
+
+declare void @foo()
+
+define void @foo_available() !dbg !11 {
+ ret void
+}
+
+; CHECK: define void @test({{.*}} !prof ![[ENTRY_TEST:[0-9]+]]
+define void @test(void ()*) !dbg !7 {
+ %2 = alloca void ()*
+ store void ()* %0, void ()** %2
+ %3 = load void ()*, void ()** %2
+ ; CHECK: call {{.*}}, !prof ![[PROF:[0-9]+]]
+ call void @foo(), !dbg !18
+ call void %3(), !dbg !19
+ ret void
+}
+
+; CHECK: define void @test_liveness({{.*}} !prof ![[ENTRY_TEST_LIVENESS:[0-9]+]]
+define void @test_liveness() !dbg !12 {
+ call void @foo(), !dbg !20
+ ret void
+}
+
+; GUIDs of foo, bar, foo1, foo2 and foo3 should be included in the metadata to
+; make sure hot inline stacks are imported.
+; CHECK: ![[ENTRY_TEST]] = !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7682762345278052905, i64 -7908226060800700466, i64 -2012135647395072713}
+
+; Check GUIDs for both foo and foo_available are included in the metadata to
+; make sure the liveness analysis can capture the dependency from test_liveness
+; to foo_available.
+; CHECK: ![[ENTRY_TEST_LIVENESS]] = !{!"function_entry_count", i64 1, i64 4005816710939881937, i64 6699318081062747564}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!6 = !DISubroutineType(types: !2)
+!7 = distinct !DISubprogram(name: "test", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !1, type: !6, variables: !2)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 1, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.5 "}
+!11 = distinct !DISubprogram(name: "foo_available", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !1, type: !6, variables: !2)
+!12 = distinct !DISubprogram(name: "test_liveness", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !1, type: !6, variables: !2)
+!15 = !DILexicalBlockFile(discriminator: 1, file: !1, scope: !7)
+!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7)
+!18 = !DILocation(line: 10, scope: !17)
+!19 = !DILocation(line: 11, scope: !17)
+!20 = !DILocation(line: 8, scope: !12)
diff --git a/llvm/test/Transforms/SampleProfile/import.ll b/llvm/test/Transforms/SampleProfile/import.ll
deleted file mode 100644
index 8cc2338..0000000
--- a/llvm/test/Transforms/SampleProfile/import.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=new-pm-pgo-sample-use-pipeline -profile-file=%S/Inputs/import.prof -S | FileCheck %s
-
-; Tests whether the functions in the inline stack are added to the
-; function_entry_count metadata.
-
-declare void @foo()
-
-define void @test(void ()*) !dbg !7 {
- %2 = alloca void ()*
- store void ()* %0, void ()** %2
- %3 = load void ()*, void ()** %2
- ; CHECK: call {{.*}}, !prof ![[PROF:[0-9]+]]
- call void @foo(), !dbg !18
- call void %3(), !dbg !19
- ret void
-}
-
-; GUIDs of foo, bar, foo1, foo2 and foo3 should be included in the metadata to
-; make sure hot inline stacks are imported.
-; CHECK: !{!"function_entry_count", i64 1, i64 2494702099028631698, i64 6699318081062747564, i64 7682762345278052905, i64 -7908226060800700466, i64 -2012135647395072713}
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!8, !9}
-!llvm.ident = !{!10}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
-!1 = !DIFile(filename: "calls.cc", directory: ".")
-!2 = !{}
-!6 = !DISubroutineType(types: !2)
-!7 = distinct !DISubprogram(name: "test", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !1, type: !6, variables: !2)
-!8 = !{i32 2, !"Dwarf Version", i32 4}
-!9 = !{i32 1, !"Debug Info Version", i32 3}
-!10 = !{!"clang version 3.5 "}
-!15 = !DILexicalBlockFile(discriminator: 1, file: !1, scope: !7)
-!17 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !7)
-!18 = !DILocation(line: 10, scope: !17)
-!19 = !DILocation(line: 11, scope: !17)