[GlobalOpt] Demote globals to locals more aggressively
Global to local demotion can speed up programs that use globals a lot. It is particularly useful with LTO, when the entire call graph is known and most functions have been internalized.
For a global to be demoted, it must only be accessed by one function and that function:
1. Must never recurse directly or indirectly, else the GV would be clobbered.
2. Must never rely on the value in GV at the start of the function (apart from the initializer).
GlobalOpt can already do this, but it is hamstrung and only ever tries to demote globals inside "main", because C++ gives extra guarantees about how main is called - once and only once.
In LTO mode, we can often prove the first property (if the function is internal by this point, we know enough about the callgraph to determine if it could possibly recurse). FunctionAttrs now infers the "norecurse" attribute for this reason.
The second property can be proven for a subset of functions by proving that all loads from GV are dominated by a store to GV. This is conservative in the name of compile time - this only requires a DominatorTree which is fairly cheap in the grand scheme of things. We could do more fancy stuff with MemoryDependenceAnalysis too to catch more cases but this appears to catch most of the useful ones in my testing.
llvm-svn: 253168
diff --git a/llvm/test/Transforms/GlobalOpt/global-demotion.ll b/llvm/test/Transforms/GlobalOpt/global-demotion.ll
new file mode 100644
index 0000000..7965cb8
--- /dev/null
+++ b/llvm/test/Transforms/GlobalOpt/global-demotion.ll
@@ -0,0 +1,80 @@
+; RUN: opt -globalopt -S < %s | FileCheck %s
+
+@G1 = internal global i32 5
+@G2 = internal global i32 5
+@G3 = internal global i32 5
+@G4 = internal global i32 5
+@G5 = internal global i32 5
+
+; CHECK-LABEL: @test1
+define internal i32 @test1() norecurse {
+; CHECK-NOT: @G1
+ store i32 4, i32* @G1
+ %a = load i32, i32* @G1
+; CHECK: ret
+ ret i32 %a
+}
+
+; The load comes before the store which makes @G2 live before the call.
+; CHECK-LABEL: @test2
+define internal i32 @test2() norecurse {
+; CHECK-NOT: %G2
+ %a = load i32, i32* @G2
+ store i32 4, i32* @G2
+; CHECK: ret
+ ret i32 %a
+}
+
+; This global is indexed by a GEP - this makes it partial alias and we bail out.
+; FIXME: We don't actually have to bail out in this case.
+
+; CHECK-LABEL: @test3
+define internal i32 @test3() norecurse {
+; CHECK-NOT: %G3
+ %x = getelementptr i32,i32* @G3, i32 0
+ %a = load i32, i32* %x
+ store i32 4, i32* @G3
+; CHECK: ret
+ ret i32 %a
+}
+
+; The global is casted away to a larger type then loaded. The store only partially
+; covers the load, so we must not demote.
+
+; CHECK-LABEL: @test4
+define internal i32 @test4() norecurse {
+; CHECK-NOT: %G4
+ store i32 4, i32* @G4
+ %x = bitcast i32* @G4 to i64*
+ %a = load i64, i64* %x
+ %b = trunc i64 %a to i32
+; CHECK: ret
+ ret i32 %b
+}
+
+; The global is casted away to a smaller type then loaded. This one is fine.
+
+; CHECK-LABEL: @test5
+define internal i32 @test5() norecurse {
+; CHECK-NOT: @G5
+ store i32 4, i32* @G5
+ %x = bitcast i32* @G5 to i16*
+ %a = load i16, i16* %x
+ %b = zext i16 %a to i32
+; CHECK: ret
+ ret i32 %b
+}
+
+define i32 @main() norecurse {
+ %a = call i32 @test1()
+ %b = call i32 @test2()
+ %c = call i32 @test3()
+ %d = call i32 @test4()
+ %e = call i32 @test5()
+
+ %x = or i32 %a, %b
+ %y = or i32 %x, %c
+ %z = or i32 %y, %d
+ %w = or i32 %z, %e
+ ret i32 %w
+}
diff --git a/llvm/test/Transforms/GlobalOpt/metadata.ll b/llvm/test/Transforms/GlobalOpt/metadata.ll
index fb60b66..152d58e 100644
--- a/llvm/test/Transforms/GlobalOpt/metadata.ll
+++ b/llvm/test/Transforms/GlobalOpt/metadata.ll
@@ -5,7 +5,7 @@
; to that containing %G should likewise drop to null.
@G = internal global i8** null
-define i32 @main(i32 %argc, i8** %argv) {
+define i32 @main(i32 %argc, i8** %argv) norecurse {
; CHECK-LABEL: @main(
; CHECK: %G = alloca
store i8** %argv, i8*** @G
diff --git a/llvm/test/Transforms/MergeFunc/crash2.ll b/llvm/test/Transforms/MergeFunc/crash2.ll
index 5d3e706..4b3a3f9 100644
--- a/llvm/test/Transforms/MergeFunc/crash2.ll
+++ b/llvm/test/Transforms/MergeFunc/crash2.ll
@@ -11,7 +11,7 @@
@G = internal global i8** null
@G2 = internal global i8** null
-define i32 @main(i32 %argc, i8** %argv) {
+define i32 @main(i32 %argc, i8** %argv) norecurse {
; CHECK: alloca
store i8** %argv, i8*** @G
ret i32 0