Teach LSR sink to sink the immediate portion of the common expression back into uses if they fit in address modes of all the uses.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@65215 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/test/CodeGen/X86/2007-10-05-3AddrConvert.ll b/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
index 2cc9124..e9fbe79 100644
--- a/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
+++ b/test/CodeGen/X86/2007-10-05-3AddrConvert.ll
@@ -4,29 +4,43 @@
%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
%struct.node = type { i16, double, [3 x double], i32, i32 }
-define fastcc void @old_main() {
+define i32 @main(i32 %argc, i8** nocapture %argv) nounwind {
entry:
- %tmp44 = malloc %struct.anon ; <%struct.anon*> [#uses=2]
- store double 4.000000e+00, double* null, align 4
- br label %bb41
+ %0 = malloc %struct.anon ; <%struct.anon*> [#uses=2]
+ %1 = getelementptr %struct.anon* %0, i32 0, i32 2 ; <%struct.node**> [#uses=1]
+ br label %bb14.i
-bb41: ; preds = %uniform_testdata.exit, %entry
- %i.0110 = phi i32 [ 0, %entry ], [ %tmp48, %uniform_testdata.exit ] ; <i32> [#uses=2]
- %tmp48 = add i32 %i.0110, 1 ; <i32> [#uses=1]
- br i1 false, label %uniform_testdata.exit, label %bb33.preheader.i
+bb14.i: ; preds = %bb14.i, %entry
+ %i8.0.reg2mem.0.i = phi i32 [ 0, %entry ], [ %2, %bb14.i ] ; <i32> [#uses=1]
+ %2 = add i32 %i8.0.reg2mem.0.i, 1 ; <i32> [#uses=2]
+ %exitcond74.i = icmp eq i32 %2, 32 ; <i1> [#uses=1]
+ br i1 %exitcond74.i, label %bb32.i, label %bb14.i
-bb33.preheader.i: ; preds = %bb41
- ret void
+bb32.i: ; preds = %bb32.i, %bb14.i
+ %tmp.0.reg2mem.0.i = phi i32 [ %indvar.next63.i, %bb32.i ], [ 0, %bb14.i ] ; <i32> [#uses=1]
+ %indvar.next63.i = add i32 %tmp.0.reg2mem.0.i, 1 ; <i32> [#uses=2]
+ %exitcond64.i = icmp eq i32 %indvar.next63.i, 64 ; <i1> [#uses=1]
+ br i1 %exitcond64.i, label %bb47.loopexit.i, label %bb32.i
-uniform_testdata.exit: ; preds = %bb41
- %tmp57 = getelementptr %struct.anon* %tmp44, i32 0, i32 3, i32 %i.0110 ; <%struct.bnode**> [#uses=1]
- store %struct.bnode* null, %struct.bnode** %tmp57, align 4
- br i1 false, label %bb154, label %bb41
+bb.i.i: ; preds = %bb47.loopexit.i
+ unreachable
-bb154: ; preds = %bb154, %uniform_testdata.exit
- br i1 false, label %bb166, label %bb154
+stepsystem.exit.i: ; preds = %bb47.loopexit.i
+ store %struct.node* null, %struct.node** %1, align 4
+ br label %bb.i6.i
-bb166: ; preds = %bb154
- %tmp169 = getelementptr %struct.anon* %tmp44, i32 0, i32 3, i32 0 ; <%struct.bnode**> [#uses=0]
- ret void
+bb.i6.i: ; preds = %bb.i6.i, %stepsystem.exit.i
+ %tmp.0.i.i = add i32 0, -1 ; <i32> [#uses=1]
+ %3 = icmp slt i32 %tmp.0.i.i, 0 ; <i1> [#uses=1]
+ br i1 %3, label %bb107.i.i, label %bb.i6.i
+
+bb107.i.i: ; preds = %bb107.i.i, %bb.i6.i
+ %q_addr.0.i.i.in = phi %struct.bnode** [ null, %bb107.i.i ], [ %4, %bb.i6.i ] ; <%struct.bnode**> [#uses=1]
+ %q_addr.0.i.i = load %struct.bnode** %q_addr.0.i.i.in ; <%struct.bnode*> [#uses=0]
+ br label %bb107.i.i
+
+bb47.loopexit.i: ; preds = %bb32.i
+ %4 = getelementptr %struct.anon* %0, i32 0, i32 4, i32 0 ; <%struct.bnode**> [#uses=1]
+ %5 = icmp eq %struct.node* null, null ; <i1> [#uses=1]
+ br i1 %5, label %stepsystem.exit.i, label %bb.i.i
}
diff --git a/test/CodeGen/X86/loop-strength-reduce-2.ll b/test/CodeGen/X86/loop-strength-reduce-2.ll
index b67e618..8ea5bdb 100644
--- a/test/CodeGen/X86/loop-strength-reduce-2.ll
+++ b/test/CodeGen/X86/loop-strength-reduce-2.ll
@@ -1,8 +1,10 @@
; RUN: llvm-as < %s | llc -march=x86 -relocation-model=pic | \
; RUN: grep {, 4} | count 1
+; RUN: llvm-as < %s | llc -march=x86 | not grep lea
;
; Make sure the common loop invariant A is hoisted up to preheader,
; since too many registers are needed to subsume it into the addressing modes.
+; It's safe to sink A in when it's not pic.
@A = global [16 x [16 x i32]] zeroinitializer, align 32 ; <[16 x [16 x i32]]*> [#uses=2]
diff --git a/test/CodeGen/X86/loop-strength-reduce8.ll b/test/CodeGen/X86/loop-strength-reduce8.ll
new file mode 100644
index 0000000..1846c7d
--- /dev/null
+++ b/test/CodeGen/X86/loop-strength-reduce8.ll
@@ -0,0 +1,78 @@
+; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep leal | not grep 16
+
+ %struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
+ %struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }
+ %struct.bitmap_head_def = type { %struct.bitmap_element*, %struct.bitmap_element*, i32 }
+ %struct.branch_path = type { %struct.rtx_def*, i32 }
+ %struct.c_lang_decl = type <{ i8, [3 x i8] }>
+ %struct.constant_descriptor = type { %struct.constant_descriptor*, i8*, %struct.rtx_def*, { x86_fp80 } }
+ %struct.eh_region = type { %struct.eh_region*, %struct.eh_region*, %struct.eh_region*, i32, %struct.bitmap_head_def*, i32, { { %struct.eh_region*, %struct.eh_region*, %struct.eh_region*, %struct.rtx_def* } }, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+ %struct.eh_status = type { %struct.eh_region*, %struct.eh_region**, %struct.eh_region*, %struct.eh_region*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, i32, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.varray_head_tag*, %struct.branch_path*, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+ %struct.emit_status = type { i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.sequence_stack*, i32, i32, i8*, i32, i8*, %struct.tree_node**, %struct.rtx_def** }
+ %struct.equiv_table = type { %struct.rtx_def*, %struct.rtx_def* }
+ %struct.expr_status = type { i32, i32, i32, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def* }
+ %struct.function = type { %struct.eh_status*, %struct.stmt_status*, %struct.expr_status*, %struct.emit_status*, %struct.varasm_status*, i8*, %struct.tree_node*, %struct.function*, i32, i32, i32, i32, %struct.rtx_def*, %struct.CUMULATIVE_ARGS, %struct.rtx_def*, %struct.rtx_def*, i8*, %struct.initial_value_struct*, i32, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, %struct.rtx_def*, i64, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, i32, %struct.rtx_def**, %struct.temp_slot*, i32, i32, i32, %struct.var_refs_queue*, i32, i32, i8*, %struct.tree_node*, %struct.rtx_def*, i32, i32, %struct.machine_function*, i32, i32, %struct.language_function*, %struct.rtx_def*, i8, i8, i8 }
+ %struct.goto_fixup = type { %struct.goto_fixup*, %struct.rtx_def*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, i32, %struct.rtx_def*, %struct.tree_node* }
+ %struct.initial_value_struct = type { i32, i32, %struct.equiv_table* }
+ %struct.label_chain = type { %struct.label_chain*, %struct.tree_node* }
+ %struct.lang_decl = type { %struct.c_lang_decl, %struct.tree_node* }
+ %struct.language_function = type { %struct.stmt_tree_s, %struct.tree_node* }
+ %struct.machine_function = type { [59 x [3 x %struct.rtx_def*]], i32, i32 }
+ %struct.nesting = type { %struct.nesting*, %struct.nesting*, i32, %struct.rtx_def*, { { i32, %struct.rtx_def*, %struct.rtx_def*, %struct.nesting*, %struct.tree_node*, %struct.tree_node*, %struct.label_chain*, i32, i32, i32, i32, %struct.rtx_def*, %struct.tree_node** } } }
+ %struct.pool_constant = type { %struct.constant_descriptor*, %struct.pool_constant*, %struct.pool_constant*, %struct.rtx_def*, i32, i32, i32, i64, i32 }
+ %struct.rtunion = type { i64 }
+ %struct.rtx_def = type { i16, i8, i8, [1 x %struct.rtunion] }
+ %struct.sequence_stack = type { %struct.rtx_def*, %struct.rtx_def*, %struct.tree_node*, %struct.sequence_stack* }
+ %struct.stmt_status = type { %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, %struct.nesting*, i32, i32, %struct.tree_node*, %struct.rtx_def*, i32, i8*, i32, %struct.goto_fixup* }
+ %struct.stmt_tree_s = type { %struct.tree_node*, %struct.tree_node*, i8*, i32 }
+ %struct.temp_slot = type { %struct.temp_slot*, %struct.rtx_def*, %struct.rtx_def*, i32, i64, %struct.tree_node*, %struct.tree_node*, i8, i8, i32, i32, i64, i64 }
+ %struct.tree_common = type { %struct.tree_node*, %struct.tree_node*, i8, i8, i8, i8 }
+ %struct.tree_decl = type { %struct.tree_common, i8*, i32, i32, %struct.tree_node*, i8, i8, i8, i8, i8, i8, %struct.rtunion, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, %struct.rtx_def*, %struct.rtx_def*, { %struct.function* }, %struct.tree_node*, %struct.tree_node*, %struct.tree_node*, i64, %struct.lang_decl* }
+ %struct.tree_exp = type { %struct.tree_common, i32, [1 x %struct.tree_node*] }
+ %struct.tree_node = type { %struct.tree_decl }
+ %struct.var_refs_queue = type { %struct.rtx_def*, i32, i32, %struct.var_refs_queue* }
+ %struct.varasm_status = type { %struct.constant_descriptor**, %struct.pool_constant**, %struct.pool_constant*, %struct.pool_constant*, i64, %struct.rtx_def* }
+ %struct.varray_data = type { [1 x i64] }
+ %struct.varray_head_tag = type { i32, i32, i32, i8*, %struct.varray_data }
+@lineno = internal global i32 0 ; <i32*> [#uses=1]
+@tree_code_length = internal global [256 x i32] zeroinitializer
+@llvm.used = appending global [1 x i8*] [ i8* bitcast (%struct.tree_node* (i32, ...)* @build_stmt to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define %struct.tree_node* @build_stmt(i32 %code, ...) nounwind {
+entry:
+ %p = alloca i8* ; <i8**> [#uses=3]
+ %p1 = bitcast i8** %p to i8* ; <i8*> [#uses=2]
+ call void @llvm.va_start(i8* %p1)
+ %0 = call fastcc %struct.tree_node* @make_node(i32 %code) nounwind ; <%struct.tree_node*> [#uses=2]
+ %1 = getelementptr [256 x i32]* @tree_code_length, i32 0, i32 %code ; <i32*> [#uses=1]
+ %2 = load i32* %1, align 4 ; <i32> [#uses=2]
+ %3 = load i32* @lineno, align 4 ; <i32> [#uses=1]
+ %4 = bitcast %struct.tree_node* %0 to %struct.tree_exp* ; <%struct.tree_exp*> [#uses=2]
+ %5 = getelementptr %struct.tree_exp* %4, i32 0, i32 1 ; <i32*> [#uses=1]
+ store i32 %3, i32* %5, align 4
+ %6 = icmp sgt i32 %2, 0 ; <i1> [#uses=1]
+ br i1 %6, label %bb, label %bb3
+
+bb: ; preds = %bb, %entry
+ %i.01 = phi i32 [ %indvar.next, %bb ], [ 0, %entry ] ; <i32> [#uses=2]
+ %7 = load i8** %p, align 4 ; <i8*> [#uses=2]
+ %8 = getelementptr i8* %7, i32 4 ; <i8*> [#uses=1]
+ store i8* %8, i8** %p, align 4
+ %9 = bitcast i8* %7 to %struct.tree_node** ; <%struct.tree_node**> [#uses=1]
+ %10 = load %struct.tree_node** %9, align 4 ; <%struct.tree_node*> [#uses=1]
+ %11 = getelementptr %struct.tree_exp* %4, i32 0, i32 2, i32 %i.01 ; <%struct.tree_node**> [#uses=1]
+ store %struct.tree_node* %10, %struct.tree_node** %11, align 4
+ %indvar.next = add i32 %i.01, 1 ; <i32> [#uses=2]
+ %exitcond = icmp eq i32 %indvar.next, %2 ; <i1> [#uses=1]
+ br i1 %exitcond, label %bb3, label %bb
+
+bb3: ; preds = %bb, %entry
+ call void @llvm.va_end(i8* %p1)
+ ret %struct.tree_node* %0
+}
+
+declare void @llvm.va_start(i8*) nounwind
+
+declare void @llvm.va_end(i8*) nounwind
+
+declare fastcc %struct.tree_node* @make_node(i32) nounwind
diff --git a/test/CodeGen/X86/stride-nine-with-base-reg.ll b/test/CodeGen/X86/stride-nine-with-base-reg.ll
index 4bd9924..c0cfb85 100644
--- a/test/CodeGen/X86/stride-nine-with-base-reg.ll
+++ b/test/CodeGen/X86/stride-nine-with-base-reg.ll
@@ -1,14 +1,14 @@
-; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | grep lea | count 1
+; RUN: llvm-as < %s | llc -march=x86 -relocation-model=static | not grep lea
; RUN: llvm-as < %s | llc -march=x86-64 | not grep lea
-; For x86 there's an lea above the loop. In both cases, there shouldn't
-; be any lea instructions inside the loop.
+; _P should be sunk into the loop and folded into the address mode. There
+; shouldn't be any lea instructions inside the loop.
@B = external global [1000 x i8], align 32
@A = external global [1000 x i8], align 32
@P = external global [1000 x i8], align 32
-define void @foo(i32 %m, i32 %p) {
+define void @foo(i32 %m, i32 %p) nounwind {
entry:
%tmp1 = icmp sgt i32 %m, 0
br i1 %tmp1, label %bb, label %return