[PPC] Set SP after loading data from stack frame, if no red zone is present

Follow-up to r280705: Make sure that the SP is only restored after all data
is loaded from the stack frame, if there is no red zone.

This completes the fix for https://llvm.org/bugs/show_bug.cgi?id=26519.

Differential Revision: https://reviews.llvm.org/D24466

llvm-svn: 282174
diff --git a/llvm/test/CodeGen/PowerPC/ppc32-pic-large.ll b/llvm/test/CodeGen/PowerPC/ppc32-pic-large.ll
index dbfbc1c..1f24ea10 100644
--- a/llvm/test/CodeGen/PowerPC/ppc32-pic-large.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc32-pic-large.ll
@@ -25,7 +25,7 @@
 ; LARGE-BSS-DAG:     lwz [[VREG:[0-9]+]], [[VREF:\.LC[0-9]+]]-.LTOC(30)
 ; LARGE-BSS-DAG:     lwz {{[0-9]+}}, 0([[VREG]])
 ; LARGE-BSS-DAG:     stw {{[0-9]+}}, 8(1)
-; LARGE-BSS:         lwz 30, -8(1)
+; LARGE-BSS:         lwz 30, 24(1)
 ; LARGE-BSS:       [[VREF]]:
 ; LARGE-BSS-NEXT:     .p2align 2
 ; LARGE-BSS-NEXT:    .long bar
diff --git a/llvm/test/CodeGen/PowerPC/ppc32-pic.ll b/llvm/test/CodeGen/PowerPC/ppc32-pic.ll
index 5c33233..79c36b0 100644
--- a/llvm/test/CodeGen/PowerPC/ppc32-pic.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc32-pic.ll
@@ -21,4 +21,4 @@
 ; SMALL-BSS-DAG:     lwz [[VREG:[0-9]+]], bar@GOT(30)
 ; SMALL-BSS-DAG:     lwz {{[0-9]+}}, 0([[VREG]])
 ; SMALL-BSS:         bl call_foo@PLT
-; SMALL-BSS:         lwz 30, -8(1)
+; SMALL-BSS:         lwz 30, 24(1)
diff --git a/llvm/test/CodeGen/PowerPC/stack-no-redzone.ll b/llvm/test/CodeGen/PowerPC/stack-no-redzone.ll
new file mode 100644
index 0000000..66ef91b
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/stack-no-redzone.ll
@@ -0,0 +1,146 @@
+; Test that accesses of the stack remain within the range defined by R1,
+; i.e. that loads and stores only access the allocated stack. This does not
+; have to be the case when red zone is present.
+
+; Make sure that there is no red zone, i.e. ppc32 and SVR4 ABI.
+; RUN: llc -mtriple=powerpc--freebsd-elf < %s | FileCheck %s
+
+; There are two ways that the stack pointer can be adjusted in the prologue:
+; - by adding an immediate value:
+;     stwu r1, -imm(r1)
+; - by adding another register:
+;     stwux r1, rx, r1
+;
+; The restoring of the stack pointer can be done:
+; - by adding an immediate value to it:
+;     addi r1, r1, imm
+; - by copying the value from another register:
+;     mr r1, rx
+
+
+; Nothing (no special features).
+;
+; CHECK-LABEL: test_n:
+; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
+; CHECK: stwu 1, -[[SIZE:[0-9]+]](1)
+; CHECK: addi 1, 1, [[SIZE]]
+; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
+define i32 @test_n() local_unnamed_addr #0 {
+entry:
+  %t0 = tail call i32 bitcast (i32 (...)* @bar0 to i32 ()*)() #0
+  ret i32 %t0
+}
+
+; Aligned object on the stack.
+;
+; CHECK-LABEL: test_a:
+; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
+; CHECK: stwux 1, 1, {{[0-9]+}}
+; CHECK: mr 1, {{[0-9]+}}
+; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
+
+define i32 @test_a() local_unnamed_addr #0 {
+entry:
+  %t0 = alloca i32, align 128
+  %t1 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
+  ret i32 %t1
+}
+
+; Dynamic allocation on the stack.
+;
+; CHECK-LABEL: test_d:
+; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
+; CHECK: stwu 1, -[[SIZE:[0-9]+]](1)
+; CHECK: mr 1, {{[0-9]+}}
+; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
+define i32 @test_d(i32 %p0) local_unnamed_addr #0 {
+  %t0 = alloca i32, i32 %p0, align 4
+  %t1 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
+  ret i32 %t1
+}
+
+; Large stack (exceeds size of D-field).
+; CHECK-LABEL: test_s:
+; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
+; CHECK: stwux 1, 1, {{[0-9]+}}
+; CHECK: mr 1, {{[0-9]+}}
+; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
+define i32 @test_s(i32 %p0) local_unnamed_addr #0 {
+entry:
+  %t0 = alloca [16384 x i32]
+  %t1 = getelementptr [16384 x i32], [16384 x i32]* %t0, i32 0, i32 0
+  %t2 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t1) #0
+  ret i32 %t2
+}
+
+; Combinations.
+
+; CHECK-LABEL: test_ad:
+; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
+; CHECK: stwux 1, 1, {{[0-9]+}}
+; CHECK: mr 1, {{[0-9]+}}
+; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
+define i32 @test_ad(i32 %p0) local_unnamed_addr #0 {
+  %t0 = alloca i32, align 128
+  %t1 = alloca i32, i32 %p0, align 4
+  %t2 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
+  %t3 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t1) #0
+  %t4 = add i32 %t2, %t3
+  ret i32 %t4
+}
+
+; CHECK-LABEL: test_as:
+; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
+; CHECK: stwux 1, 1, {{[0-9]+}}
+; CHECK: mr 1, {{[0-9]+}}
+; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
+define i32 @test_as() local_unnamed_addr #0 {
+  %t0 = alloca i32, align 128
+  %t1 = alloca [16384 x i32]
+  %t2 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
+  %t3 = getelementptr [16384 x i32], [16384 x i32]* %t1, i32 0, i32 0
+  %t4 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t3) #0
+  %t5 = add i32 %t2, %t4
+  ret i32 %t5
+}
+
+; CHECK-LABEL: test_ds:
+; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
+; CHECK: stwux 1, 1, {{[0-9]+}}
+; CHECK: mr 1, {{[0-9]+}}
+; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
+define i32 @test_ds(i32 %p0) local_unnamed_addr #0 {
+  %t0 = alloca i32, i32 %p0, align 4
+  %t1 = alloca [16384 x i32]
+  %t2 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
+  %t3 = getelementptr [16384 x i32], [16384 x i32]* %t1, i32 0, i32 0
+  %t4 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t3) #0
+  %t5 = add i32 %t2, %t4
+  ret i32 %t5
+}
+
+; CHECK-LABEL: test_ads:
+; CHECK-NOT: stw {{[0-9]+}}, -{{[0-9]+}}(1)
+; CHECK: stwux 1, 1, {{[0-9]+}}
+; CHECK: mr 1, {{[0-9]+}}
+; CHECK-NOT: lwz {{[0-9]+}}, -{{[0-9]+}}(1)
+define i32 @test_ads(i32 %p0) local_unnamed_addr #0 {
+  %t0 = alloca i32, align 128
+  %t1 = alloca i32, i32 %p0, align 4
+  %t2 = alloca [16384 x i32]
+
+  %t3 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t0) #0
+  %t4 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t1) #0
+  %t5 = add i32 %t3, %t4
+
+  %t6 = getelementptr [16384 x i32], [16384 x i32]* %t2, i32 0, i32 0
+  %t7 = tail call i32 bitcast (i32 (...)* @bar1 to i32 (i32*)*)(i32* %t6) #0
+  %t8 = add i32 %t5, %t7
+  ret i32 %t7
+}
+
+
+declare i32 @bar0(...) local_unnamed_addr #0
+declare i32 @bar1(...) local_unnamed_addr #0
+
+attributes #0 = { nounwind }
diff --git a/llvm/test/CodeGen/PowerPC/stack-realign.ll b/llvm/test/CodeGen/PowerPC/stack-realign.ll
index 00cd61d..d92f93b 100644
--- a/llvm/test/CodeGen/PowerPC/stack-realign.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-realign.ll
@@ -83,18 +83,26 @@
 ; CHECK-32-DAG: subfic 0, [[REG]], -64
 ; CHECK-32: stwux 1, 1, 0
 ; CHECK-32: subf 0, 0, 1
-; CHECK-32: stw 30, -8(0)
-; CHECK-32: mr 30, 0
+; CHECK-32: addic 0, 0, -4
+; CHECK-32: stwx 31, 0, 0
+; CHECK-32: addic 0, 0, -4
+; CHECK-32: stwx 30, 0, 0
+; CHECK-32: addic 30, 0, 8
 
 ; CHECK-32-PIC-LABEL: @goo
 ; CHECK-32-PIC-DAG: mflr [[LR:[0-9]+]]
 ; CHECK-32-PIC-DAG: clrlwi [[REG:[0-9]+]], 1, 27
 ; CHECK-32-PIC-DAG: stw [[LR]], 4(1)
 ; CHECK-32-PIC-DAG: subfic 0, [[REG]], -64
-; CHECK-32-PIC: stwux 1, 1, 0
-; CHECK-32-PIC: subf 0, 0, 1
-; CHECK-32-PIC: stw 29, -12(0)
-; CHECK-32-PIC-DAG: mr 29, 0
+; CHECK-32-PIC:     stwux 1, 1, 0
+; CHECK-32-PIC:     subf 0, 0, 1
+; CHECK-32-PIC:     addic 0, 0, -4
+; CHECK-32-PIC:     stwx 31, 0, 0
+; CHECK-32-PIC:     addic 0, 0, -4
+; CHECK-32-PIC:     stwx 30, 0, 0
+; CHECK-32-PIC:     addic 0, 0, -4
+; CHECK-32-PIC:     stwx 29, 0, 0
+; CHECK-32-PIC:     addic 29, 0, 12
 
 ; The large-frame-size case.
 define void @hoo(%struct.s* byval nocapture readonly %a) {
@@ -138,9 +146,11 @@
 ; CHECK-32-DAG: subfc 0, [[REG3]], [[REG2]]
 ; CHECK-32:     stwux 1, 1, 0
 ; CHECK-32:     subf 0, 0, 1
-; CHECK-32-DAG: stw 31, -4(0)
-; CHECK-32-DAG: stw 30, -8(0)
-; CHECK-32: mr 30, 0
+; CHECK-32:     addic 0, 0, -4
+; CHECK-32:     stwx 31, 0, 0
+; CHECK-32:     addic 0, 0, -4
+; CHECK-32:     stwx 30, 0, 0
+; CHECK-32:     addic 30, 0, 8
 
 ; CHECK-32: blr
 
@@ -152,10 +162,13 @@
 ; CHECK-32-PIC-DAG: ori [[REG2:[0-9]+]], [[REG1]], 51904
 ; CHECK-32-PIC-DAG: stw 0, 4(1)
 ; CHECK-32-PIC-DAG: subfc 0, [[REG3]], [[REG2]]
-; CHECK-32-PIC: stwux 1, 1, 0
-; CHECK-32-PIC: stw 29, -12(0)
-; CHECK-32-PIC: subf 0, 0, 1
-; CHECK-32-PIC: mr 29, 0
+; CHECK-32-PIC:     stwux 1, 1, 0
+; CHECK-32-PIC:     subf 0, 0, 1
+; CHECK-32-PIC:     addic 0, 0, -4
+; CHECK-32-PIC:     stwx 31, 0, 0
+; CHECK-32-PIC:     addic 0, 0, -8
+; CHECK-32-PIC:     stwx 29, 0, 0
+; CHECK-32-PIC:     addic 29, 0, 12
 
 ; CHECK-32: blr