[SystemZ] Fold more spills
Add a mapping from register-based <INSN>R instructions to the corresponding
memory-based <INSN>. Use it to cut down on the number of spill loads.
Some instructions extend their operands from smaller fields, so this
required a new TSFlags field to say how big the unextended operand is.
This optimisation doesn't trigger for C(G)R and CL(G)R because in practice
we always combine those instructions with a branch. Adding a test for every
other case probably seems excessive, but it did catch a missed optimisation
for DSGF (fixed in r185435).
llvm-svn: 185529
diff --git a/llvm/test/CodeGen/SystemZ/int-add-02.ll b/llvm/test/CodeGen/SystemZ/int-add-02.ll
index 568ad1c..bc434a6 100644
--- a/llvm/test/CodeGen/SystemZ/int-add-02.ll
+++ b/llvm/test/CodeGen/SystemZ/int-add-02.ll
@@ -2,6 +2,8 @@
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+declare i32 @foo()
+
; Check AR.
define i32 @f1(i32 %a, i32 %b) {
; CHECK: f1:
@@ -127,3 +129,46 @@
%add = add i32 %a, %b
ret i32 %add
}
+
+; Check that additions of spilled values can use A rather than AR.
+define i32 @f12(i32 *%ptr0) {
+; CHECK: f12:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: a %r2, 16{{[04]}}(%r15)
+; CHECK: br %r14
+ %ptr1 = getelementptr i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32 *%ptr0, i64 16
+ %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+ %val0 = load i32 *%ptr0
+ %val1 = load i32 *%ptr1
+ %val2 = load i32 *%ptr2
+ %val3 = load i32 *%ptr3
+ %val4 = load i32 *%ptr4
+ %val5 = load i32 *%ptr5
+ %val6 = load i32 *%ptr6
+ %val7 = load i32 *%ptr7
+ %val8 = load i32 *%ptr8
+ %val9 = load i32 *%ptr9
+
+ %ret = call i32 @foo()
+
+ %add0 = add i32 %ret, %val0
+ %add1 = add i32 %add0, %val1
+ %add2 = add i32 %add1, %val2
+ %add3 = add i32 %add2, %val3
+ %add4 = add i32 %add3, %val4
+ %add5 = add i32 %add4, %val5
+ %add6 = add i32 %add5, %val6
+ %add7 = add i32 %add6, %val7
+ %add8 = add i32 %add7, %val8
+ %add9 = add i32 %add8, %val9
+
+ ret i32 %add9
+}