[SystemZ] Fold more spills
Add a mapping from register-based <INSN>R instructions to the corresponding
memory-based <INSN>. Use it to cut down on the number of spill loads.
Some instructions extend their operands from smaller fields, so this
required a new TSFlags field to say how big the unextended operand is.
This optimisation doesn't trigger for C(G)R and CL(G)R because in practice
we always combine those instructions with a branch. Adding a test for every
other case probably seems excessive, but it did catch a missed optimisation
for DSGF (fixed in r185435).
llvm-svn: 185529
diff --git a/llvm/test/CodeGen/SystemZ/and-01.ll b/llvm/test/CodeGen/SystemZ/and-01.ll
index 8dd106b..0da13f9 100644
--- a/llvm/test/CodeGen/SystemZ/and-01.ll
+++ b/llvm/test/CodeGen/SystemZ/and-01.ll
@@ -2,6 +2,8 @@
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+declare i32 @foo()
+
; Check NR.
define i32 @f1(i32 %a, i32 %b) {
; CHECK: f1:
@@ -127,3 +129,46 @@
%and = and i32 %a, %b
ret i32 %and
}
+
+; Check that ANDs of spilled values can use N rather than NR.
+define i32 @f12(i32 *%ptr0) {
+; CHECK: f12:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: n %r2, 16{{[04]}}(%r15)
+; CHECK: br %r14
+ %ptr1 = getelementptr i32 *%ptr0, i64 2
+ %ptr2 = getelementptr i32 *%ptr0, i64 4
+ %ptr3 = getelementptr i32 *%ptr0, i64 6
+ %ptr4 = getelementptr i32 *%ptr0, i64 8
+ %ptr5 = getelementptr i32 *%ptr0, i64 10
+ %ptr6 = getelementptr i32 *%ptr0, i64 12
+ %ptr7 = getelementptr i32 *%ptr0, i64 14
+ %ptr8 = getelementptr i32 *%ptr0, i64 16
+ %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+ %val0 = load i32 *%ptr0
+ %val1 = load i32 *%ptr1
+ %val2 = load i32 *%ptr2
+ %val3 = load i32 *%ptr3
+ %val4 = load i32 *%ptr4
+ %val5 = load i32 *%ptr5
+ %val6 = load i32 *%ptr6
+ %val7 = load i32 *%ptr7
+ %val8 = load i32 *%ptr8
+ %val9 = load i32 *%ptr9
+
+ %ret = call i32 @foo()
+
+ %and0 = and i32 %ret, %val0
+ %and1 = and i32 %and0, %val1
+ %and2 = and i32 %and1, %val2
+ %and3 = and i32 %and2, %val3
+ %and4 = and i32 %and3, %val4
+ %and5 = and i32 %and4, %val5
+ %and6 = and i32 %and5, %val6
+ %and7 = and i32 %and6, %val7
+ %and8 = and i32 %and7, %val8
+ %and9 = and i32 %and8, %val9
+
+ ret i32 %and9
+}