[SystemZ] Fold more spills
Add a mapping from register-based <INSN>R instructions to the corresponding
memory-based <INSN>. Use it to cut down on the number of spill loads.
Some instructions extend their operands from smaller fields, so this
required a new TSFlags field to say how big the unextended operand is.
This optimisation doesn't trigger for C(G)R and CL(G)R because in practice
we always combine those instructions with a branch. Adding a test for every
other case probably seems excessive, but it did catch a missed optimisation
for DSGF (fixed in r185435).
llvm-svn: 185529
diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-01.ll b/llvm/test/CodeGen/SystemZ/fp-mul-01.ll
index 68c78ee..3e6428a 100644
--- a/llvm/test/CodeGen/SystemZ/fp-mul-01.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-mul-01.ll
@@ -2,6 +2,8 @@
;
; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+declare float @foo()
+
; Check register multiplication.
define float @f1(float %f1, float %f2) {
; CHECK: f1:
@@ -69,3 +71,49 @@
%res = fmul float %f1, %f2
ret float %res
}
+
+; Check that multiplications of spilled values can use MEEB rather than MEEBR.
+define float @f7(float *%ptr0) {
+; CHECK: f7:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: meeb %f0, 16{{[04]}}(%r15)
+; CHECK: br %r14
+ %ptr1 = getelementptr float *%ptr0, i64 2
+ %ptr2 = getelementptr float *%ptr0, i64 4
+ %ptr3 = getelementptr float *%ptr0, i64 6
+ %ptr4 = getelementptr float *%ptr0, i64 8
+ %ptr5 = getelementptr float *%ptr0, i64 10
+ %ptr6 = getelementptr float *%ptr0, i64 12
+ %ptr7 = getelementptr float *%ptr0, i64 14
+ %ptr8 = getelementptr float *%ptr0, i64 16
+ %ptr9 = getelementptr float *%ptr0, i64 18
+ %ptr10 = getelementptr float *%ptr0, i64 20
+
+ %val0 = load float *%ptr0
+ %val1 = load float *%ptr1
+ %val2 = load float *%ptr2
+ %val3 = load float *%ptr3
+ %val4 = load float *%ptr4
+ %val5 = load float *%ptr5
+ %val6 = load float *%ptr6
+ %val7 = load float *%ptr7
+ %val8 = load float *%ptr8
+ %val9 = load float *%ptr9
+ %val10 = load float *%ptr10
+
+ %ret = call float @foo()
+
+ %mul0 = fmul float %ret, %val0
+ %mul1 = fmul float %mul0, %val1
+ %mul2 = fmul float %mul1, %val2
+ %mul3 = fmul float %mul2, %val3
+ %mul4 = fmul float %mul3, %val4
+ %mul5 = fmul float %mul4, %val5
+ %mul6 = fmul float %mul5, %val6
+ %mul7 = fmul float %mul6, %val7
+ %mul8 = fmul float %mul7, %val8
+ %mul9 = fmul float %mul8, %val9
+ %mul10 = fmul float %mul9, %val10
+
+ ret float %mul10
+}