[SystemZ] Fold more spills

Add a mapping from register-based <INSN>R instructions to the corresponding
memory-based <INSN>.  Use it to cut down on the number of spill loads.

Some instructions extend their operands from smaller fields, so this
required a new TSFlags field to say how big the unextended operand is.

This optimisation doesn't trigger for C(G)R and CL(G)R because in practice
we always combine those instructions with a branch.  Adding a test for every
other case probably seems excessive, but it did catch a missed optimisation
for DSGF (fixed in r185435).

llvm-svn: 185529
diff --git a/llvm/test/CodeGen/SystemZ/and-01.ll b/llvm/test/CodeGen/SystemZ/and-01.ll
index 8dd106b..0da13f9 100644
--- a/llvm/test/CodeGen/SystemZ/and-01.ll
+++ b/llvm/test/CodeGen/SystemZ/and-01.ll
@@ -2,6 +2,8 @@
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
 
+declare i32 @foo()
+
 ; Check NR.
 define i32 @f1(i32 %a, i32 %b) {
 ; CHECK: f1:
@@ -127,3 +129,46 @@
   %and = and i32 %a, %b
   ret i32 %and
 }
+
+; Check that ANDs of spilled values can use N rather than NR.
+define i32 @f12(i32 *%ptr0) {
+; CHECK: f12:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: n %r2, 16{{[04]}}(%r15)
+; CHECK: br %r14
+  %ptr1 = getelementptr i32 *%ptr0, i64 2
+  %ptr2 = getelementptr i32 *%ptr0, i64 4
+  %ptr3 = getelementptr i32 *%ptr0, i64 6
+  %ptr4 = getelementptr i32 *%ptr0, i64 8
+  %ptr5 = getelementptr i32 *%ptr0, i64 10
+  %ptr6 = getelementptr i32 *%ptr0, i64 12
+  %ptr7 = getelementptr i32 *%ptr0, i64 14
+  %ptr8 = getelementptr i32 *%ptr0, i64 16
+  %ptr9 = getelementptr i32 *%ptr0, i64 18
+
+  %val0 = load i32 *%ptr0
+  %val1 = load i32 *%ptr1
+  %val2 = load i32 *%ptr2
+  %val3 = load i32 *%ptr3
+  %val4 = load i32 *%ptr4
+  %val5 = load i32 *%ptr5
+  %val6 = load i32 *%ptr6
+  %val7 = load i32 *%ptr7
+  %val8 = load i32 *%ptr8
+  %val9 = load i32 *%ptr9
+
+  %ret = call i32 @foo()
+
+  %and0 = and i32 %ret, %val0
+  %and1 = and i32 %and0, %val1
+  %and2 = and i32 %and1, %val2
+  %and3 = and i32 %and2, %val3
+  %and4 = and i32 %and3, %val4
+  %and5 = and i32 %and4, %val5
+  %and6 = and i32 %and5, %val6
+  %and7 = and i32 %and6, %val7
+  %and8 = and i32 %and7, %val8
+  %and9 = and i32 %and8, %val9
+
+  ret i32 %and9
+}