[SystemZ] Handle SADDO et.al. and ADD/SUBCARRY

This provides an optimized implementation of SADDO/SSUBO/UADDO/USUBO
as well as ADDCARRY/SUBCARRY on top of the new CC implementation.

In particular, multi-word arithmetic now uses UADDO/ADDCARRY instead
of the old ADDC/ADDE logic, which means we no longer need to use
"glue" links for those instructions.  This also allows making full
use of the memory-based instructions like ALSI, which couldn't be
recognized due to limitations in the DAG matcher previously.

Also, the llvm.sadd.with.overflow et.al. intrinsincs now expand to
directly using the ADD instructions and checking for a CC 3 result.

llvm-svn: 331203
diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-08.ll b/llvm/test/CodeGen/SystemZ/int-uadd-08.ll
new file mode 100644
index 0000000..5a069db
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/int-uadd-08.ll
@@ -0,0 +1,142 @@
+; Test 32-bit addition in which the second operand is constant and in which
+; three-operand forms are available.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
+
+declare i32 @foo()
+
+; Check addition of 1.
+define zeroext i1 @f1(i32 %dummy, i32 %a, i32 *%res) {
+; CHECK-LABEL: f1:
+; CHECK: alhsik [[REG1:%r[0-5]]], %r3, 1
+; CHECK-DAG: st [[REG1]], 0(%r4)
+; CHECK-DAG: ipm [[REG2:%r[0-5]]]
+; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35
+; CHECK: br %r14
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32 *%res
+  ret i1 %obit
+}
+
+; Check the high end of the ALHSIK range.
+define zeroext i1 @f2(i32 %dummy, i32 %a, i32 *%res) {
+; CHECK-LABEL: f2:
+; CHECK: alhsik [[REG1:%r[0-5]]], %r3, 32767
+; CHECK-DAG: st [[REG1]], 0(%r4)
+; CHECK-DAG: ipm [[REG2:%r[0-5]]]
+; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35
+; CHECK: br %r14
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 32767)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32 *%res
+  ret i1 %obit
+}
+
+; Check the next value up, which must use ALFI instead.
+define zeroext i1 @f3(i32 %dummy, i32 %a, i32 *%res) {
+; CHECK-LABEL: f3:
+; CHECK: alfi %r3, 32768
+; CHECK-DAG: st %r3, 0(%r4)
+; CHECK-DAG: ipm [[REG2:%r[0-5]]]
+; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35
+; CHECK: br %r14
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 32768)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32 *%res
+  ret i1 %obit
+}
+
+; Check the high end of the negative ALHSIK range.
+define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%res) {
+; CHECK-LABEL: f4:
+; CHECK: alhsik [[REG1:%r[0-5]]], %r3, -1
+; CHECK-DAG: st [[REG1]], 0(%r4)
+; CHECK-DAG: ipm [[REG2:%r[0-5]]]
+; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35
+; CHECK: br %r14
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 -1)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32 *%res
+  ret i1 %obit
+}
+
+; Check the low end of the ALHSIK range.
+define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%res) {
+; CHECK-LABEL: f5:
+; CHECK: alhsik [[REG1:%r[0-5]]], %r3, -32768
+; CHECK-DAG: st [[REG1]], 0(%r4)
+; CHECK-DAG: ipm [[REG2:%r[0-5]]]
+; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35
+; CHECK: br %r14
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 -32768)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32 *%res
+  ret i1 %obit
+}
+
+; Check the next value down, which must use ALFI instead.
+define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%res) {
+; CHECK-LABEL: f6:
+; CHECK: alfi %r3, 4294934527
+; CHECK-DAG: st %r3, 0(%r4)
+; CHECK-DAG: ipm [[REG2:%r[0-5]]]
+; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35
+; CHECK: br %r14
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 -32769)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32 *%res
+  ret i1 %obit
+}
+
+; Check using the overflow result for a branch.
+define void @f7(i32 %dummy, i32 %a, i32 *%res) {
+; CHECK-LABEL: f7:
+; CHECK: alhsik [[REG1:%r[0-5]]], %r3, 1
+; CHECK-DAG: st [[REG1]], 0(%r4)
+; CHECK: bler %r14
+; CHECK: jg foo@PLT
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32 *%res
+  br i1 %obit, label %call, label %exit
+
+call:
+  tail call i32 @foo()
+  br label %exit
+
+exit:
+  ret void
+}
+
+; ... and the same with the inverted direction.
+define void @f8(i32 %dummy, i32 %a, i32 *%res) {
+; CHECK-LABEL: f8:
+; CHECK: alhsik [[REG1:%r[0-5]]], %r3, 1
+; CHECK-DAG: st [[REG1]], 0(%r4)
+; CHECK: bnler %r14
+; CHECK: jg foo@PLT
+  %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1)
+  %val = extractvalue {i32, i1} %t, 0
+  %obit = extractvalue {i32, i1} %t, 1
+  store i32 %val, i32 *%res
+  br i1 %obit, label %exit, label %call
+
+call:
+  tail call i32 @foo()
+  br label %exit
+
+exit:
+  ret void
+}
+
+
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+