[SystemZ] computeKnownBitsForTargetNode() / ComputeNumSignBitsForTargetNode()
Improve/implement these methods to improve DAG combining. This mainly
concerns intrinsics.
Some constant operands to SystemZISD nodes have been marked Opaque to avoid
transforming back and forth between generic and target nodes infinitely.
Review: Ulrich Weigand
llvm-svn: 327765
diff --git a/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-binop.ll b/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-binop.ll
new file mode 100644
index 0000000..3bcbbb4
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-binop.ll
@@ -0,0 +1,460 @@
+; Test that DAGCombiner gets helped by computeKnownBitsForTargetNode() with
+; vector intrinsics.
+;
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s
+
+declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>)
+declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>)
+declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>)
+
+; PACKS_CC (operand elements are 0): i64 -> i32
+define <4 x i32> @f0() {
+; CHECK-LABEL: f0:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
+ %extr = extractvalue {<4 x i32>, i32} %call, 0
+ %and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %and
+}
+
+; PACKS_CC (operand elements are 1): i64 -> i32
+; NOTE: The vector AND is optimized away, but vrepig+vpksgs is used instead
+; of vrepif. Similarly for more test cases below.
+define <4 x i32> @f1() {
+; CHECK-LABEL: f1:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vrepig %v0, 1
+; CHECK-NEXT: vpksgs %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>)
+ %extr = extractvalue {<4 x i32>, i32} %call, 0
+ %and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %and
+}
+
+; PACKS_CC (operand elements are 0): i32 -> i16
+define <8 x i16> @f2() {
+; CHECK-LABEL: f2:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> <i32 0, i32 0, i32 0, i32 0>,
+ <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ %extr = extractvalue {<8 x i16>, i32} %call, 0
+ %and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %and
+}
+
+; PACKS_CC (operand elements are 1): i32 -> i16
+define <8 x i16> @f3() {
+; CHECK-LABEL: f3:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vrepif %v0, 1
+; CHECK-NEXT: vpksfs %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
+ <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+ %extr = extractvalue {<8 x i16>, i32} %call, 0
+ %and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %and
+}
+
+; PACKS_CC (operand elements are 0): i16 -> i8
+define <16 x i8> @f4() {
+; CHECK-LABEL: f4:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(
+ <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
+ <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+ %extr = extractvalue {<16 x i8>, i32} %call, 0
+ %and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %and
+}
+
+; PACKS_CC (operand elements are 1): i16 -> i8
+define <16 x i8> @f5() {
+; CHECK-LABEL: f5:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vrepih %v0, 1
+; CHECK-NEXT: vpkshs %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(
+ <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>,
+ <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ %extr = extractvalue {<16 x i8>, i32} %call, 0
+ %and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %and
+}
+
+declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>)
+declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>)
+declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>)
+
+; PACKLS_CC (operand elements are 0): i64 -> i32
+define <4 x i32> @f6() {
+; CHECK-LABEL: f6:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
+ %extr = extractvalue {<4 x i32>, i32} %call, 0
+ %and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %and
+}
+
+; PACKLS_CC (operand elements are 1): i64 -> i32
+define <4 x i32> @f7() {
+; CHECK-LABEL: f7:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vrepig %v0, 1
+; CHECK-NEXT: vpklsgs %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>)
+ %extr = extractvalue {<4 x i32>, i32} %call, 0
+ %and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %and
+}
+
+; PACKLS_CC (operand elements are 0): i32 -> i16
+define <8 x i16> @f8() {
+; CHECK-LABEL: f8:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> <i32 0, i32 0, i32 0, i32 0>,
+ <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ %extr = extractvalue {<8 x i16>, i32} %call, 0
+ %and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %and
+}
+
+; PACKLS_CC (operand elements are 1): i32 -> i16
+define <8 x i16> @f9() {
+; CHECK-LABEL: f9:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vrepif %v0, 1
+; CHECK-NEXT: vpklsfs %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
+ <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+ %extr = extractvalue {<8 x i16>, i32} %call, 0
+ %and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %and
+}
+
+; PACKLS_CC (operand elements are 0): i16 -> i8
+define <16 x i8> @f10() {
+; CHECK-LABEL: f10:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(
+ <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
+ <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+ %extr = extractvalue {<16 x i8>, i32} %call, 0
+ %and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %and
+}
+
+; PACKLS_CC (operand elements are 1): i16 -> i8
+define <16 x i8> @f11() {
+; CHECK-LABEL: f11:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vrepih %v0, 1
+; CHECK-NEXT: vpklshs %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(
+ <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>,
+ <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ %extr = extractvalue {<16 x i8>, i32} %call, 0
+ %and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %and
+}
+
+declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>)
+
+; PACKS (operand elements are 0): i64 -> i32
+define <4 x i32> @f12() {
+; CHECK-LABEL: f12:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
+ %and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %and
+}
+
+; PACKS (operand elements are 1): i64 -> i32
+define <4 x i32> @f13() {
+; CHECK-LABEL: f13:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vrepig %v0, 1
+; CHECK-NEXT: vpksg %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>)
+ %and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %and
+}
+
+; PACKS (operand elements are 0): i32 -> i16
+define <8 x i16> @f14() {
+; CHECK-LABEL: f14:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> <i32 0, i32 0, i32 0, i32 0>,
+ <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ %and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %and
+}
+
+; PACKS (operand elements are 1): i32 -> i16
+define <8 x i16> @f15() {
+; CHECK-LABEL: f15:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vrepif %v0, 1
+; CHECK-NEXT: vpksf %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
+ <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+ %and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %and
+}
+
+; PACKS (operand elements are 0): i16 -> i8
+define <16 x i8> @f16() {
+; CHECK-LABEL: f16:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %call = call <16 x i8> @llvm.s390.vpksh(
+ <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
+ <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+ %and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %and
+}
+
+; PACKS (operand elements are 1): i16 -> i8
+define <16 x i8> @f17() {
+; CHECK-LABEL: f17:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vrepih %v0, 1
+; CHECK-NEXT: vpksh %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call <16 x i8> @llvm.s390.vpksh(
+ <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>,
+ <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ %and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %and
+}
+
+declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>)
+
+; PACKLS (operand elements are 0): i64 -> i32
+define <4 x i32> @f18() {
+; CHECK-LABEL: f18:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
+ %and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %and
+}
+
+; PACKLS (operand elements are 1): i64 -> i32
+define <4 x i32> @f19() {
+; CHECK-LABEL: f19:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vrepig %v0, 1
+; CHECK-NEXT: vpklsg %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>)
+ %and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %and
+}
+
+; PACKLS (operand elements are 0): i32 -> i16
+define <8 x i16> @f20() {
+; CHECK-LABEL: f20:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> <i32 0, i32 0, i32 0, i32 0>,
+ <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+ %and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %and
+}
+
+; PACKLS (operand elements are 1): i32 -> i16
+define <8 x i16> @f21() {
+; CHECK-LABEL: f21:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vrepif %v0, 1
+; CHECK-NEXT: vpklsf %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
+ <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+ %and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %and
+}
+
+; PACKLS (operand elements are 0): i16 -> i8
+define <16 x i8> @f22() {
+; CHECK-LABEL: f22:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %call = call <16 x i8> @llvm.s390.vpklsh(
+ <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
+ <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+ %and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %and
+}
+
+; PACKLS (operand elements are 1): i16 -> i8
+define <16 x i8> @f23() {
+; CHECK-LABEL: f23:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vrepih %v0, 1
+; CHECK-NEXT: vpklsh %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call <16 x i8> @llvm.s390.vpklsh(
+ <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>,
+ <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+ %and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %and
+}
+
+declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32)
+
+; VPDI (operand elements are 0):
+define <2 x i64> @f24() {
+; CHECK-LABEL: f24:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> <i64 0, i64 0>,
+ <2 x i64> <i64 0, i64 0>, i32 0)
+ %res = and <2 x i64> %perm, <i64 1, i64 1>
+ ret <2 x i64> %res
+}
+
+; VPDI (operand elements are 1):
+define <2 x i64> @f25() {
+; CHECK-LABEL: f25:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vrepig %v0, 1
+; CHECK-NEXT: vpdi %v24, %v0, %v0, 0
+; CHECK-NEXT: br %r14
+ %perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> <i64 1, i64 1>,
+ <2 x i64> <i64 1, i64 1>, i32 0)
+ %res = and <2 x i64> %perm, <i64 1, i64 1>
+ ret <2 x i64> %res
+}
+
+declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32)
+
+; VSLDB (operand elements are 0):
+define <16 x i8> @f26() {
+; CHECK-LABEL: f26:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8>
+ <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+ i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8>
+ <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+ i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
+ i32 1)
+
+ %res = and <16 x i8> %shfd, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %res
+}
+
+; VSLDB (operand elements are 1):
+define <16 x i8> @f27() {
+; CHECK-LABEL: f27:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vrepib %v0, 1
+; CHECK-NEXT: vsldb %v24, %v0, %v0, 1
+; CHECK-NEXT: br %r14
+ %shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8>
+ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8>
+ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>,
+ i32 1)
+
+ %res = and <16 x i8> %shfd, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %res
+}
+
+; Test that intrinsic CC result is recognized.
+define i32 @f28(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: f28:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b)
+ %cc = extractvalue {<8 x i16>, i32} %call, 1
+ %res = and i32 %cc, -4
+ ret i32 %res
+}
+
+declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>)
+
+; Test VPERM (operand elements are 0):
+define <16 x i8> @f29() {
+; CHECK-LABEL: f29:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %perm = call <16 x i8> @llvm.s390.vperm(
+ <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+ i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
+ <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+ i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
+ <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+ i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+ %res = and <16 x i8> %perm, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %res
+}
+
+; Test VPERM (operand elements are 1):
+define <16 x i8> @f30() {
+; CHECK-LABEL: f30:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vrepib %v1, 1
+; CHECK-NEXT: vperm %v24, %v1, %v1, %v0
+; CHECK-NEXT: br %r14
+ %perm = call <16 x i8> @llvm.s390.vperm(
+ <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>,
+ <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>,
+ <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+ i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+ %res = and <16 x i8> %perm, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ ret <16 x i8> %res
+}
diff --git a/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-unpack.ll b/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-unpack.ll
new file mode 100644
index 0000000..1966340
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-unpack.ll
@@ -0,0 +1,384 @@
+; Test that DAGCombiner gets helped by computeKnownBitsForTargetNode() with
+; vector intrinsics.
+;
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s
+
+declare <8 x i16> @llvm.s390.vuphb(<16 x i8>)
+declare <8 x i16> @llvm.s390.vuplhb(<16 x i8>)
+
+; VUPHB (used operand elements are 0)
+define <8 x i16> @f0() {
+; CHECK-LABEL: f0:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %unp = call <8 x i16> @llvm.s390.vuphb(<16 x i8>
+ <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %and
+}
+
+; VUPHB (used operand elements are 1)
+; NOTE: The AND is optimized away, but instead of replicating '1' into <8 x
+; i16>, the original vector constant is put in the constant pool and then
+; unpacked (repeated in more test cases below).
+define <8 x i16> @f1() {
+; CHECK-LABEL: f1:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI
+; CHECK-NEXT: vl %v0, 0(%r1)
+; CHECK-NEXT: vuphb %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <8 x i16> @llvm.s390.vuphb(<16 x i8>
+ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+ %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %and
+}
+
+; VUPLHB (used operand elements are 0)
+define <8 x i16> @f2() {
+; CHECK-LABEL: f2:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %unp = call <8 x i16> @llvm.s390.vuplhb(<16 x i8>
+ <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %and
+}
+
+; VUPLHB (used operand elements are 1)
+define <8 x i16> @f3() {
+; CHECK-LABEL: f3:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI
+; CHECK-NEXT: vl %v0, 0(%r1)
+; CHECK-NEXT: vuplhb %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <8 x i16> @llvm.s390.vuplhb(<16 x i8>
+ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+ %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %and
+}
+
+declare <4 x i32> @llvm.s390.vuphh(<8 x i16>)
+declare <4 x i32> @llvm.s390.vuplhh(<8 x i16>)
+
+; VUPHH (used operand elements are 0)
+define <4 x i32> @f4() {
+; CHECK-LABEL: f4:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %unp = call <4 x i32> @llvm.s390.vuphh(<8 x i16>
+ <i16 0, i16 0, i16 0, i16 0,
+ i16 1, i16 1, i16 1, i16 1>)
+ %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %and
+}
+
+; VUPHH (used operand elements are 1)
+define <4 x i32> @f5() {
+; CHECK-LABEL: f5:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI
+; CHECK-NEXT: vl %v0, 0(%r1)
+; CHECK-NEXT: vuphh %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <4 x i32> @llvm.s390.vuphh(<8 x i16>
+ <i16 1, i16 1, i16 1, i16 1,
+ i16 0, i16 0, i16 0, i16 0>)
+ %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %and
+}
+
+; VUPLHH (used operand elements are 0)
+define <4 x i32> @f6() {
+; CHECK-LABEL: f6:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %unp = call <4 x i32> @llvm.s390.vuplhh(<8 x i16>
+ <i16 0, i16 0, i16 0, i16 0,
+ i16 1, i16 1, i16 1, i16 1>)
+ %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %and
+}
+
+; VUPLHH (used operand elements are 1)
+define <4 x i32> @f7() {
+; CHECK-LABEL: f7:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI
+; CHECK-NEXT: vl %v0, 0(%r1)
+; CHECK-NEXT: vuplhh %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <4 x i32> @llvm.s390.vuplhh(<8 x i16>
+ <i16 1, i16 1, i16 1, i16 1,
+ i16 0, i16 0, i16 0, i16 0>)
+ %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %and
+}
+
+declare <2 x i64> @llvm.s390.vuphf(<4 x i32>)
+declare <2 x i64> @llvm.s390.vuplhf(<4 x i32>)
+
+; VUPHF (used operand elements are 0)
+define <2 x i64> @f8() {
+; CHECK-LABEL: f8:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 0, i32 0, i32 1, i32 1>)
+ %and = and <2 x i64> %unp, <i64 1, i64 1>
+ ret <2 x i64> %and
+}
+
+; VUPHF (used operand elements are 1)
+define <2 x i64> @f9() {
+; CHECK-LABEL: f9:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI
+; CHECK-NEXT: vl %v0, 0(%r1)
+; CHECK-NEXT: vuphf %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>)
+ %and = and <2 x i64> %unp, <i64 1, i64 1>
+ ret <2 x i64> %and
+}
+
+; VUPLHF (used operand elements are 0)
+define <2 x i64> @f10() {
+; CHECK-LABEL: f10:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %unp = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> <i32 0, i32 0, i32 1, i32 1>)
+ %and = and <2 x i64> %unp, <i64 1, i64 1>
+ ret <2 x i64> %and
+}
+
+; VUPLHF (used operand elements are 1)
+define <2 x i64> @f11() {
+; CHECK-LABEL: f11:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI
+; CHECK-NEXT: vl %v0, 0(%r1)
+; CHECK-NEXT: vuplhf %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>)
+ %and = and <2 x i64> %unp, <i64 1, i64 1>
+ ret <2 x i64> %and
+}
+
+declare <8 x i16> @llvm.s390.vuplb(<16 x i8>)
+declare <8 x i16> @llvm.s390.vupllb(<16 x i8>)
+
+; VUPLB (used operand elements are 0)
+define <8 x i16> @f12() {
+; CHECK-LABEL: f12:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %unp = call <8 x i16> @llvm.s390.vuplb(<16 x i8>
+ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+
+ %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %and
+}
+
+; VUPLB (used operand elements are 1)
+define <8 x i16> @f13() {
+; CHECK-LABEL: f13:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI
+; CHECK-NEXT: vl %v0, 0(%r1)
+; CHECK-NEXT: vuplb %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <8 x i16> @llvm.s390.vuplb(<16 x i8>
+ <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %and
+}
+
+; VUPLLB (used operand elements are 0)
+define <8 x i16> @f14() {
+; CHECK-LABEL: f14:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %unp = call <8 x i16> @llvm.s390.vupllb(<16 x i8>
+ <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+ i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+ %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %and
+}
+
+; VUPLLB (used operand elements are 1)
+define <8 x i16> @f15() {
+; CHECK-LABEL: f15:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI
+; CHECK-NEXT: vl %v0, 0(%r1)
+; CHECK-NEXT: vupllb %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <8 x i16> @llvm.s390.vupllb(<16 x i8>
+ <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+ i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+ %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ ret <8 x i16> %and
+}
+
+declare <4 x i32> @llvm.s390.vuplhw(<8 x i16>)
+declare <4 x i32> @llvm.s390.vupllh(<8 x i16>)
+
+; VUPLHW (used operand elements are 0)
+define <4 x i32> @f16() {
+; CHECK-LABEL: f16:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %unp = call <4 x i32> @llvm.s390.vuplhw(<8 x i16>
+ <i16 1, i16 1, i16 1, i16 1,
+ i16 0, i16 0, i16 0, i16 0>)
+
+ %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %and
+}
+
+; VUPLHW (used operand elements are 1)
+define <4 x i32> @f17() {
+; CHECK-LABEL: f17:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI
+; CHECK-NEXT: vl %v0, 0(%r1)
+; CHECK-NEXT: vuplhw %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <4 x i32> @llvm.s390.vuplhw(<8 x i16>
+ <i16 0, i16 0, i16 0, i16 0,
+ i16 1, i16 1, i16 1, i16 1>)
+ %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %and
+}
+
+; VUPLLH (used operand elements are 0)
+define <4 x i32> @f18() {
+; CHECK-LABEL: f18:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %unp = call <4 x i32> @llvm.s390.vupllh(<8 x i16>
+ <i16 1, i16 1, i16 1, i16 1,
+ i16 0, i16 0, i16 0, i16 0>)
+ %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %and
+}
+
+; VUPLLH (used operand elements are 1)
+define <4 x i32> @f19() {
+; CHECK-LABEL: f19:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI
+; CHECK-NEXT: vl %v0, 0(%r1)
+; CHECK-NEXT: vupllh %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <4 x i32> @llvm.s390.vupllh(<8 x i16>
+ <i16 0, i16 0, i16 0, i16 0,
+ i16 1, i16 1, i16 1, i16 1>)
+ %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
+ ret <4 x i32> %and
+}
+
+declare <2 x i64> @llvm.s390.vuplf(<4 x i32>)
+declare <2 x i64> @llvm.s390.vupllf(<4 x i32>)
+
+; VUPLF (used operand elements are 0)
+define <2 x i64> @f20() {
+; CHECK-LABEL: f20:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %unp = call <2 x i64> @llvm.s390.vuplf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>)
+ %and = and <2 x i64> %unp, <i64 1, i64 1>
+ ret <2 x i64> %and
+}
+
+; VUPLF (used operand elements are 1)
+define <2 x i64> @f21() {
+; CHECK-LABEL: f21:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI
+; CHECK-NEXT: vl %v0, 0(%r1)
+; CHECK-NEXT: vuplf %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <2 x i64> @llvm.s390.vuplf(<4 x i32> <i32 0, i32 0, i32 1, i32 1>)
+ %and = and <2 x i64> %unp, <i64 1, i64 1>
+ ret <2 x i64> %and
+}
+
+; VUPLLF (used operand elements are 0)
+define <2 x i64> @f22() {
+; CHECK-LABEL: f22:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %unp = call <2 x i64> @llvm.s390.vupllf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>)
+ %and = and <2 x i64> %unp, <i64 1, i64 1>
+ ret <2 x i64> %and
+}
+
+; VUPLLF (used operand elements are 1)
+define <2 x i64> @f23() {
+; CHECK-LABEL: f23:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI
+; CHECK-NEXT: vl %v0, 0(%r1)
+; CHECK-NEXT: vupllf %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <2 x i64> @llvm.s390.vupllf(<4 x i32> <i32 0, i32 0, i32 1, i32 1>)
+ %and = and <2 x i64> %unp, <i64 1, i64 1>
+ ret <2 x i64> %and
+}
+
+; Test that signed unpacking of positive elements gives known zeros in high part.
+define <2 x i64> @f24() {
+; CHECK-LABEL: f24:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>)
+ %and = and <2 x i64> %unp, <i64 -4294967296, ; = 0xffffffff00000000
+ i64 -4294967296>
+ ret <2 x i64> %and
+}
+
+; Test that signed unpacking of negative elements gives known ones in high part.
+define <2 x i64> @f25() {
+; CHECK-LABEL: f25:
+; CHECK-LABEL: # %bb.0:
+; 61680 = 0xf0f0
+; CHECK-NEXT: vgbm %v24, 61680
+; CHECK-NEXT: br %r14
+ %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 -1, i32 -1, i32 0, i32 0>)
+ %and = and <2 x i64> %unp, <i64 -4294967296, ; = 0xffffffff00000000
+ i64 -4294967296>
+ ret <2 x i64> %and
+}
+
+; Test that logical unpacking of negative elements gives known zeros in high part.
+define <2 x i64> @f26() {
+; CHECK-LABEL: f26:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+ %unp = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> <i32 -1, i32 -1, i32 0, i32 0>)
+ %and = and <2 x i64> %unp, <i64 -4294967296, ; = 0xffffffff00000000
+ i64 -4294967296>
+ ret <2 x i64> %and
+}
diff --git a/llvm/test/CodeGen/SystemZ/knownbits.ll b/llvm/test/CodeGen/SystemZ/knownbits.ll
new file mode 100644
index 0000000..703c0bf
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/knownbits.ll
@@ -0,0 +1,51 @@
+; Test that DAGCombiner gets helped by computeKnownBitsForTargetNode().
+;
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s
+
+; SystemZISD::REPLICATE
+define i32 @f0() {
+; CHECK-LABEL: f0:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vlgvf
+; CHECK-NOT: lhi %r2, 0
+; CHECK-NOT: chi %r0, 0
+; CHECK-NOT: lochilh %r2, 1
+; CHECK: br %r14
+ %cmp0 = icmp ne <4 x i32> undef, zeroinitializer
+ %zxt0 = zext <4 x i1> %cmp0 to <4 x i32>
+ %ext0 = extractelement <4 x i32> %zxt0, i32 3
+ br label %exit
+
+exit:
+; The vector icmp+zext involves a REPLICATE of 1's. If KnownBits reflects
+; this, DAGCombiner can see that the i32 icmp and zext here are not needed.
+ %cmp1 = icmp ne i32 %ext0, 0
+ %zxt1 = zext i1 %cmp1 to i32
+ ret i32 %zxt1
+}
+
+; SystemZISD::JOIN_DWORDS (and REPLICATE)
+define void @f1() {
+; The DAG XOR has JOIN_DWORDS and REPLICATE operands. With KnownBits properly set
+; for both these nodes, ICMP is used instead of TM during lowering because
+; adjustForRedundantAnd() succeeds.
+; CHECK-LABEL: f1:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NOT: tmll
+; CHECK-NOT: jne
+; CHECK: cijlh
+ %1 = load i16, i16* null, align 2
+ %2 = icmp eq i16 %1, 0
+ %3 = insertelement <2 x i1> undef, i1 %2, i32 0
+ %4 = insertelement <2 x i1> %3, i1 true, i32 1
+ %5 = xor <2 x i1> %4, <i1 true, i1 true>
+ %6 = extractelement <2 x i1> %5, i32 0
+ %7 = or i1 %6, undef
+ br i1 %7, label %9, label %8
+
+; <label>:8: ; preds = %0
+ unreachable
+
+; <label>:9: ; preds = %0
+ unreachable
+}
diff --git a/llvm/test/CodeGen/SystemZ/signbits-intrinsics-binop.ll b/llvm/test/CodeGen/SystemZ/signbits-intrinsics-binop.ll
new file mode 100644
index 0000000..1fc1496
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/signbits-intrinsics-binop.ll
@@ -0,0 +1,236 @@
+; Test that DAGCombiner gets helped by ComputeNumSignBitsForTargetNode() with
+; vector intrinsics.
+;
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s
+
+declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>)
+declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>)
+declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>)
+
+; PACKS_CC: i64 -> i32
+define <4 x i32> @f0() {
+; CHECK-LABEL: f0:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vpksgs %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 0, i64 1>)
+ %extr = extractvalue {<4 x i32>, i32} %call, 0
+ %trunc = trunc <4 x i32> %extr to <4 x i16>
+ %ret = sext <4 x i16> %trunc to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; PACKS_CC: i32 -> i16
+define <8 x i16> @f1() {
+; CHECK-LABEL: f1:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vpksfs %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
+ <4 x i32> <i32 0, i32 1, i32 1, i32 0>)
+ %extr = extractvalue {<8 x i16>, i32} %call, 0
+ %trunc = trunc <8 x i16> %extr to <8 x i8>
+ %ret = sext <8 x i8> %trunc to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; PACKS_CC: i16 -> i8
+define <16 x i8> @f2() {
+; CHECK-LABEL: f2:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vpkshs %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(
+ <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
+ <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
+ %extr = extractvalue {<16 x i8>, i32} %call, 0
+ %trunc = trunc <16 x i8> %extr to <16 x i4>
+ %ret = sext <16 x i4> %trunc to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>)
+declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>)
+declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>)
+
+; PACKLS_CC: i64 -> i32
+define <4 x i32> @f3() {
+; CHECK-LABEL: f3:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vpklsgs %v24, %v1, %v0
+; CHECK-NEXT: br %r14
+ %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>)
+ %extr = extractvalue {<4 x i32>, i32} %call, 0
+ %trunc = trunc <4 x i32> %extr to <4 x i16>
+ %ret = sext <4 x i16> %trunc to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; PACKLS_CC: i32 -> i16
+define <8 x i16> @f4() {
+; CHECK-LABEL: f4:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vpklsfs %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
+ <4 x i32> <i32 0, i32 1, i32 1, i32 0>)
+ %extr = extractvalue {<8 x i16>, i32} %call, 0
+ %trunc = trunc <8 x i16> %extr to <8 x i8>
+ %ret = sext <8 x i8> %trunc to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; PACKLS_CC: i16 -> i8
+define <16 x i8> @f5() {
+; CHECK-LABEL: f5:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vpklshs %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(
+ <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
+ <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
+ %extr = extractvalue {<16 x i8>, i32} %call, 0
+ %trunc = trunc <16 x i8> %extr to <16 x i4>
+ %ret = sext <16 x i4> %trunc to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>)
+
+; PACKS: i64 -> i32
+define <4 x i32> @f6() {
+; CHECK-LABEL: f6:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vpksg %v24, %v1, %v0
+; CHECK-NEXT: br %r14
+ %call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>)
+ %trunc = trunc <4 x i32> %call to <4 x i16>
+ %ret = sext <4 x i16> %trunc to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; PACKS: i32 -> i16
+define <8 x i16> @f7() {
+; CHECK-LABEL: f7:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vpksf %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
+ <4 x i32> <i32 0, i32 1, i32 1, i32 0>)
+ %trunc = trunc <8 x i16> %call to <8 x i8>
+ %ret = sext <8 x i8> %trunc to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; PACKS: i16 -> i8
+define <16 x i8> @f8() {
+; CHECK-LABEL: f8:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vpksh %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call <16 x i8> @llvm.s390.vpksh(
+ <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
+ <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
+ %trunc = trunc <16 x i8> %call to <16 x i4>
+ %ret = sext <16 x i4> %trunc to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>)
+
+; PACKLS: i64 -> i32
+define <4 x i32> @f9() {
+; CHECK-LABEL: f9:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vpklsg %v24, %v1, %v0
+; CHECK-NEXT: br %r14
+ %call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>)
+ %trunc = trunc <4 x i32> %call to <4 x i16>
+ %ret = sext <4 x i16> %trunc to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+; PACKLS: i32 -> i16
+define <8 x i16> @f10() {
+; CHECK-LABEL: f10:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vpklsf %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
+ <4 x i32> <i32 0, i32 1, i32 1, i32 0>)
+ %trunc = trunc <8 x i16> %call to <8 x i8>
+ %ret = sext <8 x i8> %trunc to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+; PACKLS: i16 -> i8
+define <16 x i8> @f11() {
+; CHECK-LABEL: f11:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vpklsh %v24, %v0, %v0
+; CHECK-NEXT: br %r14
+ %call = call <16 x i8> @llvm.s390.vpklsh(
+ <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
+ <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
+ %trunc = trunc <16 x i8> %call to <16 x i4>
+ %ret = sext <16 x i4> %trunc to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32)
+
+; VPDI:
+define <2 x i64> @f12() {
+; CHECK-LABEL: f12:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vpdi %v24, %v1, %v0, 0
+; CHECK-NEXT: br %r14
+ %perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> <i64 0, i64 1>,
+ <2 x i64> <i64 1, i64 0>, i32 0)
+ %trunc = trunc <2 x i64> %perm to <2 x i32>
+ %ret = sext <2 x i32> %trunc to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32)
+
+; VSLDB:
+define <16 x i8> @f13() {
+; CHECK-LABEL: f13:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vsldb %v24, %v0, %v0, 1
+; CHECK-NEXT: br %r14
+ %shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8>
+ <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
+ i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>, <16 x i8>
+ <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
+ i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>,
+ i32 1)
+ %trunc = trunc <16 x i8> %shfd to <16 x i4>
+ %ret = sext <16 x i4> %trunc to <16 x i8>
+ ret <16 x i8> %ret
+}
+
+declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>)
+
+; Test VPERM:
+define <16 x i8> @f14() {
+; CHECK-LABEL: f14:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vperm %v24, %v0, %v0, %v0
+; CHECK-NEXT: br %r14
+ %perm = call <16 x i8> @llvm.s390.vperm(
+ <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
+ i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>,
+ <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
+ i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>,
+ <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
+ i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>)
+ %trunc = trunc <16 x i8> %perm to <16 x i4>
+ %ret = sext <16 x i4> %trunc to <16 x i8>
+ ret <16 x i8> %ret
+}
diff --git a/llvm/test/CodeGen/SystemZ/signbits-intrinsics-unpack.ll b/llvm/test/CodeGen/SystemZ/signbits-intrinsics-unpack.ll
new file mode 100644
index 0000000..b37c1c7
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/signbits-intrinsics-unpack.ll
@@ -0,0 +1,97 @@
+; Test that DAGCombiner gets helped by ComputeNumSignBitsForTargetNode() with
+; vector intrinsics.
+;
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s
+
+declare <8 x i16> @llvm.s390.vuphb(<16 x i8>)
+
+; VUPHB
+define <8 x i16> @f0() {
+; CHECK-LABEL: f0:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vuphb %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <8 x i16> @llvm.s390.vuphb(<16 x i8>
+ <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1,
+ i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
+ %trunc = trunc <8 x i16> %unp to <8 x i8>
+ %ret = sext <8 x i8> %trunc to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+declare <4 x i32> @llvm.s390.vuphh(<8 x i16>)
+
+; VUPHH
+define <4 x i32> @f1() {
+; CHECK-LABEL: f1:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vuphh %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <4 x i32> @llvm.s390.vuphh(<8 x i16>
+ <i16 0, i16 1, i16 0, i16 1,
+ i16 0, i16 1, i16 0, i16 1>)
+ %trunc = trunc <4 x i32> %unp to <4 x i16>
+ %ret = sext <4 x i16> %trunc to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+declare <2 x i64> @llvm.s390.vuphf(<4 x i32>)
+
+; VUPHF
+define <2 x i64> @f2() {
+; CHECK-LABEL: f2:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vuphf %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 0, i32 1, i32 0, i32 1>)
+ %trunc = trunc <2 x i64> %unp to <2 x i32>
+ %ret = sext <2 x i32> %trunc to <2 x i64>
+ ret <2 x i64> %ret
+}
+
+declare <8 x i16> @llvm.s390.vuplb(<16 x i8>)
+
+; VUPLB
+define <8 x i16> @f3() {
+; CHECK-LABEL: f3:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vuplb %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <8 x i16> @llvm.s390.vuplb(<16 x i8>
+ <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1,
+ i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
+ %trunc = trunc <8 x i16> %unp to <8 x i8>
+ %ret = sext <8 x i8> %trunc to <8 x i16>
+ ret <8 x i16> %ret
+}
+
+declare <4 x i32> @llvm.s390.vuplhw(<8 x i16>)
+
+; VUPLHW
+define <4 x i32> @f4() {
+; CHECK-LABEL: f4:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vuplhw %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <4 x i32> @llvm.s390.vuplhw(<8 x i16>
+ <i16 1, i16 0, i16 1, i16 0,
+ i16 1, i16 0, i16 1, i16 0>)
+ %trunc = trunc <4 x i32> %unp to <4 x i16>
+ %ret = sext <4 x i16> %trunc to <4 x i32>
+ ret <4 x i32> %ret
+}
+
+declare <2 x i64> @llvm.s390.vuplf(<4 x i32>)
+
+; VUPLF
+define <2 x i64> @f5() {
+; CHECK-LABEL: f5:
+; CHECK-LABEL: # %bb.0:
+; CHECK: vuplf %v24, %v0
+; CHECK-NEXT: br %r14
+ %unp = call <2 x i64> @llvm.s390.vuplf(<4 x i32> <i32 1, i32 0, i32 1, i32 0>)
+ %trunc = trunc <2 x i64> %unp to <2 x i32>
+ %ret = sext <2 x i32> %trunc to <2 x i64>
+ ret <2 x i64> %ret
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/signbits.ll b/llvm/test/CodeGen/SystemZ/signbits.ll
new file mode 100644
index 0000000..4c019a6
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/signbits.ll
@@ -0,0 +1,36 @@
+; Test that ComputeNumSignBitsForTargetNode() (SELECT_CCMASK) will help
+; DAGCombiner so that it knows that %sel0 is already sign extended.
+;
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -debug-only=isel < %s 2>&1 | FileCheck %s
+
+%0 = type <{ %1*, i16, [6 x i8] }>
+%1 = type { i32 (...)** }
+
+define signext i16 @fun(%0* %Arg0, i16 signext %Arg1) {
+entry:
+ br i1 undef, label %lab0, label %lab1
+
+lab0:
+ %icmp0 = icmp eq i32 undef, 0
+ %sel0 = select i1 %icmp0, i16 %Arg1, i16 1
+ br label %lab1
+
+lab1:
+; CHECK: *** MachineFunction at end of ISel ***
+; CHECK-LABEL: bb.2.lab1:
+; CHECK-NOT: LHR
+; CHECK: BRC
+ %phi0 = phi i16 [ 2, %entry ], [ %sel0, %lab0 ]
+ %sext0 = sext i16 %phi0 to i32
+ br i1 undef, label %lab2, label %lab3
+
+lab2:
+ %and0 = and i32 %sext0, 8
+ %icmp1 = icmp eq i32 %and0, 0
+ %sel1 = select i1 %icmp1, i16 %phi0, i16 4
+ ret i16 %sel1
+
+lab3:
+ ret i16 8
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/vec-trunc-to-i1.ll b/llvm/test/CodeGen/SystemZ/vec-trunc-to-i1.ll
index fe4ae45..7da1ded 100644
--- a/llvm/test/CodeGen/SystemZ/vec-trunc-to-i1.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-trunc-to-i1.ll
@@ -17,8 +17,7 @@
; CHECK-NEXT: vlvgf [[REG2]], [[REG3]], 2
; CHECK-NEXT: vn [[REG2]], [[REG2]], [[REG0]]
; CHECK-NEXT: vlgvf [[REG4:%r[0-9]]], [[REG2]], 3
-; CHECK-NEXT: tmll [[REG4]], 1
-; CHECK-NEXT: jne .LBB0_1
+; CHECK-NEXT: cijlh [[REG4]], 0, .LBB0_1
; CHECK-NEXT: # %bb.2: # %CF36
; CHECK-NEXT: br %r14
BB: