[SystemZ]  computeKnownBitsForTargetNode() / ComputeNumSignBitsForTargetNode()

Improve/implement these methods to improve DAG combining. This mainly
concerns intrinsics.

Some constant operands to SystemZISD nodes have been marked Opaque to avoid
transforming back and forth between generic and target nodes infinitely.

Review: Ulrich Weigand
llvm-svn: 327765
diff --git a/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-binop.ll b/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-binop.ll
new file mode 100644
index 0000000..3bcbbb4
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-binop.ll
@@ -0,0 +1,460 @@
+; Test that DAGCombiner gets helped by computeKnownBitsForTargetNode() with
+; vector intrinsics.
+;
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s  | FileCheck %s
+
+declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>)
+declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>)
+declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>)
+
+; PACKS_CC (operand elements are 0): i64 -> i32
+define <4 x i32> @f0() {
+; CHECK-LABEL: f0:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
+  %extr = extractvalue {<4 x i32>, i32} %call, 0
+  %and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %and
+}
+
+; PACKS_CC (operand elements are 1): i64 -> i32
+; NOTE: The vector AND is optimized away, but vrepig+vpksgs is used instead
+; of vrepif. Similarly for more test cases below.
+define <4 x i32> @f1() {
+; CHECK-LABEL: f1:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vrepig %v0, 1
+; CHECK-NEXT:  vpksgs %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>)
+  %extr = extractvalue {<4 x i32>, i32} %call, 0
+  %and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %and
+}
+
+; PACKS_CC (operand elements are 0): i32 -> i16
+define <8 x i16> @f2() {
+; CHECK-LABEL: f2:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> <i32 0, i32 0, i32 0, i32 0>,
+                                                  <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+  %extr = extractvalue {<8 x i16>, i32} %call, 0
+  %and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %and
+}
+
+; PACKS_CC (operand elements are 1): i32 -> i16
+define <8 x i16> @f3() {
+; CHECK-LABEL: f3:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vrepif %v0, 1
+; CHECK-NEXT:  vpksfs %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
+                                                  <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+  %extr = extractvalue {<8 x i16>, i32} %call, 0
+  %and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %and
+}
+
+; PACKS_CC (operand elements are 0): i16 -> i8
+define <16 x i8> @f4() {
+; CHECK-LABEL: f4:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(
+                <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
+                <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+  %extr = extractvalue {<16 x i8>, i32} %call, 0
+  %and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                               i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %and
+}
+
+; PACKS_CC (operand elements are 1): i16 -> i8
+define <16 x i8> @f5() {
+; CHECK-LABEL: f5:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vrepih %v0, 1
+; CHECK-NEXT:  vpkshs %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(
+                <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>,
+                <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+  %extr = extractvalue {<16 x i8>, i32} %call, 0
+  %and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                               i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %and
+}
+
+declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>)
+declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>)
+declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>)
+
+; PACKLS_CC (operand elements are 0): i64 -> i32
+define <4 x i32> @f6() {
+; CHECK-LABEL: f6:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
+  %extr = extractvalue {<4 x i32>, i32} %call, 0
+  %and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %and
+}
+
+; PACKLS_CC (operand elements are 1): i64 -> i32
+define <4 x i32> @f7() {
+; CHECK-LABEL: f7:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vrepig %v0, 1
+; CHECK-NEXT:  vpklsgs %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>)
+  %extr = extractvalue {<4 x i32>, i32} %call, 0
+  %and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %and
+}
+
+; PACKLS_CC (operand elements are 0): i32 -> i16
+define <8 x i16> @f8() {
+; CHECK-LABEL: f8:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> <i32 0, i32 0, i32 0, i32 0>,
+                                                  <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+  %extr = extractvalue {<8 x i16>, i32} %call, 0
+  %and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %and
+}
+
+; PACKLS_CC (operand elements are 1): i32 -> i16
+define <8 x i16> @f9() {
+; CHECK-LABEL: f9:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vrepif %v0, 1
+; CHECK-NEXT:  vpklsfs %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
+                                                  <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+  %extr = extractvalue {<8 x i16>, i32} %call, 0
+  %and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %and
+}
+
+; PACKLS_CC (operand elements are 0): i16 -> i8
+define <16 x i8> @f10() {
+; CHECK-LABEL: f10:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(
+                <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
+                <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+  %extr = extractvalue {<16 x i8>, i32} %call, 0
+  %and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                               i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %and
+}
+
+; PACKLS_CC (operand elements are 1): i16 -> i8
+define <16 x i8> @f11() {
+; CHECK-LABEL: f11:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vrepih %v0, 1
+; CHECK-NEXT:  vpklshs %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(
+                <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>,
+                <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+  %extr = extractvalue {<16 x i8>, i32} %call, 0
+  %and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                               i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %and
+}
+
+declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>)
+
+; PACKS (operand elements are 0): i64 -> i32
+define <4 x i32> @f12() {
+; CHECK-LABEL: f12:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
+  %and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %and
+}
+
+; PACKS (operand elements are 1): i64 -> i32
+define <4 x i32> @f13() {
+; CHECK-LABEL: f13:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vrepig %v0, 1
+; CHECK-NEXT:  vpksg %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>)
+  %and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %and
+}
+
+; PACKS (operand elements are 0): i32 -> i16
+define <8 x i16> @f14() {
+; CHECK-LABEL: f14:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> <i32 0, i32 0, i32 0, i32 0>,
+                                          <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+  %and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %and
+}
+
+; PACKS (operand elements are 1): i32 -> i16
+define <8 x i16> @f15() {
+; CHECK-LABEL: f15:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vrepif %v0, 1
+; CHECK-NEXT:  vpksf %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
+                                          <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+  %and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %and
+}
+
+; PACKS (operand elements are 0): i16 -> i8
+define <16 x i8> @f16() {
+; CHECK-LABEL: f16:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %call = call <16 x i8> @llvm.s390.vpksh(
+                <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
+                <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+  %and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                               i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %and
+}
+
+; PACKS (operand elements are 1): i16 -> i8
+define <16 x i8> @f17() {
+; CHECK-LABEL: f17:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vrepih %v0, 1
+; CHECK-NEXT:  vpksh %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call <16 x i8> @llvm.s390.vpksh(
+                <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>,
+                <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+  %and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                               i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %and
+}
+
+declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>)
+
+; PACKLS (operand elements are 0): i64 -> i32
+define <4 x i32> @f18() {
+; CHECK-LABEL: f18:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>)
+  %and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %and
+}
+
+; PACKLS (operand elements are 1): i64 -> i32
+define <4 x i32> @f19() {
+; CHECK-LABEL: f19:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vrepig %v0, 1
+; CHECK-NEXT:  vpklsg %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>)
+  %and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %and
+}
+
+; PACKLS (operand elements are 0): i32 -> i16
+define <8 x i16> @f20() {
+; CHECK-LABEL: f20:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> <i32 0, i32 0, i32 0, i32 0>,
+                                           <4 x i32> <i32 0, i32 0, i32 0, i32 0>)
+  %and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %and
+}
+
+; PACKLS (operand elements are 1): i32 -> i16
+define <8 x i16> @f21() {
+; CHECK-LABEL: f21:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vrepif %v0, 1
+; CHECK-NEXT:  vpklsf %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
+                                           <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
+  %and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %and
+}
+
+; PACKLS (operand elements are 0): i16 -> i8
+define <16 x i8> @f22() {
+; CHECK-LABEL: f22:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %call = call <16 x i8> @llvm.s390.vpklsh(
+                <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>,
+                <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
+  %and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                               i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %and
+}
+
+; PACKLS (operand elements are 1): i16 -> i8
+define <16 x i8> @f23() {
+; CHECK-LABEL: f23:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vrepih %v0, 1
+; CHECK-NEXT:  vpklsh %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call <16 x i8> @llvm.s390.vpklsh(
+                <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>,
+                <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>)
+  %and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                               i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %and
+}
+
+declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32)
+
+; VPDI (operand elements are 0):
+define <2 x i64> @f24() {
+; CHECK-LABEL: f24:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+  %perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> <i64 0, i64 0>,
+                                         <2 x i64> <i64 0, i64 0>, i32 0)
+  %res = and <2 x i64> %perm, <i64 1, i64 1>
+  ret <2 x i64> %res
+}
+
+; VPDI (operand elements are 1):
+define <2 x i64> @f25() {
+; CHECK-LABEL: f25:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vrepig %v0, 1
+; CHECK-NEXT: vpdi %v24, %v0, %v0, 0
+; CHECK-NEXT: br %r14
+  %perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> <i64 1, i64 1>,
+                                         <2 x i64> <i64 1, i64 1>, i32 0)
+  %res = and <2 x i64> %perm, <i64 1, i64 1>
+  ret <2 x i64> %res
+}
+
+declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32)
+
+; VSLDB (operand elements are 0):
+define <16 x i8> @f26() {
+; CHECK-LABEL: f26:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+  %shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8>
+                 <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+                  i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8>
+                 <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+                  i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
+                  i32 1)
+
+  %res = and <16 x i8> %shfd, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                               i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %res
+}
+
+; VSLDB (operand elements are 1):
+define <16 x i8> @f27() {
+; CHECK-LABEL: f27:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vrepib %v0, 1
+; CHECK-NEXT: vsldb %v24, %v0, %v0, 1
+; CHECK-NEXT: br %r14
+  %shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8>
+                 <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                  i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8>
+                 <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                  i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>,
+                  i32 1)
+
+  %res = and <16 x i8> %shfd, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                               i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %res
+}
+
+; Test that intrinsic CC result is recognized.
+define i32 @f28(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: f28:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b)
+  %cc = extractvalue {<8 x i16>, i32} %call, 1
+  %res = and i32 %cc, -4
+  ret i32 %res
+}
+
+declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>)
+
+; Test VPERM (operand elements are 0):
+define <16 x i8> @f29() {
+; CHECK-LABEL: f29:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+  %perm = call <16 x i8> @llvm.s390.vperm(
+                  <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+                             i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
+                  <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+                             i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>,
+                  <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+                             i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+  %res = and <16 x i8> %perm, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                               i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %res
+}
+
+; Test VPERM (operand elements are 1):
+define <16 x i8> @f30() {
+; CHECK-LABEL: f30:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vrepib %v1, 1
+; CHECK-NEXT: vperm %v24, %v1, %v1, %v0
+; CHECK-NEXT: br %r14
+  %perm = call <16 x i8> @llvm.s390.vperm(
+                  <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                             i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>,
+                  <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                             i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>,
+                  <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+                             i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+  %res = and <16 x i8> %perm, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                               i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %res
+}
diff --git a/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-unpack.ll b/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-unpack.ll
new file mode 100644
index 0000000..1966340
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-unpack.ll
@@ -0,0 +1,384 @@
+; Test that DAGCombiner gets helped by computeKnownBitsForTargetNode() with
+; vector intrinsics.
+;
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s  | FileCheck %s
+
+declare <8 x i16> @llvm.s390.vuphb(<16 x i8>)
+declare <8 x i16> @llvm.s390.vuplhb(<16 x i8>)
+
+; VUPHB (used operand elements are 0)
+define <8 x i16> @f0() {
+; CHECK-LABEL: f0:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %unp = call <8 x i16> @llvm.s390.vuphb(<16 x i8>
+                                         <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+                                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+  %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %and
+}
+
+; VUPHB (used operand elements are 1)
+; NOTE: The AND is optimized away, but instead of replicating '1' into <8 x
+; i16>, the original vector constant is put in the constant pool and then
+; unpacked (repeated in more test cases below).
+define <8 x i16> @f1() {
+; CHECK-LABEL: f1:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  larl %r1, .LCPI
+; CHECK-NEXT:  vl %v0, 0(%r1)
+; CHECK-NEXT:  vuphb %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <8 x i16> @llvm.s390.vuphb(<16 x i8>
+                                         <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                                          i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+  %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %and
+}
+
+; VUPLHB (used operand elements are 0)
+define <8 x i16> @f2() {
+; CHECK-LABEL: f2:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %unp = call <8 x i16> @llvm.s390.vuplhb(<16 x i8>
+                                          <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+                                           i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+  %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %and
+}
+
+; VUPLHB (used operand elements are 1)
+define <8 x i16> @f3() {
+; CHECK-LABEL: f3:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  larl %r1, .LCPI
+; CHECK-NEXT:  vl %v0, 0(%r1)
+; CHECK-NEXT:  vuplhb %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <8 x i16> @llvm.s390.vuplhb(<16 x i8>
+                                          <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                                           i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+  %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %and
+}
+
+declare <4 x i32> @llvm.s390.vuphh(<8 x i16>)
+declare <4 x i32> @llvm.s390.vuplhh(<8 x i16>)
+
+; VUPHH (used operand elements are 0)
+define <4 x i32> @f4() {
+; CHECK-LABEL: f4:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %unp = call <4 x i32> @llvm.s390.vuphh(<8 x i16>
+                                         <i16 0, i16 0, i16 0, i16 0,
+                                          i16 1, i16 1, i16 1, i16 1>)
+  %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %and
+}
+
+; VUPHH (used operand elements are 1)
+define <4 x i32> @f5() {
+; CHECK-LABEL: f5:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  larl %r1, .LCPI
+; CHECK-NEXT:  vl %v0, 0(%r1)
+; CHECK-NEXT:  vuphh %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <4 x i32> @llvm.s390.vuphh(<8 x i16>
+                                         <i16 1, i16 1, i16 1, i16 1,
+                                          i16 0, i16 0, i16 0, i16 0>)
+  %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %and
+}
+
+; VUPLHH (used operand elements are 0)
+define <4 x i32> @f6() {
+; CHECK-LABEL: f6:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %unp = call <4 x i32> @llvm.s390.vuplhh(<8 x i16>
+                                          <i16 0, i16 0, i16 0, i16 0,
+                                           i16 1, i16 1, i16 1, i16 1>)
+  %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %and
+}
+
+; VUPLHH (used operand elements are 1)
+define <4 x i32> @f7() {
+; CHECK-LABEL: f7:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  larl %r1, .LCPI
+; CHECK-NEXT:  vl %v0, 0(%r1)
+; CHECK-NEXT:  vuplhh %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <4 x i32> @llvm.s390.vuplhh(<8 x i16>
+                                          <i16 1, i16 1, i16 1, i16 1,
+                                           i16 0, i16 0, i16 0, i16 0>)
+  %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %and
+}
+
+declare <2 x i64> @llvm.s390.vuphf(<4 x i32>)
+declare <2 x i64> @llvm.s390.vuplhf(<4 x i32>)
+
+; VUPHF (used operand elements are 0)
+define <2 x i64> @f8() {
+; CHECK-LABEL: f8:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 0, i32 0, i32 1, i32 1>)
+  %and = and <2 x i64> %unp, <i64 1, i64 1>
+  ret <2 x i64> %and
+}
+
+; VUPHF (used operand elements are 1)
+define <2 x i64> @f9() {
+; CHECK-LABEL: f9:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  larl %r1, .LCPI
+; CHECK-NEXT:  vl %v0, 0(%r1)
+; CHECK-NEXT:  vuphf %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>)
+  %and = and <2 x i64> %unp, <i64 1, i64 1>
+  ret <2 x i64> %and
+}
+
+; VUPLHF (used operand elements are 0)
+define <2 x i64> @f10() {
+; CHECK-LABEL: f10:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %unp = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> <i32 0, i32 0, i32 1, i32 1>)
+  %and = and <2 x i64> %unp, <i64 1, i64 1>
+  ret <2 x i64> %and
+}
+
+; VUPLHF (used operand elements are 1)
+define <2 x i64> @f11() {
+; CHECK-LABEL: f11:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  larl %r1, .LCPI
+; CHECK-NEXT:  vl %v0, 0(%r1)
+; CHECK-NEXT:  vuplhf %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>)
+  %and = and <2 x i64> %unp, <i64 1, i64 1>
+  ret <2 x i64> %and
+}
+
+declare <8 x i16> @llvm.s390.vuplb(<16 x i8>)
+declare <8 x i16> @llvm.s390.vupllb(<16 x i8>)
+
+; VUPLB (used operand elements are 0)
+define <8 x i16> @f12() {
+; CHECK-LABEL: f12:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %unp = call <8 x i16> @llvm.s390.vuplb(<16 x i8>
+                                         <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                                          i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+
+  %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %and
+}
+
+; VUPLB (used operand elements are 1)
+define <8 x i16> @f13() {
+; CHECK-LABEL: f13:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  larl %r1, .LCPI
+; CHECK-NEXT:  vl %v0, 0(%r1)
+; CHECK-NEXT:  vuplb %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <8 x i16> @llvm.s390.vuplb(<16 x i8>
+                                         <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+                                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+  %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %and
+}
+
+; VUPLLB (used operand elements are 0)
+define <8 x i16> @f14() {
+; CHECK-LABEL: f14:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %unp = call <8 x i16> @llvm.s390.vupllb(<16 x i8>
+                                         <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1,
+                                          i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>)
+  %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %and
+}
+
+; VUPLLB (used operand elements are 1)
+define <8 x i16> @f15() {
+; CHECK-LABEL: f15:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  larl %r1, .LCPI
+; CHECK-NEXT:  vl %v0, 0(%r1)
+; CHECK-NEXT:  vupllb %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <8 x i16> @llvm.s390.vupllb(<16 x i8>
+                                         <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0,
+                                          i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>)
+  %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %and
+}
+
+declare <4 x i32> @llvm.s390.vuplhw(<8 x i16>)
+declare <4 x i32> @llvm.s390.vupllh(<8 x i16>)
+
+; VUPLHW (used operand elements are 0)
+define <4 x i32> @f16() {
+; CHECK-LABEL: f16:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %unp = call <4 x i32> @llvm.s390.vuplhw(<8 x i16>
+                                          <i16 1, i16 1, i16 1, i16 1,
+                                           i16 0, i16 0, i16 0, i16 0>)
+
+  %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %and
+}
+
+; VUPLHW (used operand elements are 1)
+define <4 x i32> @f17() {
+; CHECK-LABEL: f17:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  larl %r1, .LCPI
+; CHECK-NEXT:  vl %v0, 0(%r1)
+; CHECK-NEXT:  vuplhw %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <4 x i32> @llvm.s390.vuplhw(<8 x i16>
+                                          <i16 0, i16 0, i16 0, i16 0,
+                                           i16 1, i16 1, i16 1, i16 1>)
+  %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %and
+}
+
+; VUPLLH (used operand elements are 0)
+define <4 x i32> @f18() {
+; CHECK-LABEL: f18:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %unp = call <4 x i32> @llvm.s390.vupllh(<8 x i16>
+                                          <i16 1, i16 1, i16 1, i16 1,
+                                           i16 0, i16 0, i16 0, i16 0>)
+  %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %and
+}
+
+; VUPLLH (used operand elements are 1)
+define <4 x i32> @f19() {
+; CHECK-LABEL: f19:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  larl %r1, .LCPI
+; CHECK-NEXT:  vl %v0, 0(%r1)
+; CHECK-NEXT:  vupllh %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <4 x i32> @llvm.s390.vupllh(<8 x i16>
+                                          <i16 0, i16 0, i16 0, i16 0,
+                                           i16 1, i16 1, i16 1, i16 1>)
+  %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %and
+}
+
+declare <2 x i64> @llvm.s390.vuplf(<4 x i32>)
+declare <2 x i64> @llvm.s390.vupllf(<4 x i32>)
+
+; VUPLF (used operand elements are 0)
+define <2 x i64> @f20() {
+; CHECK-LABEL: f20:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %unp = call <2 x i64> @llvm.s390.vuplf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>)
+  %and = and <2 x i64> %unp, <i64 1, i64 1>
+  ret <2 x i64> %and
+}
+
+; VUPLF (used operand elements are 1)
+define <2 x i64> @f21() {
+; CHECK-LABEL: f21:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  larl %r1, .LCPI
+; CHECK-NEXT:  vl %v0, 0(%r1)
+; CHECK-NEXT:  vuplf %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <2 x i64> @llvm.s390.vuplf(<4 x i32> <i32 0, i32 0, i32 1, i32 1>)
+  %and = and <2 x i64> %unp, <i64 1, i64 1>
+  ret <2 x i64> %and
+}
+
+; VUPLLF (used operand elements are 0)
+define <2 x i64> @f22() {
+; CHECK-LABEL: f22:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %unp = call <2 x i64> @llvm.s390.vupllf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>)
+  %and = and <2 x i64> %unp, <i64 1, i64 1>
+  ret <2 x i64> %and
+}
+
+; VUPLLF (used operand elements are 1)
+define <2 x i64> @f23() {
+; CHECK-LABEL: f23:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  larl %r1, .LCPI
+; CHECK-NEXT:  vl %v0, 0(%r1)
+; CHECK-NEXT:  vupllf %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <2 x i64> @llvm.s390.vupllf(<4 x i32> <i32 0, i32 0, i32 1, i32 1>)
+  %and = and <2 x i64> %unp, <i64 1, i64 1>
+  ret <2 x i64> %and
+}
+
+; Test that signed unpacking of positive elements gives known zeros in high part.
+define <2 x i64> @f24() {
+; CHECK-LABEL: f24:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>)
+  %and = and <2 x i64> %unp, <i64 -4294967296, ; = 0xffffffff00000000
+                              i64 -4294967296>
+  ret <2 x i64> %and
+}
+
+; Test that signed unpacking of negative elements gives known ones in high part.
+define <2 x i64> @f25() {
+; CHECK-LABEL: f25:
+; CHECK-LABEL: # %bb.0:
+;                         61680 = 0xf0f0
+; CHECK-NEXT:  vgbm %v24, 61680
+; CHECK-NEXT:  br %r14
+  %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 -1, i32 -1, i32 0, i32 0>)
+  %and = and <2 x i64> %unp, <i64 -4294967296, ; = 0xffffffff00000000
+                              i64 -4294967296>
+  ret <2 x i64> %and
+}
+
+; Test that logical unpacking of negative elements gives known zeros in high part.
+define <2 x i64> @f26() {
+; CHECK-LABEL: f26:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NEXT:  vgbm %v24, 0
+; CHECK-NEXT:  br %r14
+  %unp = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> <i32 -1, i32 -1, i32 0, i32 0>)
+  %and = and <2 x i64> %unp, <i64 -4294967296, ; = 0xffffffff00000000
+                              i64 -4294967296>
+  ret <2 x i64> %and
+}
diff --git a/llvm/test/CodeGen/SystemZ/knownbits.ll b/llvm/test/CodeGen/SystemZ/knownbits.ll
new file mode 100644
index 0000000..703c0bf
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/knownbits.ll
@@ -0,0 +1,51 @@
+; Test that DAGCombiner gets helped by computeKnownBitsForTargetNode().
+;
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s  | FileCheck %s
+
+; SystemZISD::REPLICATE
+define i32 @f0() {
+; CHECK-LABEL: f0:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vlgvf
+; CHECK-NOT:   lhi %r2, 0
+; CHECK-NOT:   chi %r0, 0
+; CHECK-NOT:   lochilh %r2, 1
+; CHECK: br %r14
+  %cmp0 = icmp ne <4 x i32> undef, zeroinitializer
+  %zxt0 = zext <4 x i1> %cmp0 to <4 x i32>
+  %ext0 = extractelement <4 x i32> %zxt0, i32 3
+  br label %exit
+
+exit:
+; The vector icmp+zext involves a REPLICATE of 1's. If KnownBits reflects
+; this, DAGCombiner can see that the i32 icmp and zext here are not needed.
+  %cmp1 = icmp ne i32 %ext0, 0
+  %zxt1 = zext i1 %cmp1 to i32
+  ret i32 %zxt1
+}
+
+; SystemZISD::JOIN_DWORDS (and REPLICATE)
+define void @f1() {
+; The DAG XOR has JOIN_DWORDS and REPLICATE operands. With KnownBits properly set
+; for both these nodes, ICMP is used instead of TM during lowering because
+; adjustForRedundantAnd() succeeds.
+; CHECK-LABEL: f1:
+; CHECK-LABEL: # %bb.0:
+; CHECK-NOT:   tmll
+; CHECK-NOT:   jne
+; CHECK:       cijlh
+  %1 = load i16, i16* null, align 2
+  %2 = icmp eq i16 %1, 0
+  %3 = insertelement <2 x i1> undef, i1 %2, i32 0
+  %4 = insertelement <2 x i1> %3, i1 true, i32 1
+  %5 = xor <2 x i1> %4, <i1 true, i1 true>
+  %6 = extractelement <2 x i1> %5, i32 0
+  %7 = or i1 %6, undef
+  br i1 %7, label %9, label %8
+
+; <label>:8:                                      ; preds = %0
+  unreachable
+
+; <label>:9:                                      ; preds = %0
+  unreachable
+}
diff --git a/llvm/test/CodeGen/SystemZ/signbits-intrinsics-binop.ll b/llvm/test/CodeGen/SystemZ/signbits-intrinsics-binop.ll
new file mode 100644
index 0000000..1fc1496
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/signbits-intrinsics-binop.ll
@@ -0,0 +1,236 @@
+; Test that DAGCombiner gets helped by ComputeNumSignBitsForTargetNode() with
+; vector intrinsics.
+;
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s  | FileCheck %s
+
+declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>)
+declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>)
+declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>)
+
+; PACKS_CC: i64 -> i32
+define <4 x i32> @f0() {
+; CHECK-LABEL: f0:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vpksgs %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 0, i64 1>)
+  %extr = extractvalue {<4 x i32>, i32} %call, 0
+  %trunc = trunc <4 x i32> %extr to <4 x i16>
+  %ret = sext <4 x i16> %trunc to <4 x i32>
+  ret <4 x i32> %ret
+}
+
+; PACKS_CC: i32 -> i16
+define <8 x i16> @f1() {
+; CHECK-LABEL: f1:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vpksfs %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
+                                                  <4 x i32> <i32 0, i32 1, i32 1, i32 0>)
+  %extr = extractvalue {<8 x i16>, i32} %call, 0
+  %trunc = trunc <8 x i16> %extr to <8 x i8>
+  %ret = sext <8 x i8> %trunc to <8 x i16>
+  ret <8 x i16> %ret
+}
+
+; PACKS_CC: i16 -> i8
+define <16 x i8> @f2() {
+; CHECK-LABEL: f2:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vpkshs %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(
+                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
+                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
+  %extr = extractvalue {<16 x i8>, i32} %call, 0
+  %trunc = trunc <16 x i8> %extr to <16 x i4>
+  %ret = sext <16 x i4> %trunc to <16 x i8>
+  ret <16 x i8> %ret
+}
+
+declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>)
+declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>)
+declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>)
+
+; PACKLS_CC: i64 -> i32
+define <4 x i32> @f3() {
+; CHECK-LABEL: f3:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vpklsgs %v24, %v1, %v0
+; CHECK-NEXT:  br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>)
+  %extr = extractvalue {<4 x i32>, i32} %call, 0
+  %trunc = trunc <4 x i32> %extr to <4 x i16>
+  %ret = sext <4 x i16> %trunc to <4 x i32>
+  ret <4 x i32> %ret
+}
+
+; PACKLS_CC: i32 -> i16
+define <8 x i16> @f4() {
+; CHECK-LABEL: f4:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vpklsfs %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
+                                                   <4 x i32> <i32 0, i32 1, i32 1, i32 0>)
+  %extr = extractvalue {<8 x i16>, i32} %call, 0
+  %trunc = trunc <8 x i16> %extr to <8 x i8>
+  %ret = sext <8 x i8> %trunc to <8 x i16>
+  ret <8 x i16> %ret
+}
+
+; PACKLS_CC: i16 -> i8
+define <16 x i8> @f5() {
+; CHECK-LABEL: f5:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vpklshs %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(
+                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
+                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
+  %extr = extractvalue {<16 x i8>, i32} %call, 0
+  %trunc = trunc <16 x i8> %extr to <16 x i4>
+  %ret = sext <16 x i4> %trunc to <16 x i8>
+  ret <16 x i8> %ret
+}
+
+declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>)
+
+; PACKS: i64 -> i32
+define <4 x i32> @f6() {
+; CHECK-LABEL: f6:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vpksg %v24, %v1, %v0
+; CHECK-NEXT:  br %r14
+  %call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>)
+  %trunc = trunc <4 x i32> %call to <4 x i16>
+  %ret = sext <4 x i16> %trunc to <4 x i32>
+  ret <4 x i32> %ret
+}
+
+; PACKS: i32 -> i16
+define <8 x i16> @f7() {
+; CHECK-LABEL: f7:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vpksf %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
+                                          <4 x i32> <i32 0, i32 1, i32 1, i32 0>)
+  %trunc = trunc <8 x i16> %call to <8 x i8>
+  %ret = sext <8 x i8> %trunc to <8 x i16>
+  ret <8 x i16> %ret
+}
+
+; PACKS: i16 -> i8
+define <16 x i8> @f8() {
+; CHECK-LABEL: f8:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vpksh %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call <16 x i8> @llvm.s390.vpksh(
+                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
+                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
+  %trunc = trunc <16 x i8> %call to <16 x i4>
+  %ret = sext <16 x i4> %trunc to <16 x i8>
+  ret <16 x i8> %ret
+}
+
+declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>)
+
+; PACKLS: i64 -> i32
+define <4 x i32> @f9() {
+; CHECK-LABEL: f9:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vpklsg %v24, %v1, %v0
+; CHECK-NEXT:  br %r14
+  %call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>)
+  %trunc = trunc <4 x i32> %call to <4 x i16>
+  %ret = sext <4 x i16> %trunc to <4 x i32>
+  ret <4 x i32> %ret
+}
+
+; PACKLS: i32 -> i16
+define <8 x i16> @f10() {
+; CHECK-LABEL: f10:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vpklsf %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> <i32 0, i32 1, i32 1, i32 0>,
+                                           <4 x i32> <i32 0, i32 1, i32 1, i32 0>)
+  %trunc = trunc <8 x i16> %call to <8 x i8>
+  %ret = sext <8 x i8> %trunc to <8 x i16>
+  ret <8 x i16> %ret
+}
+
+; PACKLS: i16 -> i8
+define <16 x i8> @f11() {
+; CHECK-LABEL: f11:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vpklsh %v24, %v0, %v0
+; CHECK-NEXT:  br %r14
+  %call = call <16 x i8> @llvm.s390.vpklsh(
+                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>,
+                <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>)
+  %trunc = trunc <16 x i8> %call to <16 x i4>
+  %ret = sext <16 x i4> %trunc to <16 x i8>
+  ret <16 x i8> %ret
+}
+
+declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32)
+
+; VPDI:
+define <2 x i64> @f12() {
+; CHECK-LABEL: f12:
+; CHECK-LABEL: # %bb.0:
+; CHECK:      vpdi %v24, %v1, %v0, 0
+; CHECK-NEXT: br %r14
+  %perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> <i64 0, i64 1>,
+                                         <2 x i64> <i64 1, i64 0>, i32 0)
+  %trunc = trunc <2 x i64> %perm to <2 x i32>
+  %ret = sext <2 x i32> %trunc to <2 x i64>
+  ret <2 x i64> %ret
+}
+
+declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32)
+
+; VSLDB:
+define <16 x i8> @f13() {
+; CHECK-LABEL: f13:
+; CHECK-LABEL: # %bb.0:
+; CHECK:      vsldb %v24, %v0, %v0, 1
+; CHECK-NEXT: br %r14
+  %shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8>
+                 <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
+                  i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>, <16 x i8>
+                 <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
+                  i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>,
+                  i32 1)
+  %trunc = trunc <16 x i8> %shfd to <16 x i4>
+  %ret = sext <16 x i4> %trunc to <16 x i8>
+  ret <16 x i8> %ret
+}
+
+declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>)
+
+; Test VPERM:
+define <16 x i8> @f14() {
+; CHECK-LABEL: f14:
+; CHECK-LABEL: # %bb.0:
+; CHECK:      vperm %v24, %v0, %v0, %v0
+; CHECK-NEXT: br %r14
+  %perm = call <16 x i8> @llvm.s390.vperm(
+                  <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
+                             i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>,
+                  <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
+                             i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>,
+                  <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1,
+                             i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>)
+  %trunc = trunc <16 x i8> %perm to <16 x i4>
+  %ret = sext <16 x i4> %trunc to <16 x i8>
+  ret <16 x i8> %ret
+}
diff --git a/llvm/test/CodeGen/SystemZ/signbits-intrinsics-unpack.ll b/llvm/test/CodeGen/SystemZ/signbits-intrinsics-unpack.ll
new file mode 100644
index 0000000..b37c1c7
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/signbits-intrinsics-unpack.ll
@@ -0,0 +1,97 @@
+; Test that DAGCombiner gets helped by ComputeNumSignBitsForTargetNode() with
+; vector intrinsics.
+;
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s  | FileCheck %s
+
+declare <8 x i16> @llvm.s390.vuphb(<16 x i8>)
+
+; VUPHB
+define <8 x i16> @f0() {
+; CHECK-LABEL: f0:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vuphb %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <8 x i16> @llvm.s390.vuphb(<16 x i8>
+                                         <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1,
+                                          i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
+  %trunc = trunc <8 x i16> %unp to <8 x i8>
+  %ret = sext <8 x i8> %trunc to <8 x i16>
+  ret <8 x i16> %ret
+}
+
+declare <4 x i32> @llvm.s390.vuphh(<8 x i16>)
+
+; VUPHH
+define <4 x i32> @f1() {
+; CHECK-LABEL: f1:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vuphh %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <4 x i32> @llvm.s390.vuphh(<8 x i16>
+                                         <i16 0, i16 1, i16 0, i16 1,
+                                          i16 0, i16 1, i16 0, i16 1>)
+  %trunc = trunc <4 x i32> %unp to <4 x i16>
+  %ret = sext <4 x i16> %trunc to <4 x i32>
+  ret <4 x i32> %ret
+}
+
+declare <2 x i64> @llvm.s390.vuphf(<4 x i32>)
+
+; VUPHF
+define <2 x i64> @f2() {
+; CHECK-LABEL: f2:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vuphf %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 0, i32 1, i32 0, i32 1>)
+  %trunc = trunc <2 x i64> %unp to <2 x i32>
+  %ret = sext <2 x i32> %trunc to <2 x i64>
+  ret <2 x i64> %ret
+}
+
+declare <8 x i16> @llvm.s390.vuplb(<16 x i8>)
+
+; VUPLB
+define <8 x i16> @f3() {
+; CHECK-LABEL: f3:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vuplb %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <8 x i16> @llvm.s390.vuplb(<16 x i8>
+                                         <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1,
+                                          i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>)
+  %trunc = trunc <8 x i16> %unp to <8 x i8>
+  %ret = sext <8 x i8> %trunc to <8 x i16>
+  ret <8 x i16> %ret
+}
+
+declare <4 x i32> @llvm.s390.vuplhw(<8 x i16>)
+
+; VUPLHW
+define <4 x i32> @f4() {
+; CHECK-LABEL: f4:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vuplhw %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <4 x i32> @llvm.s390.vuplhw(<8 x i16>
+                                          <i16 1, i16 0, i16 1, i16 0,
+                                           i16 1, i16 0, i16 1, i16 0>)
+  %trunc = trunc <4 x i32> %unp to <4 x i16>
+  %ret = sext <4 x i16> %trunc to <4 x i32>
+  ret <4 x i32> %ret
+}
+
+declare <2 x i64> @llvm.s390.vuplf(<4 x i32>)
+
+; VUPLF
+define <2 x i64> @f5() {
+; CHECK-LABEL: f5:
+; CHECK-LABEL: # %bb.0:
+; CHECK:       vuplf %v24, %v0
+; CHECK-NEXT:  br %r14
+  %unp = call <2 x i64> @llvm.s390.vuplf(<4 x i32> <i32 1, i32 0, i32 1, i32 0>)
+  %trunc = trunc <2 x i64> %unp to <2 x i32>
+  %ret = sext <2 x i32> %trunc to <2 x i64>
+  ret <2 x i64> %ret
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/signbits.ll b/llvm/test/CodeGen/SystemZ/signbits.ll
new file mode 100644
index 0000000..4c019a6
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/signbits.ll
@@ -0,0 +1,36 @@
+; Test that ComputeNumSignBitsForTargetNode() (SELECT_CCMASK) will help
+; DAGCombiner so that it knows that %sel0 is already sign extended.
+;
+; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -debug-only=isel < %s 2>&1 | FileCheck %s
+
+%0 = type <{ %1*, i16, [6 x i8] }>
+%1 = type { i32 (...)** }
+
+define signext i16 @fun(%0* %Arg0, i16 signext %Arg1) {
+entry:
+  br i1 undef, label %lab0, label %lab1
+
+lab0:
+  %icmp0 = icmp eq i32 undef, 0
+  %sel0 = select i1 %icmp0, i16 %Arg1, i16 1
+  br label %lab1
+
+lab1:
+; CHECK: *** MachineFunction at end of ISel ***
+; CHECK-LABEL: bb.2.lab1:
+; CHECK-NOT:   LHR
+; CHECK:       BRC
+  %phi0 = phi i16 [ 2, %entry ], [ %sel0, %lab0 ]
+  %sext0 = sext i16 %phi0 to i32
+  br i1 undef, label %lab2, label %lab3
+
+lab2:
+  %and0 = and i32 %sext0, 8
+  %icmp1 = icmp eq i32 %and0, 0
+  %sel1 = select i1 %icmp1, i16 %phi0, i16 4
+  ret i16 %sel1
+
+lab3:
+  ret i16 8
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/vec-trunc-to-i1.ll b/llvm/test/CodeGen/SystemZ/vec-trunc-to-i1.ll
index fe4ae45..7da1ded 100644
--- a/llvm/test/CodeGen/SystemZ/vec-trunc-to-i1.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-trunc-to-i1.ll
@@ -17,8 +17,7 @@
 ; CHECK-NEXT:    vlvgf [[REG2]], [[REG3]], 2
 ; CHECK-NEXT:    vn [[REG2]], [[REG2]], [[REG0]]
 ; CHECK-NEXT:    vlgvf [[REG4:%r[0-9]]], [[REG2]], 3
-; CHECK-NEXT:    tmll [[REG4]], 1
-; CHECK-NEXT:    jne .LBB0_1
+; CHECK-NEXT:    cijlh [[REG4]], 0, .LBB0_1
 ; CHECK-NEXT:  # %bb.2: # %CF36
 ; CHECK-NEXT:    br %r14
 BB: