Teach X86 backend to create BMI2 BZHI instructions from (and X, (add (shl 1, Y), -1)). Fixes PR17038.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@189653 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 81008e9..ecbd24f 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -13473,6 +13473,7 @@
   case X86ISD::BLSI:               return "X86ISD::BLSI";
   case X86ISD::BLSMSK:             return "X86ISD::BLSMSK";
   case X86ISD::BLSR:               return "X86ISD::BLSR";
+  case X86ISD::BZHI:               return "X86ISD::BZHI";
   case X86ISD::MUL_IMM:            return "X86ISD::MUL_IMM";
   case X86ISD::PTEST:              return "X86ISD::PTEST";
   case X86ISD::TESTP:              return "X86ISD::TESTP";
@@ -17279,33 +17280,64 @@
   if (R.getNode())
     return R;
 
-  // Create BLSI, and BLSR instructions
+  // Create BLSI, BLSR, and BZHI instructions
   // BLSI is X & (-X)
   // BLSR is X & (X-1)
-  if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) {
+  // BZHI is X & ((1 << Y) - 1)
+  if (VT == MVT::i32 || VT == MVT::i64) {
     SDValue N0 = N->getOperand(0);
     SDValue N1 = N->getOperand(1);
     SDLoc DL(N);
 
-    // Check LHS for neg
-    if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 &&
-        isZero(N0.getOperand(0)))
-      return DAG.getNode(X86ISD::BLSI, DL, VT, N1);
+    if (Subtarget->hasBMI()) {
+      // Check LHS for neg
+      if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 &&
+          isZero(N0.getOperand(0)))
+        return DAG.getNode(X86ISD::BLSI, DL, VT, N1);
 
-    // Check RHS for neg
-    if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 &&
-        isZero(N1.getOperand(0)))
-      return DAG.getNode(X86ISD::BLSI, DL, VT, N0);
+      // Check RHS for neg
+      if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 &&
+          isZero(N1.getOperand(0)))
+        return DAG.getNode(X86ISD::BLSI, DL, VT, N0);
 
-    // Check LHS for X-1
-    if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
-        isAllOnes(N0.getOperand(1)))
-      return DAG.getNode(X86ISD::BLSR, DL, VT, N1);
+      // Check LHS for X-1
+      if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 &&
+          isAllOnes(N0.getOperand(1)))
+        return DAG.getNode(X86ISD::BLSR, DL, VT, N1);
 
-    // Check RHS for X-1
-    if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
-        isAllOnes(N1.getOperand(1)))
-      return DAG.getNode(X86ISD::BLSR, DL, VT, N0);
+      // Check RHS for X-1
+      if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 &&
+          isAllOnes(N1.getOperand(1)))
+        return DAG.getNode(X86ISD::BLSR, DL, VT, N0);
+    }
+
+    if (Subtarget->hasBMI2()) {
+      // Check for (and (add (shl 1, Y), -1), X)
+      if (N0.getOpcode() == ISD::ADD && isAllOnes(N0.getOperand(1))) {
+        SDValue N00 = N0.getOperand(0);
+        if (N00.getOpcode() == ISD::SHL) {
+          SDValue N001 = N00.getOperand(1);
+          assert(N001.getValueType() == MVT::i8 && "unexpected type");
+          ConstantSDNode *C = dyn_cast<ConstantSDNode>(N00.getOperand(0));
+          if (C && C->getZExtValue() == 1)
+            return DAG.getNode(X86ISD::BZHI, DL, VT, N1,
+                               DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N001));
+        }
+      }
+
+      // Check for (and X, (add (shl 1, Y), -1))
+      if (N1.getOpcode() == ISD::ADD && isAllOnes(N1.getOperand(1))) {
+        SDValue N10 = N1.getOperand(0);
+        if (N10.getOpcode() == ISD::SHL) {
+          SDValue N101 = N10.getOperand(1);
+          assert(N101.getValueType() == MVT::i8 && "unexpected type");
+          ConstantSDNode *C = dyn_cast<ConstantSDNode>(N10.getOperand(0));
+          if (C && C->getZExtValue() == 1)
+            return DAG.getNode(X86ISD::BZHI, DL, VT, N0,
+                               DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N101));
+        }
+      }
+    }
 
     return SDValue();
   }
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 632a5b6..6a2e4d2 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -295,6 +295,7 @@
       BLSI,   // BLSI - Extract lowest set isolated bit
       BLSMSK, // BLSMSK - Get mask up to lowest set bit
       BLSR,   // BLSR - Reset lowest set bit
+      BZHI,   // BZHI - Zero high bits
 
       UMUL, // LOW, HI, FLAGS = umul LHS, RHS
 
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index c2d1807..41e2c44 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -253,6 +253,7 @@
 def X86blsi   : SDNode<"X86ISD::BLSI",   SDTIntUnaryOp>;
 def X86blsmsk : SDNode<"X86ISD::BLSMSK", SDTIntUnaryOp>;
 def X86blsr   : SDNode<"X86ISD::BLSR",   SDTIntUnaryOp>;
+def X86bzhi   : SDNode<"X86ISD::BZHI",   SDTIntBinOp>;
 
 def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>;
 
@@ -1856,6 +1857,15 @@
                                int_x86_bmi_bzhi_64, loadi64>, VEX_W;
 }
 
+def : Pat<(X86bzhi GR32:$src1, GR32:$src2),
+          (BZHI32rr GR32:$src1, GR32:$src2)>;
+def : Pat<(X86bzhi (loadi32 addr:$src1), GR32:$src2),
+          (BZHI32rm addr:$src1, GR32:$src2)>;
+def : Pat<(X86bzhi GR64:$src1, GR64:$src2),
+          (BZHI64rr GR64:$src1, GR64:$src2)>;
+def : Pat<(X86bzhi (loadi64 addr:$src1), GR64:$src2),
+          (BZHI64rm addr:$src1, GR64:$src2)>;
+
 multiclass bmi_pdep_pext<string mnemonic, RegisterClass RC,
                          X86MemOperand x86memop, Intrinsic Int,
                          PatFrag ld_frag> {
diff --git a/test/CodeGen/X86/bmi.ll b/test/CodeGen/X86/bmi.ll
index 4eda888..757e98d 100644
--- a/test/CodeGen/X86/bmi.ll
+++ b/test/CodeGen/X86/bmi.ll
@@ -146,6 +146,51 @@
 
 declare i64 @llvm.x86.bmi.bzhi.64(i64, i64) nounwind readnone
 
+define i32 @bzhi32b(i32 %x, i8 zeroext %index) #0 {
+entry:
+  %conv = zext i8 %index to i32
+  %shl = shl i32 1, %conv
+  %sub = add nsw i32 %shl, -1
+  %and = and i32 %sub, %x
+  ret i32 %and
+; CHECK-LABEL: bzhi32b:
+; CHECK: bzhil
+}
+
+define i32 @bzhi32b_load(i32* %w, i8 zeroext %index) #0 {
+entry:
+  %x = load i32* %w
+  %conv = zext i8 %index to i32
+  %shl = shl i32 1, %conv
+  %sub = add nsw i32 %shl, -1
+  %and = and i32 %sub, %x
+  ret i32 %and
+; CHECK-LABEL: bzhi32b_load:
+; CHECK: bzhil {{.*}}, ({{.*}}), {{.*}}
+}
+
+define i32 @bzhi32c(i32 %x, i8 zeroext %index) #0 {
+entry:
+  %conv = zext i8 %index to i32
+  %shl = shl i32 1, %conv
+  %sub = add nsw i32 %shl, -1
+  %and = and i32 %x, %sub
+  ret i32 %and
+; CHECK-LABEL: bzhi32c:
+; CHECK: bzhil
+}
+
+define i64 @bzhi64b(i64 %x, i8 zeroext %index) #0 {
+entry:
+  %conv = zext i8 %index to i64
+  %shl = shl i64 1, %conv
+  %sub = add nsw i64 %shl, -1
+  %and = and i64 %x, %sub
+  ret i64 %and
+; CHECK-LABEL: bzhi64b:
+; CHECK: bzhiq
+}
+
 define i32 @blsi32(i32 %x) nounwind readnone {
   %tmp = sub i32 0, %x
   %tmp2 = and i32 %x, %tmp