Add X86 BZHI instruction as well as BMI2 feature detection.

llvm-svn: 142122
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index ce24904..30ff1fd 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -387,20 +387,24 @@
     /// and the additional register is encoded in VEX_VVVV prefix.
     VEX_4V      = 1U << 2,
 
+    /// VEX_4VOp3 - Similar to VEX_4V, but used on instructions that encode
+    /// operand 3 with VEX.vvvv.
+    VEX_4VOp3   = 1U << 3,
+
     /// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
     /// must be encoded in the i8 immediate field. This usually happens in
     /// instructions with 4 operands.
-    VEX_I8IMM   = 1U << 3,
+    VEX_I8IMM   = 1U << 4,
 
     /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
     /// instruction uses 256-bit wide registers. This is usually auto detected
     /// if a VR256 register is used, but some AVX instructions also have this
     /// field marked when using a f256 memory references.
-    VEX_L       = 1U << 4,
+    VEX_L       = 1U << 5,
 
     // VEX_LIG - Specifies that this instruction ignores the L-bit in the VEX
     // prefix. Usually used for scalar instructions. Needed by disassembler.
-    VEX_LIG     = 1U << 5,
+    VEX_LIG     = 1U << 6,
 
     /// Has3DNow0F0FOpcode - This flag indicates that the instruction uses the
     /// wacky 0x0F 0x0F prefix for 3DNow! instructions.  The manual documents
@@ -408,7 +412,7 @@
     /// storing a classifier in the imm8 field.  To simplify our implementation,
     /// we handle this by storeing the classifier in the opcode field and using
     /// this flag to indicate that the encoder should do the wacky 3DNow! thing.
-    Has3DNow0F0FOpcode = 1U << 6
+    Has3DNow0F0FOpcode = 1U << 7
   };
 
   // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
@@ -477,12 +481,9 @@
     case X86II::MRMDestMem:
       return 0;
     case X86II::MRMSrcMem: {
-      // FIXME: BEXTR uses VEX.vvvv for Operand 3
-      bool IsBEXTR = (Opcode == X86::BEXTR32rr || Opcode == X86::BEXTR32rm ||
-                      Opcode == X86::BEXTR64rr || Opcode == X86::BEXTR64rm);
       bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
       unsigned FirstMemOp = 1;
-      if (HasVEX_4V && !IsBEXTR)
+      if (HasVEX_4V)
         ++FirstMemOp;// Skip the register source (which is encoded in VEX_VVVV).
 
       // FIXME: Maybe lea should have its own form?  This is a horrible hack.
diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 55c6010..91f672b 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -386,9 +386,8 @@
                                            int MemOperand, const MCInst &MI,
                                            const MCInstrDesc &Desc,
                                            raw_ostream &OS) const {
-  bool HasVEX_4V = false;
-  if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
-    HasVEX_4V = true;
+  bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+  bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
 
   // VEX_R: opcode externsion equivalent to REX.R in
   // 1's complement (inverted) form
@@ -447,11 +446,6 @@
   //
   unsigned char VEX_PP = 0;
 
-  // FIXME: BEXTR uses VEX.vvvv for Operand 3 instead of Operand 2
-  unsigned Opcode = MI.getOpcode();
-  bool IsBEXTR = (Opcode == X86::BEXTR32rr || Opcode == X86::BEXTR32rm ||
-                  Opcode == X86::BEXTR64rr || Opcode == X86::BEXTR64rm);
-
   // Encode the operand size opcode prefix as needed.
   if (TSFlags & X86II::OpSize)
     VEX_PP = 0x01;
@@ -530,8 +524,7 @@
     if (X86II::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
       VEX_R = 0x0;
 
-    // FIXME: BEXTR uses VEX.vvvv for Operand 3
-    if (HasVEX_4V && !IsBEXTR)
+    if (HasVEX_4V)
       VEX_4V = getVEXRegisterEncoding(MI, 1);
 
     if (X86II::isX86_64ExtendedReg(
@@ -541,7 +534,7 @@
                MI.getOperand(MemOperand+X86::AddrIndexReg).getReg()))
       VEX_X = 0x0;
 
-    if (IsBEXTR)
+    if (HasVEX_4VOp3)
       VEX_4V = getVEXRegisterEncoding(MI, X86::AddrNumOperands+1);
     break;
   case X86II::MRM0m: case X86II::MRM1m:
@@ -572,13 +565,12 @@
       VEX_R = 0x0;
     CurOp++;
 
-    // FIXME: BEXTR uses VEX.vvvv for Operand 3
-    if (HasVEX_4V && !IsBEXTR)
+    if (HasVEX_4V)
       VEX_4V = getVEXRegisterEncoding(MI, CurOp++);
     if (X86II::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
       VEX_B = 0x0;
     CurOp++;
-    if (IsBEXTR)
+    if (HasVEX_4VOp3)
       VEX_4V = getVEXRegisterEncoding(MI, CurOp);
     break;
   case X86II::MRMDestReg:
@@ -874,15 +866,11 @@
   unsigned CurByte = 0;
 
   // Is this instruction encoded using the AVX VEX prefix?
-  bool HasVEXPrefix = false;
+  bool HasVEXPrefix = (TSFlags >> X86II::VEXShift) & X86II::VEX;
 
   // It uses the VEX.VVVV field?
-  bool HasVEX_4V = false;
-
-  if ((TSFlags >> X86II::VEXShift) & X86II::VEX)
-    HasVEXPrefix = true;
-  if ((TSFlags >> X86II::VEXShift) & X86II::VEX_4V)
-    HasVEX_4V = true;
+  bool HasVEX_4V = (TSFlags >> X86II::VEXShift) & X86II::VEX_4V;
+  bool HasVEX_4VOp3 = (TSFlags >> X86II::VEXShift) & X86II::VEX_4VOp3;
 
   // Determine where the memory operand starts, if present.
   int MemoryOperand = X86II::getMemoryOperandNo(TSFlags, Opcode);
@@ -898,10 +886,6 @@
   if ((TSFlags >> X86II::VEXShift) & X86II::Has3DNow0F0FOpcode)
     BaseOpcode = 0x0F;   // Weird 3DNow! encoding.
 
-  // FIXME: BEXTR uses VEX.vvvv for Operand 3 instead of Operand 2
-  bool IsBEXTR = (Opcode == X86::BEXTR32rr || Opcode == X86::BEXTR32rm ||
-                  Opcode == X86::BEXTR64rr || Opcode == X86::BEXTR64rm);
-
   unsigned SrcRegNum = 0;
   switch (TSFlags & X86II::FormMask) {
   case X86II::MRMInitReg:
@@ -956,20 +940,20 @@
     EmitByte(BaseOpcode, CurByte, OS);
     SrcRegNum = CurOp + 1;
 
-    if (HasVEX_4V && !IsBEXTR) // Skip 1st src (which is encoded in VEX_VVVV)
+    if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
       SrcRegNum++;
 
     EmitRegModRMByte(MI.getOperand(SrcRegNum),
                      GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
     CurOp = SrcRegNum + 1;
-    if (IsBEXTR)
+    if (HasVEX_4VOp3)
       ++CurOp;
     break;
 
   case X86II::MRMSrcMem: {
     int AddrOperands = X86::AddrNumOperands;
     unsigned FirstMemOp = CurOp+1;
-    if (HasVEX_4V && !IsBEXTR) {
+    if (HasVEX_4V) {
       ++AddrOperands;
       ++FirstMemOp;  // Skip the register source (which is encoded in VEX_VVVV).
     }
@@ -979,7 +963,7 @@
     EmitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
                      TSFlags, CurByte, OS, Fixups);
     CurOp += AddrOperands + 1;
-    if (IsBEXTR)
+    if (HasVEX_4VOp3)
       ++CurOp;
     break;
   }
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 104b91f..7ee7df5 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -106,6 +106,8 @@
                                       "Support LZCNT instruction">;
 def FeatureBMI     : SubtargetFeature<"bmi", "HasBMI", "true",
                                       "Support BMI instructions">;
+def FeatureBMI2    : SubtargetFeature<"bmi2", "HasBMI2", "true",
+                                      "Support BMI2 instructions">;
 
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
@@ -164,7 +166,7 @@
 def : Proc<"core-avx2",       [FeatureSSE42, FeatureCMPXCHG16B, FeatureAES,
                                FeatureCLMUL, FeatureRDRAND, FeatureF16C,
                                FeatureFMA3, FeatureMOVBE, FeatureLZCNT,
-                               FeatureBMI]>;
+                               FeatureBMI, FeatureBMI2]>;
 
 def : Proc<"k6",              [FeatureMMX]>;
 def : Proc<"k6-2",            [Feature3DNow]>;
diff --git a/llvm/lib/Target/X86/X86InstrFormats.td b/llvm/lib/Target/X86/X86InstrFormats.td
index 0a1590b..d291e43 100644
--- a/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/llvm/lib/Target/X86/X86InstrFormats.td
@@ -111,6 +111,7 @@
 class VEX    { bit hasVEXPrefix = 1; }
 class VEX_W  { bit hasVEX_WPrefix = 1; }
 class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
+class VEX_4VOp3 : VEX { bit hasVEX_4VOp3Prefix = 1; }
 class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
 class VEX_L  { bit hasVEX_L = 1; }
 class VEX_LIG { bit ignoresVEX_L = 1; }
@@ -148,6 +149,8 @@
   bit hasVEXPrefix = 0;     // Does this inst require a VEX prefix?
   bit hasVEX_WPrefix = 0;   // Does this inst set the VEX_W field?
   bit hasVEX_4VPrefix = 0;  // Does this inst require the VEX.VVVV field?
+  bit hasVEX_4VOp3Prefix = 0;  // Does this inst require the VEX.VVVV field to
+                               // encode the third operand?
   bit hasVEX_i8ImmReg = 0;  // Does this inst require the last source register
                             // to be encoded in a immediate field?
   bit hasVEX_L = 0;         // Does this inst use large (256-bit) registers?
@@ -169,10 +172,11 @@
   let TSFlags{33}    = hasVEXPrefix;
   let TSFlags{34}    = hasVEX_WPrefix;
   let TSFlags{35}    = hasVEX_4VPrefix;
-  let TSFlags{36}    = hasVEX_i8ImmReg;
-  let TSFlags{37}    = hasVEX_L;
-  let TSFlags{38}    = ignoresVEX_L;
-  let TSFlags{39}    = has3DNow0F0FOpcode;
+  let TSFlags{36}    = hasVEX_4VOp3Prefix;
+  let TSFlags{37}    = hasVEX_i8ImmReg;
+  let TSFlags{38}    = hasVEX_L;
+  let TSFlags{39}    = ignoresVEX_L;
+  let TSFlags{40}    = has3DNow0F0FOpcode;
 }
 
 class PseudoI<dag oops, dag iops, list<dag> pattern>
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index fd4ecd4..4a5912d 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -480,6 +480,7 @@
 def HasF16C      : Predicate<"Subtarget->hasF16C()">;
 def HasLZCNT     : Predicate<"Subtarget->hasLZCNT()">;
 def HasBMI       : Predicate<"Subtarget->hasBMI()">;
+def HasBMI2      : Predicate<"Subtarget->hasBMI2()">;
 def FPStackf32   : Predicate<"!Subtarget->hasXMM()">;
 def FPStackf64   : Predicate<"!Subtarget->hasXMMInt()">;
 def HasCmpxchg16b: Predicate<"Subtarget->hasCmpxchg16b()">;
@@ -1422,19 +1423,24 @@
   defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem>, VEX_W;
 }
 
-multiclass bmi_bextr<string mnemonic, RegisterClass RC,
-                     X86MemOperand x86memop> {
-  def rr : I<0xF7, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+multiclass bmi_bextr_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
+                          X86MemOperand x86memop> {
+  def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
              !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             []>, T8, VEX_4V;
-  def rm : I<0xF7, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
+             []>, T8, VEX_4VOp3;
+  def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
              !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-             []>, T8, VEX_4V;
+             []>, T8, VEX_4VOp3;
 }
 
 let Predicates = [HasBMI], Defs = [EFLAGS] in {
-  defm BEXTR32 : bmi_bextr<"bextr{l}", GR32, i32mem>;
-  defm BEXTR64 : bmi_bextr<"bextr{q}", GR64, i64mem>, VEX_W;
+  defm BEXTR32 : bmi_bextr_bzhi<0xF7, "bextr{l}", GR32, i32mem>;
+  defm BEXTR64 : bmi_bextr_bzhi<0xF7, "bextr{q}", GR64, i64mem>, VEX_W;
+}
+
+let Predicates = [HasBMI2], Defs = [EFLAGS] in {
+  defm BZHI32 : bmi_bextr_bzhi<0xF5, "bzhi{l}", GR32, i32mem>;
+  defm BZHI64 : bmi_bextr_bzhi<0xF5, "bzhi{q}", GR64, i64mem>, VEX_W;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index 670073a..c08dac9 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -283,6 +283,10 @@
       HasBMI = true;
       ToggleFeature(X86::FeatureBMI);
     }
+    if ((EBX >> 8) & 0x1) {
+      HasBMI2 = true;
+      ToggleFeature(X86::FeatureBMI2);
+    }
   }
 }
 
@@ -307,6 +311,7 @@
   , HasF16C(false)
   , HasLZCNT(false)
   , HasBMI(false)
+  , HasBMI2(false)
   , IsBTMemSlow(false)
   , IsUAMemFast(false)
   , HasVectorUAMem(false)
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 3258d3d..81fa176 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -105,6 +105,9 @@
   /// HasBMI - Processor has BMI1 instructions.
   bool HasBMI;
 
+  /// HasBMI2 - Processor has BMI2 instructions.
+  bool HasBMI2;
+
   /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
   bool IsBTMemSlow;
 
@@ -192,6 +195,7 @@
   bool hasF16C() const { return HasF16C; }
   bool hasLZCNT() const { return HasLZCNT; }
   bool hasBMI() const { return HasBMI; }
+  bool hasBMI2() const { return HasBMI2; }
   bool isBTMemSlow() const { return IsBTMemSlow; }
   bool isUnalignedMemAccessFast() const { return IsUAMemFast; }
   bool hasVectorUAMem() const { return HasVectorUAMem; }