Add X86 PEXTR and PDEP instructions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142141 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index 30ff1fd..007e620 100644
--- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -295,8 +295,11 @@
     T8 = 13 << Op0Shift,  TA = 14 << Op0Shift,
     A6 = 15 << Op0Shift,  A7 = 16 << Op0Shift,
 
-    // TF - Prefix before and after 0x0F
-    TF = 17 << Op0Shift,
+    // T8XD - Prefix before and after 0x0F. Combination of T8 and XD.
+    T8XD = 17 << Op0Shift,
+
+    // T8XS - Prefix before and after 0x0F. Combination of T8 and XS.
+    T8XS = 18 << Op0Shift,
 
     //===------------------------------------------------------------------===//
     // REX_W - REX prefixes are instruction prefixes used in 64-bit mode.
diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 91f672b..8ae7a3c 100644
--- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -464,7 +464,11 @@
   case X86II::TA:  // 0F 3A
     VEX_5M = 0x3;
     break;
-  case X86II::TF:  // F2 0F 38
+  case X86II::T8XS: // F3 0F 38
+    VEX_PP = 0x2;
+    VEX_5M = 0x2;
+    break;
+  case X86II::T8XD: // F2 0F 38
     VEX_PP = 0x3;
     VEX_5M = 0x2;
     break;
@@ -790,7 +794,11 @@
   case X86II::A7:  // 0F A7
     Need0FPrefix = true;
     break;
-  case X86II::TF: // F2 0F 38
+  case X86II::T8XS: // F3 0F 38
+    EmitByte(0xF3, CurByte, OS);
+    Need0FPrefix = true;
+    break;
+  case X86II::T8XD: // F2 0F 38
     EmitByte(0xF2, CurByte, OS);
     Need0FPrefix = true;
     break;
@@ -825,7 +833,8 @@
 
   // FIXME: Pull this up into previous switch if REX can be moved earlier.
   switch (TSFlags & X86II::Op0Mask) {
-  case X86II::TF:    // F2 0F 38
+  case X86II::T8XS:  // F3 0F 38
+  case X86II::T8XD:  // F2 0F 38
   case X86II::T8:    // 0F 38
     EmitByte(0x38, CurByte, OS);
     break;
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index aeff03a..a150604 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -649,15 +649,13 @@
   case X86II::A7:  // 0F A7
     Need0FPrefix = true;
     break;
-  case X86II::TF: // F2 0F 38
-    MCE.emitByte(0xF2);
-    Need0FPrefix = true;
-    break;
   case X86II::REP: break; // already handled.
+  case X86II::T8XS: // F3 0F 38
   case X86II::XS:   // F3 0F
     MCE.emitByte(0xF3);
     Need0FPrefix = true;
     break;
+  case X86II::T8XD: // F2 0F 38
   case X86II::XD:   // F2 0F
     MCE.emitByte(0xF2);
     Need0FPrefix = true;
@@ -683,7 +681,8 @@
     MCE.emitByte(0x0F);
 
   switch (Desc->TSFlags & X86II::Op0Mask) {
-  case X86II::TF:    // F2 0F 38
+  case X86II::T8XD:  // F2 0F 38
+  case X86II::T8XS:  // F3 0F 38
   case X86II::T8:    // 0F 38
     MCE.emitByte(0x38);
     break;
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index d291e43..5b7adf3 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -107,7 +107,8 @@
 class TA     { bits<5> Prefix = 14; }
 class A6     { bits<5> Prefix = 15; }
 class A7     { bits<5> Prefix = 16; }
-class TF     { bits<5> Prefix = 17; }
+class T8XD   { bits<5> Prefix = 17; }
+class T8XS   { bits<5> Prefix = 18; }
 class VEX    { bit hasVEXPrefix = 1; }
 class VEX_W  { bit hasVEX_WPrefix = 1; }
 class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; }
@@ -424,10 +425,10 @@
       : I<o, F, outs, ins, asm, pattern, SSEPackedInt>, T8,
         Requires<[HasSSE42]>;
 
-//   SS42FI - SSE 4.2 instructions with TF prefix.
+//   SS42FI - SSE 4.2 instructions with T8XD prefix.
 class SS42FI<bits<8> o, Format F, dag outs, dag ins, string asm,
               list<dag> pattern>
-      : I<o, F, outs, ins, asm, pattern>, TF, Requires<[HasSSE42]>;
+      : I<o, F, outs, ins, asm, pattern>, T8XD, Requires<[HasSSE42]>;
       
 //   SS42AI = SSE 4.2 instructions with TA prefix
 class SS42AI<bits<8> o, Format F, dag outs, dag ins, string asm,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 4a5912d..1151b02 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1443,6 +1443,23 @@
   defm BZHI64 : bmi_bextr_bzhi<0xF5, "bzhi{q}", GR64, i64mem>, VEX_W;
 }
 
+multiclass bmi_pdep_pextr<string mnemonic, RegisterClass RC,
+                          X86MemOperand x86memop> {
+  def rr : I<0xF5, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             []>, VEX_4V;
+  def rm : I<0xF5, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2),
+             !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             []>, VEX_4V;
+}
+
+let Predicates = [HasBMI2] in {
+  defm PDEP32 : bmi_pdep_pextr<"pdep{l}", GR32, i32mem>, T8XD;
+  defm PDEP64 : bmi_pdep_pextr<"pdep{q}", GR64, i64mem>, T8XD, VEX_W;
+  defm PEXTR32 : bmi_pdep_pextr<"pextr{l}", GR32, i32mem>, T8XS;
+  defm PEXTR64 : bmi_pdep_pextr<"pextr{q}", GR64, i64mem>, T8XS, VEX_W;
+}
+
 //===----------------------------------------------------------------------===//
 // Subsystems.
 //===----------------------------------------------------------------------===//