- Add AVX form of all SSE2 logical instructions
- Add VEX encoding bits to x86 MRM0r-MRM7r


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107238 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 038d445..0b36214 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2440,6 +2440,68 @@
 // SSE2 - Packed Integer Logical Instructions
 //===---------------------------------------------------------------------===//
 
+let isAsmParserOnly = 1, Predicates = [HasAVX, HasSSE2] in {
+defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw",
+                                int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>,
+                                VEX_4V;
+defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld",
+                                int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>,
+                                VEX_4V;
+defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq",
+                                int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>,
+                                VEX_4V;
+
+defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw",
+                                int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>,
+                                VEX_4V;
+defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld",
+                                int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>,
+                                VEX_4V;
+defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq",
+                                int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>,
+                                VEX_4V;
+
+defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw",
+                                int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>,
+                                VEX_4V;
+defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad",
+                                int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>,
+                                VEX_4V;
+
+let isCommutable = 1 in {
+defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 0>, VEX_4V;
+defm VPOR  : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 0>, VEX_4V;
+defm VPXOR : PDI_binop_rm_v2i64<0xEF, "vpxor", xor, 0>, VEX_4V;
+}
+
+let ExeDomain = SSEPackedInt in {
+  let neverHasSideEffects = 1 in {
+    // 128-bit logical shifts.
+    def VPSLLDQri : PDIi8<0x73, MRM7r,
+                      (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                      "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+                      VEX_4V;
+    def VPSRLDQri : PDIi8<0x73, MRM3r,
+                      (outs VR128:$dst), (ins VR128:$src1, i32i8imm:$src2),
+                      "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
+                      VEX_4V;
+    // PSRADQri doesn't exist in SSE[1-3].
+  }
+  def VPANDNrr : PDI<0xDF, MRMSrcReg,
+                    (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
+                    "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+                                              VR128:$src2)))]>, VEX_4V;
+
+  def VPANDNrm : PDI<0xDF, MRMSrcMem,
+                    (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2),
+                    "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}",
+                    [(set VR128:$dst, (v2i64 (and (vnot VR128:$src1),
+                                              (memopv2i64 addr:$src2))))]>,
+                                              VEX_4V;
+}
+}
+
 let Constraints = "$src1 = $dst" in {
 defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw",
                                int_x86_sse2_psll_w, int_x86_sse2_pslli_w>;
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index 5dd6684..80f56e1 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -60,6 +60,27 @@
   static unsigned GetX86RegNum(const MCOperand &MO) {
     return X86RegisterInfo::getX86RegNum(MO.getReg());
   }
+
+  // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the range
+  // 0-7 and the difference between the 2 groups is given by the REX prefix.
+  // In the VEX prefix, registers are seen sequencially from 0-15 and encoded
+  // in 1's complement form, example:
+  //
+  //  ModRM field => XMM9 => 1
+  //  VEX.VVVV    => XMM9 => ~9
+  //
+  // See table 4-35 of Intel AVX Programming Reference for details.
+  static unsigned char getVEXRegisterEncoding(const MCInst &MI,
+                                              unsigned OpNum) {
+    unsigned SrcReg = MI.getOperand(OpNum).getReg();
+    unsigned SrcRegNum = GetX86RegNum(MI.getOperand(OpNum));
+    if (SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15)
+      SrcRegNum += 8;
+  
+    // The registers represented through VEX_VVVV should
+    // be encoded in 1's complement form.
+    return (~SrcRegNum) & 0xf;
+  }
   
   void EmitByte(unsigned char C, unsigned &CurByte, raw_ostream &OS) const {
     OS << (char)C;
@@ -134,7 +155,6 @@
   return new X86MCCodeEmitter(TM, Ctx, true);
 }
 
-
 /// isDisp8 - Return true if this signed displacement fits in a 8-bit 
 /// sign-extended field. 
 static bool isDisp8(int Value) {
@@ -469,29 +489,12 @@
         X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(CurOp).getReg()))
       VEX_R = 0x0;
 
-    // If the memory destination has been checked first,
-    // go back to the first operand
+    // CurOp and NumOps are equal when VEX_R represents a register used
+    // to index a memory destination (which is the last operand)
     CurOp = (CurOp == NumOps) ? 0 : CurOp+1;
 
-    // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the
-    // range 0-7 and the difference between the 2 groups is given by the
-    // REX prefix. In the VEX prefix, registers are seen sequencially
-    // from 0-15 and encoded in 1's complement form, example:
-    //
-    //  ModRM field => XMM9 => 1
-    //  VEX.VVVV    => XMM9 => ~9
-    //
-    // See table 4-35 of Intel AVX Programming Reference for details.
     if (HasVEX_4V) {
-      unsigned SrcReg = MI.getOperand(CurOp).getReg();
-      unsigned SrcRegNum = GetX86RegNum(MI.getOperand(1));
-      if (SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15)
-        SrcRegNum += 8;
-
-      // The registers represented through VEX_VVVV should
-      // be encoded in 1's complement form.
-      VEX_4V = (~SrcRegNum) & 0xf;
-
+      VEX_4V = getVEXRegisterEncoding(MI, CurOp);
       CurOp++;
     }
 
@@ -505,7 +508,17 @@
         VEX_X = 0x0;
     }
     break;
-  default:
+  default: // MRM0r-MRM7r
+    if (HasVEX_4V)
+      VEX_4V = getVEXRegisterEncoding(MI, CurOp);
+
+    CurOp++;
+    for (; CurOp != NumOps; ++CurOp) {
+      const MCOperand &MO = MI.getOperand(CurOp);
+      if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+        VEX_B = 0x0;
+    }
+    break;
     assert(0 && "Not implemented!");
   }
 
@@ -831,6 +844,8 @@
   case X86II::MRM2r: case X86II::MRM3r:
   case X86II::MRM4r: case X86II::MRM5r:
   case X86II::MRM6r: case X86II::MRM7r:
+    if (HasVEX_4V) // Skip the register dst (which is encoded in VEX_VVVV).
+      CurOp++;
     EmitByte(BaseOpcode, CurByte, OS);
     EmitRegModRMByte(MI.getOperand(CurOp++),
                      (TSFlags & X86II::FormMask)-X86II::MRM0r,