Initial AVX support for some instructions. No patterns matched
yet, only assembly encoding support.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@105521 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index c5fdf8f..0109562 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -56,7 +56,7 @@
 
   MachineInstr *MI = MBBI;
   MachineFunction &MF = *MI->getParent()->getParent();
-  unsigned TSFlags = MI->getDesc().TSFlags;
+  uint64_t TSFlags = MI->getDesc().TSFlags;
   bool isPre = false;
   switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {
   default: return NULL;
@@ -488,7 +488,7 @@
 
   // Basic size info comes from the TSFlags field.
   const TargetInstrDesc &TID = MI->getDesc();
-  unsigned TSFlags = TID.TSFlags;
+  uint64_t TSFlags = TID.TSFlags;
 
   unsigned Opc = MI->getOpcode();
   switch ((TSFlags & ARMII::SizeMask) >> ARMII::SizeShift) {
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
index b1d90df..7d21256 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.h
@@ -137,25 +137,25 @@
 /// Various utilities for checking the target specific flags.
 
 /// A unary data processing instruction doesn't have an Rn operand.
-static inline bool isUnaryDP(unsigned TSFlags) {
+static inline bool isUnaryDP(uint64_t TSFlags) {
   return (TSFlags & ARMII::UnaryDP);
 }
 
 /// This four-bit field describes the addressing mode used.
 /// See also ARMBaseInstrInfo.h.
-static inline unsigned getAddrMode(unsigned TSFlags) {
+static inline unsigned getAddrMode(uint64_t TSFlags) {
   return (TSFlags & ARMII::AddrModeMask);
 }
 
 /// {IndexModePre, IndexModePost}
 /// Only valid for load and store ops.
 /// See also ARMBaseInstrInfo.h.
-static inline unsigned getIndexMode(unsigned TSFlags) {
+static inline unsigned getIndexMode(uint64_t TSFlags) {
   return (TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift;
 }
 
 /// Pre-/post-indexed operations define an extra $base_wb in the OutOperandList.
-static inline bool isPrePostLdSt(unsigned TSFlags) {
+static inline bool isPrePostLdSt(uint64_t TSFlags) {
   return (TSFlags & ARMII::IndexModeMask) != 0;
 }
 
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 66dfd4b..db11fde 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -78,7 +78,7 @@
   isLoad  = TID.mayLoad();
   isStore = TID.mayStore();
   
-  unsigned TSFlags = TID.TSFlags;
+  uint64_t TSFlags = TID.TSFlags;
   
   isFirst   = TSFlags & PPCII::PPC970_First;
   isSingle  = TSFlags & PPCII::PPC970_Single;
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index 93460ef..5fe21ac 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -232,7 +232,7 @@
 
   for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) {
     MachineInstr *MI = I;
-    unsigned Flags = MI->getDesc().TSFlags;
+    uint64_t Flags = MI->getDesc().TSFlags;
     
     unsigned FPInstClass = Flags & X86II::FPTypeMask;
     if (MI->isInlineAsm())
diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td
index c4522f3..1c4301c 100644
--- a/lib/Target/X86/X86InstrFormats.td
+++ b/lib/Target/X86/X86InstrFormats.td
@@ -83,6 +83,7 @@
 class OpSize { bit hasOpSizePrefix = 1; }
 class AdSize { bit hasAdSizePrefix = 1; }
 class REX_W  { bit hasREX_WPrefix = 1; }
+class VEX_4V { bit hasVEX_4VPrefix = 1; }
 class LOCK   { bit hasLockPrefix = 1; }
 class SegFS  { bits<2> SegOvrBits = 1; }
 class SegGS  { bits<2> SegOvrBits = 2; }
@@ -124,6 +125,7 @@
 
   bits<4> Prefix = 0;       // Which prefix byte does this inst have?
   bit hasREX_WPrefix  = 0;  // Does this inst requires the REX.W prefix?
+  bit hasVEX_4VPrefix  = 0;  // Does this inst requires the VEX.VVVV prefix?
   FPFormat FPForm = NotFP;  // What flavor of FP instruction is this?
   bit hasLockPrefix = 0;    // Does this inst have a 0xF0 prefix?
   bits<2> SegOvrBits = 0;   // Segment override prefix.
@@ -141,6 +143,7 @@
   let TSFlags{21-20} = SegOvrBits;
   let TSFlags{23-22} = ExeDomain.Value;
   let TSFlags{31-24} = Opcode;
+  let TSFlags{32}    = hasVEX_4VPrefix;
 }
 
 class I<bits<8> o, Format f, dag outs, dag ins, string asm,
@@ -216,6 +219,7 @@
 //   SSI   - SSE1 instructions with XS prefix.
 //   PSI   - SSE1 instructions with TB prefix.
 //   PSIi8 - SSE1 instructions with ImmT == Imm8 and TB prefix.
+//   VSSI  - SSE1 instructions with XS prefix in AVX form.
 
 class SSI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
       : I<o, F, outs, ins, asm, pattern>, XS, Requires<[HasSSE1]>;
@@ -229,6 +233,10 @@
             list<dag> pattern>
       : Ii8<o, F, outs, ins, asm, pattern, SSEPackedSingle>, TB,
         Requires<[HasSSE1]>;
+class VSSI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XS, VEX_4V,
+        Requires<[HasAVX, HasSSE1]>;
 
 // SSE2 Instruction Templates:
 // 
@@ -237,6 +245,7 @@
 //   SSDIi8 - SSE2 instructions with ImmT == Imm8 and XS prefix.
 //   PDI    - SSE2 instructions with TB and OpSize prefixes.
 //   PDIi8  - SSE2 instructions with ImmT == Imm8 and TB and OpSize prefixes.
+//   VSDI   - SSE2 instructions with XD prefix in AVX form.
 
 class SDI<bits<8> o, Format F, dag outs, dag ins, string asm, list<dag> pattern>
       : I<o, F, outs, ins, asm, pattern>, XD, Requires<[HasSSE2]>;
@@ -253,6 +262,10 @@
             list<dag> pattern>
       : Ii8<o, F, outs, ins, asm, pattern, SSEPackedDouble>, TB, OpSize,
         Requires<[HasSSE2]>;
+class VSDI<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern>
+      : I<o, F, outs, ins, !strconcat("v", asm), pattern>, XD, VEX_4V,
+        Requires<[HasAVX, HasSSE2]>;
 
 // SSE3 Instruction Templates:
 // 
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index f5c8022..9016c16 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -417,22 +417,36 @@
 
     OpcodeShift   = 24,
     OpcodeMask    = 0xFF << OpcodeShift
+
   };
   
+  // FIXME: The enum opcode space is over and more bits are needed. Anywhere
+  // those enums below are used, TSFlags must be shifted right by 32 first.
+  enum {
+    //===------------------------------------------------------------------===//
+    // VEX_4V - VEX prefixes are instruction prefixes used in AVX.
+    // VEX_4V is used to specify an additional AVX/SSE register. Several 2
+    // address instructions in SSE are represented as 3 address ones in AVX
+    // and the additional register is encoded in VEX_VVVV prefix.
+    //
+    VEXShift    = 0,
+    VEX_4V      = 1 << VEXShift
+  };
+
   // getBaseOpcodeFor - This function returns the "base" X86 opcode for the
   // specified machine instruction.
   //
-  static inline unsigned char getBaseOpcodeFor(unsigned TSFlags) {
+  static inline unsigned char getBaseOpcodeFor(uint64_t TSFlags) {
     return TSFlags >> X86II::OpcodeShift;
   }
   
-  static inline bool hasImm(unsigned TSFlags) {
+  static inline bool hasImm(uint64_t TSFlags) {
     return (TSFlags & X86II::ImmMask) != 0;
   }
   
   /// getSizeOfImm - Decode the "size of immediate" field from the TSFlags field
   /// of the specified instruction.
-  static inline unsigned getSizeOfImm(unsigned TSFlags) {
+  static inline unsigned getSizeOfImm(uint64_t TSFlags) {
     switch (TSFlags & X86II::ImmMask) {
     default: assert(0 && "Unknown immediate size");
     case X86II::Imm8:
@@ -446,7 +460,7 @@
   
   /// isImmPCRel - Return true if the immediate of the specified instruction's
   /// TSFlags indicates that it is pc relative.
-  static inline unsigned isImmPCRel(unsigned TSFlags) {
+  static inline unsigned isImmPCRel(uint64_t TSFlags) {
     switch (TSFlags & X86II::ImmMask) {
       default: assert(0 && "Unknown immediate size");
       case X86II::Imm8PCRel:
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 579e332..32358a3 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -673,6 +673,26 @@
     let isCommutable = Commutable;
   }
 
+  def V#NAME#SSrr : VSSI<opc, MRMSrcReg, (outs FR32:$dst),
+                 (ins FR32:$src1, FR32:$src2),
+                 !strconcat(OpcodeStr,
+                            "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 []> {
+    let isCommutable = Commutable;
+    let Constraints = "";
+    let isAsmParserOnly = 1;
+  }
+
+  def V#NAME#SDrr : VSDI<opc, MRMSrcReg, (outs FR64:$dst),
+                 (ins FR64:$src1, FR64:$src2),
+                 !strconcat(OpcodeStr,
+                            "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                 []> {
+    let isCommutable = Commutable;
+    let Constraints = "";
+    let isAsmParserOnly = 1;
+  }
+
   // Scalar operation, reg+mem.
   def SSrm : SSI<opc, MRMSrcMem, (outs FR32:$dst),
                                  (ins FR32:$src1, f32mem:$src2),
diff --git a/lib/Target/X86/X86MCCodeEmitter.cpp b/lib/Target/X86/X86MCCodeEmitter.cpp
index a9681e6..f97ac2f 100644
--- a/lib/Target/X86/X86MCCodeEmitter.cpp
+++ b/lib/Target/X86/X86MCCodeEmitter.cpp
@@ -101,12 +101,19 @@
   
   void EmitMemModRMByte(const MCInst &MI, unsigned Op,
                         unsigned RegOpcodeField, 
-                        unsigned TSFlags, unsigned &CurByte, raw_ostream &OS,
+                        uint64_t TSFlags, unsigned &CurByte, raw_ostream &OS,
                         SmallVectorImpl<MCFixup> &Fixups) const;
   
   void EncodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups) const;
   
+  void EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+                           const MCInst &MI, const TargetInstrDesc &Desc,
+                           raw_ostream &OS) const;
+
+  void EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+                        const MCInst &MI, const TargetInstrDesc &Desc,
+                        raw_ostream &OS) const;
 };
 
 } // end anonymous namespace
@@ -133,7 +140,7 @@
 
 /// getImmFixupKind - Return the appropriate fixup kind to use for an immediate
 /// in an instruction with the specified TSFlags.
-static MCFixupKind getImmFixupKind(unsigned TSFlags) {
+static MCFixupKind getImmFixupKind(uint64_t TSFlags) {
   unsigned Size = X86II::getSizeOfImm(TSFlags);
   bool isPCRel = X86II::isImmPCRel(TSFlags);
   
@@ -184,7 +191,7 @@
 
 void X86MCCodeEmitter::EmitMemModRMByte(const MCInst &MI, unsigned Op,
                                         unsigned RegOpcodeField,
-                                        unsigned TSFlags, unsigned &CurByte,
+                                        uint64_t TSFlags, unsigned &CurByte,
                                         raw_ostream &OS,
                                         SmallVectorImpl<MCFixup> &Fixups) const{
   const MCOperand &Disp     = MI.getOperand(Op+3);
@@ -324,10 +331,159 @@
     EmitImmediate(Disp, 4, FK_Data_4, CurByte, OS, Fixups);
 }
 
+/// EmitVEXOpcodePrefix - AVX instructions are encoded using a opcode prefix
+/// called VEX.
+void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+                            const MCInst &MI, const TargetInstrDesc &Desc,
+                            raw_ostream &OS) const {
+
+  // Pseudo instructions never have a VEX prefix.
+  if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
+    return;
+
+  // VEX_R: opcode externsion equivalent to REX.R in
+  // 1's complement (inverted) form
+  //
+  //  1: Same as REX_R=0 (must be 1 in 32-bit mode)
+  //  0: Same as REX_R=1 (64 bit mode only)
+  //
+  unsigned char VEX_R = 0x1;
+
+  // VEX_B:
+  //
+  //  1: Same as REX_B=0 (ignored in 32-bit mode)
+  //  0: Same as REX_B=1 (64 bit mode only)
+  //
+  unsigned char VEX_B = 0x1;
+
+  // VEX_W: opcode specific (use like REX.W, or used for
+  // opcode extension, or ignored, depending on the opcode byte)
+  unsigned char VEX_W = 0;
+
+  // VEX_5M (VEX m-mmmmm field):
+  //
+  //  0b00000: Reserved for future use
+  //  0b00001: implied 0F leading opcode
+  //  0b00010: implied 0F 38 leading opcode bytes
+  //  0b00011: implied 0F 3A leading opcode bytes
+  //  0b00100-0b11111: Reserved for future use
+  //
+  unsigned char VEX_5M = 0x1;
+
+  // VEX_4V (VEX vvvv field): a register specifier
+  // (in 1's complement form) or 1111 if unused.
+  unsigned char VEX_4V = 0xf;
+
+  // VEX_L (Vector Length):
+  //
+  //  0: scalar or 128-bit vector
+  //  1: 256-bit vector
+  //
+  unsigned char VEX_L = 0;
+
+  // VEX_PP: opcode extension providing equivalent
+  // functionality of a SIMD prefix
+  //
+  //  0b00: None
+  //  0b01: 66 (not handled yet)
+  //  0b10: F3
+  //  0b11: F2
+  //
+  unsigned char VEX_PP = 0;
+
+  switch (TSFlags & X86II::Op0Mask) {
+  default: assert(0 && "Invalid prefix!");
+  case 0: break;  // No prefix!
+  case X86II::T8:  // 0F 38
+    VEX_5M = 0x2;
+    break;
+  case X86II::TA:  // 0F 3A
+    VEX_5M = 0x3;
+    break;
+  case X86II::TF:  // F2 0F 38
+    VEX_PP = 0x3;
+    VEX_5M = 0x2;
+    break;
+  case X86II::XS:  // F3 0F
+    VEX_PP = 0x2;
+    break;
+  case X86II::XD:  // F2 0F
+    VEX_PP = 0x3;
+    break;
+  }
+
+  unsigned NumOps = MI.getNumOperands();
+  unsigned i = 0;
+  unsigned SrcReg = 0, SrcRegNum = 0;
+
+  switch (TSFlags & X86II::FormMask) {
+  case X86II::MRMInitReg: assert(0 && "FIXME: Remove this!");
+  case X86II::MRMSrcReg:
+    if (MI.getOperand(0).isReg() &&
+        X86InstrInfo::isX86_64ExtendedReg(MI.getOperand(0).getReg()))
+      VEX_R = 0x0;
+
+    // On regular x86, both XMM0-XMM7 and XMM8-XMM15 are encoded in the
+    // range 0-7 and the difference between the 2 groups is given by the
+    // REX prefix. In the VEX prefix, registers are seen sequencially
+    // from 0-15 and encoded in 1's complement form, example:
+    //
+    //  ModRM field => XMM9 => 1
+    //  VEX.VVVV    => XMM9 => ~9
+    //
+    // See table 4-35 of Intel AVX Programming Reference for details.
+    SrcReg = MI.getOperand(1).getReg();
+    SrcRegNum = GetX86RegNum(MI.getOperand(1));
+    if (SrcReg >= X86::XMM8 && SrcReg <= X86::XMM15)
+      SrcRegNum += 8;
+
+    // The registers represented through VEX_VVVV should
+    // be encoded in 1's complement form.
+    if ((TSFlags >> 32) & X86II::VEX_4V)
+      VEX_4V = (~SrcRegNum) & 0xf;
+
+    i = 2; // Skip the VEX.VVVV operand.
+    for (; i != NumOps; ++i) {
+      const MCOperand &MO = MI.getOperand(i);
+      if (MO.isReg() && X86InstrInfo::isX86_64ExtendedReg(MO.getReg()))
+        VEX_B = 0x0;
+    }
+    break;
+  default:
+    assert(0 && "Not implemented!");
+  }
+
+  // VEX opcode prefix can have 2 or 3 bytes
+  //
+  //  3 bytes:
+  //    +-----+ +--------------+ +-------------------+
+  //    | C4h | | RXB | m-mmmm | | W | vvvv | L | pp |
+  //    +-----+ +--------------+ +-------------------+
+  //  2 bytes:
+  //    +-----+ +-------------------+
+  //    | C5h | | R | vvvv | L | pp |
+  //    +-----+ +-------------------+
+  //
+  // Note: VEX.X isn't used so far
+  //
+  unsigned char LastByte = VEX_PP | (VEX_L << 2) | (VEX_4V << 3);
+
+  if (VEX_B /* & VEX_X */) { // 2 byte VEX prefix
+    EmitByte(0xC5, CurByte, OS);
+    EmitByte(LastByte | (VEX_R << 7), CurByte, OS);
+    return;
+  }
+
+  // 3 byte VEX prefix
+  EmitByte(0xC4, CurByte, OS);
+  EmitByte(VEX_R << 7 | 1 << 6 /* VEX_X = 1 */ | VEX_5M, CurByte, OS);
+  EmitByte(LastByte | (VEX_W << 7), CurByte, OS);
+}
+
 /// DetermineREXPrefix - Determine if the MCInst has to be encoded with a X86-64
 /// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand
 /// size, and 3) use of X86-64 extended registers.
-static unsigned DetermineREXPrefix(const MCInst &MI, unsigned TSFlags,
+static unsigned DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
                                    const TargetInstrDesc &Desc) {
   // Pseudo instructions never have a rex byte.
   if ((TSFlags & X86II::FormMask) == X86II::Pseudo)
@@ -422,18 +578,10 @@
   return REX;
 }
 
-void X86MCCodeEmitter::
-EncodeInstruction(const MCInst &MI, raw_ostream &OS,
-                  SmallVectorImpl<MCFixup> &Fixups) const {
-  unsigned Opcode = MI.getOpcode();
-  const TargetInstrDesc &Desc = TII.get(Opcode);
-  unsigned TSFlags = Desc.TSFlags;
-
-  // Keep track of the current byte being emitted.
-  unsigned CurByte = 0;
-  
-  // FIXME: We should emit the prefixes in exactly the same order as GAS does,
-  // in order to provide diffability.
+/// EmitOpcodePrefix - Emit all instruction prefixes prior to the opcode.
+void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
+                            const MCInst &MI, const TargetInstrDesc &Desc,
+                            raw_ostream &OS) const {
 
   // Emit the lock opcode prefix as needed.
   if (TSFlags & X86II::LOCK)
@@ -516,6 +664,30 @@
     EmitByte(0x3A, CurByte, OS);
     break;
   }
+}
+
+void X86MCCodeEmitter::
+EncodeInstruction(const MCInst &MI, raw_ostream &OS,
+                  SmallVectorImpl<MCFixup> &Fixups) const {
+  unsigned Opcode = MI.getOpcode();
+  const TargetInstrDesc &Desc = TII.get(Opcode);
+  uint64_t TSFlags = Desc.TSFlags;
+
+  // Keep track of the current byte being emitted.
+  unsigned CurByte = 0;
+  
+  // Is this instruction encoded in AVX form?
+  bool IsAVXForm = false;
+  if ((TSFlags >> 32) & X86II::VEX_4V)
+    IsAVXForm = true;
+
+  // FIXME: We should emit the prefixes in exactly the same order as GAS does,
+  // in order to provide diffability.
+
+  if (!IsAVXForm)
+    EmitOpcodePrefix(TSFlags, CurByte, MI, Desc, OS);
+  else
+    EmitVEXOpcodePrefix(TSFlags, CurByte, MI, Desc, OS);
   
   // If this is a two-address instruction, skip one of the register operands.
   unsigned NumOps = Desc.getNumOperands();
@@ -527,6 +699,7 @@
     --NumOps;
   
   unsigned char BaseOpcode = X86II::getBaseOpcodeFor(TSFlags);
+  unsigned SrcRegNum = 0;
   switch (TSFlags & X86II::FormMask) {
   case X86II::MRMInitReg:
     assert(0 && "FIXME: Remove this form when the JIT moves to MCCodeEmitter!");
@@ -558,9 +731,14 @@
       
   case X86II::MRMSrcReg:
     EmitByte(BaseOpcode, CurByte, OS);
-    EmitRegModRMByte(MI.getOperand(CurOp+1), GetX86RegNum(MI.getOperand(CurOp)),
-                     CurByte, OS);
-    CurOp += 2;
+    SrcRegNum = CurOp + 1;
+
+    if (IsAVXForm) // Skip 1st src (which is encoded in VEX_VVVV)
+      SrcRegNum++;
+
+    EmitRegModRMByte(MI.getOperand(SrcRegNum),
+                     GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+    CurOp = SrcRegNum + 1;
     break;
     
   case X86II::MRMSrcMem: {