Implement support for MMX movd instruction generation.

Based on http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20150202/257325.html
and http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20121029/154639.html

Change-Id: I098654245c06a975b8b0bc66e0feb5acea0e9c89
Reviewed-on: https://swiftshader-review.googlesource.com/4510
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/LLVM/lib/Target/X86/X86GenDAGISel.inc b/src/LLVM/lib/Target/X86/X86GenDAGISel.inc
index d5356c9..e28cdb6 100644
--- a/src/LLVM/lib/Target/X86/X86GenDAGISel.inc
+++ b/src/LLVM/lib/Target/X86/X86GenDAGISel.inc
Binary files differ
diff --git a/src/LLVM/lib/Target/X86/X86GenFastISel.inc b/src/LLVM/lib/Target/X86/X86GenFastISel.inc
index b7ada20..71ac967 100644
--- a/src/LLVM/lib/Target/X86/X86GenFastISel.inc
+++ b/src/LLVM/lib/Target/X86/X86GenFastISel.inc
@@ -1132,6 +1132,42 @@
   }

 }

 

+// FastEmit functions for X86ISD::MMX_MOVD2W.

+

+unsigned FastEmit_X86ISD_MMX_MOVD2W_MVT_x86mmx_r(MVT RetVT, unsigned Op0, bool Op0IsKill) {

+  if (RetVT.SimpleTy != MVT::i32)

+    return 0;

+  if ((Subtarget->hasMMX())) {

+    return FastEmitInst_r(X86::MMX_MOVD64grr, X86::GR32RegisterClass, Op0, Op0IsKill);

+  }

+  return 0;

+}

+

+unsigned FastEmit_X86ISD_MMX_MOVD2W_r(MVT VT, MVT RetVT, unsigned Op0, bool Op0IsKill) {

+  switch (VT.SimpleTy) {

+  case MVT::x86mmx: return FastEmit_X86ISD_MMX_MOVD2W_MVT_x86mmx_r(RetVT, Op0, Op0IsKill);

+  default: return 0;

+  }

+}

+

+// FastEmit functions for X86ISD::MMX_MOVW2D.

+

+unsigned FastEmit_X86ISD_MMX_MOVW2D_MVT_i32_r(MVT RetVT, unsigned Op0, bool Op0IsKill) {

+  if (RetVT.SimpleTy != MVT::x86mmx)

+    return 0;

+  if ((Subtarget->hasMMX())) {

+    return FastEmitInst_r(X86::MMX_MOVD64rr, X86::VR64RegisterClass, Op0, Op0IsKill);

+  }

+  return 0;

+}

+

+unsigned FastEmit_X86ISD_MMX_MOVW2D_r(MVT VT, MVT RetVT, unsigned Op0, bool Op0IsKill) {

+  switch (VT.SimpleTy) {

+  case MVT::i32: return FastEmit_X86ISD_MMX_MOVW2D_MVT_i32_r(RetVT, Op0, Op0IsKill);

+  default: return 0;

+  }

+}

+

 // FastEmit functions for X86ISD::MOVDDUP.

 

 unsigned FastEmit_X86ISD_MOVDDUP_MVT_v4i64_r(MVT RetVT, unsigned Op0, bool Op0IsKill) {

@@ -1389,6 +1425,8 @@
   case X86ISD::FRCP: return FastEmit_X86ISD_FRCP_r(VT, RetVT, Op0, Op0IsKill);

   case X86ISD::FRSQRT: return FastEmit_X86ISD_FRSQRT_r(VT, RetVT, Op0, Op0IsKill);

   case X86ISD::MEMBARRIER: return FastEmit_X86ISD_MEMBARRIER_r(VT, RetVT, Op0, Op0IsKill);

+  case X86ISD::MMX_MOVD2W: return FastEmit_X86ISD_MMX_MOVD2W_r(VT, RetVT, Op0, Op0IsKill);

+  case X86ISD::MMX_MOVW2D: return FastEmit_X86ISD_MMX_MOVW2D_r(VT, RetVT, Op0, Op0IsKill);

   case X86ISD::MOVDDUP: return FastEmit_X86ISD_MOVDDUP_r(VT, RetVT, Op0, Op0IsKill);

   case X86ISD::MOVDQ2Q: return FastEmit_X86ISD_MOVDQ2Q_r(VT, RetVT, Op0, Op0IsKill);

   case X86ISD::MOVQ2DQ: return FastEmit_X86ISD_MOVQ2DQ_r(VT, RetVT, Op0, Op0IsKill);

diff --git a/src/LLVM/lib/Target/X86/X86GenInstrInfo.inc b/src/LLVM/lib/Target/X86/X86GenInstrInfo.inc
index f7396ae..b6e99f4 100644
--- a/src/LLVM/lib/Target/X86/X86GenInstrInfo.inc
+++ b/src/LLVM/lib/Target/X86/X86GenInstrInfo.inc
@@ -5432,7 +5432,7 @@
   { 1248,	2,	0,	0,	0,	"MMX_MASKMOVQ", 0|(1<<MCID::MayLoad)|(1<<MCID::MayStore)|(1<<MCID::UnmodeledSideEffects), 0x1ee000105ULL, ImplicitList34, NULL, OperandInfo149 },  // Inst #1248 = MMX_MASKMOVQ

   { 1249,	2,	0,	0,	0,	"MMX_MASKMOVQ64", 0|(1<<MCID::MayLoad)|(1<<MCID::MayStore)|(1<<MCID::UnmodeledSideEffects), 0x1ee000105ULL, ImplicitList35, NULL, OperandInfo149 },  // Inst #1249 = MMX_MASKMOVQ64

   { 1250,	2,	1,	0,	0,	"MMX_MOVD64from64rr", 0|(1<<MCID::Bitcast), 0xfc002103ULL, NULL, NULL, OperandInfo150 },  // Inst #1250 = MMX_MOVD64from64rr

-  { 1251,	2,	0,	0,	0,	"MMX_MOVD64grr", 0|(1<<MCID::UnmodeledSideEffects), 0xfc000103ULL, NULL, NULL, OperandInfo151 },  // Inst #1251 = MMX_MOVD64grr

+  { 1251,	2,	1,	0,	0,	"MMX_MOVD64grr", 0, 0xfc000103ULL, NULL, NULL, OperandInfo151 },  // Inst #1251 = MMX_MOVD64grr

   { 1252,	6,	0,	0,	0,	"MMX_MOVD64mr", 0|(1<<MCID::MayStore)|(1<<MCID::UnmodeledSideEffects), 0xfc000104ULL, NULL, NULL, OperandInfo152 },  // Inst #1252 = MMX_MOVD64mr

   { 1253,	6,	1,	0,	0,	"MMX_MOVD64rm", 0|(1<<MCID::FoldableAsLoad)|(1<<MCID::MayLoad), 0xdc000106ULL, NULL, NULL, OperandInfo145 },  // Inst #1253 = MMX_MOVD64rm

   { 1254,	2,	1,	0,	0,	"MMX_MOVD64rr", 0, 0xdc000105ULL, NULL, NULL, OperandInfo153 },  // Inst #1254 = MMX_MOVD64rr

diff --git a/src/LLVM/lib/Target/X86/X86ISelLowering.cpp b/src/LLVM/lib/Target/X86/X86ISelLowering.cpp
index 7c8ce17..1be4bc5 100644
--- a/src/LLVM/lib/Target/X86/X86ISelLowering.cpp
+++ b/src/LLVM/lib/Target/X86/X86ISelLowering.cpp
@@ -1137,6 +1137,7 @@
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+  setTargetDAGCombine(ISD::BITCAST);
   setTargetDAGCombine(ISD::BUILD_VECTOR);
   setTargetDAGCombine(ISD::VSELECT);
   setTargetDAGCombine(ISD::SELECT);
@@ -12517,12 +12518,39 @@
   return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
 }
 
+/// \brief Detect bitcasts between i32 to x86mmx low word. Since MMX types are

+/// special and don't usually play with other vector types, it's better to

+/// handle them early to be sure we emit efficient code by avoiding

+/// store-load conversions.

+static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {

+  if (N->getValueType(0) != MVT::x86mmx ||

+      N->getOperand(0)->getOpcode() != ISD::BUILD_VECTOR ||

+      N->getOperand(0)->getValueType(0) != MVT::v2i32)

+    return SDValue();

+

+  SDValue V = N->getOperand(0);

+  ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));

+  if (C && C->getZExtValue() == 0 && V.getOperand(0).getValueType() == MVT::i32)

+    return DAG.getNode(X86ISD::MMX_MOVW2D, V.getOperand(0).getDebugLoc(),

+                       N->getValueType(0), V.getOperand(0));

+

+  return SDValue();

+}
+
 /// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
 /// generation and convert it from being a bunch of shuffles and extracts
 /// to a simple store and scalar loads to extract the elements.
 static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
                                                 const TargetLowering &TLI) {
   SDValue InputVector = N->getOperand(0);
+  // Detect whether we are trying to convert from mmx to i32 and the bitcast

+  // from mmx to v2i32 has a single usage.

+  if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST &&

+      InputVector.getNode()->getOperand(0).getValueType() == MVT::x86mmx &&

+      InputVector.hasOneUse() && N->getValueType(0) == MVT::i32)

+    return DAG.getNode(X86ISD::MMX_MOVD2W, InputVector.getDebugLoc(),

+                       N->getValueType(0),

+                       InputVector.getNode()->getOperand(0));
 
   // Only operate on vectors of 4 elements, where the alternative shuffling
   // gets to be more expensive.
@@ -14170,6 +14198,7 @@
     return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
   case ISD::VSELECT:
   case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);
+  case ISD::BITCAST:        return PerformBITCASTCombine(N, DAG);
   case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI);
   case ISD::ADD:            return OptimizeConditionalInDecrement(N, DAG);
   case ISD::SUB:            return PerformSubCombine(N, DAG);
diff --git a/src/LLVM/lib/Target/X86/X86ISelLowering.h b/src/LLVM/lib/Target/X86/X86ISelLowering.h
index 342a5e6..bde9088 100644
--- a/src/LLVM/lib/Target/X86/X86ISelLowering.h
+++ b/src/LLVM/lib/Target/X86/X86ISelLowering.h
@@ -146,6 +146,13 @@
       /// mnemonic, so do I; blame Intel.
       MOVDQ2Q,
 
+      /// vector to a GPR.

+      MMX_MOVD2W,

+ 

+      /// MMX_MOVW2D - Copies a GPR into the low 32-bit word of a MMX vector

+      /// and zero out the high word.

+      MMX_MOVW2D,
+
       /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
       /// i32, corresponds to X86::PEXTRB.
       PEXTRB,
diff --git a/src/LLVM/lib/Target/X86/X86InstrFragmentsSIMD.td b/src/LLVM/lib/Target/X86/X86InstrFragmentsSIMD.td
index af919fb..3380f9f 100644
--- a/src/LLVM/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/src/LLVM/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -11,6 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+// MMX specific DAG Nodes.

+//===----------------------------------------------------------------------===//

+

+// Low word of MMX to GPR.

+def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,

+                            [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>;

+

+// GPR to low word of MMX.

+def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1,

+                            [SDTCisVT<0, x86mmx>, SDTCisVT<1, i32>]>>;

+

 //===----------------------------------------------------------------------===//
 // MMX Pattern Fragments
 //===----------------------------------------------------------------------===//
diff --git a/src/LLVM/lib/Target/X86/X86InstrMMX.td b/src/LLVM/lib/Target/X86/X86InstrMMX.td
index 13dcf5a..e2ef2b4 100644
--- a/src/LLVM/lib/Target/X86/X86InstrMMX.td
+++ b/src/LLVM/lib/Target/X86/X86InstrMMX.td
@@ -141,11 +141,24 @@
                         "movd\t{$src, $dst|$dst, $src}",

               [(set VR64:$dst,

                (x86mmx (scalar_to_vector (loadi32 addr:$src))))]>;

+

+let Predicates = [HasMMX] in {

+  let AddedComplexity = 15 in

+    def : Pat<(x86mmx (MMX_X86movw2d GR32:$src)),

+              (MMX_MOVD64rr GR32:$src)>;

+  let AddedComplexity = 20 in

+    def : Pat<(x86mmx (MMX_X86movw2d (loadi32 addr:$src))),

+              (MMX_MOVD64rm addr:$src)>;

+}

+

 let mayStore = 1 in

 def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),

                         "movd\t{$src, $dst|$dst, $src}", []>;

-def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src),

-                        "movd\t{$src, $dst|$dst, $src}", []>;

+

+def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),

+                         "movd\t{$src, $dst|$dst, $src}",

+                         [(set GR32:$dst,

+                          (MMX_X86movd2w (x86mmx VR64:$src)))]>;

 

 let neverHasSideEffects = 1 in

 def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),

diff --git a/src/Reactor/Nucleus.cpp b/src/Reactor/Nucleus.cpp
index 52ee089..179afb7 100644
--- a/src/Reactor/Nucleus.cpp
+++ b/src/Reactor/Nucleus.cpp
@@ -4184,13 +4184,12 @@
 		return Type::getInt64Ty(*Nucleus::getContext());
 	}
 
-	Long1::Long1(const Reference<UInt> &cast)
+	Long1::Long1(const RValue<UInt> cast)
 	{
-		Value *uint = cast.loadValue();
-		Value *int64 = Nucleus::createZExt(uint, Long::getType());
-		Value *long1 = Nucleus::createBitCast(int64, Long1::getType());
+		Value *undefCast = Nucleus::createInsertElement(UndefValue::get(VectorType::get(Int::getType(), 2)), cast.value, 0);
+		Value *zeroCast = Nucleus::createInsertElement(undefCast, Nucleus::createConstantInt(0), 1);
 
-		storeValue(long1);
+		storeValue(Nucleus::createBitCast(zeroCast, Long1::getType()));
 	}
 
 	Long1::Long1(RValue<Long1> rhs)
diff --git a/src/Reactor/Nucleus.hpp b/src/Reactor/Nucleus.hpp
index a892e21..a7aaf79 100644
--- a/src/Reactor/Nucleus.hpp
+++ b/src/Reactor/Nucleus.hpp
@@ -1310,11 +1310,9 @@
 	//	explicit Long1(RValue<Short> cast);

 	//	explicit Long1(RValue<UShort> cast);

 	//	explicit Long1(RValue<Int> cast);

-	//	explicit Long1(RValue<UInt> cast);

+		explicit Long1(RValue<UInt> cast);

 	//	explicit Long1(RValue<Float> cast);

 

-		explicit Long1(const Reference<UInt> &cast);

-

 	//	Long1();

 	//	Long1(qword x);

 		Long1(RValue<Long1> rhs);