Implement support for MMX movd instruction generation.
Based on http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20150202/257325.html
and http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20121029/154639.html
Change-Id: I098654245c06a975b8b0bc66e0feb5acea0e9c89
Reviewed-on: https://swiftshader-review.googlesource.com/4510
Tested-by: Nicolas Capens <capn@google.com>
Reviewed-by: Alexis Hétu <sugoi@google.com>
Reviewed-by: Nicolas Capens <capn@google.com>
diff --git a/src/LLVM/lib/Target/X86/X86GenDAGISel.inc b/src/LLVM/lib/Target/X86/X86GenDAGISel.inc
index d5356c9..e28cdb6 100644
--- a/src/LLVM/lib/Target/X86/X86GenDAGISel.inc
+++ b/src/LLVM/lib/Target/X86/X86GenDAGISel.inc
Binary files differ
diff --git a/src/LLVM/lib/Target/X86/X86GenFastISel.inc b/src/LLVM/lib/Target/X86/X86GenFastISel.inc
index b7ada20..71ac967 100644
--- a/src/LLVM/lib/Target/X86/X86GenFastISel.inc
+++ b/src/LLVM/lib/Target/X86/X86GenFastISel.inc
@@ -1132,6 +1132,42 @@
}
}
+// FastEmit functions for X86ISD::MMX_MOVD2W.
+
+unsigned FastEmit_X86ISD_MMX_MOVD2W_MVT_x86mmx_r(MVT RetVT, unsigned Op0, bool Op0IsKill) {
+ if (RetVT.SimpleTy != MVT::i32)
+ return 0;
+ if ((Subtarget->hasMMX())) {
+ return FastEmitInst_r(X86::MMX_MOVD64grr, X86::GR32RegisterClass, Op0, Op0IsKill);
+ }
+ return 0;
+}
+
+unsigned FastEmit_X86ISD_MMX_MOVD2W_r(MVT VT, MVT RetVT, unsigned Op0, bool Op0IsKill) {
+ switch (VT.SimpleTy) {
+ case MVT::x86mmx: return FastEmit_X86ISD_MMX_MOVD2W_MVT_x86mmx_r(RetVT, Op0, Op0IsKill);
+ default: return 0;
+ }
+}
+
+// FastEmit functions for X86ISD::MMX_MOVW2D.
+
+unsigned FastEmit_X86ISD_MMX_MOVW2D_MVT_i32_r(MVT RetVT, unsigned Op0, bool Op0IsKill) {
+ if (RetVT.SimpleTy != MVT::x86mmx)
+ return 0;
+ if ((Subtarget->hasMMX())) {
+ return FastEmitInst_r(X86::MMX_MOVD64rr, X86::VR64RegisterClass, Op0, Op0IsKill);
+ }
+ return 0;
+}
+
+unsigned FastEmit_X86ISD_MMX_MOVW2D_r(MVT VT, MVT RetVT, unsigned Op0, bool Op0IsKill) {
+ switch (VT.SimpleTy) {
+ case MVT::i32: return FastEmit_X86ISD_MMX_MOVW2D_MVT_i32_r(RetVT, Op0, Op0IsKill);
+ default: return 0;
+ }
+}
+
// FastEmit functions for X86ISD::MOVDDUP.
unsigned FastEmit_X86ISD_MOVDDUP_MVT_v4i64_r(MVT RetVT, unsigned Op0, bool Op0IsKill) {
@@ -1389,6 +1425,8 @@
case X86ISD::FRCP: return FastEmit_X86ISD_FRCP_r(VT, RetVT, Op0, Op0IsKill);
case X86ISD::FRSQRT: return FastEmit_X86ISD_FRSQRT_r(VT, RetVT, Op0, Op0IsKill);
case X86ISD::MEMBARRIER: return FastEmit_X86ISD_MEMBARRIER_r(VT, RetVT, Op0, Op0IsKill);
+ case X86ISD::MMX_MOVD2W: return FastEmit_X86ISD_MMX_MOVD2W_r(VT, RetVT, Op0, Op0IsKill);
+ case X86ISD::MMX_MOVW2D: return FastEmit_X86ISD_MMX_MOVW2D_r(VT, RetVT, Op0, Op0IsKill);
case X86ISD::MOVDDUP: return FastEmit_X86ISD_MOVDDUP_r(VT, RetVT, Op0, Op0IsKill);
case X86ISD::MOVDQ2Q: return FastEmit_X86ISD_MOVDQ2Q_r(VT, RetVT, Op0, Op0IsKill);
case X86ISD::MOVQ2DQ: return FastEmit_X86ISD_MOVQ2DQ_r(VT, RetVT, Op0, Op0IsKill);
diff --git a/src/LLVM/lib/Target/X86/X86GenInstrInfo.inc b/src/LLVM/lib/Target/X86/X86GenInstrInfo.inc
index f7396ae..b6e99f4 100644
--- a/src/LLVM/lib/Target/X86/X86GenInstrInfo.inc
+++ b/src/LLVM/lib/Target/X86/X86GenInstrInfo.inc
@@ -5432,7 +5432,7 @@
{ 1248, 2, 0, 0, 0, "MMX_MASKMOVQ", 0|(1<<MCID::MayLoad)|(1<<MCID::MayStore)|(1<<MCID::UnmodeledSideEffects), 0x1ee000105ULL, ImplicitList34, NULL, OperandInfo149 }, // Inst #1248 = MMX_MASKMOVQ
{ 1249, 2, 0, 0, 0, "MMX_MASKMOVQ64", 0|(1<<MCID::MayLoad)|(1<<MCID::MayStore)|(1<<MCID::UnmodeledSideEffects), 0x1ee000105ULL, ImplicitList35, NULL, OperandInfo149 }, // Inst #1249 = MMX_MASKMOVQ64
{ 1250, 2, 1, 0, 0, "MMX_MOVD64from64rr", 0|(1<<MCID::Bitcast), 0xfc002103ULL, NULL, NULL, OperandInfo150 }, // Inst #1250 = MMX_MOVD64from64rr
- { 1251, 2, 0, 0, 0, "MMX_MOVD64grr", 0|(1<<MCID::UnmodeledSideEffects), 0xfc000103ULL, NULL, NULL, OperandInfo151 }, // Inst #1251 = MMX_MOVD64grr
+ { 1251, 2, 1, 0, 0, "MMX_MOVD64grr", 0, 0xfc000103ULL, NULL, NULL, OperandInfo151 }, // Inst #1251 = MMX_MOVD64grr
{ 1252, 6, 0, 0, 0, "MMX_MOVD64mr", 0|(1<<MCID::MayStore)|(1<<MCID::UnmodeledSideEffects), 0xfc000104ULL, NULL, NULL, OperandInfo152 }, // Inst #1252 = MMX_MOVD64mr
{ 1253, 6, 1, 0, 0, "MMX_MOVD64rm", 0|(1<<MCID::FoldableAsLoad)|(1<<MCID::MayLoad), 0xdc000106ULL, NULL, NULL, OperandInfo145 }, // Inst #1253 = MMX_MOVD64rm
{ 1254, 2, 1, 0, 0, "MMX_MOVD64rr", 0, 0xdc000105ULL, NULL, NULL, OperandInfo153 }, // Inst #1254 = MMX_MOVD64rr
diff --git a/src/LLVM/lib/Target/X86/X86ISelLowering.cpp b/src/LLVM/lib/Target/X86/X86ISelLowering.cpp
index 7c8ce17..1be4bc5 100644
--- a/src/LLVM/lib/Target/X86/X86ISelLowering.cpp
+++ b/src/LLVM/lib/Target/X86/X86ISelLowering.cpp
@@ -1137,6 +1137,7 @@
// We have target-specific dag combine patterns for the following nodes:
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
+ setTargetDAGCombine(ISD::BITCAST);
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::VSELECT);
setTargetDAGCombine(ISD::SELECT);
@@ -12517,12 +12518,39 @@
return EltsFromConsecutiveLoads(VT, Elts, dl, DAG);
}
+/// \brief Detect bitcasts between i32 to x86mmx low word. Since MMX types are
+/// special and don't usually play with other vector types, it's better to
+/// handle them early to be sure we emit efficient code by avoiding
+/// store-load conversions.
+static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
+ if (N->getValueType(0) != MVT::x86mmx ||
+ N->getOperand(0)->getOpcode() != ISD::BUILD_VECTOR ||
+ N->getOperand(0)->getValueType(0) != MVT::v2i32)
+ return SDValue();
+
+ SDValue V = N->getOperand(0);
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(V.getOperand(1));
+ if (C && C->getZExtValue() == 0 && V.getOperand(0).getValueType() == MVT::i32)
+ return DAG.getNode(X86ISD::MMX_MOVW2D, V.getOperand(0).getDebugLoc(),
+ N->getValueType(0), V.getOperand(0));
+
+ return SDValue();
+}
+
/// PerformEXTRACT_VECTOR_ELTCombine - Detect vector gather/scatter index
/// generation and convert it from being a bunch of shuffles and extracts
/// to a simple store and scalar loads to extract the elements.
static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI) {
SDValue InputVector = N->getOperand(0);
+ // Detect whether we are trying to convert from mmx to i32 and the bitcast
+ // from mmx to v2i32 has a single usage.
+ if (InputVector.getNode()->getOpcode() == llvm::ISD::BITCAST &&
+ InputVector.getNode()->getOperand(0).getValueType() == MVT::x86mmx &&
+ InputVector.hasOneUse() && N->getValueType(0) == MVT::i32)
+ return DAG.getNode(X86ISD::MMX_MOVD2W, InputVector.getDebugLoc(),
+ N->getValueType(0),
+ InputVector.getNode()->getOperand(0));
// Only operate on vectors of 4 elements, where the alternative shuffling
// gets to be more expensive.
@@ -14170,6 +14198,7 @@
return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
case ISD::VSELECT:
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
+ case ISD::BITCAST: return PerformBITCASTCombine(N, DAG);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
case ISD::ADD: return OptimizeConditionalInDecrement(N, DAG);
case ISD::SUB: return PerformSubCombine(N, DAG);
diff --git a/src/LLVM/lib/Target/X86/X86ISelLowering.h b/src/LLVM/lib/Target/X86/X86ISelLowering.h
index 342a5e6..bde9088 100644
--- a/src/LLVM/lib/Target/X86/X86ISelLowering.h
+++ b/src/LLVM/lib/Target/X86/X86ISelLowering.h
@@ -146,6 +146,13 @@
/// mnemonic, so do I; blame Intel.
MOVDQ2Q,
+ /// vector to a GPR.
+ MMX_MOVD2W,
+
+ /// MMX_MOVW2D - Copies a GPR into the low 32-bit word of a MMX vector
+ /// and zero out the high word.
+ MMX_MOVW2D,
+
/// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
/// i32, corresponds to X86::PEXTRB.
PEXTRB,
diff --git a/src/LLVM/lib/Target/X86/X86InstrFragmentsSIMD.td b/src/LLVM/lib/Target/X86/X86InstrFragmentsSIMD.td
index af919fb..3380f9f 100644
--- a/src/LLVM/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/src/LLVM/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -11,6 +11,17 @@
//
//===----------------------------------------------------------------------===//
+// MMX specific DAG Nodes.
+//===----------------------------------------------------------------------===//
+
+// Low word of MMX to GPR.
+def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1,
+ [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>;
+
+// GPR to low word of MMX.
+def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1,
+ [SDTCisVT<0, x86mmx>, SDTCisVT<1, i32>]>>;
+
//===----------------------------------------------------------------------===//
// MMX Pattern Fragments
//===----------------------------------------------------------------------===//
diff --git a/src/LLVM/lib/Target/X86/X86InstrMMX.td b/src/LLVM/lib/Target/X86/X86InstrMMX.td
index 13dcf5a..e2ef2b4 100644
--- a/src/LLVM/lib/Target/X86/X86InstrMMX.td
+++ b/src/LLVM/lib/Target/X86/X86InstrMMX.td
@@ -141,11 +141,24 @@
"movd\t{$src, $dst|$dst, $src}",
[(set VR64:$dst,
(x86mmx (scalar_to_vector (loadi32 addr:$src))))]>;
+
+let Predicates = [HasMMX] in {
+ let AddedComplexity = 15 in
+ def : Pat<(x86mmx (MMX_X86movw2d GR32:$src)),
+ (MMX_MOVD64rr GR32:$src)>;
+ let AddedComplexity = 20 in
+ def : Pat<(x86mmx (MMX_X86movw2d (loadi32 addr:$src))),
+ (MMX_MOVD64rm addr:$src)>;
+}
+
let mayStore = 1 in
def MMX_MOVD64mr : MMXI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR64:$src),
"movd\t{$src, $dst|$dst, $src}", []>;
-def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs), (ins GR32:$dst, VR64:$src),
- "movd\t{$src, $dst|$dst, $src}", []>;
+
+def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src),
+ "movd\t{$src, $dst|$dst, $src}",
+ [(set GR32:$dst,
+ (MMX_X86movd2w (x86mmx VR64:$src)))]>;
let neverHasSideEffects = 1 in
def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
diff --git a/src/Reactor/Nucleus.cpp b/src/Reactor/Nucleus.cpp
index 52ee089..179afb7 100644
--- a/src/Reactor/Nucleus.cpp
+++ b/src/Reactor/Nucleus.cpp
@@ -4184,13 +4184,12 @@
return Type::getInt64Ty(*Nucleus::getContext());
}
- Long1::Long1(const Reference<UInt> &cast)
+ Long1::Long1(const RValue<UInt> cast)
{
- Value *uint = cast.loadValue();
- Value *int64 = Nucleus::createZExt(uint, Long::getType());
- Value *long1 = Nucleus::createBitCast(int64, Long1::getType());
+ Value *undefCast = Nucleus::createInsertElement(UndefValue::get(VectorType::get(Int::getType(), 2)), cast.value, 0);
+ Value *zeroCast = Nucleus::createInsertElement(undefCast, Nucleus::createConstantInt(0), 1);
- storeValue(long1);
+ storeValue(Nucleus::createBitCast(zeroCast, Long1::getType()));
}
Long1::Long1(RValue<Long1> rhs)
diff --git a/src/Reactor/Nucleus.hpp b/src/Reactor/Nucleus.hpp
index a892e21..a7aaf79 100644
--- a/src/Reactor/Nucleus.hpp
+++ b/src/Reactor/Nucleus.hpp
@@ -1310,11 +1310,9 @@
// explicit Long1(RValue<Short> cast);
// explicit Long1(RValue<UShort> cast);
// explicit Long1(RValue<Int> cast);
- // explicit Long1(RValue<UInt> cast);
+ explicit Long1(RValue<UInt> cast);
// explicit Long1(RValue<Float> cast);
- explicit Long1(const Reference<UInt> &cast);
-
// Long1();
// Long1(qword x);
Long1(RValue<Long1> rhs);