Recognize Neon VDUP shuffles during legalization instead of selection.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78852 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index f348fe2..f2b6686 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -487,6 +487,7 @@
case ARMISD::VREV64: return "ARMISD::VREV64";
case ARMISD::VREV32: return "ARMISD::VREV32";
case ARMISD::VREV16: return "ARMISD::VREV16";
+ case ARMISD::VSPLAT0: return "ARMISD::VSPLAT0";
}
}
@@ -2440,6 +2441,8 @@
DebugLoc dl = Op.getDebugLoc();
EVT VT = Op.getValueType();
+ if (SVN->isSplat() && SVN->getSplatIndex() == 0)
+ return DAG.getNode(ARMISD::VSPLAT0, dl, VT, SVN->getOperand(0));
if (isVREVMask(SVN, 64))
return DAG.getNode(ARMISD::VREV64, dl, VT, SVN->getOperand(0));
if (isVREVMask(SVN, 32))
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 328959a..57f3c4d 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -129,7 +129,8 @@
// Vector shuffles:
VREV64, // reverse elements within 64-bit doublewords
VREV32, // reverse elements within 32-bit words
- VREV16 // reverse elements within 16-bit halfwords
+ VREV16, // reverse elements within 16-bit halfwords
+ VSPLAT0 // duplicate element 0 into all elements
};
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 7654be7..38e8800 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -99,6 +99,7 @@
def NEONvrev64 : SDNode<"ARMISD::VREV64", SDTARMVSHUF>;
def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
+def NEONvsplat0 : SDNode<"ARMISD::VSPLAT0", SDTARMVSHUF>;
//===----------------------------------------------------------------------===//
// NEON operand definitions
@@ -1744,20 +1745,14 @@
// VDUP : Vector Duplicate (from ARM core register to all elements)
-def splat_lo : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
- return SVOp->isSplat() && SVOp->getSplatIndex() == 0;
-}]>;
-
class VDUPD<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty>
: NVDup<opcod1, 0b1011, opcod3, (outs DPR:$dst), (ins GPR:$src),
NoItinerary, "vdup", !strconcat(asmSize, "\t$dst, $src"),
- [(set DPR:$dst, (Ty (splat_lo (scalar_to_vector GPR:$src), undef)))]>;
+ [(set DPR:$dst, (Ty (NEONvsplat0 (scalar_to_vector GPR:$src))))]>;
class VDUPQ<bits<8> opcod1, bits<2> opcod3, string asmSize, ValueType Ty>
: NVDup<opcod1, 0b1011, opcod3, (outs QPR:$dst), (ins GPR:$src),
NoItinerary, "vdup", !strconcat(asmSize, "\t$dst, $src"),
- [(set QPR:$dst, (Ty (splat_lo (scalar_to_vector GPR:$src), undef)))]>;
+ [(set QPR:$dst, (Ty (NEONvsplat0 (scalar_to_vector GPR:$src))))]>;
def VDUP8d : VDUPD<0b11101100, 0b00, ".8", v8i8>;
def VDUP16d : VDUPD<0b11101000, 0b01, ".16", v4i16>;
@@ -1768,16 +1763,14 @@
def VDUPfd : NVDup<0b11101000, 0b1011, 0b00, (outs DPR:$dst), (ins GPR:$src),
NoItinerary, "vdup", ".32\t$dst, $src",
- [(set DPR:$dst, (v2f32 (splat_lo
- (scalar_to_vector
- (f32 (bitconvert GPR:$src))),
- undef)))]>;
+ [(set DPR:$dst,
+ (v2f32 (NEONvsplat0 (scalar_to_vector
+ (f32 (bitconvert GPR:$src))))))]>;
def VDUPfq : NVDup<0b11101010, 0b1011, 0b00, (outs QPR:$dst), (ins GPR:$src),
NoItinerary, "vdup", ".32\t$dst, $src",
- [(set QPR:$dst, (v4f32 (splat_lo
- (scalar_to_vector
- (f32 (bitconvert GPR:$src))),
- undef)))]>;
+ [(set QPR:$dst,
+ (v4f32 (NEONvsplat0 (scalar_to_vector
+ (f32 (bitconvert GPR:$src))))))]>;
// VDUP : Vector Duplicate Lane (from scalar to all elements)
@@ -1819,16 +1812,14 @@
def VDUPfdf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 0, 0,
(outs DPR:$dst), (ins SPR:$src),
NoItinerary, "vdup.32\t$dst, ${src:lane}", "",
- [(set DPR:$dst, (v2f32 (splat_lo
- (scalar_to_vector SPR:$src),
- undef)))]>;
+ [(set DPR:$dst, (v2f32 (NEONvsplat0
+ (scalar_to_vector SPR:$src))))]>;
def VDUPfqf : N2V<0b11, 0b11, 0b01, 0b00, 0b11000, 1, 0,
(outs QPR:$dst), (ins SPR:$src),
NoItinerary, "vdup.32\t$dst, ${src:lane}", "",
- [(set QPR:$dst, (v4f32 (splat_lo
- (scalar_to_vector SPR:$src),
- undef)))]>;
+ [(set QPR:$dst, (v4f32 (NEONvsplat0
+ (scalar_to_vector SPR:$src))))]>;
// VMOVN : Vector Narrowing Move
defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, "vmovn.i",