Try to lower memset/memcpy/memmove to vector instructions on ARM where the alignment permits.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143582 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 31e522d..222a399 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -8127,6 +8127,34 @@
}
}
+static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign,
+ unsigned AlignCheck) {
+ return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) &&
+ (DstAlign == 0 || DstAlign % AlignCheck == 0));
+}
+
+EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool NonScalarIntSafe,
+ bool MemcpyStrSrc,
+ MachineFunction &MF) const {
+ const Function *F = MF.getFunction();
+
+ // See if we can use NEON instructions for this...
+ if (NonScalarIntSafe &&
+ !F->hasFnAttr(Attribute::NoImplicitFloat) &&
+ Subtarget->hasNEON()) {
+ if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) {
+ return MVT::v4i32;
+ } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) {
+ return MVT::v2i32;
+ }
+ }
+
+ // Let the target-independent logic figure it out.
+ return MVT::Other;
+}
+
static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
if (V < 0)
return false;