Distribute (A + B) * C to (A * C) + (B * C) to make use of NEON multiplier
accumulator forwarding:
vadd d3, d0, d1
vmul d3, d3, d2
=>
vmul d3, d0, d2
vmla d3, d1, d2
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@128665 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index bf4315f..e690e18 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -51,6 +51,12 @@
// to just not use them.
def FeatureHasSlowFPVMLx : SubtargetFeature<"slowfpvmlx", "SlowFPVMLx", "true",
"Disable VFP / NEON MAC instructions">;
+
+// Cortex-A8 / A9 Advanced SIMD has multiplier accumulator forwarding.
+def FeatureVMLxForwarding : SubtargetFeature<"vmlx-forwarding",
+ "HasVMLxForwarding", "true",
+ "Has multiplier accumulator forwarding">;
+
// Some processors benefit from using NEON instructions for scalar
// single-precision FP operations.
def FeatureNEONForFP : SubtargetFeature<"neonfp", "UseNEONForSinglePrecisionFP",
@@ -100,11 +106,12 @@
def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
"Cortex-A8 ARM processors",
[FeatureSlowFPBrcc, FeatureNEONForFP,
- FeatureHasSlowFPVMLx, FeatureT2XtPk]>;
+ FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
+ FeatureT2XtPk]>;
def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
"Cortex-A9 ARM processors",
- [FeatureHasSlowFPVMLx, FeatureT2XtPk,
- FeatureFP16]>;
+ [FeatureHasSlowFPVMLx, FeatureVMLxForwarding,
+ FeatureT2XtPk, FeatureFP16]>;
class ProcNoItin<string Name, list<SubtargetFeature> Features>
: Processor<Name, GenericItineraries, Features>;