Distribute (A + B) * C to (A * C) + (B * C) to make use of NEON multiplier
accumulator forwarding:
vadd d3, d0, d1
vmul d3, d3, d2
=>
vmul d3, d0, d2
vmla d3, d1, d2


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@128665 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index 76c1c3f..e024182 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -61,6 +61,10 @@
   /// whether the FP VML[AS] instructions are slow (if so, don't use them).
   bool SlowFPVMLx;
 
+  /// HasVMLxForwarding - If true, NEON has special multiplier accumulator
+  /// forwarding to allow mul + mla being issued back to back.
+  bool HasVMLxForwarding;
+
   /// SlowFPBrcc - True if floating point compare + branch is slow.
   bool SlowFPBrcc;
 
@@ -182,6 +186,7 @@
   bool hasT2ExtractPack() const { return HasT2ExtractPack; }
   bool hasDataBarrier() const { return HasDataBarrier; }
   bool useFPVMLx() const { return !SlowFPVMLx; }
+  bool hasVMLxForwarding() const { return HasVMLxForwarding; }
   bool isFPBrccSlow() const { return SlowFPBrcc; }
   bool isFPOnlySP() const { return FPOnlySP; }
   bool prefers32BitThumb() const { return Pref32BitThumb; }