[AArch64] Fix PR32384: bump up the number of stores per memset and memcpy
As suggested in https://bugs.llvm.org/show_bug.cgi?id=32384#c1, this change
makes the inlining of `memset()` and `memcpy()` more aggressive when
compiling for speed. The tuning remains the same when optimizing for size.
Patch by: Sebastian Pop <s.pop@samsung.com>
Evandro Menezes <e.menezes@samsung.com>
Differential revision: https://reviews.llvm.org/D45098
llvm-svn: 333429
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 5bf5231..66f1b63 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -579,11 +579,17 @@
setTargetDAGCombine(ISD::GlobalAddress);
- MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 8;
- MaxGluedStoresPerMemcpy = 4;
+ // In case of strict alignment, avoid an excessive number of byte wide stores.
+ MaxStoresPerMemsetOptSize = 8;
+ MaxStoresPerMemset = Subtarget->requiresStrictAlign()
+ ? MaxStoresPerMemsetOptSize : 32;
- MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 4;
- MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 4;
+ MaxGluedStoresPerMemcpy = 4;
+ MaxStoresPerMemcpyOptSize = 4;
+ MaxStoresPerMemcpy = Subtarget->requiresStrictAlign()
+ ? MaxStoresPerMemcpyOptSize : 16;
+
+ MaxStoresPerMemmoveOptSize = MaxStoresPerMemmove = 4;
setStackPointerRegisterToSaveRestore(AArch64::SP);