call libc memcpy/memset if array size is bigger then threshold.
Coping 100MB array (after a warmup) shows that glibc 2.6.1 implementation on
x86-64 (core 2) is 30% faster (from 0.270917s to 0.188079s)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@41479 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 6673c5f..75fbd44 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -3753,10 +3753,10 @@
if (Align == 0) Align = 1;
ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- // If not DWORD aligned, call memset if size is less than the threshold.
+ // If not DWORD aligned or size is more than the threshold, call memset.
// It knows how to align to the right boundary first.
if ((Align & 3) != 0 ||
- (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
+ (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) {
MVT::ValueType IntPtr = getPointerTy();
const Type *IntPtrTy = getTargetData()->getIntPtrType();
TargetLowering::ArgListTy Args;
@@ -3909,10 +3909,10 @@
if (Align == 0) Align = 1;
ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
- // If not DWORD aligned, call memcpy if size is less than the threshold.
+ // If not DWORD aligned or size is more than the threshold, call memcpy.
// It knows how to align to the right boundary first.
if ((Align & 3) != 0 ||
- (I && I->getValue() < Subtarget->getMinRepStrSizeThreshold())) {
+ (I && I->getValue() > Subtarget->getMinRepStrSizeThreshold())) {
MVT::ValueType IntPtr = getPointerTy();
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;