A bit more memset / memcpy optimization.
Turns them into calls to memset / memcpy if 1) buffer(s) are not DWORD aligned,
2) size is not known to be greater or equal to some minimum value (currently 128).
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@26224 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 172b8a1..adaa986 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1772,6 +1772,25 @@
(unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
if (Align == 0) Align = 1;
+ ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
+ // If not DWORD aligned, call memset if size is less than the threshold.
+ // It knows how to align to the right boundary first.
+ if ((Align & 3) != 0 &&
+ !(I && I->getValue() >= Subtarget->getMinRepStrSizeThreshold())) {
+ MVT::ValueType IntPtr = getPointerTy();
+ const Type *IntPtrTy = getTargetData().getIntPtrType();
+ std::vector<std::pair<SDOperand, const Type*> > Args;
+ Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
+ // Extend the ubyte argument to be an int value for the call.
+ SDOperand Val = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Op.getOperand(2));
+ Args.push_back(std::make_pair(Val, IntPtrTy));
+ Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
+ std::pair<SDOperand,SDOperand> CallResult =
+ LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
+ DAG.getExternalSymbol("memset", IntPtr), Args, DAG);
+ return CallResult.second;
+ }
+
MVT::ValueType AVT;
SDOperand Count;
if (ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
@@ -1782,7 +1801,7 @@
switch (Align & 3) {
case 2: // WORD aligned
AVT = MVT::i16;
- if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+ if (I)
Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
else
Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
@@ -1792,7 +1811,7 @@
break;
case 0: // DWORD aligned
AVT = MVT::i32;
- if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+ if (I)
Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
else
Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
@@ -1812,7 +1831,7 @@
InFlag);
InFlag = Chain.getValue(1);
} else {
- AVT = MVT::i8;
+ AVT = MVT::i8;
Count = Op.getOperand(3);
Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
InFlag = Chain.getValue(1);
@@ -1832,20 +1851,36 @@
(unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
if (Align == 0) Align = 1;
+ ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3));
+ // If not DWORD aligned, call memcpy if size is less than the threshold.
+ // It knows how to align to the right boundary first.
+ if ((Align & 3) != 0 &&
+ !(I && I->getValue() >= Subtarget->getMinRepStrSizeThreshold())) {
+ MVT::ValueType IntPtr = getPointerTy();
+ const Type *IntPtrTy = getTargetData().getIntPtrType();
+ std::vector<std::pair<SDOperand, const Type*> > Args;
+ Args.push_back(std::make_pair(Op.getOperand(1), IntPtrTy));
+ Args.push_back(std::make_pair(Op.getOperand(2), IntPtrTy));
+ Args.push_back(std::make_pair(Op.getOperand(3), IntPtrTy));
+ std::pair<SDOperand,SDOperand> CallResult =
+ LowerCallTo(Chain, Type::VoidTy, false, CallingConv::C, false,
+ DAG.getExternalSymbol("memcpy", IntPtr), Args, DAG);
+ return CallResult.second;
+ }
+
MVT::ValueType AVT;
SDOperand Count;
switch (Align & 3) {
case 2: // WORD aligned
AVT = MVT::i16;
- if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+ if (I)
Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
else
- Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
- DAG.getConstant(1, MVT::i8));
+ Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
break;
case 0: // DWORD aligned
AVT = MVT::i32;
- if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+ if (I)
Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
else
Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index d0c5c4d..b3fafbf 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -146,6 +146,8 @@
X86Subtarget::X86Subtarget(const Module &M, const std::string &FS) {
stackAlignment = 8;
+ // FIXME: this is a known good value for Yonah. Not sure about others.
+ MinRepStrSizeThreshold = 128;
indirectExternAndWeakGlobals = false;
X86SSELevel = NoMMXSSE;
X863DNowLevel = NoThreeDNow;
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 60cc49a..0cc06d5 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -44,6 +44,9 @@
/// entry to the function and which must be maintained by every function.
unsigned stackAlignment;
+ /// Min. memset / memcpy size that is turned into rep/movs, rep/stos ops.
+ unsigned MinRepStrSizeThreshold;
+
/// Used by instruction selector
bool indirectExternAndWeakGlobals;
@@ -62,6 +65,12 @@
/// function for this subtarget.
unsigned getStackAlignment() const { return stackAlignment; }
+ /// getMinRepStrSizeThreshold - Returns the minimum memset / memcpy size
+ /// required to turn the operation into a X86 rep/movs or rep/stos
+ /// instruction. This is only used if the src / dst alignment is not DWORD
+ /// aligned.
+ unsigned getMinRepStrSizeThreshold() const { return MinRepStrSizeThreshold; }
+
/// Returns true if the instruction selector should treat global values
/// referencing external or weak symbols as indirect rather than direct
/// references.