Support for MEMCPY and MEMSET.
llvm-svn: 25226
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 65622af..a6a95c7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -147,6 +147,9 @@
setOperationAction(ISD::SHL_PARTS , MVT::i32 , Custom);
setOperationAction(ISD::SRA_PARTS , MVT::i32 , Custom);
setOperationAction(ISD::SRL_PARTS , MVT::i32 , Custom);
+ // X86 wants to expand memset / memcpy itself.
+ setOperationAction(ISD::MEMSET , MVT::Other, Custom);
+ setOperationAction(ISD::MEMCPY , MVT::Other, Custom);
}
// We don't have line number support yet.
@@ -1614,6 +1617,109 @@
return DAG.getNode(X86ISD::RET_FLAG, MVT::Other, Op.getOperand(0),
DAG.getConstant(getBytesToPopOnReturn(), MVT::i16));
}
+ case ISD::MEMSET: {
+ SDOperand InFlag;
+ SDOperand Chain = Op.getOperand(0);
+ unsigned Align =
+ (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
+ if (Align == 0) Align = 1;
+
+ MVT::ValueType AVT;
+ SDOperand Count;
+ if (ConstantSDNode *ValC = dyn_cast<ConstantSDNode>(Op.getOperand(2))) {
+ unsigned ValReg;
+ unsigned Val = ValC->getValue() & 255;
+
+ // If the value is a constant, then we can potentially use larger sets.
+ switch (Align & 3) {
+ case 2: // WORD aligned
+ AVT = MVT::i16;
+ if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+ Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
+ else
+ Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
+ DAG.getConstant(1, MVT::i8));
+ Val = (Val << 8) | Val;
+ ValReg = X86::AX;
+ break;
+ case 0: // DWORD aligned
+ AVT = MVT::i32;
+ if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+ Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
+ else
+ Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
+ DAG.getConstant(2, MVT::i8));
+ Val = (Val << 8) | Val;
+ Val = (Val << 16) | Val;
+ ValReg = X86::EAX;
+ break;
+ default: // Byte aligned
+ AVT = MVT::i8;
+ Count = Op.getOperand(3);
+ ValReg = X86::AL;
+ break;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, ValReg, DAG.getConstant(Val, AVT),
+ InFlag);
+ InFlag = Chain.getValue(1);
+ } else {
+ AVT = MVT::i8;
+ Count = Op.getOperand(3);
+ Chain = DAG.getCopyToReg(Chain, X86::AL, Op.getOperand(2), InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
+ InFlag = Chain.getValue(1);
+
+ return DAG.getNode(X86ISD::REP_STOS, MVT::Other, Chain,
+ DAG.getValueType(AVT), InFlag);
+ }
+ case ISD::MEMCPY: {
+ SDOperand Chain = Op.getOperand(0);
+ unsigned Align =
+ (unsigned)cast<ConstantSDNode>(Op.getOperand(4))->getValue();
+ if (Align == 0) Align = 1;
+
+ MVT::ValueType AVT;
+ SDOperand Count;
+ switch (Align & 3) {
+ case 2: // WORD aligned
+ AVT = MVT::i16;
+ if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+ Count = DAG.getConstant(I->getValue() / 2, MVT::i32);
+ else
+ Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
+ DAG.getConstant(1, MVT::i8));
+ break;
+ case 0: // DWORD aligned
+ AVT = MVT::i32;
+ if (ConstantSDNode *I = dyn_cast<ConstantSDNode>(Op.getOperand(3)))
+ Count = DAG.getConstant(I->getValue() / 4, MVT::i32);
+ else
+ Count = DAG.getNode(ISD::SRL, MVT::i32, Op.getOperand(3),
+ DAG.getConstant(2, MVT::i8));
+ break;
+ default: // Byte aligned
+ AVT = MVT::i8;
+ Count = Op.getOperand(3);
+ break;
+ }
+
+ SDOperand InFlag;
+ Chain = DAG.getCopyToReg(Chain, X86::ECX, Count, InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, X86::EDI, Op.getOperand(1), InFlag);
+ InFlag = Chain.getValue(1);
+ Chain = DAG.getCopyToReg(Chain, X86::ESI, Op.getOperand(2), InFlag);
+ InFlag = Chain.getValue(1);
+
+ return DAG.getNode(X86ISD::REP_MOVS, MVT::Other, Chain,
+ DAG.getValueType(AVT), InFlag);
+ }
case ISD::GlobalAddress: {
GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
SDOperand GVOp = DAG.getTargetGlobalAddress(GV, getPointerTy());
@@ -1659,6 +1765,8 @@
case X86ISD::CMOV: return "X86ISD::CMOV";
case X86ISD::BRCOND: return "X86ISD::BRCOND";
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
+ case X86ISD::REP_STOS: return "X86ISD::RET_STOS";
+ case X86ISD::REP_MOVS: return "X86ISD::RET_MOVS";
}
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index a45c84e..a4d481a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -123,9 +123,15 @@
/// or TEST instruction.
BRCOND,
- /// Return with a flag operand. Operand 1 is the number of bytes of stack
- /// to pop, operand 2 is the chain and operand 3 is a flag operand.
+ /// Return with a flag operand. Operand 1 is the chain operand, operand
+ /// 2 is the number of bytes of stack to pop.
RET_FLAG,
+
+ /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx.
+ REP_STOS,
+
+ /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
+ REP_MOVS,
};
// X86 specific condition code. These correspond to X86_*_COND in
diff --git a/llvm/lib/Target/X86/X86ISelPattern.cpp b/llvm/lib/Target/X86/X86ISelPattern.cpp
index 563539d..6ca4078 100644
--- a/llvm/lib/Target/X86/X86ISelPattern.cpp
+++ b/llvm/lib/Target/X86/X86ISelPattern.cpp
@@ -3558,8 +3558,8 @@
Opcode = X86::REP_STOSB;
}
- // No matter what the alignment is, we put the source in ESI, the
- // destination in EDI, and the count in ECX.
+ // No matter what the alignment is, we put the destination in EDI, and the
+ // count in ECX.
unsigned TmpReg1 = SelectExpr(Node->getOperand(1));
BuildMI(BB, X86::MOV32rr, 1, X86::ECX).addReg(CountReg);
BuildMI(BB, X86::MOV32rr, 1, X86::EDI).addReg(TmpReg1);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 695dc84..70baa8a 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -52,6 +52,8 @@
SDTCisPtrTy<1>, SDTCisVT<2, OtherVT>]>;
def SDTX86Fild64m : SDTypeProfile<1, 1, [SDTCisVT<0, f64>, SDTCisPtrTy<1>]>;
+def SDTX86RepStr : SDTypeProfile<0, 1, [SDTCisVT<0, OtherVT>]>;
+
def SDTX86RdTsc : SDTypeProfile<0, 0, []>;
def X86addflag : SDNode<"X86ISD::ADD_FLAG", SDTIntBinOp ,
@@ -99,6 +101,11 @@
def X86fild64m : SDNode<"X86ISD::FILD64m", SDTX86Fild64m,
[SDNPHasChain]>;
+def X86rep_stos: SDNode<"X86ISD::REP_STOS", SDTX86RepStr,
+ [SDNPHasChain, SDNPInFlag]>;
+def X86rep_movs: SDNode<"X86ISD::REP_MOVS", SDTX86RepStr,
+ [SDNPHasChain, SDNPInFlag]>;
+
def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG",SDTX86RdTsc,
[SDNPHasChain, SDNPOutFlag]>;
@@ -549,18 +556,24 @@
"lea{l} {$src|$dst}, {$dst|$src}",
[(set R32:$dst, leaaddr:$src)]>;
-def REP_MOVSB : I<0xA4, RawFrm, (ops), "{rep;movsb|rep movsb}", []>,
+def REP_MOVSB : I<0xA4, RawFrm, (ops), "{rep;movsb|rep movsb}",
+ [(X86rep_movs i8)]>,
Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>, REP;
-def REP_MOVSW : I<0xA5, RawFrm, (ops), "{rep;movsw|rep movsw}", []>,
+def REP_MOVSW : I<0xA5, RawFrm, (ops), "{rep;movsw|rep movsw}",
+ [(X86rep_movs i16)]>,
Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>, REP, OpSize;
-def REP_MOVSD : I<0xA5, RawFrm, (ops), "{rep;movsd|rep movsd}", []>,
+def REP_MOVSD : I<0xA5, RawFrm, (ops), "{rep;movsd|rep movsd}",
+ [(X86rep_movs i32)]>,
Imp<[ECX,EDI,ESI], [ECX,EDI,ESI]>, REP;
-def REP_STOSB : I<0xAA, RawFrm, (ops), "{rep;stosb|rep stosb}", []>,
+def REP_STOSB : I<0xAA, RawFrm, (ops), "{rep;stosb|rep stosb}",
+ [(X86rep_stos i8)]>,
Imp<[AL,ECX,EDI], [ECX,EDI]>, REP;
-def REP_STOSW : I<0xAB, RawFrm, (ops), "{rep;stosw|rep stosw}", []>,
+def REP_STOSW : I<0xAB, RawFrm, (ops), "{rep;stosw|rep stosw}",
+ [(X86rep_stos i16)]>,
Imp<[AX,ECX,EDI], [ECX,EDI]>, REP, OpSize;
-def REP_STOSD : I<0xAB, RawFrm, (ops), "{rep;stosl|rep stosd}", []>,
+def REP_STOSD : I<0xAB, RawFrm, (ops), "{rep;stosl|rep stosd}",
+ [(X86rep_stos i32)]>,
Imp<[EAX,ECX,EDI], [ECX,EDI]>, REP;