CellSPU:
- Fix bugs 3194, 3195: i128 load/stores produce correct code (although, we
need to ensure that i128 is 16-byte aligned in real life), and 128 zero-
extends are supported.
- New td file: SPU128InstrInfo.td: this is where all new i128 support should
be put in the future.
- Continue to hammer on i64 operations and test cases; ensure that the only
remaining problem will be i64 mul.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61784 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/CellSPU/SPU128InstrInfo.td b/lib/Target/CellSPU/SPU128InstrInfo.td
new file mode 100644
index 0000000..6612901
--- /dev/null
+++ b/lib/Target/CellSPU/SPU128InstrInfo.td
@@ -0,0 +1,22 @@
+//===--- SPU128InstrInfo.td - Cell SPU 128-bit operations -*- tablegen -*--===//
+//
+// Cell SPU 128-bit operations
+//
+// Primary author: Scott Michel (scottm@aero.org)
+//===----------------------------------------------------------------------===//
+
+// zext 32->128: Zero extend 32-bit to 128-bit
+def : Pat<(i128 (zext R32C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 R32C:$rSrc, 12)>;
+
+// zext 64->128: Zero extend 64-bit to 128-bit
+def : Pat<(i128 (zext R64C:$rSrc)),
+ (ROTQMBYIr128_zext_r64 R64C:$rSrc, 8)>;
+
+// zext 16->128: Zero extend 16-bit to 128-bit
+def : Pat<(i128 (zext R16C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDi16i32 R16C:$rSrc, (ILAr32 0xffff)), 12)>;
+
+// zext 8->128: Zero extend 8-bit to 128-bit
+def : Pat<(i128 (zext R8C:$rSrc)),
+ (ROTQMBYIr128_zext_r32 (ANDIi8i32 R8C:$rSrc, 0xf), 12)>;
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
index d6fc2bd..74c0eca 100644
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -48,8 +48,8 @@
// is in a 32-bit register that contains a select mask pattern (i.e., gather
// bits result):
-def : Pat<(select R32C:$rC, R64C:$rB, R64C:$rA),
- (SELBr64_cond R64C:$rA, R64C:$rB, (FSMr32 R32C:$rC))>;
+def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
+ (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
// The i64 seteq fragment that does the scalar->vector conversion and
diff --git a/lib/Target/CellSPU/SPUCallingConv.td b/lib/Target/CellSPU/SPUCallingConv.td
index 4bad198..5213e42 100644
--- a/lib/Target/CellSPU/SPUCallingConv.td
+++ b/lib/Target/CellSPU/SPUCallingConv.td
@@ -21,10 +21,11 @@
// Return-value convention for Cell SPU: Everything can be passed back via $3:
def RetCC_SPU : CallingConv<[
- CCIfType<[i8], CCAssignToReg<[R3]>>,
- CCIfType<[i16], CCAssignToReg<[R3]>>,
- CCIfType<[i32], CCAssignToReg<[R3]>>,
- CCIfType<[i64], CCAssignToReg<[R3]>>,
+ CCIfType<[i8], CCAssignToReg<[R3]>>,
+ CCIfType<[i16], CCAssignToReg<[R3]>>,
+ CCIfType<[i32], CCAssignToReg<[R3]>>,
+ CCIfType<[i64], CCAssignToReg<[R3]>>,
+ CCIfType<[i128], CCAssignToReg<[R3]>>,
CCIfType<[f32, f64], CCAssignToReg<[R3]>>,
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToReg<[R3]>>
]>;
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index c13d696..7e63a87 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -114,7 +114,7 @@
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
// SPU's loads and stores have to be custom lowered:
- for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
+ for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
++sctype) {
MVT VT = (MVT::SimpleValueType)sctype;
@@ -947,6 +947,9 @@
case MVT::i64:
ArgRegClass = &SPU::R64CRegClass;
break;
+ case MVT::i128:
+ ArgRegClass = &SPU::GPRCRegClass;
+ break;
case MVT::f32:
ArgRegClass = &SPU::R32FPRegClass;
break;
@@ -1070,6 +1073,8 @@
switch (Arg.getValueType().getSimpleVT()) {
default: assert(0 && "Unexpected ValueType for argument!");
+ case MVT::i8:
+ case MVT::i16:
case MVT::i32:
case MVT::i64:
case MVT::i128:
@@ -1220,6 +1225,11 @@
ResultVals[0] = Chain.getValue(0);
NumResults = 1;
break;
+ case MVT::i128:
+ Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i128, InFlag).getValue(1);
+ ResultVals[0] = Chain.getValue(0);
+ NumResults = 1;
+ break;
case MVT::f32:
case MVT::f64:
Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
@@ -2182,24 +2192,48 @@
MVT Op0VT = Op0.getValueType();
MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
- assert(Op0VT == MVT::i32
- && "CellSPU: Zero/sign extending something other than i32");
-
- DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
-
SDValue PromoteScalar =
DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
// Use a shuffle to zero extend the i32 to i64 directly:
- SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
- DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(0x00010203,
- MVT::i32), DAG.getConstant(0x80808080, MVT::i32), DAG.getConstant(
- 0x08090a0b, MVT::i32));
- SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT, PromoteScalar,
- PromoteScalar, shufMask);
+ SDValue shufMask;
- return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, DAG.getNode(ISD::BIT_CONVERT,
- VecVT, zextShuffle));
+ switch (Op0VT.getSimpleVT()) {
+ default:
+ cerr << "CellSPU LowerI64Math: Unhandled zero/any extend MVT\n";
+ abort();
+ /*NOTREACHED*/
+ break;
+ case MVT::i32:
+ shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ DAG.getConstant(0x80808080, MVT::i32),
+ DAG.getConstant(0x00010203, MVT::i32),
+ DAG.getConstant(0x80808080, MVT::i32),
+ DAG.getConstant(0x08090a0b, MVT::i32));
+ break;
+
+ case MVT::i16:
+ shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ DAG.getConstant(0x80808080, MVT::i32),
+ DAG.getConstant(0x80800203, MVT::i32),
+ DAG.getConstant(0x80808080, MVT::i32),
+ DAG.getConstant(0x80800a0b, MVT::i32));
+ break;
+
+ case MVT::i8:
+ shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ DAG.getConstant(0x80808080, MVT::i32),
+ DAG.getConstant(0x80808003, MVT::i32),
+ DAG.getConstant(0x80808080, MVT::i32),
+ DAG.getConstant(0x8080800b, MVT::i32));
+ break;
+ }
+
+ SDValue zextShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
+ PromoteScalar, PromoteScalar, shufMask);
+
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
+ DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
}
case ISD::ADD: {
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index d88b2e9..06ad507 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -130,7 +130,32 @@
case SPU::ORi32_v4i32:
case SPU::ORi64_v2i64:
case SPU::ORf32_v4f32:
- case SPU::ORf64_v2f64: {
+ case SPU::ORf64_v2f64:
+ case SPU::ORi128_r64:
+ case SPU::ORi128_f64:
+ case SPU::ORi128_r32:
+ case SPU::ORi128_f32:
+ case SPU::ORi128_r16:
+ case SPU::ORi128_r8:
+ case SPU::ORi128_vec:
+ case SPU::ORr64_i128:
+ case SPU::ORf64_i128:
+ case SPU::ORr32_i128:
+ case SPU::ORf32_i128:
+ case SPU::ORr16_i128:
+ case SPU::ORr8_i128:
+ case SPU::ORvec_i128:
+ case SPU::ORr16_r32:
+ case SPU::ORr8_r32:
+ case SPU::ORr32_r16:
+ case SPU::ORr32_r8:
+ case SPU::ORr32_r64:
+ case SPU::ORr16_r64:
+ case SPU::ORr8_r64:
+ case SPU::ORr64_r32:
+ case SPU::ORr64_r16:
+ case SPU::ORr64_r8:
+ {
assert(MI.getNumOperands() == 2 &&
MI.getOperand(0).isReg() &&
MI.getOperand(1).isReg() &&
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index 1ceaf1a..6a0fde3 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -1140,48 +1140,66 @@
XSBHInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
[(set (v8i16 VECREG:$rDst), (sext (vectype VECREG:$rSrc)))]>;
-class XSBHInRegInst<RegisterClass rclass>:
+class XSBHInRegInst<RegisterClass rclass, list<dag> pattern>:
XSBHInst<(outs rclass:$rDst), (ins rclass:$rSrc),
- [(set rclass:$rDst, (sext_inreg rclass:$rSrc, i8))]>;
+ pattern>;
multiclass ExtendByteHalfword {
- def v16i8: XSBHVecInst<v8i16>;
- def r16: XSBHInRegInst<R16C>;
- def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
- [(set R16C:$rDst, (sext R8C:$rSrc))]>;
+ def v16i8: XSBHVecInst<v8i16>;
+ def r8: XSBHInst<(outs R16C:$rDst), (ins R8C:$rSrc),
+ [(set R16C:$rDst, (sext R8C:$rSrc))]>;
+ def r16: XSBHInRegInst<R16C,
+ [(set R16C:$rDst, (sext_inreg R16C:$rSrc, i8))]>;
// 32-bit form for XSBH: used to sign extend 8-bit quantities to 16-bit
// quantities to 32-bit quantities via a 32-bit register (see the sext 8->32
// pattern below). Intentionally doesn't match a pattern because we want the
// sext 8->32 pattern to do the work for us, namely because we need the extra
// XSHWr32.
- def r32: XSBHInRegInst<R32C>;
+ def r32: XSBHInRegInst<R32C, [/* no pattern */]>;
+
+ // Same as the 32-bit version, but for i64
+ def r64: XSBHInRegInst<R64C, [/* no pattern */]>;
}
defm XSBH : ExtendByteHalfword;
// Sign extend halfwords to words:
-def XSHWvec:
- RRForm_1<0b01101101010, (outs VECREG:$rDest), (ins VECREG:$rSrc),
- "xshw\t$rDest, $rSrc", IntegerOp,
- [(set (v4i32 VECREG:$rDest), (sext (v8i16 VECREG:$rSrc)))]>;
-def XSHWr32:
- RRForm_1<0b01101101010, (outs R32C:$rDst), (ins R32C:$rSrc),
- "xshw\t$rDst, $rSrc", IntegerOp,
- [(set R32C:$rDst, (sext_inreg R32C:$rSrc, i16))]>;
+class XSHWInst<dag OOL, dag IOL, list<dag> pattern>:
+ RRForm_1<0b01101101010, OOL, IOL, "xshw\t$rDest, $rSrc",
+ IntegerOp, pattern>;
-def XSHWr16:
- RRForm_1<0b01101101010, (outs R32C:$rDst), (ins R16C:$rSrc),
- "xshw\t$rDst, $rSrc", IntegerOp,
- [(set R32C:$rDst, (sext R16C:$rSrc))]>;
+class XSHWVecInst<ValueType in_vectype, ValueType out_vectype>:
+ XSHWInst<(outs VECREG:$rDest), (ins VECREG:$rSrc),
+ [(set (out_vectype VECREG:$rDest),
+ (sext (in_vectype VECREG:$rSrc)))]>;
+
+class XSHWInRegInst<RegisterClass rclass, list<dag> pattern>:
+ XSHWInst<(outs rclass:$rDest), (ins rclass:$rSrc),
+ pattern>;
+
+class XSHWRegInst<RegisterClass rclass>:
+ XSHWInst<(outs rclass:$rDest), (ins R16C:$rSrc),
+ [(set rclass:$rDest, (sext R16C:$rSrc))]>;
+
+multiclass ExtendHalfwordWord {
+ def v4i32: XSHWVecInst<v4i32, v8i16>;
+
+ def r16: XSHWRegInst<R32C>;
+
+ def r32: XSHWInRegInst<R32C,
+ [(set R32C:$rDest, (sext_inreg R32C:$rSrc, i16))]>;
+ def r64: XSHWInRegInst<R64C, [/* no pattern */]>;
+}
+
+defm XSHW : ExtendHalfwordWord;
// Sign-extend words to doublewords (32->64 bits)
class XSWDInst<dag OOL, dag IOL, list<dag> pattern>:
- RRForm_1<0b01100101010, OOL, IOL,
- "xswd\t$rDst, $rSrc", IntegerOp,
- pattern>;
+ RRForm_1<0b01100101010, OOL, IOL, "xswd\t$rDst, $rSrc",
+ IntegerOp, pattern>;
class XSWDVecInst<ValueType in_vectype, ValueType out_vectype>:
XSWDInst<(outs VECREG:$rDst), (ins VECREG:$rSrc),
@@ -1411,6 +1429,18 @@
class ORCvtGPRCReg<RegisterClass rclass>:
ORCvtForm<(outs rclass:$rT), (ins GPRC:$rA)>;
+
+class ORCvtFormR32Reg<RegisterClass rclass>:
+ ORCvtForm<(outs rclass:$rT), (ins R32C:$rA)>;
+
+class ORCvtFormRegR32<RegisterClass rclass>:
+ ORCvtForm<(outs R32C:$rT), (ins rclass:$rA)>;
+
+class ORCvtFormR64Reg<RegisterClass rclass>:
+ ORCvtForm<(outs rclass:$rT), (ins R64C:$rA)>;
+
+class ORCvtFormRegR64<RegisterClass rclass>:
+ ORCvtForm<(outs R64C:$rT), (ins rclass:$rA)>;
class ORCvtGPRCVec:
ORCvtForm<(outs VECREG:$rT), (ins GPRC:$rA)>;
@@ -1481,6 +1511,24 @@
// Conversion from vector to GPRC
def vec_i128: ORCvtGPRCVec;
+
+ // Conversion from register to R32C:
+ def r16_r32: ORCvtFormRegR32<R16C>;
+ def r8_r32: ORCvtFormRegR32<R8C>;
+
+ // Conversion from R32C to register
+ def r32_r16: ORCvtFormR32Reg<R16C>;
+ def r32_r8: ORCvtFormR32Reg<R8C>;
+
+ // Conversion from register to R64C:
+ def r32_r64: ORCvtFormR64Reg<R32C>;
+ def r16_r64: ORCvtFormR64Reg<R16C>;
+ def r8_r64: ORCvtFormR64Reg<R8C>;
+
+ // Conversion from R64C to register
+ def r64_r32: ORCvtFormRegR64<R32C>;
+ def r64_r16: ORCvtFormRegR64<R16C>;
+ def r64_r8: ORCvtFormRegR64<R8C>;
}
defm OR : BitwiseOr;
@@ -2682,7 +2730,7 @@
(ROTMIr32 R32C:$rA, uimm7:$val)>;
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
-// ROTQMBYvec: This is a vector form merely so that when used in an
+// ROTQMBY: This is a vector form merely so that when used in an
// instruction pattern, type checking will succeed. This instruction assumes
// that the user knew to negate $rB.
//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
@@ -2720,10 +2768,16 @@
ROTQMBYIInst<(outs VECREG:$rT), (ins VECREG:$rA, rotNeg7imm:$val),
[/* no pattern */]>;
-class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype, PatLeaf pred>:
+class ROTQMBYIRegInst<RegisterClass rclass, Operand optype, ValueType inttype,
+ PatLeaf pred>:
ROTQMBYIInst<(outs rclass:$rT), (ins rclass:$rA, optype:$val),
[/* no pattern */]>;
+// 128-bit zero extension form:
+class ROTQMBYIZExtInst<RegisterClass rclass, Operand optype, PatLeaf pred>:
+ ROTQMBYIInst<(outs GPRC:$rT), (ins rclass:$rA, optype:$val),
+ [/* no pattern */]>;
+
multiclass RotateQuadBytesImm
{
def v16i8: ROTQMBYIVecInst<v16i8>;
@@ -2733,6 +2787,11 @@
def r128: ROTQMBYIRegInst<GPRC, rotNeg7imm, i32, uimm7>;
def r64: ROTQMBYIRegInst<R64C, rotNeg7imm, i32, uimm7>;
+
+ def r128_zext_r8: ROTQMBYIZExtInst<R8C, rotNeg7imm, uimm7>;
+ def r128_zext_r16: ROTQMBYIZExtInst<R16C, rotNeg7imm, uimm7>;
+ def r128_zext_r32: ROTQMBYIZExtInst<R32C, rotNeg7imm, uimm7>;
+ def r128_zext_r64: ROTQMBYIZExtInst<R64C, rotNeg7imm, uimm7>;
}
defm ROTQMBYI : RotateQuadBytesImm;
@@ -4339,6 +4398,13 @@
def : Pat<(i32 (sext R8C:$rSrc)),
(XSHWr16 (XSBHr8 R8C:$rSrc))>;
+// sext 8->64: Sign extend bytes to double word
+def : Pat<(sext_inreg R64C:$rSrc, i8),
+ (XSWDr64_inreg (XSHWr64 (XSBHr64 R64C:$rSrc)))>;
+
+def : Pat<(i64 (sext R8C:$rSrc)),
+ (XSWDr64 (XSHWr16 (XSBHr8 R8C:$rSrc)))>;
+
// zext 8->16: Zero extend bytes to halfwords
def : Pat<(i16 (zext R8C:$rSrc)),
(ANDHIi8i16 R8C:$rSrc, 0xff)>;
@@ -4347,14 +4413,29 @@
def : Pat<(i32 (zext R8C:$rSrc)),
(ANDIi8i32 R8C:$rSrc, 0xff)>;
-// anyext 8->16: Extend 8->16 bits, irrespective of sign
+// zext 8->64: Zero extend bytes to double words
+def : Pat<(i64 (zext R8C:$rSrc)),
+ (ORi64_v2i64 (SELBv4i32 (ROTQMBYv4i32
+ (ORv4i32_i32 (ANDIi8i32 R8C:$rSrc, 0xff)),
+ 0x4),
+ (ILv4i32 0x0),
+ (FSMBIv4i32 0x0f0f)))>;
+
+// anyext 8->16: Extend 8->16 bits, irrespective of sign, preserves high bits
def : Pat<(i16 (anyext R8C:$rSrc)),
(ORHIi8i16 R8C:$rSrc, 0)>;
-// anyext 8->32: Extend 8->32 bits, irrespective of sign
+// anyext 8->32: Extend 8->32 bits, irrespective of sign, preserves high bits
def : Pat<(i32 (anyext R8C:$rSrc)),
(ORIi8i32 R8C:$rSrc, 0)>;
+// sext 16->64: Sign extend halfword to double word
+def : Pat<(sext_inreg R64C:$rSrc, i16),
+ (XSWDr64_inreg (XSHWr64 R64C:$rSrc))>;
+
+def : Pat<(sext R16C:$rSrc),
+ (XSWDr64 (XSHWr16 R16C:$rSrc))>;
+
// zext 16->32: Zero extend halfwords to words
def : Pat<(i32 (zext R16C:$rSrc)),
(ANDi16i32 R16C:$rSrc, (ILAr32 0xffff))>;
@@ -4461,15 +4542,6 @@
(SPUlo tconstpool:$in, 0)),
(IOHLlo (ILHUhi tconstpool:$in), tconstpool:$in)>;
-/*
-def : Pat<(SPUindirect R32C:$sp, i32ImmSExt10:$imm),
- (AIr32 R32C:$sp, i32ImmSExt10:$imm)>;
-
-def : Pat<(SPUindirect R32C:$sp, imm:$imm),
- (Ar32 R32C:$sp,
- (IOHLr32 (ILHUr32 (HI16 imm:$imm)), (LO16 imm:$imm)))>;
- */
-
def : Pat<(add (SPUhi tglobaladdr:$in, 0), (SPUlo tglobaladdr:$in, 0)),
(IOHLlo (ILHUhi tglobaladdr:$in), tglobaladdr:$in)>;
@@ -4488,3 +4560,5 @@
include "SPUMathInstr.td"
// 64-bit "instructions"/support
include "SPU64InstrInfo.td"
+// 128-bit "instructions"/support
+include "SPU128InstrInfo.td"