- Remove Tilmann's custom truncate lowering: it completely hosed over
DAGcombine's ability to find reasons to remove truncates when they were not
needed. Consequently, the CellSPU backend would produce correct, but _really
slow and horrible_, code.
Replaced with instruction sequences that do the equivalent truncation in
SPUInstrInfo.td.
- Re-examine how unaligned loads and stores work. Generated unaligned
load code has been tested on the CellSPU hardware; see the i32operations.c
and i64operations.c in CodeGen/CellSPU/useful-harnesses. (While they may be
toy test code, it does prove that some real world code does compile
correctly.)
- Fix truncating stores in bug 3193 (note: unpack_df.ll will still make llc
fault because i64 ult is not yet implemented.)
- Added i64 eq and neq for setcc and select/setcc; started new instruction
information file for them in SPU64InstrInfo.td. Additional i64 operations
should be added to this file and not to SPUInstrInfo.td.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61447 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index c3c31e0..e975d0d 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -15,6 +15,7 @@
#include "SPUISelLowering.h"
#include "SPUTargetMachine.h"
#include "SPUFrameInfo.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -77,37 +78,6 @@
return retval;
}
-
- //! Predicate that returns true if operand is a memory target
- /*!
- \arg Op Operand to test
- \return true if the operand is a memory target (i.e., global
- address, external symbol, constant pool) or an A-form
- address.
- */
- bool isMemoryOperand(const SDValue &Op)
- {
- const unsigned Opc = Op.getOpcode();
- return (Opc == ISD::GlobalAddress
- || Opc == ISD::GlobalTLSAddress
- || Opc == ISD::JumpTable
- || Opc == ISD::ConstantPool
- || Opc == ISD::ExternalSymbol
- || Opc == ISD::TargetGlobalAddress
- || Opc == ISD::TargetGlobalTLSAddress
- || Opc == ISD::TargetJumpTable
- || Opc == ISD::TargetConstantPool
- || Opc == ISD::TargetExternalSymbol
- || Opc == SPUISD::AFormAddr);
- }
-
- //! Predicate that returns true if the operand is an indirect target
- bool isIndirectOperand(const SDValue &Op)
- {
- const unsigned Opc = Op.getOpcode();
- return (Opc == ISD::Register
- || Opc == SPUISD::LDRESULT);
- }
}
SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
@@ -135,20 +105,8 @@
setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
- setTruncStoreAction(MVT::i8, MVT::i8, Custom);
- setTruncStoreAction(MVT::i16, MVT::i8, Custom);
- setTruncStoreAction(MVT::i32, MVT::i8, Custom);
- setTruncStoreAction(MVT::i64, MVT::i8, Custom);
- setTruncStoreAction(MVT::i128, MVT::i8, Custom);
-
- setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
-
- setLoadExtAction(ISD::EXTLOAD, MVT::f32, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
// SPU constant load actions are custom lowered:
setOperationAction(ISD::Constant, MVT::i64, Custom);
@@ -160,11 +118,33 @@
++sctype) {
MVT VT = (MVT::SimpleValueType)sctype;
- setOperationAction(ISD::LOAD, VT, Custom);
- setOperationAction(ISD::STORE, VT, Custom);
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
+ MVT StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
}
- // Custom lower BRCOND for i8 to "promote" the result to i16
+ for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
+ ++sctype) {
+ MVT VT = (MVT::SimpleValueType) sctype;
+
+ setOperationAction(ISD::LOAD, VT, Custom);
+ setOperationAction(ISD::STORE, VT, Custom);
+
+ for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
+ MVT StoreVT = (MVT::SimpleValueType) stype;
+ setTruncStoreAction(VT, StoreVT, Expand);
+ }
+ }
+
+ // Custom lower BRCOND for i8 to "promote" the result to whatever the result
+ // operand happens to be:
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
// Expand the jumptable branches
@@ -176,14 +156,12 @@
setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
-#if 0
setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
-#endif
// SPU has no intrinsics for these particular operations:
setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
- // PowerPC has no SREM/UREM instructions
+ // SPU has no SREM/UREM instructions
setOperationAction(ISD::SREM, MVT::i32, Expand);
setOperationAction(ISD::UREM, MVT::i32, Expand);
setOperationAction(ISD::SREM, MVT::i64, Expand);
@@ -232,14 +210,6 @@
setOperationAction(ISD::MUL, MVT::i32, Custom);
setOperationAction(ISD::MUL, MVT::i64, Expand); // libcall
- // SMUL_LOHI, UMUL_LOHI
-#if 0
- setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
- setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
- setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
-#endif
-
// Need to custom handle (some) common i8, i64 math ops
setOperationAction(ISD::ADD, MVT::i64, Custom);
setOperationAction(ISD::SUB, MVT::i8, Custom);
@@ -265,12 +235,12 @@
setOperationAction(ISD::SELECT, MVT::i8, Legal);
setOperationAction(ISD::SELECT, MVT::i16, Legal);
setOperationAction(ISD::SELECT, MVT::i32, Legal);
- setOperationAction(ISD::SELECT, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT, MVT::i64, Legal);
setOperationAction(ISD::SETCC, MVT::i8, Legal);
setOperationAction(ISD::SETCC, MVT::i16, Legal);
- setOperationAction(ISD::SETCC, MVT::i32, Legal);
- setOperationAction(ISD::SETCC, MVT::i64, Expand);
+ setOperationAction(ISD::SETCC, MVT::i32, Custom);
+ setOperationAction(ISD::SETCC, MVT::i64, Custom);
// Zero extension and sign extension for i64 have to be
// custom legalized
@@ -278,10 +248,7 @@
setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom);
- // Custom lower truncates
- setOperationAction(ISD::TRUNCATE, MVT::i8, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::i16, Custom);
- setOperationAction(ISD::TRUNCATE, MVT::i32, Custom);
+ // Custom lower i128 -> i64 truncates
setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
// SPU has a legal FP -> signed INT instruction
@@ -292,7 +259,7 @@
// FDIV on SPU requires custom lowering
setOperationAction(ISD::FDIV, MVT::f32, Custom);
- //setOperationAction(ISD::FDIV, MVT::f64, Custom);
+ setOperationAction(ISD::FDIV, MVT::f64, Expand); // libcall
// SPU has [U|S]INT_TO_FP
setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
@@ -402,7 +369,7 @@
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
setShiftAmountType(MVT::i32);
- setBooleanContents(ZeroOrOneBooleanContent);
+ setBooleanContents(ZeroOrNegativeOneBooleanContent);
setStackPointerRegisterToSaveRestore(SPU::R1);
@@ -435,7 +402,7 @@
node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
- node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
+ node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PROMOTE_SCALAR";
node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
@@ -471,9 +438,14 @@
return ((i != node_names.end()) ? i->second : 0);
}
+//===----------------------------------------------------------------------===//
+// Return the Cell SPU's SETCC result type
+//===----------------------------------------------------------------------===//
+
MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
MVT VT = Op.getValueType();
- return (VT.isInteger() ? VT : MVT(MVT::i32));
+ // i16 and i32 are valid SETCC result types
+ return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
}
//===----------------------------------------------------------------------===//
@@ -486,105 +458,6 @@
// LowerOperation implementation
//===----------------------------------------------------------------------===//
-/// Aligned load common code for CellSPU
-/*!
- \param[in] Op The SelectionDAG load or store operand
- \param[in] DAG The selection DAG
- \param[in] ST CellSPU subtarget information structure
- \param[in,out] alignment Caller initializes this to the load or store node's
- value from getAlignment(), may be updated while generating the aligned load
- \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
- offset (divisible by 16, modulo 16 == 0)
- \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
- offset of the preferred slot (modulo 16 != 0)
- \param[in,out] VT Caller initializes this value type to the the load or store
- node's loaded or stored value type; may be updated if an i1-extended load or
- store.
- \param[out] was16aligned true if the base pointer had 16-byte alignment,
- otherwise false. Can help to determine if the chunk needs to be rotated.
-
- Both load and store lowering load a block of data aligned on a 16-byte
- boundary. This is the common aligned load code shared between both.
- */
-static SDValue
-AlignedLoad(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST,
- LSBaseSDNode *LSN,
- unsigned &alignment, int &alignOffs, int &prefSlotOffs,
- MVT &VT, bool &was16aligned)
-{
- MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- const valtype_map_s *vtm = getValueTypeMapEntry(VT);
- SDValue basePtr = LSN->getBasePtr();
- SDValue chain = LSN->getChain();
-
- if (basePtr.getOpcode() == ISD::ADD) {
- SDValue Op1 = basePtr.getNode()->getOperand(1);
-
- if (Op1.getOpcode() == ISD::Constant
- || Op1.getOpcode() == ISD::TargetConstant) {
- const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
-
- alignOffs = (int) CN->getZExtValue();
- prefSlotOffs = (int) (alignOffs & 0xf);
-
- // Adjust the rotation amount to ensure that the final result ends up in
- // the preferred slot:
- prefSlotOffs -= vtm->prefslot_byte;
- basePtr = basePtr.getOperand(0);
-
- // Loading from memory, can we adjust alignment?
- if (basePtr.getOpcode() == SPUISD::AFormAddr) {
- SDValue APtr = basePtr.getOperand(0);
- if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
- GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
- alignment = GSDN->getGlobal()->getAlignment();
- }
- }
- } else {
- alignOffs = 0;
- prefSlotOffs = -vtm->prefslot_byte;
- }
- } else if (basePtr.getOpcode() == ISD::FrameIndex) {
- FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
- alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
- prefSlotOffs = (int) (alignOffs & 0xf);
- prefSlotOffs -= vtm->prefslot_byte;
- } else {
- alignOffs = 0;
- prefSlotOffs = -vtm->prefslot_byte;
- }
-
- if (alignment == 16) {
- // Realign the base pointer as a D-Form address:
- if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
- basePtr = DAG.getNode(ISD::ADD, PtrVT,
- basePtr,
- DAG.getConstant((alignOffs & ~0xf), PtrVT));
- }
-
- // Emit the vector load:
- was16aligned = true;
- return DAG.getLoad(MVT::v16i8, chain, basePtr,
- LSN->getSrcValue(), LSN->getSrcValueOffset(),
- LSN->isVolatile(), 16);
- }
-
- // Unaligned load or we're using the "large memory" model, which means that
- // we have to be very pessimistic:
- if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
- basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr,
- DAG.getConstant(0, PtrVT));
- }
-
- // Add the offset
- basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
- DAG.getConstant((alignOffs & ~0xf), PtrVT));
- was16aligned = false;
- return DAG.getLoad(MVT::v16i8, chain, basePtr,
- LSN->getSrcValue(), LSN->getSrcValueOffset(),
- LSN->isVolatile(), 16);
-}
-
/// Custom lower loads for CellSPU
/*!
All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
@@ -605,43 +478,110 @@
LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
LoadSDNode *LN = cast<LoadSDNode>(Op);
SDValue the_chain = LN->getChain();
+ MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
MVT InVT = LN->getMemoryVT();
MVT OutVT = Op.getValueType();
ISD::LoadExtType ExtType = LN->getExtensionType();
unsigned alignment = LN->getAlignment();
- SDValue Ops[8];
+ const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
switch (LN->getAddressingMode()) {
case ISD::UNINDEXED: {
- int offset, rotamt;
- bool was16aligned;
- SDValue result =
- AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, InVT,
- was16aligned);
+ SDValue result;
+ SDValue basePtr = LN->getBasePtr();
+ SDValue rotate;
- if (result.getNode() == 0)
- return result;
+ if (alignment == 16) {
+ ConstantSDNode *CN;
- the_chain = result.getValue(1);
- // Rotate the chunk if necessary
- if (rotamt < 0)
- rotamt += 16;
- if (rotamt != 0 || !was16aligned) {
- SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
+ // Special cases for a known aligned load to simplify the base pointer
+ // and the rotation amount:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+ int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
- Ops[0] = result;
- if (was16aligned) {
- Ops[1] = DAG.getConstant(rotamt, MVT::i16);
+ if (rotamt < 0)
+ rotamt += 16;
+
+ rotate = DAG.getConstant(rotamt, MVT::i16);
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
+ || (basePtr.getOpcode() == SPUISD::IndirectAddr
+ && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
+ && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
+ // Plain aligned a-form address: rotate into preferred slot
+ // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
+ int64_t rotamt = -vtm->prefslot_byte;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getConstant(rotamt, MVT::i16);
} else {
- MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
- LoadSDNode *LN1 = cast<LoadSDNode>(result);
- Ops[1] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ int64_t rotamt = -vtm->prefslot_byte;
+ if (rotamt < 0)
+ rotamt += 16;
+ rotate = DAG.getNode(ISD::ADD, PtrVT,
+ basePtr,
DAG.getConstant(rotamt, PtrVT));
}
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
- result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8, Ops, 2);
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Offset the rotate amount by the basePtr and the preferred slot
+ // byte offset
+ rotate = DAG.getNode(ISD::ADD, PtrVT,
+ basePtr,
+ DAG.getConstant(-vtm->prefslot_byte, PtrVT));
}
+ // Re-emit as a v16i8 vector load
+ result = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
+ LN->getSrcValue(), LN->getSrcValueOffset(),
+ LN->isVolatile(), 16);
+
+ // Update the chain
+ the_chain = result.getValue(1);
+
+ // Rotate into the preferred slot:
+ result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8,
+ result.getValue(0), rotate);
+
// Convert the loaded v16i8 vector to the appropriate vector type
// specified by the operand:
MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
@@ -704,23 +644,86 @@
switch (SN->getAddressingMode()) {
case ISD::UNINDEXED: {
- int chunk_offset, slot_offset;
- bool was16aligned;
-
// The vector type we really want to load from the 16-byte chunk.
MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
- SDValue alignLoadVec =
- AlignedLoad(Op, DAG, ST, SN, alignment,
- chunk_offset, slot_offset, VT, was16aligned);
+ SDValue alignLoadVec;
+ SDValue basePtr = SN->getBasePtr();
+ SDValue the_chain = SN->getChain();
+ SDValue insertEltOffs;
- if (alignLoadVec.getNode() == 0)
- return alignLoadVec;
+ if (alignment == 16) {
+ ConstantSDNode *CN;
+
+ // Special cases for a known aligned load to simplify the base pointer
+ // and insertion byte:
+ if (basePtr.getOpcode() == ISD::ADD
+ && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
+ // Known offset into basePtr
+ int64_t offset = CN->getSExtValue();
+
+ // Simplify the base pointer for this case:
+ basePtr = basePtr.getOperand(0);
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & 0xf), PtrVT));
+
+ if ((offset & ~0xf) > 0) {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant((offset & ~0xf), PtrVT));
+ }
+ } else {
+ // Otherwise, assume it's at byte 0 of basePtr
+ insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+ } else {
+ // Unaligned load: must be more pessimistic about addressing modes:
+ if (basePtr.getOpcode() == ISD::ADD) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineRegisterInfo &RegInfo = MF.getRegInfo();
+ unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
+ SDValue Flag;
+
+ SDValue Op0 = basePtr.getOperand(0);
+ SDValue Op1 = basePtr.getOperand(1);
+
+ if (isa<ConstantSDNode>(Op1)) {
+ // Convert the (add <ptr>, <const>) to an indirect address contained
+ // in a register. Note that this is done because we need to avoid
+ // creating a 0(reg) d-form address due to the SPU's block loads.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+ the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
+ basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
+ } else {
+ // Convert the (add <arg1>, <arg2>) to an indirect address, which
+ // will likely be lowered as a reg(reg) x-form address.
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
+ }
+ } else {
+ basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Insertion point is solely determined by basePtr's contents
+ insertEltOffs = DAG.getNode(ISD::ADD, PtrVT,
+ basePtr,
+ DAG.getConstant(0, PtrVT));
+ }
+
+ // Re-emit as a v16i8 vector load
+ alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
+ SN->getSrcValue(), SN->getSrcValueOffset(),
+ SN->isVolatile(), 16);
+
+ // Update the chain
+ the_chain = alignLoadVec.getValue(1);
LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
- SDValue basePtr = LN->getBasePtr();
- SDValue the_chain = alignLoadVec.getValue(1);
SDValue theValue = SN->getValue();
SDValue result;
@@ -732,29 +735,20 @@
theValue = theValue.getOperand(0);
}
- chunk_offset &= 0xf;
-
- SDValue insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
- SDValue insertEltPtr;
-
// If the base pointer is already a D-form address, then just create
// a new D-form address with a slot offset and the orignal base pointer.
// Otherwise generate a D-form address with the slot offset relative
// to the stack pointer, which is always aligned.
- DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
- DEBUG(basePtr.getNode()->dump(&DAG));
- DEBUG(cerr << "\n");
-
- if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
- (basePtr.getOpcode() == ISD::ADD
- && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
- insertEltPtr = basePtr;
- } else {
- insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
- }
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "CellSPU LowerSTORE: basePtr = ";
+ basePtr.getNode()->dump(&DAG);
+ cerr << "\n";
+ }
+#endif
SDValue insertEltOp =
- DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltPtr);
+ DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs);
SDValue vectorizeOp =
DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
@@ -919,22 +913,31 @@
return SDValue();
}
-//! Lower MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
static SDValue
-LowerBRCOND(SDValue Op, SelectionDAG &DAG)
-{
+LowerBRCOND(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) {
SDValue Cond = Op.getOperand(1);
MVT CondVT = Cond.getValueType();
- MVT CondNVT;
+ unsigned CondOpc;
if (CondVT == MVT::i8) {
- CondNVT = MVT::i16;
+ SDValue CondOp0 = Cond.getOperand(0);
+ if (Cond.getOpcode() == ISD::TRUNCATE) {
+ // Use the truncate's value type and ANY_EXTEND the condition (DAGcombine
+ // will then remove the truncate)
+ CondVT = CondOp0.getValueType();
+ CondOpc = ISD::ANY_EXTEND;
+ } else {
+ CondVT = MVT::i32; // default to something reasonable
+ CondOpc = ISD::ZERO_EXTEND;
+ }
+
+ Cond = DAG.getNode(CondOpc, CondVT, Op.getOperand(1));
+
return DAG.getNode(ISD::BRCOND, Op.getValueType(),
- Op.getOperand(0),
- DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
- Op.getOperand(2));
- } else
- return SDValue(); // Unchanged
+ Op.getOperand(0), Cond, Op.getOperand(2));
+ }
+
+ return SDValue(); // Unchanged
}
static SDValue
@@ -1896,7 +1899,7 @@
case MVT::i64:
case MVT::f32:
case MVT::f64:
- return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
+ return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0);
}
}
@@ -2274,9 +2277,11 @@
return result;
}
-static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
+static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
+ const TargetLowering &TLI)
{
SDValue N0 = Op.getOperand(0); // Everything has at least one operand
+ MVT ShiftVT = TLI.getShiftAmountTy();
assert(Op.getValueType() == MVT::i8);
switch (Opc) {
@@ -2290,11 +2295,11 @@
SDValue N1 = Op.getOperand(1);
N0 = (N0.getOpcode() != ISD::Constant
? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
- : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
+ : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
MVT::i16));
N1 = (N1.getOpcode() != ISD::Constant
? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
- : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+ : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
MVT::i16));
return DAG.getNode(ISD::TRUNCATE, MVT::i8,
DAG.getNode(Opc, MVT::i16, N0, N1));
@@ -2307,13 +2312,13 @@
? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
: DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
MVT::i16));
- N1Opc = N1.getValueType().bitsLT(MVT::i32)
+ N1Opc = N1.getValueType().bitsLT(ShiftVT)
? ISD::ZERO_EXTEND
: ISD::TRUNCATE;
N1 = (N1.getOpcode() != ISD::Constant
- ? DAG.getNode(N1Opc, MVT::i32, N1)
+ ? DAG.getNode(N1Opc, ShiftVT, N1)
: DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
- MVT::i32));
+ TLI.getShiftAmountTy()));
SDValue ExpandArg =
DAG.getNode(ISD::OR, MVT::i16, N0,
DAG.getNode(ISD::SHL, MVT::i16,
@@ -2328,14 +2333,13 @@
N0 = (N0.getOpcode() != ISD::Constant
? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
: DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
- MVT::i16));
- N1Opc = N1.getValueType().bitsLT(MVT::i16)
+ MVT::i32));
+ N1Opc = N1.getValueType().bitsLT(ShiftVT)
? ISD::ZERO_EXTEND
: ISD::TRUNCATE;
N1 = (N1.getOpcode() != ISD::Constant
- ? DAG.getNode(N1Opc, MVT::i16, N1)
- : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
- MVT::i16));
+ ? DAG.getNode(N1Opc, ShiftVT, N1)
+ : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
return DAG.getNode(ISD::TRUNCATE, MVT::i8,
DAG.getNode(Opc, MVT::i16, N0, N1));
}
@@ -2344,15 +2348,15 @@
unsigned N1Opc;
N0 = (N0.getOpcode() != ISD::Constant
? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
- : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
+ : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
MVT::i16));
- N1Opc = N1.getValueType().bitsLT(MVT::i16)
+ N1Opc = N1.getValueType().bitsLT(ShiftVT)
? ISD::SIGN_EXTEND
: ISD::TRUNCATE;
N1 = (N1.getOpcode() != ISD::Constant
- ? DAG.getNode(N1Opc, MVT::i16, N1)
+ ? DAG.getNode(N1Opc, ShiftVT, N1)
: DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
- MVT::i16));
+ ShiftVT));
return DAG.getNode(ISD::TRUNCATE, MVT::i8,
DAG.getNode(Opc, MVT::i16, N0, N1));
}
@@ -2366,7 +2370,7 @@
N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
N1 = (N1.getOpcode() != ISD::Constant
? DAG.getNode(N1Opc, MVT::i16, N1)
- : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
+ : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
MVT::i16));
return DAG.getNode(ISD::TRUNCATE, MVT::i8,
DAG.getNode(Opc, MVT::i16, N0, N1));
@@ -2397,7 +2401,7 @@
DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
SDValue PromoteScalar =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
+ DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
if (Opc != ISD::SIGN_EXTEND) {
// Use a shuffle to zero extend the i32 to i64 directly:
@@ -2438,9 +2442,9 @@
// Turn operands into vectors to satisfy type checking (shufb works on
// vectors)
SDValue Op0 =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
+ DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
SDValue Op1 =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
+ DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
SmallVector<SDValue, 16> ShufBytes;
// Create the shuffle mask for "rotating" the borrow up one register slot
@@ -2467,9 +2471,9 @@
// Turn operands into vectors to satisfy type checking (shufb works on
// vectors)
SDValue Op0 =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
+ DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
SDValue Op1 =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
+ DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
SmallVector<SDValue, 16> ShufBytes;
// Create the shuffle mask for "rotating" the borrow up one register slot
@@ -2495,7 +2499,7 @@
case ISD::SHL: {
SDValue ShiftAmt = Op.getOperand(1);
MVT ShiftAmtVT = ShiftAmt.getValueType();
- SDValue Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
+ SDValue Op0Vec = DAG.getNode(SPUISD::PREFSLOT2VEC, VecVT, Op0);
SDValue MaskLower =
DAG.getNode(SPUISD::SELB, VecVT,
Op0Vec,
@@ -2540,7 +2544,7 @@
case ISD::SRA: {
// Promote Op0 to vector
SDValue Op0 =
- DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
+ DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
SDValue ShiftAmt = Op.getOperand(1);
MVT ShiftVT = ShiftAmt.getValueType();
@@ -2669,7 +2673,7 @@
SDValue N = Op.getOperand(0);
SDValue Elt0 = DAG.getConstant(0, MVT::i32);
- SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
@@ -2686,7 +2690,7 @@
SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
SDValue Shift1 = DAG.getConstant(8, MVT::i32);
- SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
// CNTB_result becomes the chain to which all of the virtual registers
@@ -2720,7 +2724,7 @@
SDValue Shift1 = DAG.getConstant(16, MVT::i32);
SDValue Shift2 = DAG.getConstant(8, MVT::i32);
- SDValue Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
+ SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
// CNTB_result becomes the chain to which all of the virtual registers
@@ -2760,6 +2764,32 @@
return SDValue();
}
+//! Lower ISD::SETCC
+/*!
+ Lower i64 condition code handling.
+ */
+
+static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) {
+ MVT VT = Op.getValueType();
+ SDValue lhs = Op.getOperand(0);
+ SDValue rhs = Op.getOperand(1);
+ SDValue condition = Op.getOperand(2);
+
+ if (VT == MVT::i32 && lhs.getValueType() == MVT::i64) {
+ // Expand the i64 comparisons to what Cell can actually support,
+ // which is eq, ugt and sgt:
+#if 0
+ CondCodeSDNode *ccvalue = dyn_cast<CondCodeSDValue>(condition);
+
+ switch (ccvalue->get()) {
+ case
+ }
+#endif
+ }
+
+ return SDValue();
+}
+
//! Lower ISD::SELECT_CC
/*!
ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
@@ -2772,7 +2802,8 @@
assumption, given the simplisitc uses so far.
*/
-static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
+ const TargetLowering &TLI) {
MVT VT = Op.getValueType();
SDValue lhs = Op.getOperand(0);
SDValue rhs = Op.getOperand(1);
@@ -2780,12 +2811,20 @@
SDValue falseval = Op.getOperand(3);
SDValue condition = Op.getOperand(4);
+ // NOTE: SELB's arguments: $rA, $rB, $mask
+ //
+ // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
+ // where bits in $mask are 1. CCond will be inverted, having 1s where the
+ // condition was true and 0s where the condition was false. Hence, the
+ // arguments to SELB get reversed.
+
// Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
// legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
// with another "cannot select select_cc" assert:
- SDValue compare = DAG.getNode(ISD::SETCC, VT, lhs, rhs, condition);
- return DAG.getNode(SPUISD::SELB, VT, trueval, falseval, compare);
+ SDValue compare = DAG.getNode(ISD::SETCC, TLI.getSetCCResultType(Op),
+ lhs, rhs, condition);
+ return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare);
}
//! Custom lower ISD::TRUNCATE
@@ -2799,89 +2838,29 @@
MVT Op0VT = Op0.getValueType();
MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
- SDValue PromoteScalar = DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
-
- unsigned maskLow;
- unsigned maskHigh;
-
// Create shuffle mask
- switch (Op0VT.getSimpleVT()) {
- case MVT::i128:
- switch (simpleVT) {
- case MVT::i64:
- // least significant doubleword of quadword
- maskHigh = 0x08090a0b;
- maskLow = 0x0c0d0e0f;
- break;
- case MVT::i32:
- // least significant word of quadword
- maskHigh = maskLow = 0x0c0d0e0f;
- break;
- case MVT::i16:
- // least significant halfword of quadword
- maskHigh = maskLow = 0x0e0f0e0f;
- break;
- case MVT::i8:
- // least significant byte of quadword
- maskHigh = maskLow = 0x0f0f0f0f;
- break;
- default:
- cerr << "Truncation to illegal type!";
- abort();
- }
- break;
- case MVT::i64:
- switch (simpleVT) {
- case MVT::i32:
- // least significant word of doubleword
- maskHigh = maskLow = 0x04050607;
- break;
- case MVT::i16:
- // least significant halfword of doubleword
- maskHigh = maskLow = 0x06070607;
- break;
- case MVT::i8:
- // least significant byte of doubleword
- maskHigh = maskLow = 0x07070707;
- break;
- default:
- cerr << "Truncation to illegal type!";
- abort();
- }
- break;
- case MVT::i32:
- case MVT::i16:
- switch (simpleVT) {
- case MVT::i16:
- // least significant halfword of word
- maskHigh = maskLow = 0x02030203;
- break;
- case MVT::i8:
- // least significant byte of word/halfword
- maskHigh = maskLow = 0x03030303;
- break;
- default:
- cerr << "Truncation to illegal type!";
- abort();
- }
- break;
- default:
- cerr << "Trying to lower truncation from illegal type!";
- abort();
+ if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
+ // least significant doubleword of quadword
+ unsigned maskHigh = 0x08090a0b;
+ unsigned maskLow = 0x0c0d0e0f;
+ // Use a shuffle to perform the truncation
+ SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32),
+ DAG.getConstant(maskHigh, MVT::i32),
+ DAG.getConstant(maskLow, MVT::i32));
+
+
+ SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
+
+ SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
+ PromoteScalar, PromoteScalar, shufMask);
+
+ return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
+ DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
}
- // Use a shuffle to perform the truncation
- SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
- DAG.getConstant(maskHigh, MVT::i32),
- DAG.getConstant(maskLow, MVT::i32),
- DAG.getConstant(maskHigh, MVT::i32),
- DAG.getConstant(maskLow, MVT::i32));
-
- SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
- PromoteScalar, PromoteScalar, shufMask);
-
- return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
- DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
+ return SDValue(); // Leave the truncate unmolested
}
//! Custom (target-specific) lowering entry point
@@ -2921,7 +2900,7 @@
case ISD::ConstantFP:
return LowerConstantFP(Op, DAG);
case ISD::BRCOND:
- return LowerBRCOND(Op, DAG);
+ return LowerBRCOND(Op, DAG, *this);
case ISD::FORMAL_ARGUMENTS:
return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
case ISD::CALL:
@@ -2942,7 +2921,7 @@
case ISD::SHL:
case ISD::SRA: {
if (VT == MVT::i8)
- return LowerI8Math(Op, DAG, Opc);
+ return LowerI8Math(Op, DAG, Opc, *this);
else if (VT == MVT::i64)
return LowerI64Math(Op, DAG, Opc);
break;
@@ -2971,7 +2950,7 @@
if (VT.isVector())
return LowerVectorMUL(Op, DAG);
else if (VT == MVT::i8)
- return LowerI8Math(Op, DAG, Opc);
+ return LowerI8Math(Op, DAG, Opc, *this);
else
return LowerMUL(Op, DAG, VT, Opc);
@@ -2990,10 +2969,13 @@
return LowerCTPOP(Op, DAG);
case ISD::SELECT_CC:
- return LowerSELECT_CC(Op, DAG);
+ return LowerSELECT_CC(Op, DAG, *this);
case ISD::TRUNCATE:
return LowerTRUNCATE(Op, DAG);
+
+ case ISD::SETCC:
+ return LowerSETCC(Op, DAG);
}
return SDValue();
@@ -3036,7 +3018,7 @@
SelectionDAG &DAG = DCI.DAG;
SDValue Op0 = N->getOperand(0); // everything has at least one operand
MVT NodeVT = N->getValueType(0); // The node's value type
- MVT Op0VT = Op0.getValueType(); // The first operand's result
+ MVT Op0VT = Op0.getValueType(); // The first operand's result
SDValue Result; // Initially, empty result
switch (N->getOpcode()) {
@@ -3044,49 +3026,53 @@
case ISD::ADD: {
SDValue Op1 = N->getOperand(1);
- if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
- SDValue Op01 = Op0.getOperand(1);
- if (Op01.getOpcode() == ISD::Constant
- || Op01.getOpcode() == ISD::TargetConstant) {
- // (add <const>, (SPUindirect <arg>, <const>)) ->
- // (SPUindirect <arg>, <const + const>)
- ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
- ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
- SDValue combinedConst =
- DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
+ if (Op0.getOpcode() == SPUISD::IndirectAddr
+ || Op1.getOpcode() == SPUISD::IndirectAddr) {
+ // Normalize the operands to reduce repeated code
+ SDValue IndirectArg = Op0, AddArg = Op1;
+
+ if (Op1.getOpcode() == SPUISD::IndirectAddr) {
+ IndirectArg = Op1;
+ AddArg = Op0;
+ }
+
+ if (isa<ConstantSDNode>(AddArg)) {
+ ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
+ SDValue IndOp1 = IndirectArg.getOperand(1);
+
+ if (CN0->isNullValue()) {
+ // (add (SPUindirect <arg>, <arg>), 0) ->
+ // (SPUindirect <arg>, <arg>)
#if !defined(NDEBUG)
- if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
cerr << "\n"
- << "Replace: (add " << CN0->getZExtValue() << ", "
- << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n"
- << "With: (SPUindirect <arg>, "
- << CN0->getZExtValue() + CN1->getZExtValue() << ")\n";
- }
+ << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
+ << "With: (SPUindirect <arg>, <arg>)\n";
+ }
#endif
- return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
- Op0.getOperand(0), combinedConst);
- }
- } else if (isa<ConstantSDNode>(Op0)
- && Op1.getOpcode() == SPUISD::IndirectAddr) {
- SDValue Op11 = Op1.getOperand(1);
- if (Op11.getOpcode() == ISD::Constant
- || Op11.getOpcode() == ISD::TargetConstant) {
- // (add (SPUindirect <arg>, <const>), <const>) ->
- // (SPUindirect <arg>, <const + const>)
- ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
- ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
- SDValue combinedConst =
- DAG.getConstant(CN0->getZExtValue() + CN1->getZExtValue(), Op0VT);
+ return IndirectArg;
+ } else if (isa<ConstantSDNode>(IndOp1)) {
+ // (add (SPUindirect <arg>, <const>), <const>) ->
+ // (SPUindirect <arg>, <const + const>)
+ ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
+ int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
+ SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
- DEBUG(cerr << "Replace: (add " << CN0->getZExtValue() << ", "
- << "(SPUindirect <arg>, " << CN1->getZExtValue() << "))\n");
- DEBUG(cerr << "With: (SPUindirect <arg>, "
- << CN0->getZExtValue() + CN1->getZExtValue() << ")\n");
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "\n"
+ << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
+ << "), " << CN0->getSExtValue() << ")\n"
+ << "With: (SPUindirect <arg>, "
+ << combinedConst << ")\n";
+ }
+#endif
- return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
- Op1.getOperand(0), combinedConst);
+ return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
+ IndirectArg, combinedValue);
+ }
}
}
break;
@@ -3127,6 +3113,25 @@
return Op0;
}
+ } else if (Op0.getOpcode() == ISD::ADD) {
+ SDValue Op1 = N->getOperand(1);
+ if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
+ // (SPUindirect (add <arg>, <arg>), 0) ->
+ // (SPUindirect <arg>, <arg>)
+ if (CN1->isNullValue()) {
+
+#if !defined(NDEBUG)
+ if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
+ cerr << "\n"
+ << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
+ << "With: (SPUindirect <arg>, <arg>)\n";
+ }
+#endif
+
+ return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
+ Op0.getOperand(0), Op0.getOperand(1));
+ }
+ }
}
break;
}
@@ -3136,19 +3141,19 @@
case SPUISD::VEC_SRL:
case SPUISD::VEC_SRA:
case SPUISD::ROTQUAD_RZ_BYTES:
- case SPUISD::ROTQUAD_RZ_BITS: {
+ case SPUISD::ROTQUAD_RZ_BITS:
+ case SPUISD::ROTBYTES_LEFT: {
SDValue Op1 = N->getOperand(1);
- if (isa<ConstantSDNode>(Op1)) {
- // Kill degenerate vector shifts:
- ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
- if (CN->getZExtValue() == 0) {
+ // Kill degenerate vector shifts:
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
+ if (CN->isNullValue()) {
Result = Op0;
}
}
break;
}
- case SPUISD::PROMOTE_SCALAR: {
+ case SPUISD::PREFSLOT2VEC: {
switch (Op0.getOpcode()) {
default:
break;
@@ -3263,7 +3268,7 @@
case CNTB:
#endif
- case SPUISD::PROMOTE_SCALAR: {
+ case SPUISD::PREFSLOT2VEC: {
SDValue Op0 = Op.getOperand(0);
MVT Op0VT = Op0.getValueType();
unsigned Op0VTBits = Op0VT.getSizeInBits();
@@ -3306,7 +3311,25 @@
#endif
}
}
+
+unsigned
+SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
+ unsigned Depth) const {
+ switch (Op.getOpcode()) {
+ default:
+ return 1;
+ case ISD::SETCC: {
+ MVT VT = Op.getValueType();
+
+ if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
+ VT = MVT::i32;
+ }
+ return VT.getSizeInBits();
+ }
+ }
+}
+
// LowerAsmOperandForConstraint
void
SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,