| //===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //==-----------------------------------------------------------------------===// |
| // |
| /// \file |
| /// \brief TargetLowering functions borrowed from AMDIL. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "AMDGPUISelLowering.h" |
| #include "AMDGPURegisterInfo.h" |
| #include "AMDGPUSubtarget.h" |
| #include "AMDILDevices.h" |
| #include "AMDILIntrinsicInfo.h" |
| #include "llvm/CodeGen/MachineFrameInfo.h" |
| #include "llvm/CodeGen/MachineRegisterInfo.h" |
| #include "llvm/CodeGen/PseudoSourceValue.h" |
| #include "llvm/CodeGen/SelectionDAG.h" |
| #include "llvm/CodeGen/SelectionDAGNodes.h" |
| #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" |
| #include "llvm/IR/CallingConv.h" |
| #include "llvm/IR/DerivedTypes.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/Intrinsics.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Target/TargetInstrInfo.h" |
| #include "llvm/Target/TargetOptions.h" |
| |
| using namespace llvm; |
| //===----------------------------------------------------------------------===// |
| // Calling Convention Implementation |
| //===----------------------------------------------------------------------===// |
| #include "AMDGPUGenCallingConv.inc" |
| |
| //===----------------------------------------------------------------------===// |
| // TargetLowering Implementation Help Functions End |
| //===----------------------------------------------------------------------===// |
| |
| //===----------------------------------------------------------------------===// |
| // TargetLowering Class Implementation Begins |
| //===----------------------------------------------------------------------===// |
| void AMDGPUTargetLowering::InitAMDILLowering() { |
| int types[] = { |
| (int)MVT::i8, |
| (int)MVT::i16, |
| (int)MVT::i32, |
| (int)MVT::f32, |
| (int)MVT::f64, |
| (int)MVT::i64, |
| (int)MVT::v2i8, |
| (int)MVT::v4i8, |
| (int)MVT::v2i16, |
| (int)MVT::v4i16, |
| (int)MVT::v4f32, |
| (int)MVT::v4i32, |
| (int)MVT::v2f32, |
| (int)MVT::v2i32, |
| (int)MVT::v2f64, |
| (int)MVT::v2i64 |
| }; |
| |
| int IntTypes[] = { |
| (int)MVT::i8, |
| (int)MVT::i16, |
| (int)MVT::i32, |
| (int)MVT::i64 |
| }; |
| |
| int FloatTypes[] = { |
| (int)MVT::f32, |
| (int)MVT::f64 |
| }; |
| |
| int VectorTypes[] = { |
| (int)MVT::v2i8, |
| (int)MVT::v4i8, |
| (int)MVT::v2i16, |
| (int)MVT::v4i16, |
| (int)MVT::v4f32, |
| (int)MVT::v4i32, |
| (int)MVT::v2f32, |
| (int)MVT::v2i32, |
| (int)MVT::v2f64, |
| (int)MVT::v2i64 |
| }; |
| size_t NumTypes = sizeof(types) / sizeof(*types); |
| size_t NumFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes); |
| size_t NumIntTypes = sizeof(IntTypes) / sizeof(*IntTypes); |
| size_t NumVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes); |
| |
| const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>(); |
| // These are the current register classes that are |
| // supported |
| |
| for (unsigned int x = 0; x < NumTypes; ++x) { |
| MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x]; |
| |
| //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types |
| // We cannot sextinreg, expand to shifts |
| setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom); |
| setOperationAction(ISD::SUBE, VT, Expand); |
| setOperationAction(ISD::SUBC, VT, Expand); |
| setOperationAction(ISD::ADDE, VT, Expand); |
| setOperationAction(ISD::ADDC, VT, Expand); |
| setOperationAction(ISD::BRCOND, VT, Custom); |
| setOperationAction(ISD::BR_JT, VT, Expand); |
| setOperationAction(ISD::BRIND, VT, Expand); |
| // TODO: Implement custom UREM/SREM routines |
| setOperationAction(ISD::SREM, VT, Expand); |
| setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
| setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
| if (VT != MVT::i64 && VT != MVT::v2i64) { |
| setOperationAction(ISD::SDIV, VT, Custom); |
| } |
| } |
| for (unsigned int x = 0; x < NumFloatTypes; ++x) { |
| MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x]; |
| |
| // IL does not have these operations for floating point types |
| setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); |
| setOperationAction(ISD::SETOLT, VT, Expand); |
| setOperationAction(ISD::SETOGE, VT, Expand); |
| setOperationAction(ISD::SETOGT, VT, Expand); |
| setOperationAction(ISD::SETOLE, VT, Expand); |
| setOperationAction(ISD::SETULT, VT, Expand); |
| setOperationAction(ISD::SETUGE, VT, Expand); |
| setOperationAction(ISD::SETUGT, VT, Expand); |
| setOperationAction(ISD::SETULE, VT, Expand); |
| } |
| |
| for (unsigned int x = 0; x < NumIntTypes; ++x) { |
| MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x]; |
| |
| // GPU also does not have divrem function for signed or unsigned |
| setOperationAction(ISD::SDIVREM, VT, Expand); |
| |
| // GPU does not have [S|U]MUL_LOHI functions as a single instruction |
| setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
| setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
| |
| // GPU doesn't have a rotl, rotr, or byteswap instruction |
| setOperationAction(ISD::ROTR, VT, Expand); |
| setOperationAction(ISD::BSWAP, VT, Expand); |
| |
| // GPU doesn't have any counting operators |
| setOperationAction(ISD::CTPOP, VT, Expand); |
| setOperationAction(ISD::CTTZ, VT, Expand); |
| setOperationAction(ISD::CTLZ, VT, Expand); |
| } |
| |
| for (unsigned int ii = 0; ii < NumVectorTypes; ++ii) { |
| MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii]; |
| |
| setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); |
| setOperationAction(ISD::SDIVREM, VT, Expand); |
| setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
| // setOperationAction(ISD::VSETCC, VT, Expand); |
| setOperationAction(ISD::SELECT_CC, VT, Expand); |
| |
| } |
| if (STM.device()->isSupported(AMDGPUDeviceInfo::LongOps)) { |
| setOperationAction(ISD::MULHU, MVT::i64, Expand); |
| setOperationAction(ISD::MULHU, MVT::v2i64, Expand); |
| setOperationAction(ISD::MULHS, MVT::i64, Expand); |
| setOperationAction(ISD::MULHS, MVT::v2i64, Expand); |
| setOperationAction(ISD::ADD, MVT::v2i64, Expand); |
| setOperationAction(ISD::SREM, MVT::v2i64, Expand); |
| setOperationAction(ISD::Constant , MVT::i64 , Legal); |
| setOperationAction(ISD::SDIV, MVT::v2i64, Expand); |
| setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); |
| setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); |
| setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); |
| setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); |
| } |
| if (STM.device()->isSupported(AMDGPUDeviceInfo::DoubleOps)) { |
| // we support loading/storing v2f64 but not operations on the type |
| setOperationAction(ISD::FADD, MVT::v2f64, Expand); |
| setOperationAction(ISD::FSUB, MVT::v2f64, Expand); |
| setOperationAction(ISD::FMUL, MVT::v2f64, Expand); |
| setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); |
| setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); |
| setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); |
| // We want to expand vector conversions into their scalar |
| // counterparts. |
| setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); |
| setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); |
| setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); |
| setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); |
| setOperationAction(ISD::FABS, MVT::f64, Expand); |
| setOperationAction(ISD::FABS, MVT::v2f64, Expand); |
| } |
| // TODO: Fix the UDIV24 algorithm so it works for these |
| // types correctly. This needs vector comparisons |
| // for this to work correctly. |
| setOperationAction(ISD::UDIV, MVT::v2i8, Expand); |
| setOperationAction(ISD::UDIV, MVT::v4i8, Expand); |
| setOperationAction(ISD::UDIV, MVT::v2i16, Expand); |
| setOperationAction(ISD::UDIV, MVT::v4i16, Expand); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom); |
| setOperationAction(ISD::SUBC, MVT::Other, Expand); |
| setOperationAction(ISD::ADDE, MVT::Other, Expand); |
| setOperationAction(ISD::ADDC, MVT::Other, Expand); |
| setOperationAction(ISD::BRCOND, MVT::Other, Custom); |
| setOperationAction(ISD::BR_JT, MVT::Other, Expand); |
| setOperationAction(ISD::BRIND, MVT::Other, Expand); |
| setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand); |
| |
| |
| // Use the default implementation. |
| setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); |
| setOperationAction(ISD::Constant , MVT::i32 , Legal); |
| |
| setSchedulingPreference(Sched::RegPressure); |
| setPow2DivIsCheap(false); |
| setSelectIsExpensive(true); |
| setJumpIsExpensive(true); |
| |
| maxStoresPerMemcpy = 4096; |
| maxStoresPerMemmove = 4096; |
| maxStoresPerMemset = 4096; |
| |
| } |
| |
| bool |
| AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, |
| const CallInst &I, unsigned Intrinsic) const { |
| return false; |
| } |
| |
| // The backend supports 32 and 64 bit floating point immediates |
| bool |
| AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { |
| if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 |
| || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { |
| return true; |
| } else { |
| return false; |
| } |
| } |
| |
| bool |
| AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const { |
| if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 |
| || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { |
| return false; |
| } else { |
| return true; |
| } |
| } |
| |
| |
| // isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to |
| // be zero. Op is expected to be a target specific node. Used by DAG |
| // combiner. |
| |
| void |
| AMDGPUTargetLowering::computeMaskedBitsForTargetNode( |
| const SDValue Op, |
| APInt &KnownZero, |
| APInt &KnownOne, |
| const SelectionDAG &DAG, |
| unsigned Depth) const { |
| APInt KnownZero2; |
| APInt KnownOne2; |
| KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything |
| switch (Op.getOpcode()) { |
| default: break; |
| case ISD::SELECT_CC: |
| DAG.ComputeMaskedBits( |
| Op.getOperand(1), |
| KnownZero, |
| KnownOne, |
| Depth + 1 |
| ); |
| DAG.ComputeMaskedBits( |
| Op.getOperand(0), |
| KnownZero2, |
| KnownOne2 |
| ); |
| assert((KnownZero & KnownOne) == 0 |
| && "Bits known to be one AND zero?"); |
| assert((KnownZero2 & KnownOne2) == 0 |
| && "Bits known to be one AND zero?"); |
| // Only known if known in both the LHS and RHS |
| KnownOne &= KnownOne2; |
| KnownZero &= KnownZero2; |
| break; |
| }; |
| } |
| |
| //===----------------------------------------------------------------------===// |
| // Other Lowering Hooks |
| //===----------------------------------------------------------------------===// |
| |
| SDValue |
| AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { |
| EVT OVT = Op.getValueType(); |
| SDValue DST; |
| if (OVT.getScalarType() == MVT::i64) { |
| DST = LowerSDIV64(Op, DAG); |
| } else if (OVT.getScalarType() == MVT::i32) { |
| DST = LowerSDIV32(Op, DAG); |
| } else if (OVT.getScalarType() == MVT::i16 |
| || OVT.getScalarType() == MVT::i8) { |
| DST = LowerSDIV24(Op, DAG); |
| } else { |
| DST = SDValue(Op.getNode(), 0); |
| } |
| return DST; |
| } |
| |
| SDValue |
| AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const { |
| EVT OVT = Op.getValueType(); |
| SDValue DST; |
| if (OVT.getScalarType() == MVT::i64) { |
| DST = LowerSREM64(Op, DAG); |
| } else if (OVT.getScalarType() == MVT::i32) { |
| DST = LowerSREM32(Op, DAG); |
| } else if (OVT.getScalarType() == MVT::i16) { |
| DST = LowerSREM16(Op, DAG); |
| } else if (OVT.getScalarType() == MVT::i8) { |
| DST = LowerSREM8(Op, DAG); |
| } else { |
| DST = SDValue(Op.getNode(), 0); |
| } |
| return DST; |
| } |
| |
| SDValue |
| AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { |
| SDValue Data = Op.getOperand(0); |
| VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1)); |
| DebugLoc DL = Op.getDebugLoc(); |
| EVT DVT = Data.getValueType(); |
| EVT BVT = BaseType->getVT(); |
| unsigned baseBits = BVT.getScalarType().getSizeInBits(); |
| unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1; |
| unsigned shiftBits = srcBits - baseBits; |
| if (srcBits < 32) { |
| // If the op is less than 32 bits, then it needs to extend to 32bits |
| // so it can properly keep the upper bits valid. |
| EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1); |
| Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data); |
| shiftBits = 32 - baseBits; |
| DVT = IVT; |
| } |
| SDValue Shift = DAG.getConstant(shiftBits, DVT); |
| // Shift left by 'Shift' bits. |
| Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift); |
| // Signed shift Right by 'Shift' bits. |
| Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift); |
| if (srcBits < 32) { |
| // Once the sign extension is done, the op needs to be converted to |
| // its original type. |
| Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType()); |
| } |
| return Data; |
| } |
| EVT |
| AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const { |
| int iSize = (size * numEle); |
| int vEle = (iSize >> ((size == 64) ? 6 : 5)); |
| if (!vEle) { |
| vEle = 1; |
| } |
| if (size == 64) { |
| if (vEle == 1) { |
| return EVT(MVT::i64); |
| } else { |
| return EVT(MVT::getVectorVT(MVT::i64, vEle)); |
| } |
| } else { |
| if (vEle == 1) { |
| return EVT(MVT::i32); |
| } else { |
| return EVT(MVT::getVectorVT(MVT::i32, vEle)); |
| } |
| } |
| } |
| |
| SDValue |
| AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { |
| SDValue Chain = Op.getOperand(0); |
| SDValue Cond = Op.getOperand(1); |
| SDValue Jump = Op.getOperand(2); |
| SDValue Result; |
| Result = DAG.getNode( |
| AMDGPUISD::BRANCH_COND, |
| Op.getDebugLoc(), |
| Op.getValueType(), |
| Chain, Jump, Cond); |
| return Result; |
| } |
| |
| SDValue |
| AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const { |
| DebugLoc DL = Op.getDebugLoc(); |
| EVT OVT = Op.getValueType(); |
| SDValue LHS = Op.getOperand(0); |
| SDValue RHS = Op.getOperand(1); |
| MVT INTTY; |
| MVT FLTTY; |
| if (!OVT.isVector()) { |
| INTTY = MVT::i32; |
| FLTTY = MVT::f32; |
| } else if (OVT.getVectorNumElements() == 2) { |
| INTTY = MVT::v2i32; |
| FLTTY = MVT::v2f32; |
| } else if (OVT.getVectorNumElements() == 4) { |
| INTTY = MVT::v4i32; |
| FLTTY = MVT::v4f32; |
| } |
| unsigned bitsize = OVT.getScalarType().getSizeInBits(); |
| // char|short jq = ia ^ ib; |
| SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); |
| |
| // jq = jq >> (bitsize - 2) |
| jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); |
| |
| // jq = jq | 0x1 |
| jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); |
| |
| // jq = (int)jq |
| jq = DAG.getSExtOrTrunc(jq, DL, INTTY); |
| |
| // int ia = (int)LHS; |
| SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); |
| |
| // int ib, (int)RHS; |
| SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); |
| |
| // float fa = (float)ia; |
| SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); |
| |
| // float fb = (float)ib; |
| SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); |
| |
| // float fq = native_divide(fa, fb); |
| SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb); |
| |
| // fq = trunc(fq); |
| fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); |
| |
| // float fqneg = -fq; |
| SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); |
| |
| // float fr = mad(fqneg, fb, fa); |
| SDValue fr = DAG.getNode(AMDGPUISD::MAD, DL, FLTTY, fqneg, fb, fa); |
| |
| // int iq = (int)fq; |
| SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); |
| |
| // fr = fabs(fr); |
| fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); |
| |
| // fb = fabs(fb); |
| fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); |
| |
| // int cv = fr >= fb; |
| SDValue cv; |
| if (INTTY == MVT::i32) { |
| cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); |
| } else { |
| cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); |
| } |
| // jq = (cv ? jq : 0); |
| jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq, |
| DAG.getConstant(0, OVT)); |
| // dst = iq + jq; |
| iq = DAG.getSExtOrTrunc(iq, DL, OVT); |
| iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); |
| return iq; |
| } |
| |
| SDValue |
| AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const { |
| DebugLoc DL = Op.getDebugLoc(); |
| EVT OVT = Op.getValueType(); |
| SDValue LHS = Op.getOperand(0); |
| SDValue RHS = Op.getOperand(1); |
| // The LowerSDIV32 function generates equivalent to the following IL. |
| // mov r0, LHS |
| // mov r1, RHS |
| // ilt r10, r0, 0 |
| // ilt r11, r1, 0 |
| // iadd r0, r0, r10 |
| // iadd r1, r1, r11 |
| // ixor r0, r0, r10 |
| // ixor r1, r1, r11 |
| // udiv r0, r0, r1 |
| // ixor r10, r10, r11 |
| // iadd r0, r0, r10 |
| // ixor DST, r0, r10 |
| |
| // mov r0, LHS |
| SDValue r0 = LHS; |
| |
| // mov r1, RHS |
| SDValue r1 = RHS; |
| |
| // ilt r10, r0, 0 |
| SDValue r10 = DAG.getSelectCC(DL, |
| r0, DAG.getConstant(0, OVT), |
| DAG.getConstant(-1, MVT::i32), |
| DAG.getConstant(0, MVT::i32), |
| ISD::SETLT); |
| |
| // ilt r11, r1, 0 |
| SDValue r11 = DAG.getSelectCC(DL, |
| r1, DAG.getConstant(0, OVT), |
| DAG.getConstant(-1, MVT::i32), |
| DAG.getConstant(0, MVT::i32), |
| ISD::SETLT); |
| |
| // iadd r0, r0, r10 |
| r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); |
| |
| // iadd r1, r1, r11 |
| r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); |
| |
| // ixor r0, r0, r10 |
| r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); |
| |
| // ixor r1, r1, r11 |
| r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); |
| |
| // udiv r0, r0, r1 |
| r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); |
| |
| // ixor r10, r10, r11 |
| r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); |
| |
| // iadd r0, r0, r10 |
| r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); |
| |
| // ixor DST, r0, r10 |
| SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); |
| return DST; |
| } |
| |
| SDValue |
| AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const { |
| return SDValue(Op.getNode(), 0); |
| } |
| |
| SDValue |
| AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const { |
| DebugLoc DL = Op.getDebugLoc(); |
| EVT OVT = Op.getValueType(); |
| MVT INTTY = MVT::i32; |
| if (OVT == MVT::v2i8) { |
| INTTY = MVT::v2i32; |
| } else if (OVT == MVT::v4i8) { |
| INTTY = MVT::v4i32; |
| } |
| SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); |
| SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); |
| LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); |
| LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); |
| return LHS; |
| } |
| |
| SDValue |
| AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const { |
| DebugLoc DL = Op.getDebugLoc(); |
| EVT OVT = Op.getValueType(); |
| MVT INTTY = MVT::i32; |
| if (OVT == MVT::v2i16) { |
| INTTY = MVT::v2i32; |
| } else if (OVT == MVT::v4i16) { |
| INTTY = MVT::v4i32; |
| } |
| SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); |
| SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); |
| LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); |
| LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); |
| return LHS; |
| } |
| |
| SDValue |
| AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const { |
| DebugLoc DL = Op.getDebugLoc(); |
| EVT OVT = Op.getValueType(); |
| SDValue LHS = Op.getOperand(0); |
| SDValue RHS = Op.getOperand(1); |
| // The LowerSREM32 function generates equivalent to the following IL. |
| // mov r0, LHS |
| // mov r1, RHS |
| // ilt r10, r0, 0 |
| // ilt r11, r1, 0 |
| // iadd r0, r0, r10 |
| // iadd r1, r1, r11 |
| // ixor r0, r0, r10 |
| // ixor r1, r1, r11 |
| // udiv r20, r0, r1 |
| // umul r20, r20, r1 |
| // sub r0, r0, r20 |
| // iadd r0, r0, r10 |
| // ixor DST, r0, r10 |
| |
| // mov r0, LHS |
| SDValue r0 = LHS; |
| |
| // mov r1, RHS |
| SDValue r1 = RHS; |
| |
| // ilt r10, r0, 0 |
| SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT); |
| |
| // ilt r11, r1, 0 |
| SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT); |
| |
| // iadd r0, r0, r10 |
| r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); |
| |
| // iadd r1, r1, r11 |
| r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); |
| |
| // ixor r0, r0, r10 |
| r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); |
| |
| // ixor r1, r1, r11 |
| r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); |
| |
| // udiv r20, r0, r1 |
| SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); |
| |
| // umul r20, r20, r1 |
| r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1); |
| |
| // sub r0, r0, r20 |
| r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); |
| |
| // iadd r0, r0, r10 |
| r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); |
| |
| // ixor DST, r0, r10 |
| SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); |
| return DST; |
| } |
| |
| SDValue |
| AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const { |
| return SDValue(Op.getNode(), 0); |
| } |