|  | // | 
|  | //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===// | 
|  | //                     The LLVM Compiler Infrastructure | 
|  | // | 
|  | // This file is distributed under the University of Illinois Open Source | 
|  | // License. See LICENSE.TXT for details. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  | // | 
|  | // This file implements the SPUTargetLowering class. | 
|  | // | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "SPURegisterNames.h" | 
|  | #include "SPUISelLowering.h" | 
|  | #include "SPUTargetMachine.h" | 
|  | #include "SPUFrameInfo.h" | 
|  | #include "SPUMachineFunction.h" | 
|  | #include "llvm/Constants.h" | 
|  | #include "llvm/Function.h" | 
|  | #include "llvm/Intrinsics.h" | 
|  | #include "llvm/CallingConv.h" | 
|  | #include "llvm/CodeGen/CallingConvLower.h" | 
|  | #include "llvm/CodeGen/MachineFrameInfo.h" | 
|  | #include "llvm/CodeGen/MachineFunction.h" | 
|  | #include "llvm/CodeGen/MachineInstrBuilder.h" | 
|  | #include "llvm/CodeGen/MachineRegisterInfo.h" | 
|  | #include "llvm/CodeGen/SelectionDAG.h" | 
|  | #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" | 
|  | #include "llvm/Target/TargetOptions.h" | 
|  | #include "llvm/ADT/VectorExtras.h" | 
|  | #include "llvm/Support/Debug.h" | 
|  | #include "llvm/Support/ErrorHandling.h" | 
|  | #include "llvm/Support/MathExtras.h" | 
|  | #include "llvm/Support/raw_ostream.h" | 
|  | #include <map> | 
|  |  | 
|  | using namespace llvm; | 
|  |  | 
|  | // Used in getTargetNodeName() below | 
|  | namespace { | 
|  | std::map<unsigned, const char *> node_names; | 
|  |  | 
|  | //! EVT mapping to useful data for Cell SPU | 
|  | struct valtype_map_s { | 
|  | EVT   valtype; | 
|  | int   prefslot_byte; | 
|  | }; | 
|  |  | 
|  | const valtype_map_s valtype_map[] = { | 
|  | { MVT::i1,   3 }, | 
|  | { MVT::i8,   3 }, | 
|  | { MVT::i16,  2 }, | 
|  | { MVT::i32,  0 }, | 
|  | { MVT::f32,  0 }, | 
|  | { MVT::i64,  0 }, | 
|  | { MVT::f64,  0 }, | 
|  | { MVT::i128, 0 } | 
|  | }; | 
|  |  | 
|  | const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]); | 
|  |  | 
|  | const valtype_map_s *getValueTypeMapEntry(EVT VT) { | 
|  | const valtype_map_s *retval = 0; | 
|  |  | 
|  | for (size_t i = 0; i < n_valtype_map; ++i) { | 
|  | if (valtype_map[i].valtype == VT) { | 
|  | retval = valtype_map + i; | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | #ifndef NDEBUG | 
|  | if (retval == 0) { | 
|  | report_fatal_error("getValueTypeMapEntry returns NULL for " + | 
|  | Twine(VT.getEVTString())); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | return retval; | 
|  | } | 
|  |  | 
|  | //! Expand a library call into an actual call DAG node | 
|  | /*! | 
|  | \note | 
|  | This code is taken from SelectionDAGLegalize, since it is not exposed as | 
|  | part of the LLVM SelectionDAG API. | 
|  | */ | 
|  |  | 
|  | SDValue | 
|  | ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG, | 
|  | bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) { | 
|  | // The input chain to this libcall is the entry node of the function. | 
|  | // Legalizing the call will automatically add the previous call to the | 
|  | // dependence. | 
|  | SDValue InChain = DAG.getEntryNode(); | 
|  |  | 
|  | TargetLowering::ArgListTy Args; | 
|  | TargetLowering::ArgListEntry Entry; | 
|  | for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { | 
|  | EVT ArgVT = Op.getOperand(i).getValueType(); | 
|  | const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); | 
|  | Entry.Node = Op.getOperand(i); | 
|  | Entry.Ty = ArgTy; | 
|  | Entry.isSExt = isSigned; | 
|  | Entry.isZExt = !isSigned; | 
|  | Args.push_back(Entry); | 
|  | } | 
|  | SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), | 
|  | TLI.getPointerTy()); | 
|  |  | 
|  | // Splice the libcall in wherever FindInputOutputChains tells us to. | 
|  | const Type *RetTy = | 
|  | Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext()); | 
|  | std::pair<SDValue, SDValue> CallInfo = | 
|  | TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, | 
|  | 0, TLI.getLibcallCallingConv(LC), false, | 
|  | /*isReturnValueUsed=*/true, | 
|  | Callee, Args, DAG, Op.getDebugLoc()); | 
|  |  | 
|  | return CallInfo.first; | 
|  | } | 
|  | } | 
|  |  | 
|  | SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM) | 
|  | : TargetLowering(TM, new TargetLoweringObjectFileELF()), | 
|  | SPUTM(TM) { | 
|  | // Fold away setcc operations if possible. | 
|  | setPow2DivIsCheap(); | 
|  |  | 
|  | // Use _setjmp/_longjmp instead of setjmp/longjmp. | 
|  | setUseUnderscoreSetJmp(true); | 
|  | setUseUnderscoreLongJmp(true); | 
|  |  | 
|  | // Set RTLIB libcall names as used by SPU: | 
|  | setLibcallName(RTLIB::DIV_F64, "__fast_divdf3"); | 
|  |  | 
|  | // Set up the SPU's register classes: | 
|  | addRegisterClass(MVT::i8,   SPU::R8CRegisterClass); | 
|  | addRegisterClass(MVT::i16,  SPU::R16CRegisterClass); | 
|  | addRegisterClass(MVT::i32,  SPU::R32CRegisterClass); | 
|  | addRegisterClass(MVT::i64,  SPU::R64CRegisterClass); | 
|  | addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass); | 
|  | addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass); | 
|  | addRegisterClass(MVT::i128, SPU::GPRCRegisterClass); | 
|  |  | 
|  | // SPU has no sign or zero extended loads for i1, i8, i16: | 
|  | setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote); | 
|  | setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); | 
|  | setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); | 
|  |  | 
|  | setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand); | 
|  | setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand); | 
|  |  | 
|  | setTruncStoreAction(MVT::i128, MVT::i64, Expand); | 
|  | setTruncStoreAction(MVT::i128, MVT::i32, Expand); | 
|  | setTruncStoreAction(MVT::i128, MVT::i16, Expand); | 
|  | setTruncStoreAction(MVT::i128, MVT::i8, Expand); | 
|  |  | 
|  | setTruncStoreAction(MVT::f64, MVT::f32, Expand); | 
|  |  | 
|  | // SPU constant load actions are custom lowered: | 
|  | setOperationAction(ISD::ConstantFP, MVT::f32, Legal); | 
|  | setOperationAction(ISD::ConstantFP, MVT::f64, Custom); | 
|  |  | 
|  | // SPU's loads and stores have to be custom lowered: | 
|  | for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128; | 
|  | ++sctype) { | 
|  | MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; | 
|  |  | 
|  | setOperationAction(ISD::LOAD,   VT, Custom); | 
|  | setOperationAction(ISD::STORE,  VT, Custom); | 
|  | setLoadExtAction(ISD::EXTLOAD,  VT, Custom); | 
|  | setLoadExtAction(ISD::ZEXTLOAD, VT, Custom); | 
|  | setLoadExtAction(ISD::SEXTLOAD, VT, Custom); | 
|  |  | 
|  | for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) { | 
|  | MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; | 
|  | setTruncStoreAction(VT, StoreVT, Expand); | 
|  | } | 
|  | } | 
|  |  | 
|  | for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64; | 
|  | ++sctype) { | 
|  | MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype; | 
|  |  | 
|  | setOperationAction(ISD::LOAD,   VT, Custom); | 
|  | setOperationAction(ISD::STORE,  VT, Custom); | 
|  |  | 
|  | for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) { | 
|  | MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype; | 
|  | setTruncStoreAction(VT, StoreVT, Expand); | 
|  | } | 
|  | } | 
|  |  | 
|  | // Expand the jumptable branches | 
|  | setOperationAction(ISD::BR_JT,        MVT::Other, Expand); | 
|  | setOperationAction(ISD::BR_CC,        MVT::Other, Expand); | 
|  |  | 
|  | // Custom lower SELECT_CC for most cases, but expand by default | 
|  | setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand); | 
|  | setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom); | 
|  | setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom); | 
|  | setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom); | 
|  | setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom); | 
|  |  | 
|  | // SPU has no intrinsics for these particular operations: | 
|  | setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); | 
|  |  | 
|  | // SPU has no division/remainder instructions | 
|  | setOperationAction(ISD::SREM,    MVT::i8,   Expand); | 
|  | setOperationAction(ISD::UREM,    MVT::i8,   Expand); | 
|  | setOperationAction(ISD::SDIV,    MVT::i8,   Expand); | 
|  | setOperationAction(ISD::UDIV,    MVT::i8,   Expand); | 
|  | setOperationAction(ISD::SDIVREM, MVT::i8,   Expand); | 
|  | setOperationAction(ISD::UDIVREM, MVT::i8,   Expand); | 
|  | setOperationAction(ISD::SREM,    MVT::i16,  Expand); | 
|  | setOperationAction(ISD::UREM,    MVT::i16,  Expand); | 
|  | setOperationAction(ISD::SDIV,    MVT::i16,  Expand); | 
|  | setOperationAction(ISD::UDIV,    MVT::i16,  Expand); | 
|  | setOperationAction(ISD::SDIVREM, MVT::i16,  Expand); | 
|  | setOperationAction(ISD::UDIVREM, MVT::i16,  Expand); | 
|  | setOperationAction(ISD::SREM,    MVT::i32,  Expand); | 
|  | setOperationAction(ISD::UREM,    MVT::i32,  Expand); | 
|  | setOperationAction(ISD::SDIV,    MVT::i32,  Expand); | 
|  | setOperationAction(ISD::UDIV,    MVT::i32,  Expand); | 
|  | setOperationAction(ISD::SDIVREM, MVT::i32,  Expand); | 
|  | setOperationAction(ISD::UDIVREM, MVT::i32,  Expand); | 
|  | setOperationAction(ISD::SREM,    MVT::i64,  Expand); | 
|  | setOperationAction(ISD::UREM,    MVT::i64,  Expand); | 
|  | setOperationAction(ISD::SDIV,    MVT::i64,  Expand); | 
|  | setOperationAction(ISD::UDIV,    MVT::i64,  Expand); | 
|  | setOperationAction(ISD::SDIVREM, MVT::i64,  Expand); | 
|  | setOperationAction(ISD::UDIVREM, MVT::i64,  Expand); | 
|  | setOperationAction(ISD::SREM,    MVT::i128, Expand); | 
|  | setOperationAction(ISD::UREM,    MVT::i128, Expand); | 
|  | setOperationAction(ISD::SDIV,    MVT::i128, Expand); | 
|  | setOperationAction(ISD::UDIV,    MVT::i128, Expand); | 
|  | setOperationAction(ISD::SDIVREM, MVT::i128, Expand); | 
|  | setOperationAction(ISD::UDIVREM, MVT::i128, Expand); | 
|  |  | 
|  | // We don't support sin/cos/sqrt/fmod | 
|  | setOperationAction(ISD::FSIN , MVT::f64, Expand); | 
|  | setOperationAction(ISD::FCOS , MVT::f64, Expand); | 
|  | setOperationAction(ISD::FREM , MVT::f64, Expand); | 
|  | setOperationAction(ISD::FSIN , MVT::f32, Expand); | 
|  | setOperationAction(ISD::FCOS , MVT::f32, Expand); | 
|  | setOperationAction(ISD::FREM , MVT::f32, Expand); | 
|  |  | 
|  | // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt | 
|  | // for f32!) | 
|  | setOperationAction(ISD::FSQRT, MVT::f64, Expand); | 
|  | setOperationAction(ISD::FSQRT, MVT::f32, Expand); | 
|  |  | 
|  | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); | 
|  | setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); | 
|  |  | 
|  | // SPU can do rotate right and left, so legalize it... but customize for i8 | 
|  | // because instructions don't exist. | 
|  |  | 
|  | // FIXME: Change from "expand" to appropriate type once ROTR is supported in | 
|  | //        .td files. | 
|  | setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/); | 
|  | setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/); | 
|  | setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/); | 
|  |  | 
|  | setOperationAction(ISD::ROTL, MVT::i32,    Legal); | 
|  | setOperationAction(ISD::ROTL, MVT::i16,    Legal); | 
|  | setOperationAction(ISD::ROTL, MVT::i8,     Custom); | 
|  |  | 
|  | // SPU has no native version of shift left/right for i8 | 
|  | setOperationAction(ISD::SHL,  MVT::i8,     Custom); | 
|  | setOperationAction(ISD::SRL,  MVT::i8,     Custom); | 
|  | setOperationAction(ISD::SRA,  MVT::i8,     Custom); | 
|  |  | 
|  | // Make these operations legal and handle them during instruction selection: | 
|  | setOperationAction(ISD::SHL,  MVT::i64,    Legal); | 
|  | setOperationAction(ISD::SRL,  MVT::i64,    Legal); | 
|  | setOperationAction(ISD::SRA,  MVT::i64,    Legal); | 
|  |  | 
|  | // Custom lower i8, i32 and i64 multiplications | 
|  | setOperationAction(ISD::MUL,  MVT::i8,     Custom); | 
|  | setOperationAction(ISD::MUL,  MVT::i32,    Legal); | 
|  | setOperationAction(ISD::MUL,  MVT::i64,    Legal); | 
|  |  | 
|  | // Expand double-width multiplication | 
|  | // FIXME: It would probably be reasonable to support some of these operations | 
|  | setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand); | 
|  | setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand); | 
|  | setOperationAction(ISD::MULHU,     MVT::i8,  Expand); | 
|  | setOperationAction(ISD::MULHS,     MVT::i8,  Expand); | 
|  | setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand); | 
|  | setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand); | 
|  | setOperationAction(ISD::MULHU,     MVT::i16, Expand); | 
|  | setOperationAction(ISD::MULHS,     MVT::i16, Expand); | 
|  | setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); | 
|  | setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); | 
|  | setOperationAction(ISD::MULHU,     MVT::i32, Expand); | 
|  | setOperationAction(ISD::MULHS,     MVT::i32, Expand); | 
|  | setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); | 
|  | setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); | 
|  | setOperationAction(ISD::MULHU,     MVT::i64, Expand); | 
|  | setOperationAction(ISD::MULHS,     MVT::i64, Expand); | 
|  |  | 
|  | // Need to custom handle (some) common i8, i64 math ops | 
|  | setOperationAction(ISD::ADD,  MVT::i8,     Custom); | 
|  | setOperationAction(ISD::ADD,  MVT::i64,    Legal); | 
|  | setOperationAction(ISD::SUB,  MVT::i8,     Custom); | 
|  | setOperationAction(ISD::SUB,  MVT::i64,    Legal); | 
|  |  | 
|  | // SPU does not have BSWAP. It does have i32 support CTLZ. | 
|  | // CTPOP has to be custom lowered. | 
|  | setOperationAction(ISD::BSWAP, MVT::i32,   Expand); | 
|  | setOperationAction(ISD::BSWAP, MVT::i64,   Expand); | 
|  |  | 
|  | setOperationAction(ISD::CTPOP, MVT::i8,    Custom); | 
|  | setOperationAction(ISD::CTPOP, MVT::i16,   Custom); | 
|  | setOperationAction(ISD::CTPOP, MVT::i32,   Custom); | 
|  | setOperationAction(ISD::CTPOP, MVT::i64,   Custom); | 
|  | setOperationAction(ISD::CTPOP, MVT::i128,  Expand); | 
|  |  | 
|  | setOperationAction(ISD::CTTZ , MVT::i8,    Expand); | 
|  | setOperationAction(ISD::CTTZ , MVT::i16,   Expand); | 
|  | setOperationAction(ISD::CTTZ , MVT::i32,   Expand); | 
|  | setOperationAction(ISD::CTTZ , MVT::i64,   Expand); | 
|  | setOperationAction(ISD::CTTZ , MVT::i128,  Expand); | 
|  |  | 
|  | setOperationAction(ISD::CTLZ , MVT::i8,    Promote); | 
|  | setOperationAction(ISD::CTLZ , MVT::i16,   Promote); | 
|  | setOperationAction(ISD::CTLZ , MVT::i32,   Legal); | 
|  | setOperationAction(ISD::CTLZ , MVT::i64,   Expand); | 
|  | setOperationAction(ISD::CTLZ , MVT::i128,  Expand); | 
|  |  | 
|  | // SPU has a version of select that implements (a&~c)|(b&c), just like | 
|  | // select ought to work: | 
|  | setOperationAction(ISD::SELECT, MVT::i8,   Legal); | 
|  | setOperationAction(ISD::SELECT, MVT::i16,  Legal); | 
|  | setOperationAction(ISD::SELECT, MVT::i32,  Legal); | 
|  | setOperationAction(ISD::SELECT, MVT::i64,  Legal); | 
|  |  | 
|  | setOperationAction(ISD::SETCC, MVT::i8,    Legal); | 
|  | setOperationAction(ISD::SETCC, MVT::i16,   Legal); | 
|  | setOperationAction(ISD::SETCC, MVT::i32,   Legal); | 
|  | setOperationAction(ISD::SETCC, MVT::i64,   Legal); | 
|  | setOperationAction(ISD::SETCC, MVT::f64,   Custom); | 
|  |  | 
|  | // Custom lower i128 -> i64 truncates | 
|  | setOperationAction(ISD::TRUNCATE, MVT::i64, Custom); | 
|  |  | 
|  | // Custom lower i32/i64 -> i128 sign extend | 
|  | setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom); | 
|  |  | 
|  | setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); | 
|  | setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); | 
|  | setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); | 
|  | setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); | 
|  | // SPU has a legal FP -> signed INT instruction for f32, but for f64, need | 
|  | // to expand to a libcall, hence the custom lowering: | 
|  | setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); | 
|  | setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); | 
|  | setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); | 
|  | setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); | 
|  | setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand); | 
|  | setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand); | 
|  |  | 
|  | // FDIV on SPU requires custom lowering | 
|  | setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall | 
|  |  | 
|  | // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64: | 
|  | setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); | 
|  | setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); | 
|  | setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote); | 
|  | setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); | 
|  | setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); | 
|  | setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote); | 
|  | setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); | 
|  | setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); | 
|  |  | 
|  | setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal); | 
|  | setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal); | 
|  | setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal); | 
|  | setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal); | 
|  |  | 
|  | // We cannot sextinreg(i1).  Expand to shifts. | 
|  | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); | 
|  |  | 
|  | // We want to legalize GlobalAddress and ConstantPool nodes into the | 
|  | // appropriate instructions to materialize the address. | 
|  | for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128; | 
|  | ++sctype) { | 
|  | MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype; | 
|  |  | 
|  | setOperationAction(ISD::GlobalAddress,  VT, Custom); | 
|  | setOperationAction(ISD::ConstantPool,   VT, Custom); | 
|  | setOperationAction(ISD::JumpTable,      VT, Custom); | 
|  | } | 
|  |  | 
|  | // VASTART needs to be custom lowered to use the VarArgsFrameIndex | 
|  | setOperationAction(ISD::VASTART           , MVT::Other, Custom); | 
|  |  | 
|  | // Use the default implementation. | 
|  | setOperationAction(ISD::VAARG             , MVT::Other, Expand); | 
|  | setOperationAction(ISD::VACOPY            , MVT::Other, Expand); | 
|  | setOperationAction(ISD::VAEND             , MVT::Other, Expand); | 
|  | setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand); | 
|  | setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand); | 
|  | setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand); | 
|  | setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand); | 
|  |  | 
|  | // Cell SPU has instructions for converting between i64 and fp. | 
|  | setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); | 
|  | setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); | 
|  |  | 
|  | // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT | 
|  | setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); | 
|  |  | 
|  | // BUILD_PAIR can't be handled natively, and should be expanded to shl/or | 
|  | setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); | 
|  |  | 
|  | // First set operation action for all vector types to expand. Then we | 
|  | // will selectively turn on ones that can be effectively codegen'd. | 
|  | addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass); | 
|  | addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass); | 
|  | addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass); | 
|  | addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass); | 
|  | addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass); | 
|  | addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass); | 
|  |  | 
|  | // "Odd size" vector classes that we're willing to support: | 
|  | addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass); | 
|  |  | 
|  | for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; | 
|  | i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { | 
|  | MVT::SimpleValueType VT = (MVT::SimpleValueType)i; | 
|  |  | 
|  | // add/sub are legal for all supported vector VT's. | 
|  | setOperationAction(ISD::ADD,     VT, Legal); | 
|  | setOperationAction(ISD::SUB,     VT, Legal); | 
|  | // mul has to be custom lowered. | 
|  | setOperationAction(ISD::MUL,     VT, Legal); | 
|  |  | 
|  | setOperationAction(ISD::AND,     VT, Legal); | 
|  | setOperationAction(ISD::OR,      VT, Legal); | 
|  | setOperationAction(ISD::XOR,     VT, Legal); | 
|  | setOperationAction(ISD::LOAD,    VT, Legal); | 
|  | setOperationAction(ISD::SELECT,  VT, Legal); | 
|  | setOperationAction(ISD::STORE,   VT, Legal); | 
|  |  | 
|  | // These operations need to be expanded: | 
|  | setOperationAction(ISD::SDIV,    VT, Expand); | 
|  | setOperationAction(ISD::SREM,    VT, Expand); | 
|  | setOperationAction(ISD::UDIV,    VT, Expand); | 
|  | setOperationAction(ISD::UREM,    VT, Expand); | 
|  |  | 
|  | // Custom lower build_vector, constant pool spills, insert and | 
|  | // extract vector elements: | 
|  | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); | 
|  | setOperationAction(ISD::ConstantPool, VT, Custom); | 
|  | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); | 
|  | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); | 
|  | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); | 
|  | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); | 
|  | } | 
|  |  | 
|  | setOperationAction(ISD::AND, MVT::v16i8, Custom); | 
|  | setOperationAction(ISD::OR,  MVT::v16i8, Custom); | 
|  | setOperationAction(ISD::XOR, MVT::v16i8, Custom); | 
|  | setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); | 
|  |  | 
|  | setOperationAction(ISD::FDIV, MVT::v4f32, Legal); | 
|  |  | 
|  | setShiftAmountType(MVT::i32); | 
|  | setBooleanContents(ZeroOrNegativeOneBooleanContent); | 
|  |  | 
|  | setStackPointerRegisterToSaveRestore(SPU::R1); | 
|  |  | 
|  | // We have target-specific dag combine patterns for the following nodes: | 
|  | setTargetDAGCombine(ISD::ADD); | 
|  | setTargetDAGCombine(ISD::ZERO_EXTEND); | 
|  | setTargetDAGCombine(ISD::SIGN_EXTEND); | 
|  | setTargetDAGCombine(ISD::ANY_EXTEND); | 
|  |  | 
|  | computeRegisterProperties(); | 
|  |  | 
|  | // Set pre-RA register scheduler default to BURR, which produces slightly | 
|  | // better code than the default (could also be TDRR, but TargetLowering.h | 
|  | // needs a mod to support that model): | 
|  | setSchedulingPreference(Sched::RegPressure); | 
|  | } | 
|  |  | 
|  | const char * | 
|  | SPUTargetLowering::getTargetNodeName(unsigned Opcode) const | 
|  | { | 
|  | if (node_names.empty()) { | 
|  | node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG"; | 
|  | node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi"; | 
|  | node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo"; | 
|  | node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr"; | 
|  | node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr"; | 
|  | node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr"; | 
|  | node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT"; | 
|  | node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL"; | 
|  | node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB"; | 
|  | node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK"; | 
|  | node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB"; | 
|  | node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC"; | 
|  | node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT"; | 
|  | node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS"; | 
|  | node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES"; | 
|  | node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL"; | 
|  | node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR"; | 
|  | node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT"; | 
|  | node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] = | 
|  | "SPUISD::ROTBYTES_LEFT_BITS"; | 
|  | node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK"; | 
|  | node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB"; | 
|  | node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER"; | 
|  | node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER"; | 
|  | node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER"; | 
|  | } | 
|  |  | 
|  | std::map<unsigned, const char *>::iterator i = node_names.find(Opcode); | 
|  |  | 
|  | return ((i != node_names.end()) ? i->second : 0); | 
|  | } | 
|  |  | 
|  | /// getFunctionAlignment - Return the Log2 alignment of this function. | 
|  | unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const { | 
|  | return 3; | 
|  | } | 
|  |  | 
|  | //===----------------------------------------------------------------------===// | 
|  | // Return the Cell SPU's SETCC result type | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const { | 
|  | // i16 and i32 are valid SETCC result types | 
|  | return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? | 
|  | VT.getSimpleVT().SimpleTy : | 
|  | MVT::i32); | 
|  | } | 
|  |  | 
|  | //===----------------------------------------------------------------------===// | 
|  | // Calling convention code: | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | #include "SPUGenCallingConv.inc" | 
|  |  | 
|  | //===----------------------------------------------------------------------===// | 
|  | //  LowerOperation implementation | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | /// Custom lower loads for CellSPU | 
|  | /*! | 
|  | All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements | 
|  | within a 16-byte block, we have to rotate to extract the requested element. | 
|  |  | 
|  | For extending loads, we also want to ensure that the following sequence is | 
|  | emitted, e.g. for MVT::f32 extending load to MVT::f64: | 
|  |  | 
|  | \verbatim | 
|  | %1  v16i8,ch = load | 
|  | %2  v16i8,ch = rotate %1 | 
|  | %3  v4f8, ch = bitconvert %2 | 
|  | %4  f32      = vec2perfslot %3 | 
|  | %5  f64      = fp_extend %4 | 
|  | \endverbatim | 
|  | */ | 
|  | static SDValue | 
|  | LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { | 
|  | LoadSDNode *LN = cast<LoadSDNode>(Op); | 
|  | SDValue the_chain = LN->getChain(); | 
|  | EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); | 
|  | EVT InVT = LN->getMemoryVT(); | 
|  | EVT OutVT = Op.getValueType(); | 
|  | ISD::LoadExtType ExtType = LN->getExtensionType(); | 
|  | unsigned alignment = LN->getAlignment(); | 
|  | const valtype_map_s *vtm = getValueTypeMapEntry(InVT); | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  |  | 
|  | switch (LN->getAddressingMode()) { | 
|  | case ISD::UNINDEXED: { | 
|  | SDValue result; | 
|  | SDValue basePtr = LN->getBasePtr(); | 
|  | SDValue rotate; | 
|  |  | 
|  | if (alignment == 16) { | 
|  | ConstantSDNode *CN; | 
|  |  | 
|  | // Special cases for a known aligned load to simplify the base pointer | 
|  | // and the rotation amount: | 
|  | if (basePtr.getOpcode() == ISD::ADD | 
|  | && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) { | 
|  | // Known offset into basePtr | 
|  | int64_t offset = CN->getSExtValue(); | 
|  | int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte); | 
|  |  | 
|  | if (rotamt < 0) | 
|  | rotamt += 16; | 
|  |  | 
|  | rotate = DAG.getConstant(rotamt, MVT::i16); | 
|  |  | 
|  | // Simplify the base pointer for this case: | 
|  | basePtr = basePtr.getOperand(0); | 
|  | if ((offset & ~0xf) > 0) { | 
|  | basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, | 
|  | basePtr, | 
|  | DAG.getConstant((offset & ~0xf), PtrVT)); | 
|  | } | 
|  | } else if ((basePtr.getOpcode() == SPUISD::AFormAddr) | 
|  | || (basePtr.getOpcode() == SPUISD::IndirectAddr | 
|  | && basePtr.getOperand(0).getOpcode() == SPUISD::Hi | 
|  | && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) { | 
|  | // Plain aligned a-form address: rotate into preferred slot | 
|  | // Same for (SPUindirect (SPUhi ...), (SPUlo ...)) | 
|  | int64_t rotamt = -vtm->prefslot_byte; | 
|  | if (rotamt < 0) | 
|  | rotamt += 16; | 
|  | rotate = DAG.getConstant(rotamt, MVT::i16); | 
|  | } else { | 
|  | // Offset the rotate amount by the basePtr and the preferred slot | 
|  | // byte offset | 
|  | int64_t rotamt = -vtm->prefslot_byte; | 
|  | if (rotamt < 0) | 
|  | rotamt += 16; | 
|  | rotate = DAG.getNode(ISD::ADD, dl, PtrVT, | 
|  | basePtr, | 
|  | DAG.getConstant(rotamt, PtrVT)); | 
|  | } | 
|  | } else { | 
|  | // Unaligned load: must be more pessimistic about addressing modes: | 
|  | if (basePtr.getOpcode() == ISD::ADD) { | 
|  | MachineFunction &MF = DAG.getMachineFunction(); | 
|  | MachineRegisterInfo &RegInfo = MF.getRegInfo(); | 
|  | unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); | 
|  | SDValue Flag; | 
|  |  | 
|  | SDValue Op0 = basePtr.getOperand(0); | 
|  | SDValue Op1 = basePtr.getOperand(1); | 
|  |  | 
|  | if (isa<ConstantSDNode>(Op1)) { | 
|  | // Convert the (add <ptr>, <const>) to an indirect address contained | 
|  | // in a register. Note that this is done because we need to avoid | 
|  | // creating a 0(reg) d-form address due to the SPU's block loads. | 
|  | basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); | 
|  | the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); | 
|  | basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); | 
|  | } else { | 
|  | // Convert the (add <arg1>, <arg2>) to an indirect address, which | 
|  | // will likely be lowered as a reg(reg) x-form address. | 
|  | basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); | 
|  | } | 
|  | } else { | 
|  | basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, | 
|  | basePtr, | 
|  | DAG.getConstant(0, PtrVT)); | 
|  | } | 
|  |  | 
|  | // Offset the rotate amount by the basePtr and the preferred slot | 
|  | // byte offset | 
|  | rotate = DAG.getNode(ISD::ADD, dl, PtrVT, | 
|  | basePtr, | 
|  | DAG.getConstant(-vtm->prefslot_byte, PtrVT)); | 
|  | } | 
|  |  | 
|  | // Re-emit as a v16i8 vector load | 
|  | result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, | 
|  | LN->getSrcValue(), LN->getSrcValueOffset(), | 
|  | LN->isVolatile(), LN->isNonTemporal(), 16); | 
|  |  | 
|  | // Update the chain | 
|  | the_chain = result.getValue(1); | 
|  |  | 
|  | // Rotate into the preferred slot: | 
|  | result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8, | 
|  | result.getValue(0), rotate); | 
|  |  | 
|  | // Convert the loaded v16i8 vector to the appropriate vector type | 
|  | // specified by the operand: | 
|  | EVT vecVT = EVT::getVectorVT(*DAG.getContext(), | 
|  | InVT, (128 / InVT.getSizeInBits())); | 
|  | result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, | 
|  | DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result)); | 
|  |  | 
|  | // Handle extending loads by extending the scalar result: | 
|  | if (ExtType == ISD::SEXTLOAD) { | 
|  | result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result); | 
|  | } else if (ExtType == ISD::ZEXTLOAD) { | 
|  | result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result); | 
|  | } else if (ExtType == ISD::EXTLOAD) { | 
|  | unsigned NewOpc = ISD::ANY_EXTEND; | 
|  |  | 
|  | if (OutVT.isFloatingPoint()) | 
|  | NewOpc = ISD::FP_EXTEND; | 
|  |  | 
|  | result = DAG.getNode(NewOpc, dl, OutVT, result); | 
|  | } | 
|  |  | 
|  | SDVTList retvts = DAG.getVTList(OutVT, MVT::Other); | 
|  | SDValue retops[2] = { | 
|  | result, | 
|  | the_chain | 
|  | }; | 
|  |  | 
|  | result = DAG.getNode(SPUISD::LDRESULT, dl, retvts, | 
|  | retops, sizeof(retops) / sizeof(retops[0])); | 
|  | return result; | 
|  | } | 
|  | case ISD::PRE_INC: | 
|  | case ISD::PRE_DEC: | 
|  | case ISD::POST_INC: | 
|  | case ISD::POST_DEC: | 
|  | case ISD::LAST_INDEXED_MODE: | 
|  | { | 
|  | report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other " | 
|  | "than UNINDEXED\n" + | 
|  | Twine((unsigned)LN->getAddressingMode())); | 
|  | /*NOTREACHED*/ | 
|  | } | 
|  | } | 
|  |  | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | /// Custom lower stores for CellSPU | 
|  | /*! | 
|  | All CellSPU stores are aligned to 16-byte boundaries, so for elements | 
|  | within a 16-byte block, we have to generate a shuffle to insert the | 
|  | requested element into its place, then store the resulting block. | 
|  | */ | 
|  | static SDValue | 
|  | LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { | 
|  | StoreSDNode *SN = cast<StoreSDNode>(Op); | 
|  | SDValue Value = SN->getValue(); | 
|  | EVT VT = Value.getValueType(); | 
|  | EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT()); | 
|  | EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  | unsigned alignment = SN->getAlignment(); | 
|  |  | 
|  | switch (SN->getAddressingMode()) { | 
|  | case ISD::UNINDEXED: { | 
|  | // The vector type we really want to load from the 16-byte chunk. | 
|  | EVT vecVT = EVT::getVectorVT(*DAG.getContext(), | 
|  | VT, (128 / VT.getSizeInBits())); | 
|  |  | 
|  | SDValue alignLoadVec; | 
|  | SDValue basePtr = SN->getBasePtr(); | 
|  | SDValue the_chain = SN->getChain(); | 
|  | SDValue insertEltOffs; | 
|  |  | 
|  | if (alignment == 16) { | 
|  | ConstantSDNode *CN; | 
|  |  | 
|  | // Special cases for a known aligned load to simplify the base pointer | 
|  | // and insertion byte: | 
|  | if (basePtr.getOpcode() == ISD::ADD | 
|  | && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) { | 
|  | // Known offset into basePtr | 
|  | int64_t offset = CN->getSExtValue(); | 
|  |  | 
|  | // Simplify the base pointer for this case: | 
|  | basePtr = basePtr.getOperand(0); | 
|  | insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, | 
|  | basePtr, | 
|  | DAG.getConstant((offset & 0xf), PtrVT)); | 
|  |  | 
|  | if ((offset & ~0xf) > 0) { | 
|  | basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, | 
|  | basePtr, | 
|  | DAG.getConstant((offset & ~0xf), PtrVT)); | 
|  | } | 
|  | } else { | 
|  | // Otherwise, assume it's at byte 0 of basePtr | 
|  | insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, | 
|  | basePtr, | 
|  | DAG.getConstant(0, PtrVT)); | 
|  | } | 
|  | } else { | 
|  | // Unaligned load: must be more pessimistic about addressing modes: | 
|  | if (basePtr.getOpcode() == ISD::ADD) { | 
|  | MachineFunction &MF = DAG.getMachineFunction(); | 
|  | MachineRegisterInfo &RegInfo = MF.getRegInfo(); | 
|  | unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); | 
|  | SDValue Flag; | 
|  |  | 
|  | SDValue Op0 = basePtr.getOperand(0); | 
|  | SDValue Op1 = basePtr.getOperand(1); | 
|  |  | 
|  | if (isa<ConstantSDNode>(Op1)) { | 
|  | // Convert the (add <ptr>, <const>) to an indirect address contained | 
|  | // in a register. Note that this is done because we need to avoid | 
|  | // creating a 0(reg) d-form address due to the SPU's block loads. | 
|  | basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); | 
|  | the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag); | 
|  | basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT); | 
|  | } else { | 
|  | // Convert the (add <arg1>, <arg2>) to an indirect address, which | 
|  | // will likely be lowered as a reg(reg) x-form address. | 
|  | basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1); | 
|  | } | 
|  | } else { | 
|  | basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, | 
|  | basePtr, | 
|  | DAG.getConstant(0, PtrVT)); | 
|  | } | 
|  |  | 
|  | // Insertion point is solely determined by basePtr's contents | 
|  | insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT, | 
|  | basePtr, | 
|  | DAG.getConstant(0, PtrVT)); | 
|  | } | 
|  |  | 
|  | // Re-emit as a v16i8 vector load | 
|  | alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr, | 
|  | SN->getSrcValue(), SN->getSrcValueOffset(), | 
|  | SN->isVolatile(), SN->isNonTemporal(), 16); | 
|  |  | 
|  | // Update the chain | 
|  | the_chain = alignLoadVec.getValue(1); | 
|  |  | 
|  | LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec); | 
|  | SDValue theValue = SN->getValue(); | 
|  | SDValue result; | 
|  |  | 
|  | if (StVT != VT | 
|  | && (theValue.getOpcode() == ISD::AssertZext | 
|  | || theValue.getOpcode() == ISD::AssertSext)) { | 
|  | // Drill down and get the value for zero- and sign-extended | 
|  | // quantities | 
|  | theValue = theValue.getOperand(0); | 
|  | } | 
|  |  | 
|  | // If the base pointer is already a D-form address, then just create | 
|  | // a new D-form address with a slot offset and the orignal base pointer. | 
|  | // Otherwise generate a D-form address with the slot offset relative | 
|  | // to the stack pointer, which is always aligned. | 
|  | #if !defined(NDEBUG) | 
|  | if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { | 
|  | errs() << "CellSPU LowerSTORE: basePtr = "; | 
|  | basePtr.getNode()->dump(&DAG); | 
|  | errs() << "\n"; | 
|  | } | 
|  | #endif | 
|  |  | 
|  | SDValue insertEltOp = | 
|  | DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs); | 
|  | SDValue vectorizeOp = | 
|  | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue); | 
|  |  | 
|  | result = DAG.getNode(SPUISD::SHUFB, dl, vecVT, | 
|  | vectorizeOp, alignLoadVec, | 
|  | DAG.getNode(ISD::BIT_CONVERT, dl, | 
|  | MVT::v4i32, insertEltOp)); | 
|  |  | 
|  | result = DAG.getStore(the_chain, dl, result, basePtr, | 
|  | LN->getSrcValue(), LN->getSrcValueOffset(), | 
|  | LN->isVolatile(), LN->isNonTemporal(), | 
|  | LN->getAlignment()); | 
|  |  | 
|  | #if 0 && !defined(NDEBUG) | 
|  | if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { | 
|  | const SDValue ¤tRoot = DAG.getRoot(); | 
|  |  | 
|  | DAG.setRoot(result); | 
|  | errs() << "------- CellSPU:LowerStore result:\n"; | 
|  | DAG.dump(); | 
|  | errs() << "-------\n"; | 
|  | DAG.setRoot(currentRoot); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | return result; | 
|  | /*UNREACHED*/ | 
|  | } | 
|  | case ISD::PRE_INC: | 
|  | case ISD::PRE_DEC: | 
|  | case ISD::POST_INC: | 
|  | case ISD::POST_DEC: | 
|  | case ISD::LAST_INDEXED_MODE: | 
|  | { | 
|  | report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other " | 
|  | "than UNINDEXED\n" + | 
|  | Twine((unsigned)SN->getAddressingMode())); | 
|  | /*NOTREACHED*/ | 
|  | } | 
|  | } | 
|  |  | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | //! Generate the address of a constant pool entry. | 
|  | static SDValue | 
|  | LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { | 
|  | EVT PtrVT = Op.getValueType(); | 
|  | ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); | 
|  | const Constant *C = CP->getConstVal(); | 
|  | SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); | 
|  | SDValue Zero = DAG.getConstant(0, PtrVT); | 
|  | const TargetMachine &TM = DAG.getTarget(); | 
|  | // FIXME there is no actual debug info here | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  |  | 
|  | if (TM.getRelocationModel() == Reloc::Static) { | 
|  | if (!ST->usingLargeMem()) { | 
|  | // Just return the SDValue with the constant pool address in it. | 
|  | return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero); | 
|  | } else { | 
|  | SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero); | 
|  | SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero); | 
|  | return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); | 
|  | } | 
|  | } | 
|  |  | 
|  | llvm_unreachable("LowerConstantPool: Relocation model other than static" | 
|  | " not supported."); | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | //! Alternate entry point for generating the address of a constant pool entry | 
|  | SDValue | 
|  | SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) { | 
|  | return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl()); | 
|  | } | 
|  |  | 
|  | static SDValue | 
|  | LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { | 
|  | EVT PtrVT = Op.getValueType(); | 
|  | JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); | 
|  | SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); | 
|  | SDValue Zero = DAG.getConstant(0, PtrVT); | 
|  | const TargetMachine &TM = DAG.getTarget(); | 
|  | // FIXME there is no actual debug info here | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  |  | 
|  | if (TM.getRelocationModel() == Reloc::Static) { | 
|  | if (!ST->usingLargeMem()) { | 
|  | return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero); | 
|  | } else { | 
|  | SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero); | 
|  | SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero); | 
|  | return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); | 
|  | } | 
|  | } | 
|  |  | 
|  | llvm_unreachable("LowerJumpTable: Relocation model other than static" | 
|  | " not supported."); | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | static SDValue | 
|  | LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { | 
|  | EVT PtrVT = Op.getValueType(); | 
|  | GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); | 
|  | const GlobalValue *GV = GSDN->getGlobal(); | 
|  | SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(), | 
|  | PtrVT, GSDN->getOffset()); | 
|  | const TargetMachine &TM = DAG.getTarget(); | 
|  | SDValue Zero = DAG.getConstant(0, PtrVT); | 
|  | // FIXME there is no actual debug info here | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  |  | 
|  | if (TM.getRelocationModel() == Reloc::Static) { | 
|  | if (!ST->usingLargeMem()) { | 
|  | return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero); | 
|  | } else { | 
|  | SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero); | 
|  | SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero); | 
|  | return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo); | 
|  | } | 
|  | } else { | 
|  | report_fatal_error("LowerGlobalAddress: Relocation model other than static" | 
|  | "not supported."); | 
|  | /*NOTREACHED*/ | 
|  | } | 
|  |  | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | //! Custom lower double precision floating point constants | 
|  | static SDValue | 
|  | LowerConstantFP(SDValue Op, SelectionDAG &DAG) { | 
|  | EVT VT = Op.getValueType(); | 
|  | // FIXME there is no actual debug info here | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  |  | 
|  | if (VT == MVT::f64) { | 
|  | ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode()); | 
|  |  | 
|  | assert((FP != 0) && | 
|  | "LowerConstantFP: Node is not ConstantFPSDNode"); | 
|  |  | 
|  | uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble()); | 
|  | SDValue T = DAG.getConstant(dbits, MVT::i64); | 
|  | SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T); | 
|  | return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, | 
|  | DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec)); | 
|  | } | 
|  |  | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | SDValue | 
|  | SPUTargetLowering::LowerFormalArguments(SDValue Chain, | 
|  | CallingConv::ID CallConv, bool isVarArg, | 
|  | const SmallVectorImpl<ISD::InputArg> | 
|  | &Ins, | 
|  | DebugLoc dl, SelectionDAG &DAG, | 
|  | SmallVectorImpl<SDValue> &InVals) | 
|  | const { | 
|  |  | 
|  | MachineFunction &MF = DAG.getMachineFunction(); | 
|  | MachineFrameInfo *MFI = MF.getFrameInfo(); | 
|  | MachineRegisterInfo &RegInfo = MF.getRegInfo(); | 
|  | SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>(); | 
|  |  | 
|  | unsigned ArgOffset = SPUFrameInfo::minStackSize(); | 
|  | unsigned ArgRegIdx = 0; | 
|  | unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); | 
|  |  | 
|  | EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); | 
|  |  | 
|  | SmallVector<CCValAssign, 16> ArgLocs; | 
|  | CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, | 
|  | *DAG.getContext()); | 
|  | // FIXME: allow for other calling conventions | 
|  | CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU); | 
|  |  | 
|  | // Add DAG nodes to load the arguments or copy them out of registers. | 
|  | for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { | 
|  | EVT ObjectVT = Ins[ArgNo].VT; | 
|  | unsigned ObjSize = ObjectVT.getSizeInBits()/8; | 
|  | SDValue ArgVal; | 
|  | CCValAssign &VA = ArgLocs[ArgNo]; | 
|  |  | 
|  | if (VA.isRegLoc()) { | 
|  | const TargetRegisterClass *ArgRegClass; | 
|  |  | 
|  | switch (ObjectVT.getSimpleVT().SimpleTy) { | 
|  | default: | 
|  | report_fatal_error("LowerFormalArguments Unhandled argument type: " + | 
|  | Twine(ObjectVT.getEVTString())); | 
|  | case MVT::i8: | 
|  | ArgRegClass = &SPU::R8CRegClass; | 
|  | break; | 
|  | case MVT::i16: | 
|  | ArgRegClass = &SPU::R16CRegClass; | 
|  | break; | 
|  | case MVT::i32: | 
|  | ArgRegClass = &SPU::R32CRegClass; | 
|  | break; | 
|  | case MVT::i64: | 
|  | ArgRegClass = &SPU::R64CRegClass; | 
|  | break; | 
|  | case MVT::i128: | 
|  | ArgRegClass = &SPU::GPRCRegClass; | 
|  | break; | 
|  | case MVT::f32: | 
|  | ArgRegClass = &SPU::R32FPRegClass; | 
|  | break; | 
|  | case MVT::f64: | 
|  | ArgRegClass = &SPU::R64FPRegClass; | 
|  | break; | 
|  | case MVT::v2f64: | 
|  | case MVT::v4f32: | 
|  | case MVT::v2i64: | 
|  | case MVT::v4i32: | 
|  | case MVT::v8i16: | 
|  | case MVT::v16i8: | 
|  | ArgRegClass = &SPU::VECREGRegClass; | 
|  | break; | 
|  | } | 
|  |  | 
|  | unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass); | 
|  | RegInfo.addLiveIn(VA.getLocReg(), VReg); | 
|  | ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); | 
|  | ++ArgRegIdx; | 
|  | } else { | 
|  | // We need to load the argument to a virtual register if we determined | 
|  | // above that we ran out of physical registers of the appropriate type | 
|  | // or we're forced to do vararg | 
|  | int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true); | 
|  | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); | 
|  | ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0); | 
|  | ArgOffset += StackSlotSize; | 
|  | } | 
|  |  | 
|  | InVals.push_back(ArgVal); | 
|  | // Update the chain | 
|  | Chain = ArgVal.getOperand(0); | 
|  | } | 
|  |  | 
|  | // vararg handling: | 
|  | if (isVarArg) { | 
|  | // FIXME: we should be able to query the argument registers from | 
|  | //        tablegen generated code. | 
|  | static const unsigned ArgRegs[] = { | 
|  | SPU::R3,  SPU::R4,  SPU::R5,  SPU::R6,  SPU::R7,  SPU::R8,  SPU::R9, | 
|  | SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16, | 
|  | SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23, | 
|  | SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30, | 
|  | SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37, | 
|  | SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44, | 
|  | SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51, | 
|  | SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58, | 
|  | SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65, | 
|  | SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72, | 
|  | SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79 | 
|  | }; | 
|  | // size of ArgRegs array | 
|  | unsigned NumArgRegs = 77; | 
|  |  | 
|  | // We will spill (79-3)+1 registers to the stack | 
|  | SmallVector<SDValue, 79-3+1> MemOps; | 
|  |  | 
|  | // Create the frame slot | 
|  | for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) { | 
|  | FuncInfo->setVarArgsFrameIndex( | 
|  | MFI->CreateFixedObject(StackSlotSize, ArgOffset, true)); | 
|  | SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); | 
|  | unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass); | 
|  | SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8); | 
|  | SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0, | 
|  | false, false, 0); | 
|  | Chain = Store.getOperand(0); | 
|  | MemOps.push_back(Store); | 
|  |  | 
|  | // Increment address by stack slot size for the next stored argument | 
|  | ArgOffset += StackSlotSize; | 
|  | } | 
|  | if (!MemOps.empty()) | 
|  | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, | 
|  | &MemOps[0], MemOps.size()); | 
|  | } | 
|  |  | 
|  | return Chain; | 
|  | } | 
|  |  | 
|  | /// isLSAAddress - Return the immediate to use if the specified | 
|  | /// value is representable as a LSA address. | 
|  | static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) { | 
|  | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); | 
|  | if (!C) return 0; | 
|  |  | 
|  | int Addr = C->getZExtValue(); | 
|  | if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero. | 
|  | (Addr << 14 >> 14) != Addr) | 
|  | return 0;  // Top 14 bits have to be sext of immediate. | 
|  |  | 
|  | return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode(); | 
|  | } | 
|  |  | 
|  | SDValue | 
|  | SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee, | 
|  | CallingConv::ID CallConv, bool isVarArg, | 
|  | bool &isTailCall, | 
|  | const SmallVectorImpl<ISD::OutputArg> &Outs, | 
|  | const SmallVectorImpl<SDValue> &OutVals, | 
|  | const SmallVectorImpl<ISD::InputArg> &Ins, | 
|  | DebugLoc dl, SelectionDAG &DAG, | 
|  | SmallVectorImpl<SDValue> &InVals) const { | 
|  | // CellSPU target does not yet support tail call optimization. | 
|  | isTailCall = false; | 
|  |  | 
|  | const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); | 
|  | unsigned NumOps     = Outs.size(); | 
|  | unsigned StackSlotSize = SPUFrameInfo::stackSlotSize(); | 
|  |  | 
|  | SmallVector<CCValAssign, 16> ArgLocs; | 
|  | CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, | 
|  | *DAG.getContext()); | 
|  | // FIXME: allow for other calling conventions | 
|  | CCInfo.AnalyzeCallOperands(Outs, CCC_SPU); | 
|  |  | 
|  | const unsigned NumArgRegs = ArgLocs.size(); | 
|  |  | 
|  |  | 
|  | // Handy pointer type | 
|  | EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); | 
|  |  | 
|  | // Set up a copy of the stack pointer for use loading and storing any | 
|  | // arguments that may not fit in the registers available for argument | 
|  | // passing. | 
|  | SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32); | 
|  |  | 
|  | // Figure out which arguments are going to go in registers, and which in | 
|  | // memory. | 
|  | unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR] | 
|  | unsigned ArgRegIdx = 0; | 
|  |  | 
|  | // Keep track of registers passing arguments | 
|  | std::vector<std::pair<unsigned, SDValue> > RegsToPass; | 
|  | // And the arguments passed on the stack | 
|  | SmallVector<SDValue, 8> MemOpChains; | 
|  |  | 
|  | for (; ArgRegIdx != NumOps; ++ArgRegIdx) { | 
|  | SDValue Arg = OutVals[ArgRegIdx]; | 
|  | CCValAssign &VA = ArgLocs[ArgRegIdx]; | 
|  |  | 
|  | // PtrOff will be used to store the current argument to the stack if a | 
|  | // register cannot be found for it. | 
|  | SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); | 
|  | PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); | 
|  |  | 
|  | switch (Arg.getValueType().getSimpleVT().SimpleTy) { | 
|  | default: llvm_unreachable("Unexpected ValueType for argument!"); | 
|  | case MVT::i8: | 
|  | case MVT::i16: | 
|  | case MVT::i32: | 
|  | case MVT::i64: | 
|  | case MVT::i128: | 
|  | case MVT::f32: | 
|  | case MVT::f64: | 
|  | case MVT::v2i64: | 
|  | case MVT::v2f64: | 
|  | case MVT::v4f32: | 
|  | case MVT::v4i32: | 
|  | case MVT::v8i16: | 
|  | case MVT::v16i8: | 
|  | if (ArgRegIdx != NumArgRegs) { | 
|  | RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); | 
|  | } else { | 
|  | MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0, | 
|  | false, false, 0)); | 
|  | ArgOffset += StackSlotSize; | 
|  | } | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Accumulate how many bytes are to be pushed on the stack, including the | 
|  | // linkage area, and parameter passing area.  According to the SPU ABI, | 
|  | // we minimally need space for [LR] and [SP]. | 
|  | unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize(); | 
|  |  | 
|  | // Insert a call sequence start | 
|  | Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes, | 
|  | true)); | 
|  |  | 
|  | if (!MemOpChains.empty()) { | 
|  | // Adjust the stack pointer for the stack arguments. | 
|  | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, | 
|  | &MemOpChains[0], MemOpChains.size()); | 
|  | } | 
|  |  | 
|  | // Build a sequence of copy-to-reg nodes chained together with token chain | 
|  | // and flag operands which copy the outgoing args into the appropriate regs. | 
|  | SDValue InFlag; | 
|  | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { | 
|  | Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, | 
|  | RegsToPass[i].second, InFlag); | 
|  | InFlag = Chain.getValue(1); | 
|  | } | 
|  |  | 
|  | SmallVector<SDValue, 8> Ops; | 
|  | unsigned CallOpc = SPUISD::CALL; | 
|  |  | 
|  | // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every | 
|  | // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol | 
|  | // node so that legalize doesn't hack it. | 
|  | if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { | 
|  | const GlobalValue *GV = G->getGlobal(); | 
|  | EVT CalleeVT = Callee.getValueType(); | 
|  | SDValue Zero = DAG.getConstant(0, PtrVT); | 
|  | SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT); | 
|  |  | 
|  | if (!ST->usingLargeMem()) { | 
|  | // Turn calls to targets that are defined (i.e., have bodies) into BRSL | 
|  | // style calls, otherwise, external symbols are BRASL calls. This assumes | 
|  | // that declared/defined symbols are in the same compilation unit and can | 
|  | // be reached through PC-relative jumps. | 
|  | // | 
|  | // NOTE: | 
|  | // This may be an unsafe assumption for JIT and really large compilation | 
|  | // units. | 
|  | if (GV->isDeclaration()) { | 
|  | Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero); | 
|  | } else { | 
|  | Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero); | 
|  | } | 
|  | } else { | 
|  | // "Large memory" mode: Turn all calls into indirect calls with a X-form | 
|  | // address pairs: | 
|  | Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero); | 
|  | } | 
|  | } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { | 
|  | EVT CalleeVT = Callee.getValueType(); | 
|  | SDValue Zero = DAG.getConstant(0, PtrVT); | 
|  | SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(), | 
|  | Callee.getValueType()); | 
|  |  | 
|  | if (!ST->usingLargeMem()) { | 
|  | Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero); | 
|  | } else { | 
|  | Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero); | 
|  | } | 
|  | } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) { | 
|  | // If this is an absolute destination address that appears to be a legal | 
|  | // local store address, use the munged value. | 
|  | Callee = SDValue(Dest, 0); | 
|  | } | 
|  |  | 
|  | Ops.push_back(Chain); | 
|  | Ops.push_back(Callee); | 
|  |  | 
|  | // Add argument registers to the end of the list so that they are known live | 
|  | // into the call. | 
|  | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) | 
|  | Ops.push_back(DAG.getRegister(RegsToPass[i].first, | 
|  | RegsToPass[i].second.getValueType())); | 
|  |  | 
|  | if (InFlag.getNode()) | 
|  | Ops.push_back(InFlag); | 
|  | // Returns a chain and a flag for retval copy to use. | 
|  | Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag), | 
|  | &Ops[0], Ops.size()); | 
|  | InFlag = Chain.getValue(1); | 
|  |  | 
|  | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true), | 
|  | DAG.getIntPtrConstant(0, true), InFlag); | 
|  | if (!Ins.empty()) | 
|  | InFlag = Chain.getValue(1); | 
|  |  | 
|  | // If the function returns void, just return the chain. | 
|  | if (Ins.empty()) | 
|  | return Chain; | 
|  |  | 
|  | // If the call has results, copy the values out of the ret val registers. | 
|  | switch (Ins[0].VT.getSimpleVT().SimpleTy) { | 
|  | default: llvm_unreachable("Unexpected ret value!"); | 
|  | case MVT::Other: break; | 
|  | case MVT::i32: | 
|  | if (Ins.size() > 1 && Ins[1].VT == MVT::i32) { | 
|  | Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4, | 
|  | MVT::i32, InFlag).getValue(1); | 
|  | InVals.push_back(Chain.getValue(0)); | 
|  | Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, | 
|  | Chain.getValue(2)).getValue(1); | 
|  | InVals.push_back(Chain.getValue(0)); | 
|  | } else { | 
|  | Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32, | 
|  | InFlag).getValue(1); | 
|  | InVals.push_back(Chain.getValue(0)); | 
|  | } | 
|  | break; | 
|  | case MVT::i8: | 
|  | case MVT::i16: | 
|  | case MVT::i64: | 
|  | case MVT::i128: | 
|  | case MVT::f32: | 
|  | case MVT::f64: | 
|  | case MVT::v2f64: | 
|  | case MVT::v2i64: | 
|  | case MVT::v4f32: | 
|  | case MVT::v4i32: | 
|  | case MVT::v8i16: | 
|  | case MVT::v16i8: | 
|  | Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT, | 
|  | InFlag).getValue(1); | 
|  | InVals.push_back(Chain.getValue(0)); | 
|  | break; | 
|  | } | 
|  |  | 
|  | return Chain; | 
|  | } | 
|  |  | 
|  | SDValue | 
|  | SPUTargetLowering::LowerReturn(SDValue Chain, | 
|  | CallingConv::ID CallConv, bool isVarArg, | 
|  | const SmallVectorImpl<ISD::OutputArg> &Outs, | 
|  | const SmallVectorImpl<SDValue> &OutVals, | 
|  | DebugLoc dl, SelectionDAG &DAG) const { | 
|  |  | 
|  | SmallVector<CCValAssign, 16> RVLocs; | 
|  | CCState CCInfo(CallConv, isVarArg, getTargetMachine(), | 
|  | RVLocs, *DAG.getContext()); | 
|  | CCInfo.AnalyzeReturn(Outs, RetCC_SPU); | 
|  |  | 
|  | // If this is the first return lowered for this function, add the regs to the | 
|  | // liveout set for the function. | 
|  | if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { | 
|  | for (unsigned i = 0; i != RVLocs.size(); ++i) | 
|  | DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); | 
|  | } | 
|  |  | 
|  | SDValue Flag; | 
|  |  | 
|  | // Copy the result values into the output registers. | 
|  | for (unsigned i = 0; i != RVLocs.size(); ++i) { | 
|  | CCValAssign &VA = RVLocs[i]; | 
|  | assert(VA.isRegLoc() && "Can only return in registers!"); | 
|  | Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), | 
|  | OutVals[i], Flag); | 
|  | Flag = Chain.getValue(1); | 
|  | } | 
|  |  | 
|  | if (Flag.getNode()) | 
|  | return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag); | 
|  | else | 
|  | return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain); | 
|  | } | 
|  |  | 
|  |  | 
|  | //===----------------------------------------------------------------------===// | 
|  | // Vector related lowering: | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | static ConstantSDNode * | 
|  | getVecImm(SDNode *N) { | 
|  | SDValue OpVal(0, 0); | 
|  |  | 
|  | // Check to see if this buildvec has a single non-undef value in its elements. | 
|  | for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { | 
|  | if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; | 
|  | if (OpVal.getNode() == 0) | 
|  | OpVal = N->getOperand(i); | 
|  | else if (OpVal != N->getOperand(i)) | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | if (OpVal.getNode() != 0) { | 
|  | if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { | 
|  | return CN; | 
|  | } | 
|  | } | 
|  |  | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | /// get_vec_i18imm - Test if this vector is a vector filled with the same value | 
|  | /// and the value fits into an unsigned 18-bit constant, and if so, return the | 
|  | /// constant | 
|  | SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG, | 
|  | EVT ValueType) { | 
|  | if (ConstantSDNode *CN = getVecImm(N)) { | 
|  | uint64_t Value = CN->getZExtValue(); | 
|  | if (ValueType == MVT::i64) { | 
|  | uint64_t UValue = CN->getZExtValue(); | 
|  | uint32_t upper = uint32_t(UValue >> 32); | 
|  | uint32_t lower = uint32_t(UValue); | 
|  | if (upper != lower) | 
|  | return SDValue(); | 
|  | Value = Value >> 32; | 
|  | } | 
|  | if (Value <= 0x3ffff) | 
|  | return DAG.getTargetConstant(Value, ValueType); | 
|  | } | 
|  |  | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | /// get_vec_i16imm - Test if this vector is a vector filled with the same value | 
|  | /// and the value fits into a signed 16-bit constant, and if so, return the | 
|  | /// constant | 
|  | SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG, | 
|  | EVT ValueType) { | 
|  | if (ConstantSDNode *CN = getVecImm(N)) { | 
|  | int64_t Value = CN->getSExtValue(); | 
|  | if (ValueType == MVT::i64) { | 
|  | uint64_t UValue = CN->getZExtValue(); | 
|  | uint32_t upper = uint32_t(UValue >> 32); | 
|  | uint32_t lower = uint32_t(UValue); | 
|  | if (upper != lower) | 
|  | return SDValue(); | 
|  | Value = Value >> 32; | 
|  | } | 
|  | if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) { | 
|  | return DAG.getTargetConstant(Value, ValueType); | 
|  | } | 
|  | } | 
|  |  | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | /// get_vec_i10imm - Test if this vector is a vector filled with the same value | 
|  | /// and the value fits into a signed 10-bit constant, and if so, return the | 
|  | /// constant | 
|  | SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG, | 
|  | EVT ValueType) { | 
|  | if (ConstantSDNode *CN = getVecImm(N)) { | 
|  | int64_t Value = CN->getSExtValue(); | 
|  | if (ValueType == MVT::i64) { | 
|  | uint64_t UValue = CN->getZExtValue(); | 
|  | uint32_t upper = uint32_t(UValue >> 32); | 
|  | uint32_t lower = uint32_t(UValue); | 
|  | if (upper != lower) | 
|  | return SDValue(); | 
|  | Value = Value >> 32; | 
|  | } | 
|  | if (isInt<10>(Value)) | 
|  | return DAG.getTargetConstant(Value, ValueType); | 
|  | } | 
|  |  | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | /// get_vec_i8imm - Test if this vector is a vector filled with the same value | 
|  | /// and the value fits into a signed 8-bit constant, and if so, return the | 
|  | /// constant. | 
|  | /// | 
|  | /// @note: The incoming vector is v16i8 because that's the only way we can load | 
|  | /// constant vectors. Thus, we test to see if the upper and lower bytes are the | 
|  | /// same value. | 
|  | SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG, | 
|  | EVT ValueType) { | 
|  | if (ConstantSDNode *CN = getVecImm(N)) { | 
|  | int Value = (int) CN->getZExtValue(); | 
|  | if (ValueType == MVT::i16 | 
|  | && Value <= 0xffff                 /* truncated from uint64_t */ | 
|  | && ((short) Value >> 8) == ((short) Value & 0xff)) | 
|  | return DAG.getTargetConstant(Value & 0xff, ValueType); | 
|  | else if (ValueType == MVT::i8 | 
|  | && (Value & 0xff) == Value) | 
|  | return DAG.getTargetConstant(Value, ValueType); | 
|  | } | 
|  |  | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value | 
|  | /// and the value fits into a signed 16-bit constant, and if so, return the | 
|  | /// constant | 
|  | SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG, | 
|  | EVT ValueType) { | 
|  | if (ConstantSDNode *CN = getVecImm(N)) { | 
|  | uint64_t Value = CN->getZExtValue(); | 
|  | if ((ValueType == MVT::i32 | 
|  | && ((unsigned) Value & 0xffff0000) == (unsigned) Value) | 
|  | || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value)) | 
|  | return DAG.getTargetConstant(Value >> 16, ValueType); | 
|  | } | 
|  |  | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | /// get_v4i32_imm - Catch-all for general 32-bit constant vectors | 
|  | SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) { | 
|  | if (ConstantSDNode *CN = getVecImm(N)) { | 
|  | return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32); | 
|  | } | 
|  |  | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | /// get_v4i32_imm - Catch-all for general 64-bit constant vectors | 
|  | SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) { | 
|  | if (ConstantSDNode *CN = getVecImm(N)) { | 
|  | return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64); | 
|  | } | 
|  |  | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | //! Lower a BUILD_VECTOR instruction creatively: | 
|  | static SDValue | 
|  | LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) { | 
|  | EVT VT = Op.getValueType(); | 
|  | EVT EltVT = VT.getVectorElementType(); | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  | BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode()); | 
|  | assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR"); | 
|  | unsigned minSplatBits = EltVT.getSizeInBits(); | 
|  |  | 
|  | if (minSplatBits < 16) | 
|  | minSplatBits = 16; | 
|  |  | 
|  | APInt APSplatBits, APSplatUndef; | 
|  | unsigned SplatBitSize; | 
|  | bool HasAnyUndefs; | 
|  |  | 
|  | if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, | 
|  | HasAnyUndefs, minSplatBits) | 
|  | || minSplatBits < SplatBitSize) | 
|  | return SDValue();   // Wasn't a constant vector or splat exceeded min | 
|  |  | 
|  | uint64_t SplatBits = APSplatBits.getZExtValue(); | 
|  |  | 
|  | switch (VT.getSimpleVT().SimpleTy) { | 
|  | default: | 
|  | report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " + | 
|  | Twine(VT.getEVTString())); | 
|  | /*NOTREACHED*/ | 
|  | case MVT::v4f32: { | 
|  | uint32_t Value32 = uint32_t(SplatBits); | 
|  | assert(SplatBitSize == 32 | 
|  | && "LowerBUILD_VECTOR: Unexpected floating point vector element."); | 
|  | // NOTE: pretend the constant is an integer. LLVM won't load FP constants | 
|  | SDValue T = DAG.getConstant(Value32, MVT::i32); | 
|  | return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32, | 
|  | DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T)); | 
|  | break; | 
|  | } | 
|  | case MVT::v2f64: { | 
|  | uint64_t f64val = uint64_t(SplatBits); | 
|  | assert(SplatBitSize == 64 | 
|  | && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes."); | 
|  | // NOTE: pretend the constant is an integer. LLVM won't load FP constants | 
|  | SDValue T = DAG.getConstant(f64val, MVT::i64); | 
|  | return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, | 
|  | DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T)); | 
|  | break; | 
|  | } | 
|  | case MVT::v16i8: { | 
|  | // 8-bit constants have to be expanded to 16-bits | 
|  | unsigned short Value16 = SplatBits /* | (SplatBits << 8) */; | 
|  | SmallVector<SDValue, 8> Ops; | 
|  |  | 
|  | Ops.assign(8, DAG.getConstant(Value16, MVT::i16)); | 
|  | return DAG.getNode(ISD::BIT_CONVERT, dl, VT, | 
|  | DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size())); | 
|  | } | 
|  | case MVT::v8i16: { | 
|  | unsigned short Value16 = SplatBits; | 
|  | SDValue T = DAG.getConstant(Value16, EltVT); | 
|  | SmallVector<SDValue, 8> Ops; | 
|  |  | 
|  | Ops.assign(8, T); | 
|  | return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); | 
|  | } | 
|  | case MVT::v4i32: { | 
|  | SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); | 
|  | return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T); | 
|  | } | 
|  | case MVT::v2i32: { | 
|  | SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType()); | 
|  | return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T); | 
|  | } | 
|  | case MVT::v2i64: { | 
|  | return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl); | 
|  | } | 
|  | } | 
|  |  | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | /*! | 
|  | */ | 
|  | SDValue | 
|  | SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal, | 
|  | DebugLoc dl) { | 
|  | uint32_t upper = uint32_t(SplatVal >> 32); | 
|  | uint32_t lower = uint32_t(SplatVal); | 
|  |  | 
|  | if (upper == lower) { | 
|  | // Magic constant that can be matched by IL, ILA, et. al. | 
|  | SDValue Val = DAG.getTargetConstant(upper, MVT::i32); | 
|  | return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, | 
|  | DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, | 
|  | Val, Val, Val, Val)); | 
|  | } else { | 
|  | bool upper_special, lower_special; | 
|  |  | 
|  | // NOTE: This code creates common-case shuffle masks that can be easily | 
|  | // detected as common expressions. It is not attempting to create highly | 
|  | // specialized masks to replace any and all 0's, 0xff's and 0x80's. | 
|  |  | 
|  | // Detect if the upper or lower half is a special shuffle mask pattern: | 
|  | upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000); | 
|  | lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000); | 
|  |  | 
|  | // Both upper and lower are special, lower to a constant pool load: | 
|  | if (lower_special && upper_special) { | 
|  | SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64); | 
|  | return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, | 
|  | SplatValCN, SplatValCN); | 
|  | } | 
|  |  | 
|  | SDValue LO32; | 
|  | SDValue HI32; | 
|  | SmallVector<SDValue, 16> ShufBytes; | 
|  | SDValue Result; | 
|  |  | 
|  | // Create lower vector if not a special pattern | 
|  | if (!lower_special) { | 
|  | SDValue LO32C = DAG.getConstant(lower, MVT::i32); | 
|  | LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, | 
|  | DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, | 
|  | LO32C, LO32C, LO32C, LO32C)); | 
|  | } | 
|  |  | 
|  | // Create upper vector if not a special pattern | 
|  | if (!upper_special) { | 
|  | SDValue HI32C = DAG.getConstant(upper, MVT::i32); | 
|  | HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT, | 
|  | DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, | 
|  | HI32C, HI32C, HI32C, HI32C)); | 
|  | } | 
|  |  | 
|  | // If either upper or lower are special, then the two input operands are | 
|  | // the same (basically, one of them is a "don't care") | 
|  | if (lower_special) | 
|  | LO32 = HI32; | 
|  | if (upper_special) | 
|  | HI32 = LO32; | 
|  |  | 
|  | for (int i = 0; i < 4; ++i) { | 
|  | uint64_t val = 0; | 
|  | for (int j = 0; j < 4; ++j) { | 
|  | SDValue V; | 
|  | bool process_upper, process_lower; | 
|  | val <<= 8; | 
|  | process_upper = (upper_special && (i & 1) == 0); | 
|  | process_lower = (lower_special && (i & 1) == 1); | 
|  |  | 
|  | if (process_upper || process_lower) { | 
|  | if ((process_upper && upper == 0) | 
|  | || (process_lower && lower == 0)) | 
|  | val |= 0x80; | 
|  | else if ((process_upper && upper == 0xffffffff) | 
|  | || (process_lower && lower == 0xffffffff)) | 
|  | val |= 0xc0; | 
|  | else if ((process_upper && upper == 0x80000000) | 
|  | || (process_lower && lower == 0x80000000)) | 
|  | val |= (j == 0 ? 0xe0 : 0x80); | 
|  | } else | 
|  | val |= i * 4 + j + ((i & 1) * 16); | 
|  | } | 
|  |  | 
|  | ShufBytes.push_back(DAG.getConstant(val, MVT::i32)); | 
|  | } | 
|  |  | 
|  | return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32, | 
|  | DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, | 
|  | &ShufBytes[0], ShufBytes.size())); | 
|  | } | 
|  | } | 
|  |  | 
|  | /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on | 
|  | /// which the Cell can operate. The code inspects V3 to ascertain whether the | 
|  | /// permutation vector, V3, is monotonically increasing with one "exception" | 
|  | /// element, e.g., (0, 1, _, 3). If this is the case, then generate a | 
|  | /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool. | 
|  | /// In either case, the net result is going to eventually invoke SHUFB to | 
|  | /// permute/shuffle the bytes from V1 and V2. | 
|  | /// \note | 
|  | /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate | 
|  | /// control word for byte/halfword/word insertion. This takes care of a single | 
|  | /// element move from V2 into V1. | 
|  | /// \note | 
|  | /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions. | 
|  | static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { | 
|  | const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op); | 
|  | SDValue V1 = Op.getOperand(0); | 
|  | SDValue V2 = Op.getOperand(1); | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  |  | 
|  | if (V2.getOpcode() == ISD::UNDEF) V2 = V1; | 
|  |  | 
|  | // If we have a single element being moved from V1 to V2, this can be handled | 
|  | // using the C*[DX] compute mask instructions, but the vector elements have | 
|  | // to be monotonically increasing with one exception element. | 
|  | EVT VecVT = V1.getValueType(); | 
|  | EVT EltVT = VecVT.getVectorElementType(); | 
|  | unsigned EltsFromV2 = 0; | 
|  | unsigned V2Elt = 0; | 
|  | unsigned V2EltIdx0 = 0; | 
|  | unsigned CurrElt = 0; | 
|  | unsigned MaxElts = VecVT.getVectorNumElements(); | 
|  | unsigned PrevElt = 0; | 
|  | unsigned V0Elt = 0; | 
|  | bool monotonic = true; | 
|  | bool rotate = true; | 
|  | EVT maskVT;             // which of the c?d instructions to use | 
|  |  | 
|  | if (EltVT == MVT::i8) { | 
|  | V2EltIdx0 = 16; | 
|  | maskVT = MVT::v16i8; | 
|  | } else if (EltVT == MVT::i16) { | 
|  | V2EltIdx0 = 8; | 
|  | maskVT = MVT::v8i16; | 
|  | } else if (EltVT == MVT::i32 || EltVT == MVT::f32) { | 
|  | V2EltIdx0 = 4; | 
|  | maskVT = MVT::v4i32; | 
|  | } else if (EltVT == MVT::i64 || EltVT == MVT::f64) { | 
|  | V2EltIdx0 = 2; | 
|  | maskVT = MVT::v2i64; | 
|  | } else | 
|  | llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE"); | 
|  |  | 
|  | for (unsigned i = 0; i != MaxElts; ++i) { | 
|  | if (SVN->getMaskElt(i) < 0) | 
|  | continue; | 
|  |  | 
|  | unsigned SrcElt = SVN->getMaskElt(i); | 
|  |  | 
|  | if (monotonic) { | 
|  | if (SrcElt >= V2EltIdx0) { | 
|  | if (1 >= (++EltsFromV2)) { | 
|  | V2Elt = (V2EltIdx0 - SrcElt) << 2; | 
|  | } | 
|  | } else if (CurrElt != SrcElt) { | 
|  | monotonic = false; | 
|  | } | 
|  |  | 
|  | ++CurrElt; | 
|  | } | 
|  |  | 
|  | if (rotate) { | 
|  | if (PrevElt > 0 && SrcElt < MaxElts) { | 
|  | if ((PrevElt == SrcElt - 1) | 
|  | || (PrevElt == MaxElts - 1 && SrcElt == 0)) { | 
|  | PrevElt = SrcElt; | 
|  | if (SrcElt == 0) | 
|  | V0Elt = i; | 
|  | } else { | 
|  | rotate = false; | 
|  | } | 
|  | } else if (i == 0) { | 
|  | // First time through, need to keep track of previous element | 
|  | PrevElt = SrcElt; | 
|  | } else { | 
|  | // This isn't a rotation, takes elements from vector 2 | 
|  | rotate = false; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | if (EltsFromV2 == 1 && monotonic) { | 
|  | // Compute mask and shuffle | 
|  | EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); | 
|  |  | 
|  | // As SHUFFLE_MASK becomes a c?d instruction, feed it an address | 
|  | // R1 ($sp) is used here only as it is guaranteed to have last bits zero | 
|  | SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, | 
|  | DAG.getRegister(SPU::R1, PtrVT), | 
|  | DAG.getConstant(V2Elt, MVT::i32)); | 
|  | SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, | 
|  | maskVT, Pointer); | 
|  |  | 
|  | // Use shuffle mask in SHUFB synthetic instruction: | 
|  | return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1, | 
|  | ShufMaskOp); | 
|  | } else if (rotate) { | 
|  | int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8; | 
|  |  | 
|  | return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(), | 
|  | V1, DAG.getConstant(rotamt, MVT::i16)); | 
|  | } else { | 
|  | // Convert the SHUFFLE_VECTOR mask's input element units to the | 
|  | // actual bytes. | 
|  | unsigned BytesPerElement = EltVT.getSizeInBits()/8; | 
|  |  | 
|  | SmallVector<SDValue, 16> ResultMask; | 
|  | for (unsigned i = 0, e = MaxElts; i != e; ++i) { | 
|  | unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i); | 
|  |  | 
|  | for (unsigned j = 0; j < BytesPerElement; ++j) | 
|  | ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8)); | 
|  | } | 
|  |  | 
|  | SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, | 
|  | &ResultMask[0], ResultMask.size()); | 
|  | return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask); | 
|  | } | 
|  | } | 
|  |  | 
|  | static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) { | 
|  | SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  |  | 
|  | if (Op0.getNode()->getOpcode() == ISD::Constant) { | 
|  | // For a constant, build the appropriate constant vector, which will | 
|  | // eventually simplify to a vector register load. | 
|  |  | 
|  | ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode()); | 
|  | SmallVector<SDValue, 16> ConstVecValues; | 
|  | EVT VT; | 
|  | size_t n_copies; | 
|  |  | 
|  | // Create a constant vector: | 
|  | switch (Op.getValueType().getSimpleVT().SimpleTy) { | 
|  | default: llvm_unreachable("Unexpected constant value type in " | 
|  | "LowerSCALAR_TO_VECTOR"); | 
|  | case MVT::v16i8: n_copies = 16; VT = MVT::i8; break; | 
|  | case MVT::v8i16: n_copies = 8; VT = MVT::i16; break; | 
|  | case MVT::v4i32: n_copies = 4; VT = MVT::i32; break; | 
|  | case MVT::v4f32: n_copies = 4; VT = MVT::f32; break; | 
|  | case MVT::v2i64: n_copies = 2; VT = MVT::i64; break; | 
|  | case MVT::v2f64: n_copies = 2; VT = MVT::f64; break; | 
|  | } | 
|  |  | 
|  | SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT); | 
|  | for (size_t j = 0; j < n_copies; ++j) | 
|  | ConstVecValues.push_back(CValue); | 
|  |  | 
|  | return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(), | 
|  | &ConstVecValues[0], ConstVecValues.size()); | 
|  | } else { | 
|  | // Otherwise, copy the value from one register to another: | 
|  | switch (Op0.getValueType().getSimpleVT().SimpleTy) { | 
|  | default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR"); | 
|  | case MVT::i8: | 
|  | case MVT::i16: | 
|  | case MVT::i32: | 
|  | case MVT::i64: | 
|  | case MVT::f32: | 
|  | case MVT::f64: | 
|  | return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0); | 
|  | } | 
|  | } | 
|  |  | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { | 
|  | EVT VT = Op.getValueType(); | 
|  | SDValue N = Op.getOperand(0); | 
|  | SDValue Elt = Op.getOperand(1); | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  | SDValue retval; | 
|  |  | 
|  | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { | 
|  | // Constant argument: | 
|  | int EltNo = (int) C->getZExtValue(); | 
|  |  | 
|  | // sanity checks: | 
|  | if (VT == MVT::i8 && EltNo >= 16) | 
|  | llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15"); | 
|  | else if (VT == MVT::i16 && EltNo >= 8) | 
|  | llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7"); | 
|  | else if (VT == MVT::i32 && EltNo >= 4) | 
|  | llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4"); | 
|  | else if (VT == MVT::i64 && EltNo >= 2) | 
|  | llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2"); | 
|  |  | 
|  | if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) { | 
|  | // i32 and i64: Element 0 is the preferred slot | 
|  | return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N); | 
|  | } | 
|  |  | 
|  | // Need to generate shuffle mask and extract: | 
|  | int prefslot_begin = -1, prefslot_end = -1; | 
|  | int elt_byte = EltNo * VT.getSizeInBits() / 8; | 
|  |  | 
|  | switch (VT.getSimpleVT().SimpleTy) { | 
|  | default: | 
|  | assert(false && "Invalid value type!"); | 
|  | case MVT::i8: { | 
|  | prefslot_begin = prefslot_end = 3; | 
|  | break; | 
|  | } | 
|  | case MVT::i16: { | 
|  | prefslot_begin = 2; prefslot_end = 3; | 
|  | break; | 
|  | } | 
|  | case MVT::i32: | 
|  | case MVT::f32: { | 
|  | prefslot_begin = 0; prefslot_end = 3; | 
|  | break; | 
|  | } | 
|  | case MVT::i64: | 
|  | case MVT::f64: { | 
|  | prefslot_begin = 0; prefslot_end = 7; | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | assert(prefslot_begin != -1 && prefslot_end != -1 && | 
|  | "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized"); | 
|  |  | 
|  | unsigned int ShufBytes[16] = { | 
|  | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | 
|  | }; | 
|  | for (int i = 0; i < 16; ++i) { | 
|  | // zero fill uppper part of preferred slot, don't care about the | 
|  | // other slots: | 
|  | unsigned int mask_val; | 
|  | if (i <= prefslot_end) { | 
|  | mask_val = | 
|  | ((i < prefslot_begin) | 
|  | ? 0x80 | 
|  | : elt_byte + (i - prefslot_begin)); | 
|  |  | 
|  | ShufBytes[i] = mask_val; | 
|  | } else | 
|  | ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)]; | 
|  | } | 
|  |  | 
|  | SDValue ShufMask[4]; | 
|  | for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) { | 
|  | unsigned bidx = i * 4; | 
|  | unsigned int bits = ((ShufBytes[bidx] << 24) | | 
|  | (ShufBytes[bidx+1] << 16) | | 
|  | (ShufBytes[bidx+2] << 8) | | 
|  | ShufBytes[bidx+3]); | 
|  | ShufMask[i] = DAG.getConstant(bits, MVT::i32); | 
|  | } | 
|  |  | 
|  | SDValue ShufMaskVec = | 
|  | DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, | 
|  | &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0])); | 
|  |  | 
|  | retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, | 
|  | DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(), | 
|  | N, N, ShufMaskVec)); | 
|  | } else { | 
|  | // Variable index: Rotate the requested element into slot 0, then replicate | 
|  | // slot 0 across the vector | 
|  | EVT VecVT = N.getValueType(); | 
|  | if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) { | 
|  | report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit" | 
|  | "vector type!"); | 
|  | } | 
|  |  | 
|  | // Make life easier by making sure the index is zero-extended to i32 | 
|  | if (Elt.getValueType() != MVT::i32) | 
|  | Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt); | 
|  |  | 
|  | // Scale the index to a bit/byte shift quantity | 
|  | APInt scaleFactor = | 
|  | APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false); | 
|  | unsigned scaleShift = scaleFactor.logBase2(); | 
|  | SDValue vecShift; | 
|  |  | 
|  | if (scaleShift > 0) { | 
|  | // Scale the shift factor: | 
|  | Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt, | 
|  | DAG.getConstant(scaleShift, MVT::i32)); | 
|  | } | 
|  |  | 
|  | vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt); | 
|  |  | 
|  | // Replicate the bytes starting at byte 0 across the entire vector (for | 
|  | // consistency with the notion of a unified register set) | 
|  | SDValue replicate; | 
|  |  | 
|  | switch (VT.getSimpleVT().SimpleTy) { | 
|  | default: | 
|  | report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector" | 
|  | "type"); | 
|  | /*NOTREACHED*/ | 
|  | case MVT::i8: { | 
|  | SDValue factor = DAG.getConstant(0x00000000, MVT::i32); | 
|  | replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, | 
|  | factor, factor, factor, factor); | 
|  | break; | 
|  | } | 
|  | case MVT::i16: { | 
|  | SDValue factor = DAG.getConstant(0x00010001, MVT::i32); | 
|  | replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, | 
|  | factor, factor, factor, factor); | 
|  | break; | 
|  | } | 
|  | case MVT::i32: | 
|  | case MVT::f32: { | 
|  | SDValue factor = DAG.getConstant(0x00010203, MVT::i32); | 
|  | replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, | 
|  | factor, factor, factor, factor); | 
|  | break; | 
|  | } | 
|  | case MVT::i64: | 
|  | case MVT::f64: { | 
|  | SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32); | 
|  | SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32); | 
|  | replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, | 
|  | loFactor, hiFactor, loFactor, hiFactor); | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, | 
|  | DAG.getNode(SPUISD::SHUFB, dl, VecVT, | 
|  | vecShift, vecShift, replicate)); | 
|  | } | 
|  |  | 
|  | return retval; | 
|  | } | 
|  |  | 
|  | static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { | 
|  | SDValue VecOp = Op.getOperand(0); | 
|  | SDValue ValOp = Op.getOperand(1); | 
|  | SDValue IdxOp = Op.getOperand(2); | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  | EVT VT = Op.getValueType(); | 
|  |  | 
|  | // use 0 when the lane to insert to is 'undef' | 
|  | int64_t Idx=0; | 
|  | if (IdxOp.getOpcode() != ISD::UNDEF) { | 
|  | ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp); | 
|  | assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!"); | 
|  | Idx = (CN->getSExtValue()); | 
|  | } | 
|  |  | 
|  | EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); | 
|  | // Use $sp ($1) because it's always 16-byte aligned and it's available: | 
|  | SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, | 
|  | DAG.getRegister(SPU::R1, PtrVT), | 
|  | DAG.getConstant(Idx, PtrVT)); | 
|  | SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer); | 
|  |  | 
|  | SDValue result = | 
|  | DAG.getNode(SPUISD::SHUFB, dl, VT, | 
|  | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp), | 
|  | VecOp, | 
|  | DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask)); | 
|  |  | 
|  | return result; | 
|  | } | 
|  |  | 
|  | static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc, | 
|  | const TargetLowering &TLI) | 
|  | { | 
|  | SDValue N0 = Op.getOperand(0);      // Everything has at least one operand | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  | EVT ShiftVT = TLI.getShiftAmountTy(); | 
|  |  | 
|  | assert(Op.getValueType() == MVT::i8); | 
|  | switch (Opc) { | 
|  | default: | 
|  | llvm_unreachable("Unhandled i8 math operator"); | 
|  | /*NOTREACHED*/ | 
|  | break; | 
|  | case ISD::ADD: { | 
|  | // 8-bit addition: Promote the arguments up to 16-bits and truncate | 
|  | // the result: | 
|  | SDValue N1 = Op.getOperand(1); | 
|  | N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); | 
|  | N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); | 
|  | return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, | 
|  | DAG.getNode(Opc, dl, MVT::i16, N0, N1)); | 
|  |  | 
|  | } | 
|  |  | 
|  | case ISD::SUB: { | 
|  | // 8-bit subtraction: Promote the arguments up to 16-bits and truncate | 
|  | // the result: | 
|  | SDValue N1 = Op.getOperand(1); | 
|  | N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); | 
|  | N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); | 
|  | return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, | 
|  | DAG.getNode(Opc, dl, MVT::i16, N0, N1)); | 
|  | } | 
|  | case ISD::ROTR: | 
|  | case ISD::ROTL: { | 
|  | SDValue N1 = Op.getOperand(1); | 
|  | EVT N1VT = N1.getValueType(); | 
|  |  | 
|  | N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); | 
|  | if (!N1VT.bitsEq(ShiftVT)) { | 
|  | unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT) | 
|  | ? ISD::ZERO_EXTEND | 
|  | : ISD::TRUNCATE; | 
|  | N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); | 
|  | } | 
|  |  | 
|  | // Replicate lower 8-bits into upper 8: | 
|  | SDValue ExpandArg = | 
|  | DAG.getNode(ISD::OR, dl, MVT::i16, N0, | 
|  | DAG.getNode(ISD::SHL, dl, MVT::i16, | 
|  | N0, DAG.getConstant(8, MVT::i32))); | 
|  |  | 
|  | // Truncate back down to i8 | 
|  | return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, | 
|  | DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1)); | 
|  | } | 
|  | case ISD::SRL: | 
|  | case ISD::SHL: { | 
|  | SDValue N1 = Op.getOperand(1); | 
|  | EVT N1VT = N1.getValueType(); | 
|  |  | 
|  | N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0); | 
|  | if (!N1VT.bitsEq(ShiftVT)) { | 
|  | unsigned N1Opc = ISD::ZERO_EXTEND; | 
|  |  | 
|  | if (N1.getValueType().bitsGT(ShiftVT)) | 
|  | N1Opc = ISD::TRUNCATE; | 
|  |  | 
|  | N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); | 
|  | } | 
|  |  | 
|  | return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, | 
|  | DAG.getNode(Opc, dl, MVT::i16, N0, N1)); | 
|  | } | 
|  | case ISD::SRA: { | 
|  | SDValue N1 = Op.getOperand(1); | 
|  | EVT N1VT = N1.getValueType(); | 
|  |  | 
|  | N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); | 
|  | if (!N1VT.bitsEq(ShiftVT)) { | 
|  | unsigned N1Opc = ISD::SIGN_EXTEND; | 
|  |  | 
|  | if (N1VT.bitsGT(ShiftVT)) | 
|  | N1Opc = ISD::TRUNCATE; | 
|  | N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1); | 
|  | } | 
|  |  | 
|  | return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, | 
|  | DAG.getNode(Opc, dl, MVT::i16, N0, N1)); | 
|  | } | 
|  | case ISD::MUL: { | 
|  | SDValue N1 = Op.getOperand(1); | 
|  |  | 
|  | N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0); | 
|  | N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1); | 
|  | return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, | 
|  | DAG.getNode(Opc, dl, MVT::i16, N0, N1)); | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | //! Lower byte immediate operations for v16i8 vectors: | 
|  | static SDValue | 
|  | LowerByteImmed(SDValue Op, SelectionDAG &DAG) { | 
|  | SDValue ConstVec; | 
|  | SDValue Arg; | 
|  | EVT VT = Op.getValueType(); | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  |  | 
|  | ConstVec = Op.getOperand(0); | 
|  | Arg = Op.getOperand(1); | 
|  | if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) { | 
|  | if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) { | 
|  | ConstVec = ConstVec.getOperand(0); | 
|  | } else { | 
|  | ConstVec = Op.getOperand(1); | 
|  | Arg = Op.getOperand(0); | 
|  | if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) { | 
|  | ConstVec = ConstVec.getOperand(0); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) { | 
|  | BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode()); | 
|  | assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed"); | 
|  |  | 
|  | APInt APSplatBits, APSplatUndef; | 
|  | unsigned SplatBitSize; | 
|  | bool HasAnyUndefs; | 
|  | unsigned minSplatBits = VT.getVectorElementType().getSizeInBits(); | 
|  |  | 
|  | if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, | 
|  | HasAnyUndefs, minSplatBits) | 
|  | && minSplatBits <= SplatBitSize) { | 
|  | uint64_t SplatBits = APSplatBits.getZExtValue(); | 
|  | SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8); | 
|  |  | 
|  | SmallVector<SDValue, 16> tcVec; | 
|  | tcVec.assign(16, tc); | 
|  | return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg, | 
|  | DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size())); | 
|  | } | 
|  | } | 
|  |  | 
|  | // These operations (AND, OR, XOR) are legal, they just couldn't be custom | 
|  | // lowered.  Return the operation, rather than a null SDValue. | 
|  | return Op; | 
|  | } | 
|  |  | 
|  | //! Custom lowering for CTPOP (count population) | 
|  | /*! | 
|  | Custom lowering code that counts the number ones in the input | 
|  | operand. SPU has such an instruction, but it counts the number of | 
|  | ones per byte, which then have to be accumulated. | 
|  | */ | 
|  | static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) { | 
|  | EVT VT = Op.getValueType(); | 
|  | EVT vecVT = EVT::getVectorVT(*DAG.getContext(), | 
|  | VT, (128 / VT.getSizeInBits())); | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  |  | 
|  | switch (VT.getSimpleVT().SimpleTy) { | 
|  | default: | 
|  | assert(false && "Invalid value type!"); | 
|  | case MVT::i8: { | 
|  | SDValue N = Op.getOperand(0); | 
|  | SDValue Elt0 = DAG.getConstant(0, MVT::i32); | 
|  |  | 
|  | SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); | 
|  | SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); | 
|  |  | 
|  | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0); | 
|  | } | 
|  |  | 
|  | case MVT::i16: { | 
|  | MachineFunction &MF = DAG.getMachineFunction(); | 
|  | MachineRegisterInfo &RegInfo = MF.getRegInfo(); | 
|  |  | 
|  | unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass); | 
|  |  | 
|  | SDValue N = Op.getOperand(0); | 
|  | SDValue Elt0 = DAG.getConstant(0, MVT::i16); | 
|  | SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16); | 
|  | SDValue Shift1 = DAG.getConstant(8, MVT::i32); | 
|  |  | 
|  | SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); | 
|  | SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); | 
|  |  | 
|  | // CNTB_result becomes the chain to which all of the virtual registers | 
|  | // CNTB_reg, SUM1_reg become associated: | 
|  | SDValue CNTB_result = | 
|  | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0); | 
|  |  | 
|  | SDValue CNTB_rescopy = | 
|  | DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); | 
|  |  | 
|  | SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16); | 
|  |  | 
|  | return DAG.getNode(ISD::AND, dl, MVT::i16, | 
|  | DAG.getNode(ISD::ADD, dl, MVT::i16, | 
|  | DAG.getNode(ISD::SRL, dl, MVT::i16, | 
|  | Tmp1, Shift1), | 
|  | Tmp1), | 
|  | Mask0); | 
|  | } | 
|  |  | 
|  | case MVT::i32: { | 
|  | MachineFunction &MF = DAG.getMachineFunction(); | 
|  | MachineRegisterInfo &RegInfo = MF.getRegInfo(); | 
|  |  | 
|  | unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); | 
|  | unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass); | 
|  |  | 
|  | SDValue N = Op.getOperand(0); | 
|  | SDValue Elt0 = DAG.getConstant(0, MVT::i32); | 
|  | SDValue Mask0 = DAG.getConstant(0xff, MVT::i32); | 
|  | SDValue Shift1 = DAG.getConstant(16, MVT::i32); | 
|  | SDValue Shift2 = DAG.getConstant(8, MVT::i32); | 
|  |  | 
|  | SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N); | 
|  | SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote); | 
|  |  | 
|  | // CNTB_result becomes the chain to which all of the virtual registers | 
|  | // CNTB_reg, SUM1_reg become associated: | 
|  | SDValue CNTB_result = | 
|  | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0); | 
|  |  | 
|  | SDValue CNTB_rescopy = | 
|  | DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result); | 
|  |  | 
|  | SDValue Comp1 = | 
|  | DAG.getNode(ISD::SRL, dl, MVT::i32, | 
|  | DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32), | 
|  | Shift1); | 
|  |  | 
|  | SDValue Sum1 = | 
|  | DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1, | 
|  | DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32)); | 
|  |  | 
|  | SDValue Sum1_rescopy = | 
|  | DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1); | 
|  |  | 
|  | SDValue Comp2 = | 
|  | DAG.getNode(ISD::SRL, dl, MVT::i32, | 
|  | DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32), | 
|  | Shift2); | 
|  | SDValue Sum2 = | 
|  | DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2, | 
|  | DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32)); | 
|  |  | 
|  | return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0); | 
|  | } | 
|  |  | 
|  | case MVT::i64: | 
|  | break; | 
|  | } | 
|  |  | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32 | 
|  | /*! | 
|  | f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall. | 
|  | All conversions to i64 are expanded to a libcall. | 
|  | */ | 
|  | static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, | 
|  | const SPUTargetLowering &TLI) { | 
|  | EVT OpVT = Op.getValueType(); | 
|  | SDValue Op0 = Op.getOperand(0); | 
|  | EVT Op0VT = Op0.getValueType(); | 
|  |  | 
|  | if ((OpVT == MVT::i32 && Op0VT == MVT::f64) | 
|  | || OpVT == MVT::i64) { | 
|  | // Convert f32 / f64 to i32 / i64 via libcall. | 
|  | RTLIB::Libcall LC = | 
|  | (Op.getOpcode() == ISD::FP_TO_SINT) | 
|  | ? RTLIB::getFPTOSINT(Op0VT, OpVT) | 
|  | : RTLIB::getFPTOUINT(Op0VT, OpVT); | 
|  | assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!"); | 
|  | SDValue Dummy; | 
|  | return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); | 
|  | } | 
|  |  | 
|  | return Op; | 
|  | } | 
|  |  | 
|  | //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32 | 
|  | /*! | 
|  | i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall. | 
|  | All conversions from i64 are expanded to a libcall. | 
|  | */ | 
|  | static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, | 
|  | const SPUTargetLowering &TLI) { | 
|  | EVT OpVT = Op.getValueType(); | 
|  | SDValue Op0 = Op.getOperand(0); | 
|  | EVT Op0VT = Op0.getValueType(); | 
|  |  | 
|  | if ((OpVT == MVT::f64 && Op0VT == MVT::i32) | 
|  | || Op0VT == MVT::i64) { | 
|  | // Convert i32, i64 to f64 via libcall: | 
|  | RTLIB::Libcall LC = | 
|  | (Op.getOpcode() == ISD::SINT_TO_FP) | 
|  | ? RTLIB::getSINTTOFP(Op0VT, OpVT) | 
|  | : RTLIB::getUINTTOFP(Op0VT, OpVT); | 
|  | assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!"); | 
|  | SDValue Dummy; | 
|  | return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI); | 
|  | } | 
|  |  | 
|  | return Op; | 
|  | } | 
|  |  | 
|  | //! Lower ISD::SETCC | 
|  | /*! | 
|  | This handles MVT::f64 (double floating point) condition lowering | 
|  | */ | 
|  | static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG, | 
|  | const TargetLowering &TLI) { | 
|  | CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2)); | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  | assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n"); | 
|  |  | 
|  | SDValue lhs = Op.getOperand(0); | 
|  | SDValue rhs = Op.getOperand(1); | 
|  | EVT lhsVT = lhs.getValueType(); | 
|  | assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n"); | 
|  |  | 
|  | EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType()); | 
|  | APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); | 
|  | EVT IntVT(MVT::i64); | 
|  |  | 
|  | // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently | 
|  | // selected to a NOP: | 
|  | SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs); | 
|  | SDValue lhsHi32 = | 
|  | DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, | 
|  | DAG.getNode(ISD::SRL, dl, IntVT, | 
|  | i64lhs, DAG.getConstant(32, MVT::i32))); | 
|  | SDValue lhsHi32abs = | 
|  | DAG.getNode(ISD::AND, dl, MVT::i32, | 
|  | lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32)); | 
|  | SDValue lhsLo32 = | 
|  | DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs); | 
|  |  | 
|  | // SETO and SETUO only use the lhs operand: | 
|  | if (CC->get() == ISD::SETO) { | 
|  | // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of | 
|  | // SETUO | 
|  | APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits()); | 
|  | return DAG.getNode(ISD::XOR, dl, ccResultVT, | 
|  | DAG.getSetCC(dl, ccResultVT, | 
|  | lhs, DAG.getConstantFP(0.0, lhsVT), | 
|  | ISD::SETUO), | 
|  | DAG.getConstant(ccResultAllOnes, ccResultVT)); | 
|  | } else if (CC->get() == ISD::SETUO) { | 
|  | // Evaluates to true if Op0 is [SQ]NaN | 
|  | return DAG.getNode(ISD::AND, dl, ccResultVT, | 
|  | DAG.getSetCC(dl, ccResultVT, | 
|  | lhsHi32abs, | 
|  | DAG.getConstant(0x7ff00000, MVT::i32), | 
|  | ISD::SETGE), | 
|  | DAG.getSetCC(dl, ccResultVT, | 
|  | lhsLo32, | 
|  | DAG.getConstant(0, MVT::i32), | 
|  | ISD::SETGT)); | 
|  | } | 
|  |  | 
|  | SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs); | 
|  | SDValue rhsHi32 = | 
|  | DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, | 
|  | DAG.getNode(ISD::SRL, dl, IntVT, | 
|  | i64rhs, DAG.getConstant(32, MVT::i32))); | 
|  |  | 
|  | // If a value is negative, subtract from the sign magnitude constant: | 
|  | SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT); | 
|  |  | 
|  | // Convert the sign-magnitude representation into 2's complement: | 
|  | SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, | 
|  | lhsHi32, DAG.getConstant(31, MVT::i32)); | 
|  | SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs); | 
|  | SDValue lhsSelect = | 
|  | DAG.getNode(ISD::SELECT, dl, IntVT, | 
|  | lhsSelectMask, lhsSignMag2TC, i64lhs); | 
|  |  | 
|  | SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT, | 
|  | rhsHi32, DAG.getConstant(31, MVT::i32)); | 
|  | SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs); | 
|  | SDValue rhsSelect = | 
|  | DAG.getNode(ISD::SELECT, dl, IntVT, | 
|  | rhsSelectMask, rhsSignMag2TC, i64rhs); | 
|  |  | 
|  | unsigned compareOp; | 
|  |  | 
|  | switch (CC->get()) { | 
|  | case ISD::SETOEQ: | 
|  | case ISD::SETUEQ: | 
|  | compareOp = ISD::SETEQ; break; | 
|  | case ISD::SETOGT: | 
|  | case ISD::SETUGT: | 
|  | compareOp = ISD::SETGT; break; | 
|  | case ISD::SETOGE: | 
|  | case ISD::SETUGE: | 
|  | compareOp = ISD::SETGE; break; | 
|  | case ISD::SETOLT: | 
|  | case ISD::SETULT: | 
|  | compareOp = ISD::SETLT; break; | 
|  | case ISD::SETOLE: | 
|  | case ISD::SETULE: | 
|  | compareOp = ISD::SETLE; break; | 
|  | case ISD::SETUNE: | 
|  | case ISD::SETONE: | 
|  | compareOp = ISD::SETNE; break; | 
|  | default: | 
|  | report_fatal_error("CellSPU ISel Select: unimplemented f64 condition"); | 
|  | } | 
|  |  | 
|  | SDValue result = | 
|  | DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect, | 
|  | (ISD::CondCode) compareOp); | 
|  |  | 
|  | if ((CC->get() & 0x8) == 0) { | 
|  | // Ordered comparison: | 
|  | SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT, | 
|  | lhs, DAG.getConstantFP(0.0, MVT::f64), | 
|  | ISD::SETO); | 
|  | SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT, | 
|  | rhs, DAG.getConstantFP(0.0, MVT::f64), | 
|  | ISD::SETO); | 
|  | SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN); | 
|  |  | 
|  | result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result); | 
|  | } | 
|  |  | 
|  | return result; | 
|  | } | 
|  |  | 
|  | //! Lower ISD::SELECT_CC | 
|  | /*! | 
|  | ISD::SELECT_CC can (generally) be implemented directly on the SPU using the | 
|  | SELB instruction. | 
|  |  | 
|  | \note Need to revisit this in the future: if the code path through the true | 
|  | and false value computations is longer than the latency of a branch (6 | 
|  | cycles), then it would be more advantageous to branch and insert a new basic | 
|  | block and branch on the condition. However, this code does not make that | 
|  | assumption, given the simplisitc uses so far. | 
|  | */ | 
|  |  | 
|  | static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG, | 
|  | const TargetLowering &TLI) { | 
|  | EVT VT = Op.getValueType(); | 
|  | SDValue lhs = Op.getOperand(0); | 
|  | SDValue rhs = Op.getOperand(1); | 
|  | SDValue trueval = Op.getOperand(2); | 
|  | SDValue falseval = Op.getOperand(3); | 
|  | SDValue condition = Op.getOperand(4); | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  |  | 
|  | // NOTE: SELB's arguments: $rA, $rB, $mask | 
|  | // | 
|  | // SELB selects bits from $rA where bits in $mask are 0, bits from $rB | 
|  | // where bits in $mask are 1. CCond will be inverted, having 1s where the | 
|  | // condition was true and 0s where the condition was false. Hence, the | 
|  | // arguments to SELB get reversed. | 
|  |  | 
|  | // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's | 
|  | // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up | 
|  | // with another "cannot select select_cc" assert: | 
|  |  | 
|  | SDValue compare = DAG.getNode(ISD::SETCC, dl, | 
|  | TLI.getSetCCResultType(Op.getValueType()), | 
|  | lhs, rhs, condition); | 
|  | return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare); | 
|  | } | 
|  |  | 
|  | //! Custom lower ISD::TRUNCATE | 
|  | static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) | 
|  | { | 
|  | // Type to truncate to | 
|  | EVT VT = Op.getValueType(); | 
|  | MVT simpleVT = VT.getSimpleVT(); | 
|  | EVT VecVT = EVT::getVectorVT(*DAG.getContext(), | 
|  | VT, (128 / VT.getSizeInBits())); | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  |  | 
|  | // Type to truncate from | 
|  | SDValue Op0 = Op.getOperand(0); | 
|  | EVT Op0VT = Op0.getValueType(); | 
|  |  | 
|  | if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) { | 
|  | // Create shuffle mask, least significant doubleword of quadword | 
|  | unsigned maskHigh = 0x08090a0b; | 
|  | unsigned maskLow = 0x0c0d0e0f; | 
|  | // Use a shuffle to perform the truncation | 
|  | SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, | 
|  | DAG.getConstant(maskHigh, MVT::i32), | 
|  | DAG.getConstant(maskLow, MVT::i32), | 
|  | DAG.getConstant(maskHigh, MVT::i32), | 
|  | DAG.getConstant(maskLow, MVT::i32)); | 
|  |  | 
|  | SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT, | 
|  | Op0, Op0, shufMask); | 
|  |  | 
|  | return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle); | 
|  | } | 
|  |  | 
|  | return SDValue();             // Leave the truncate unmolested | 
|  | } | 
|  |  | 
|  | /*! | 
|  | * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic | 
|  | * algorithm is to duplicate the sign bit using rotmai to generate at | 
|  | * least one byte full of sign bits. Then propagate the "sign-byte" into | 
|  | * the leftmost words and the i64/i32 into the rightmost words using shufb. | 
|  | * | 
|  | * @param Op The sext operand | 
|  | * @param DAG The current DAG | 
|  | * @return The SDValue with the entire instruction sequence | 
|  | */ | 
|  | static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) | 
|  | { | 
|  | DebugLoc dl = Op.getDebugLoc(); | 
|  |  | 
|  | // Type to extend to | 
|  | MVT OpVT = Op.getValueType().getSimpleVT(); | 
|  |  | 
|  | // Type to extend from | 
|  | SDValue Op0 = Op.getOperand(0); | 
|  | MVT Op0VT = Op0.getValueType().getSimpleVT(); | 
|  |  | 
|  | // The type to extend to needs to be a i128 and | 
|  | // the type to extend from needs to be i64 or i32. | 
|  | assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) && | 
|  | "LowerSIGN_EXTEND: input and/or output operand have wrong size"); | 
|  |  | 
|  | // Create shuffle mask | 
|  | unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7 | 
|  | unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11 | 
|  | unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15 | 
|  | SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, | 
|  | DAG.getConstant(mask1, MVT::i32), | 
|  | DAG.getConstant(mask1, MVT::i32), | 
|  | DAG.getConstant(mask2, MVT::i32), | 
|  | DAG.getConstant(mask3, MVT::i32)); | 
|  |  | 
|  | // Word wise arithmetic right shift to generate at least one byte | 
|  | // that contains sign bits. | 
|  | MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32; | 
|  | SDValue sraVal = DAG.getNode(ISD::SRA, | 
|  | dl, | 
|  | mvt, | 
|  | DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0), | 
|  | DAG.getConstant(31, MVT::i32)); | 
|  |  | 
|  | // Shuffle bytes - Copy the sign bits into the upper 64 bits | 
|  | // and the input value into the lower 64 bits. | 
|  | SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt, | 
|  | DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask); | 
|  |  | 
|  | return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle); | 
|  | } | 
|  |  | 
|  | //! Custom (target-specific) lowering entry point | 
|  | /*! | 
|  | This is where LLVM's DAG selection process calls to do target-specific | 
|  | lowering of nodes. | 
|  | */ | 
|  | SDValue | 
|  | SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const | 
|  | { | 
|  | unsigned Opc = (unsigned) Op.getOpcode(); | 
|  | EVT VT = Op.getValueType(); | 
|  |  | 
|  | switch (Opc) { | 
|  | default: { | 
|  | #ifndef NDEBUG | 
|  | errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n"; | 
|  | errs() << "Op.getOpcode() = " << Opc << "\n"; | 
|  | errs() << "*Op.getNode():\n"; | 
|  | Op.getNode()->dump(); | 
|  | #endif | 
|  | llvm_unreachable(0); | 
|  | } | 
|  | case ISD::LOAD: | 
|  | case ISD::EXTLOAD: | 
|  | case ISD::SEXTLOAD: | 
|  | case ISD::ZEXTLOAD: | 
|  | return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl()); | 
|  | case ISD::STORE: | 
|  | return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl()); | 
|  | case ISD::ConstantPool: | 
|  | return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl()); | 
|  | case ISD::GlobalAddress: | 
|  | return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl()); | 
|  | case ISD::JumpTable: | 
|  | return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl()); | 
|  | case ISD::ConstantFP: | 
|  | return LowerConstantFP(Op, DAG); | 
|  |  | 
|  | // i8, i64 math ops: | 
|  | case ISD::ADD: | 
|  | case ISD::SUB: | 
|  | case ISD::ROTR: | 
|  | case ISD::ROTL: | 
|  | case ISD::SRL: | 
|  | case ISD::SHL: | 
|  | case ISD::SRA: { | 
|  | if (VT == MVT::i8) | 
|  | return LowerI8Math(Op, DAG, Opc, *this); | 
|  | break; | 
|  | } | 
|  |  | 
|  | case ISD::FP_TO_SINT: | 
|  | case ISD::FP_TO_UINT: | 
|  | return LowerFP_TO_INT(Op, DAG, *this); | 
|  |  | 
|  | case ISD::SINT_TO_FP: | 
|  | case ISD::UINT_TO_FP: | 
|  | return LowerINT_TO_FP(Op, DAG, *this); | 
|  |  | 
|  | // Vector-related lowering. | 
|  | case ISD::BUILD_VECTOR: | 
|  | return LowerBUILD_VECTOR(Op, DAG); | 
|  | case ISD::SCALAR_TO_VECTOR: | 
|  | return LowerSCALAR_TO_VECTOR(Op, DAG); | 
|  | case ISD::VECTOR_SHUFFLE: | 
|  | return LowerVECTOR_SHUFFLE(Op, DAG); | 
|  | case ISD::EXTRACT_VECTOR_ELT: | 
|  | return LowerEXTRACT_VECTOR_ELT(Op, DAG); | 
|  | case ISD::INSERT_VECTOR_ELT: | 
|  | return LowerINSERT_VECTOR_ELT(Op, DAG); | 
|  |  | 
|  | // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately: | 
|  | case ISD::AND: | 
|  | case ISD::OR: | 
|  | case ISD::XOR: | 
|  | return LowerByteImmed(Op, DAG); | 
|  |  | 
|  | // Vector and i8 multiply: | 
|  | case ISD::MUL: | 
|  | if (VT == MVT::i8) | 
|  | return LowerI8Math(Op, DAG, Opc, *this); | 
|  |  | 
|  | case ISD::CTPOP: | 
|  | return LowerCTPOP(Op, DAG); | 
|  |  | 
|  | case ISD::SELECT_CC: | 
|  | return LowerSELECT_CC(Op, DAG, *this); | 
|  |  | 
|  | case ISD::SETCC: | 
|  | return LowerSETCC(Op, DAG, *this); | 
|  |  | 
|  | case ISD::TRUNCATE: | 
|  | return LowerTRUNCATE(Op, DAG); | 
|  |  | 
|  | case ISD::SIGN_EXTEND: | 
|  | return LowerSIGN_EXTEND(Op, DAG); | 
|  | } | 
|  |  | 
|  | return SDValue(); | 
|  | } | 
|  |  | 
|  | void SPUTargetLowering::ReplaceNodeResults(SDNode *N, | 
|  | SmallVectorImpl<SDValue>&Results, | 
|  | SelectionDAG &DAG) const | 
|  | { | 
|  | #if 0 | 
|  | unsigned Opc = (unsigned) N->getOpcode(); | 
|  | EVT OpVT = N->getValueType(0); | 
|  |  | 
|  | switch (Opc) { | 
|  | default: { | 
|  | errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n"; | 
|  | errs() << "Op.getOpcode() = " << Opc << "\n"; | 
|  | errs() << "*Op.getNode():\n"; | 
|  | N->dump(); | 
|  | abort(); | 
|  | /*NOTREACHED*/ | 
|  | } | 
|  | } | 
|  | #endif | 
|  |  | 
|  | /* Otherwise, return unchanged */ | 
|  | } | 
|  |  | 
|  | //===----------------------------------------------------------------------===// | 
|  | // Target Optimization Hooks | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | SDValue | 
|  | SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const | 
|  | { | 
|  | #if 0 | 
|  | TargetMachine &TM = getTargetMachine(); | 
|  | #endif | 
|  | const SPUSubtarget *ST = SPUTM.getSubtargetImpl(); | 
|  | SelectionDAG &DAG = DCI.DAG; | 
|  | SDValue Op0 = N->getOperand(0);       // everything has at least one operand | 
|  | EVT NodeVT = N->getValueType(0);      // The node's value type | 
|  | EVT Op0VT = Op0.getValueType();       // The first operand's result | 
|  | SDValue Result;                       // Initially, empty result | 
|  | DebugLoc dl = N->getDebugLoc(); | 
|  |  | 
|  | switch (N->getOpcode()) { | 
|  | default: break; | 
|  | case ISD::ADD: { | 
|  | SDValue Op1 = N->getOperand(1); | 
|  |  | 
|  | if (Op0.getOpcode() == SPUISD::IndirectAddr | 
|  | || Op1.getOpcode() == SPUISD::IndirectAddr) { | 
|  | // Normalize the operands to reduce repeated code | 
|  | SDValue IndirectArg = Op0, AddArg = Op1; | 
|  |  | 
|  | if (Op1.getOpcode() == SPUISD::IndirectAddr) { | 
|  | IndirectArg = Op1; | 
|  | AddArg = Op0; | 
|  | } | 
|  |  | 
|  | if (isa<ConstantSDNode>(AddArg)) { | 
|  | ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg); | 
|  | SDValue IndOp1 = IndirectArg.getOperand(1); | 
|  |  | 
|  | if (CN0->isNullValue()) { | 
|  | // (add (SPUindirect <arg>, <arg>), 0) -> | 
|  | // (SPUindirect <arg>, <arg>) | 
|  |  | 
|  | #if !defined(NDEBUG) | 
|  | if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { | 
|  | errs() << "\n" | 
|  | << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n" | 
|  | << "With:    (SPUindirect <arg>, <arg>)\n"; | 
|  | } | 
|  | #endif | 
|  |  | 
|  | return IndirectArg; | 
|  | } else if (isa<ConstantSDNode>(IndOp1)) { | 
|  | // (add (SPUindirect <arg>, <const>), <const>) -> | 
|  | // (SPUindirect <arg>, <const + const>) | 
|  | ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1); | 
|  | int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue(); | 
|  | SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT); | 
|  |  | 
|  | #if !defined(NDEBUG) | 
|  | if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { | 
|  | errs() << "\n" | 
|  | << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue() | 
|  | << "), " << CN0->getSExtValue() << ")\n" | 
|  | << "With:    (SPUindirect <arg>, " | 
|  | << combinedConst << ")\n"; | 
|  | } | 
|  | #endif | 
|  |  | 
|  | return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, | 
|  | IndirectArg, combinedValue); | 
|  | } | 
|  | } | 
|  | } | 
|  | break; | 
|  | } | 
|  | case ISD::SIGN_EXTEND: | 
|  | case ISD::ZERO_EXTEND: | 
|  | case ISD::ANY_EXTEND: { | 
|  | if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) { | 
|  | // (any_extend (SPUextract_elt0 <arg>)) -> | 
|  | // (SPUextract_elt0 <arg>) | 
|  | // Types must match, however... | 
|  | #if !defined(NDEBUG) | 
|  | if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { | 
|  | errs() << "\nReplace: "; | 
|  | N->dump(&DAG); | 
|  | errs() << "\nWith:    "; | 
|  | Op0.getNode()->dump(&DAG); | 
|  | errs() << "\n"; | 
|  | } | 
|  | #endif | 
|  |  | 
|  | return Op0; | 
|  | } | 
|  | break; | 
|  | } | 
|  | case SPUISD::IndirectAddr: { | 
|  | if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) { | 
|  | ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)); | 
|  | if (CN != 0 && CN->isNullValue()) { | 
|  | // (SPUindirect (SPUaform <addr>, 0), 0) -> | 
|  | // (SPUaform <addr>, 0) | 
|  |  | 
|  | DEBUG(errs() << "Replace: "); | 
|  | DEBUG(N->dump(&DAG)); | 
|  | DEBUG(errs() << "\nWith:    "); | 
|  | DEBUG(Op0.getNode()->dump(&DAG)); | 
|  | DEBUG(errs() << "\n"); | 
|  |  | 
|  | return Op0; | 
|  | } | 
|  | } else if (Op0.getOpcode() == ISD::ADD) { | 
|  | SDValue Op1 = N->getOperand(1); | 
|  | if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) { | 
|  | // (SPUindirect (add <arg>, <arg>), 0) -> | 
|  | // (SPUindirect <arg>, <arg>) | 
|  | if (CN1->isNullValue()) { | 
|  |  | 
|  | #if !defined(NDEBUG) | 
|  | if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) { | 
|  | errs() << "\n" | 
|  | << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n" | 
|  | << "With:    (SPUindirect <arg>, <arg>)\n"; | 
|  | } | 
|  | #endif | 
|  |  | 
|  | return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT, | 
|  | Op0.getOperand(0), Op0.getOperand(1)); | 
|  | } | 
|  | } | 
|  | } | 
|  | break; | 
|  | } | 
|  | case SPUISD::SHLQUAD_L_BITS: | 
|  | case SPUISD::SHLQUAD_L_BYTES: | 
|  | case SPUISD::ROTBYTES_LEFT: { | 
|  | SDValue Op1 = N->getOperand(1); | 
|  |  | 
|  | // Kill degenerate vector shifts: | 
|  | if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) { | 
|  | if (CN->isNullValue()) { | 
|  | Result = Op0; | 
|  | } | 
|  | } | 
|  | break; | 
|  | } | 
|  | case SPUISD::PREFSLOT2VEC: { | 
|  | switch (Op0.getOpcode()) { | 
|  | default: | 
|  | break; | 
|  | case ISD::ANY_EXTEND: | 
|  | case ISD::ZERO_EXTEND: | 
|  | case ISD::SIGN_EXTEND: { | 
|  | // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) -> | 
|  | // <arg> | 
|  | // but only if the SPUprefslot2vec and <arg> types match. | 
|  | SDValue Op00 = Op0.getOperand(0); | 
|  | if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) { | 
|  | SDValue Op000 = Op00.getOperand(0); | 
|  | if (Op000.getValueType() == NodeVT) { | 
|  | Result = Op000; | 
|  | } | 
|  | } | 
|  | break; | 
|  | } | 
|  | case SPUISD::VEC2PREFSLOT: { | 
|  | // (SPUprefslot2vec (SPUvec2prefslot <arg>)) -> | 
|  | // <arg> | 
|  | Result = Op0.getOperand(0); | 
|  | break; | 
|  | } | 
|  | } | 
|  | break; | 
|  | } | 
|  | } | 
|  |  | 
|  | // Otherwise, return unchanged. | 
|  | #ifndef NDEBUG | 
|  | if (Result.getNode()) { | 
|  | DEBUG(errs() << "\nReplace.SPU: "); | 
|  | DEBUG(N->dump(&DAG)); | 
|  | DEBUG(errs() << "\nWith:        "); | 
|  | DEBUG(Result.getNode()->dump(&DAG)); | 
|  | DEBUG(errs() << "\n"); | 
|  | } | 
|  | #endif | 
|  |  | 
|  | return Result; | 
|  | } | 
|  |  | 
|  | //===----------------------------------------------------------------------===// | 
|  | // Inline Assembly Support | 
|  | //===----------------------------------------------------------------------===// | 
|  |  | 
|  | /// getConstraintType - Given a constraint letter, return the type of | 
|  | /// constraint it is for this target. | 
|  | SPUTargetLowering::ConstraintType | 
|  | SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const { | 
|  | if (ConstraintLetter.size() == 1) { | 
|  | switch (ConstraintLetter[0]) { | 
|  | default: break; | 
|  | case 'b': | 
|  | case 'r': | 
|  | case 'f': | 
|  | case 'v': | 
|  | case 'y': | 
|  | return C_RegisterClass; | 
|  | } | 
|  | } | 
|  | return TargetLowering::getConstraintType(ConstraintLetter); | 
|  | } | 
|  |  | 
|  | std::pair<unsigned, const TargetRegisterClass*> | 
|  | SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, | 
|  | EVT VT) const | 
|  | { | 
|  | if (Constraint.size() == 1) { | 
|  | // GCC RS6000 Constraint Letters | 
|  | switch (Constraint[0]) { | 
|  | case 'b':   // R1-R31 | 
|  | case 'r':   // R0-R31 | 
|  | if (VT == MVT::i64) | 
|  | return std::make_pair(0U, SPU::R64CRegisterClass); | 
|  | return std::make_pair(0U, SPU::R32CRegisterClass); | 
|  | case 'f': | 
|  | if (VT == MVT::f32) | 
|  | return std::make_pair(0U, SPU::R32FPRegisterClass); | 
|  | else if (VT == MVT::f64) | 
|  | return std::make_pair(0U, SPU::R64FPRegisterClass); | 
|  | break; | 
|  | case 'v': | 
|  | return std::make_pair(0U, SPU::GPRCRegisterClass); | 
|  | } | 
|  | } | 
|  |  | 
|  | return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); | 
|  | } | 
|  |  | 
|  | //! Compute used/known bits for a SPU operand | 
|  | void | 
|  | SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, | 
|  | const APInt &Mask, | 
|  | APInt &KnownZero, | 
|  | APInt &KnownOne, | 
|  | const SelectionDAG &DAG, | 
|  | unsigned Depth ) const { | 
|  | #if 0 | 
|  | const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT; | 
|  |  | 
|  | switch (Op.getOpcode()) { | 
|  | default: | 
|  | // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); | 
|  | break; | 
|  | case CALL: | 
|  | case SHUFB: | 
|  | case SHUFFLE_MASK: | 
|  | case CNTB: | 
|  | case SPUISD::PREFSLOT2VEC: | 
|  | case SPUISD::LDRESULT: | 
|  | case SPUISD::VEC2PREFSLOT: | 
|  | case SPUISD::SHLQUAD_L_BITS: | 
|  | case SPUISD::SHLQUAD_L_BYTES: | 
|  | case SPUISD::VEC_ROTL: | 
|  | case SPUISD::VEC_ROTR: | 
|  | case SPUISD::ROTBYTES_LEFT: | 
|  | case SPUISD::SELECT_MASK: | 
|  | case SPUISD::SELB: | 
|  | } | 
|  | #endif | 
|  | } | 
|  |  | 
|  | unsigned | 
|  | SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op, | 
|  | unsigned Depth) const { | 
|  | switch (Op.getOpcode()) { | 
|  | default: | 
|  | return 1; | 
|  |  | 
|  | case ISD::SETCC: { | 
|  | EVT VT = Op.getValueType(); | 
|  |  | 
|  | if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) { | 
|  | VT = MVT::i32; | 
|  | } | 
|  | return VT.getSizeInBits(); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | // LowerAsmOperandForConstraint | 
|  | void | 
|  | SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op, | 
|  | char ConstraintLetter, | 
|  | std::vector<SDValue> &Ops, | 
|  | SelectionDAG &DAG) const { | 
|  | // Default, for the time being, to the base class handler | 
|  | TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG); | 
|  | } | 
|  |  | 
|  | /// isLegalAddressImmediate - Return true if the integer value can be used | 
|  | /// as the offset of the target addressing mode. | 
|  | bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, | 
|  | const Type *Ty) const { | 
|  | // SPU's addresses are 256K: | 
|  | return (V > -(1 << 18) && V < (1 << 18) - 1); | 
|  | } | 
|  |  | 
|  | bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { | 
|  | return false; | 
|  | } | 
|  |  | 
|  | bool | 
|  | SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { | 
|  | // The SPU target isn't yet aware of offsets. | 
|  | return false; | 
|  | } |