blob: 42bfab148c92dfd8b43530108c578a48f8ff9bd6 [file] [log] [blame]
Justin Holewinskiae556d32012-05-04 20:18:50 +00001//
2// The LLVM Compiler Infrastructure
3//
4// This file is distributed under the University of Illinois Open Source
5// License. See LICENSE.TXT for details.
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that NVPTX uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
Justin Holewinskiae556d32012-05-04 20:18:50 +000014#include "NVPTXISelLowering.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000015#include "NVPTX.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000016#include "NVPTXTargetMachine.h"
17#include "NVPTXTargetObjectFile.h"
18#include "NVPTXUtilities.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000019#include "llvm/CodeGen/Analysis.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000024#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000025#include "llvm/IR/DerivedTypes.h"
26#include "llvm/IR/Function.h"
27#include "llvm/IR/GlobalValue.h"
28#include "llvm/IR/IntrinsicInst.h"
29#include "llvm/IR/Intrinsics.h"
30#include "llvm/IR/Module.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000031#include "llvm/MC/MCSectionELF.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000032#include "llvm/Support/CallSite.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/Debug.h"
35#include "llvm/Support/ErrorHandling.h"
36#include "llvm/Support/raw_ostream.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000037#include <sstream>
38
39#undef DEBUG_TYPE
40#define DEBUG_TYPE "nvptx-lower"
41
42using namespace llvm;
43
44static unsigned int uniqueCallSite = 0;
45
Justin Holewinski0497ab12013-03-30 14:29:21 +000046static cl::opt<bool> sched4reg(
47 "nvptx-sched4reg",
48 cl::desc("NVPTX Specific: schedule for register pressue"), cl::init(false));
Justin Holewinskiae556d32012-05-04 20:18:50 +000049
Justin Holewinskibe8dc642013-02-12 14:18:49 +000050static bool IsPTXVectorType(MVT VT) {
51 switch (VT.SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +000052 default:
53 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +000054 case MVT::v2i8:
55 case MVT::v4i8:
56 case MVT::v2i16:
57 case MVT::v4i16:
58 case MVT::v2i32:
59 case MVT::v4i32:
60 case MVT::v2i64:
61 case MVT::v2f32:
62 case MVT::v4f32:
63 case MVT::v2f64:
Justin Holewinski0497ab12013-03-30 14:29:21 +000064 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +000065 }
66}
67
Justin Holewinskiae556d32012-05-04 20:18:50 +000068// NVPTXTargetLowering Constructor.
69NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
Justin Holewinski0497ab12013-03-30 14:29:21 +000070 : TargetLowering(TM, new NVPTXTargetObjectFile()), nvTM(&TM),
71 nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
Justin Holewinskiae556d32012-05-04 20:18:50 +000072
73 // always lower memset, memcpy, and memmove intrinsics to load/store
74 // instructions, rather
75 // then generating calls to memset, mempcy or memmove.
Justin Holewinski0497ab12013-03-30 14:29:21 +000076 MaxStoresPerMemset = (unsigned) 0xFFFFFFFF;
77 MaxStoresPerMemcpy = (unsigned) 0xFFFFFFFF;
78 MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF;
Justin Holewinskiae556d32012-05-04 20:18:50 +000079
80 setBooleanContents(ZeroOrNegativeOneBooleanContent);
81
82 // Jump is Expensive. Don't create extra control flow for 'and', 'or'
83 // condition branches.
84 setJumpIsExpensive(true);
85
86 // By default, use the Source scheduling
87 if (sched4reg)
88 setSchedulingPreference(Sched::RegPressure);
89 else
90 setSchedulingPreference(Sched::Source);
91
92 addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
93 addRegisterClass(MVT::i8, &NVPTX::Int8RegsRegClass);
94 addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
95 addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
96 addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
97 addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
98 addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
99
Justin Holewinskiae556d32012-05-04 20:18:50 +0000100 // Operations not directly supported by NVPTX.
Justin Holewinski0497ab12013-03-30 14:29:21 +0000101 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
102 setOperationAction(ISD::BR_CC, MVT::f32, Expand);
103 setOperationAction(ISD::BR_CC, MVT::f64, Expand);
104 setOperationAction(ISD::BR_CC, MVT::i1, Expand);
105 setOperationAction(ISD::BR_CC, MVT::i8, Expand);
106 setOperationAction(ISD::BR_CC, MVT::i16, Expand);
107 setOperationAction(ISD::BR_CC, MVT::i32, Expand);
108 setOperationAction(ISD::BR_CC, MVT::i64, Expand);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand);
110 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
111 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000112 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
113 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000114
115 if (nvptxSubtarget.hasROT64()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000116 setOperationAction(ISD::ROTL, MVT::i64, Legal);
117 setOperationAction(ISD::ROTR, MVT::i64, Legal);
118 } else {
119 setOperationAction(ISD::ROTL, MVT::i64, Expand);
120 setOperationAction(ISD::ROTR, MVT::i64, Expand);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000121 }
122 if (nvptxSubtarget.hasROT32()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000123 setOperationAction(ISD::ROTL, MVT::i32, Legal);
124 setOperationAction(ISD::ROTR, MVT::i32, Legal);
125 } else {
126 setOperationAction(ISD::ROTL, MVT::i32, Expand);
127 setOperationAction(ISD::ROTR, MVT::i32, Expand);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000128 }
129
Justin Holewinski0497ab12013-03-30 14:29:21 +0000130 setOperationAction(ISD::ROTL, MVT::i16, Expand);
131 setOperationAction(ISD::ROTR, MVT::i16, Expand);
132 setOperationAction(ISD::ROTL, MVT::i8, Expand);
133 setOperationAction(ISD::ROTR, MVT::i8, Expand);
134 setOperationAction(ISD::BSWAP, MVT::i16, Expand);
135 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
136 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000137
138 // Indirect branch is not supported.
139 // This also disables Jump Table creation.
Justin Holewinski0497ab12013-03-30 14:29:21 +0000140 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
141 setOperationAction(ISD::BRIND, MVT::Other, Expand);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000142
Justin Holewinski0497ab12013-03-30 14:29:21 +0000143 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
144 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000145
146 // We want to legalize constant related memmove and memcopy
147 // intrinsics.
148 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
149
150 // Turn FP extload into load/fextend
151 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
152 // Turn FP truncstore into trunc + store.
153 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
154
155 // PTX does not support load / store predicate registers
Justin Holewinskic6462aa2012-11-14 19:19:16 +0000156 setOperationAction(ISD::LOAD, MVT::i1, Custom);
157 setOperationAction(ISD::STORE, MVT::i1, Custom);
158
Justin Holewinskiae556d32012-05-04 20:18:50 +0000159 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
160 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000161 setTruncStoreAction(MVT::i64, MVT::i1, Expand);
162 setTruncStoreAction(MVT::i32, MVT::i1, Expand);
163 setTruncStoreAction(MVT::i16, MVT::i1, Expand);
164 setTruncStoreAction(MVT::i8, MVT::i1, Expand);
165
166 // This is legal in NVPTX
Justin Holewinski0497ab12013-03-30 14:29:21 +0000167 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
168 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000169
170 // TRAP can be lowered to PTX trap
Justin Holewinski0497ab12013-03-30 14:29:21 +0000171 setOperationAction(ISD::TRAP, MVT::Other, Legal);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000172
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000173 // Register custom handling for vector loads/stores
Justin Holewinski0497ab12013-03-30 14:29:21 +0000174 for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE;
175 ++i) {
176 MVT VT = (MVT::SimpleValueType) i;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000177 if (IsPTXVectorType(VT)) {
178 setOperationAction(ISD::LOAD, VT, Custom);
179 setOperationAction(ISD::STORE, VT, Custom);
180 setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
181 }
182 }
Justin Holewinskiae556d32012-05-04 20:18:50 +0000183
184 // Now deduce the information based on the above mentioned
185 // actions
186 computeRegisterProperties();
187}
188
Justin Holewinskiae556d32012-05-04 20:18:50 +0000189const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
190 switch (Opcode) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000191 default:
192 return 0;
193 case NVPTXISD::CALL:
194 return "NVPTXISD::CALL";
195 case NVPTXISD::RET_FLAG:
196 return "NVPTXISD::RET_FLAG";
197 case NVPTXISD::Wrapper:
198 return "NVPTXISD::Wrapper";
199 case NVPTXISD::NVBuiltin:
200 return "NVPTXISD::NVBuiltin";
201 case NVPTXISD::DeclareParam:
202 return "NVPTXISD::DeclareParam";
Justin Holewinskiae556d32012-05-04 20:18:50 +0000203 case NVPTXISD::DeclareScalarParam:
204 return "NVPTXISD::DeclareScalarParam";
Justin Holewinski0497ab12013-03-30 14:29:21 +0000205 case NVPTXISD::DeclareRet:
206 return "NVPTXISD::DeclareRet";
207 case NVPTXISD::DeclareRetParam:
208 return "NVPTXISD::DeclareRetParam";
209 case NVPTXISD::PrintCall:
210 return "NVPTXISD::PrintCall";
211 case NVPTXISD::LoadParam:
212 return "NVPTXISD::LoadParam";
Justin Holewinskife44314f2013-06-28 17:57:51 +0000213 case NVPTXISD::LoadParamV2:
214 return "NVPTXISD::LoadParamV2";
215 case NVPTXISD::LoadParamV4:
216 return "NVPTXISD::LoadParamV4";
Justin Holewinski0497ab12013-03-30 14:29:21 +0000217 case NVPTXISD::StoreParam:
218 return "NVPTXISD::StoreParam";
Justin Holewinskife44314f2013-06-28 17:57:51 +0000219 case NVPTXISD::StoreParamV2:
220 return "NVPTXISD::StoreParamV2";
221 case NVPTXISD::StoreParamV4:
222 return "NVPTXISD::StoreParamV4";
Justin Holewinski0497ab12013-03-30 14:29:21 +0000223 case NVPTXISD::StoreParamS32:
224 return "NVPTXISD::StoreParamS32";
225 case NVPTXISD::StoreParamU32:
226 return "NVPTXISD::StoreParamU32";
227 case NVPTXISD::MoveToParam:
228 return "NVPTXISD::MoveToParam";
229 case NVPTXISD::CallArgBegin:
230 return "NVPTXISD::CallArgBegin";
231 case NVPTXISD::CallArg:
232 return "NVPTXISD::CallArg";
233 case NVPTXISD::LastCallArg:
234 return "NVPTXISD::LastCallArg";
235 case NVPTXISD::CallArgEnd:
236 return "NVPTXISD::CallArgEnd";
237 case NVPTXISD::CallVoid:
238 return "NVPTXISD::CallVoid";
239 case NVPTXISD::CallVal:
240 return "NVPTXISD::CallVal";
241 case NVPTXISD::CallSymbol:
242 return "NVPTXISD::CallSymbol";
243 case NVPTXISD::Prototype:
244 return "NVPTXISD::Prototype";
245 case NVPTXISD::MoveParam:
246 return "NVPTXISD::MoveParam";
247 case NVPTXISD::MoveRetval:
248 return "NVPTXISD::MoveRetval";
249 case NVPTXISD::MoveToRetval:
250 return "NVPTXISD::MoveToRetval";
251 case NVPTXISD::StoreRetval:
252 return "NVPTXISD::StoreRetval";
Justin Holewinskife44314f2013-06-28 17:57:51 +0000253 case NVPTXISD::StoreRetvalV2:
254 return "NVPTXISD::StoreRetvalV2";
255 case NVPTXISD::StoreRetvalV4:
256 return "NVPTXISD::StoreRetvalV4";
Justin Holewinski0497ab12013-03-30 14:29:21 +0000257 case NVPTXISD::PseudoUseParam:
258 return "NVPTXISD::PseudoUseParam";
259 case NVPTXISD::RETURN:
260 return "NVPTXISD::RETURN";
261 case NVPTXISD::CallSeqBegin:
262 return "NVPTXISD::CallSeqBegin";
263 case NVPTXISD::CallSeqEnd:
264 return "NVPTXISD::CallSeqEnd";
265 case NVPTXISD::LoadV2:
266 return "NVPTXISD::LoadV2";
267 case NVPTXISD::LoadV4:
268 return "NVPTXISD::LoadV4";
269 case NVPTXISD::LDGV2:
270 return "NVPTXISD::LDGV2";
271 case NVPTXISD::LDGV4:
272 return "NVPTXISD::LDGV4";
273 case NVPTXISD::LDUV2:
274 return "NVPTXISD::LDUV2";
275 case NVPTXISD::LDUV4:
276 return "NVPTXISD::LDUV4";
277 case NVPTXISD::StoreV2:
278 return "NVPTXISD::StoreV2";
279 case NVPTXISD::StoreV4:
280 return "NVPTXISD::StoreV4";
Justin Holewinskiae556d32012-05-04 20:18:50 +0000281 }
282}
283
Justin Holewinskibc451192012-11-29 14:26:24 +0000284bool NVPTXTargetLowering::shouldSplitVectorElementType(EVT VT) const {
285 return VT == MVT::i1;
286}
Justin Holewinskiae556d32012-05-04 20:18:50 +0000287
288SDValue
289NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000290 SDLoc dl(Op);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000291 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
292 Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
293 return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
294}
295
Justin Holewinski0497ab12013-03-30 14:29:21 +0000296std::string NVPTXTargetLowering::getPrototype(
297 Type *retTy, const ArgListTy &Args,
298 const SmallVectorImpl<ISD::OutputArg> &Outs, unsigned retAlignment) const {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000299
300 bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
301
302 std::stringstream O;
303 O << "prototype_" << uniqueCallSite << " : .callprototype ";
304
305 if (retTy->getTypeID() == Type::VoidTyID)
306 O << "()";
307 else {
308 O << "(";
309 if (isABI) {
310 if (retTy->isPrimitiveType() || retTy->isIntegerTy()) {
311 unsigned size = 0;
312 if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
313 size = ITy->getBitWidth();
Justin Holewinski0497ab12013-03-30 14:29:21 +0000314 if (size < 32)
315 size = 32;
316 } else {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000317 assert(retTy->isFloatingPointTy() &&
318 "Floating point type expected here");
319 size = retTy->getPrimitiveSizeInBits();
320 }
321
322 O << ".param .b" << size << " _";
Justin Holewinski0497ab12013-03-30 14:29:21 +0000323 } else if (isa<PointerType>(retTy))
324 O << ".param .b" << getPointerTy().getSizeInBits() << " _";
Justin Holewinskiae556d32012-05-04 20:18:50 +0000325 else {
326 if ((retTy->getTypeID() == Type::StructTyID) ||
327 isa<VectorType>(retTy)) {
328 SmallVector<EVT, 16> vtparts;
329 ComputeValueVTs(*this, retTy, vtparts);
330 unsigned totalsz = 0;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000331 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000332 unsigned elems = 1;
333 EVT elemtype = vtparts[i];
334 if (vtparts[i].isVector()) {
335 elems = vtparts[i].getVectorNumElements();
336 elemtype = vtparts[i].getVectorElementType();
337 }
Justin Holewinski0497ab12013-03-30 14:29:21 +0000338 for (unsigned j = 0, je = elems; j != je; ++j) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000339 unsigned sz = elemtype.getSizeInBits();
Justin Holewinski0497ab12013-03-30 14:29:21 +0000340 if (elemtype.isInteger() && (sz < 8))
341 sz = 8;
342 totalsz += sz / 8;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000343 }
344 }
Justin Holewinski0497ab12013-03-30 14:29:21 +0000345 O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]";
346 } else {
347 assert(false && "Unknown return type");
Justin Holewinskiae556d32012-05-04 20:18:50 +0000348 }
349 }
Justin Holewinski0497ab12013-03-30 14:29:21 +0000350 } else {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000351 SmallVector<EVT, 16> vtparts;
352 ComputeValueVTs(*this, retTy, vtparts);
353 unsigned idx = 0;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000354 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000355 unsigned elems = 1;
356 EVT elemtype = vtparts[i];
357 if (vtparts[i].isVector()) {
358 elems = vtparts[i].getVectorNumElements();
359 elemtype = vtparts[i].getVectorElementType();
360 }
361
Justin Holewinski0497ab12013-03-30 14:29:21 +0000362 for (unsigned j = 0, je = elems; j != je; ++j) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000363 unsigned sz = elemtype.getSizeInBits();
Justin Holewinski0497ab12013-03-30 14:29:21 +0000364 if (elemtype.isInteger() && (sz < 32))
365 sz = 32;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000366 O << ".reg .b" << sz << " _";
Justin Holewinski0497ab12013-03-30 14:29:21 +0000367 if (j < je - 1)
368 O << ", ";
Justin Holewinskiae556d32012-05-04 20:18:50 +0000369 ++idx;
370 }
Justin Holewinski0497ab12013-03-30 14:29:21 +0000371 if (i < e - 1)
Justin Holewinskiae556d32012-05-04 20:18:50 +0000372 O << ", ";
373 }
374 }
375 O << ") ";
376 }
377 O << "_ (";
378
379 bool first = true;
380 MVT thePointerTy = getPointerTy();
381
Justin Holewinski0497ab12013-03-30 14:29:21 +0000382 for (unsigned i = 0, e = Args.size(); i != e; ++i) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000383 const Type *Ty = Args[i].Ty;
384 if (!first) {
385 O << ", ";
386 }
387 first = false;
388
389 if (Outs[i].Flags.isByVal() == false) {
390 unsigned sz = 0;
391 if (isa<IntegerType>(Ty)) {
392 sz = cast<IntegerType>(Ty)->getBitWidth();
Justin Holewinski0497ab12013-03-30 14:29:21 +0000393 if (sz < 32)
394 sz = 32;
395 } else if (isa<PointerType>(Ty))
Justin Holewinskiae556d32012-05-04 20:18:50 +0000396 sz = thePointerTy.getSizeInBits();
397 else
398 sz = Ty->getPrimitiveSizeInBits();
399 if (isABI)
400 O << ".param .b" << sz << " ";
401 else
402 O << ".reg .b" << sz << " ";
403 O << "_";
404 continue;
405 }
406 const PointerType *PTy = dyn_cast<PointerType>(Ty);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000407 assert(PTy && "Param with byval attribute should be a pointer type");
Justin Holewinskiae556d32012-05-04 20:18:50 +0000408 Type *ETy = PTy->getElementType();
409
410 if (isABI) {
411 unsigned align = Outs[i].Flags.getByValAlign();
Micah Villmowcdfe20b2012-10-08 16:38:25 +0000412 unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000413 O << ".param .align " << align << " .b8 ";
Justin Holewinskiae556d32012-05-04 20:18:50 +0000414 O << "_";
415 O << "[" << sz << "]";
416 continue;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000417 } else {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000418 SmallVector<EVT, 16> vtparts;
419 ComputeValueVTs(*this, ETy, vtparts);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000420 for (unsigned i = 0, e = vtparts.size(); i != e; ++i) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000421 unsigned elems = 1;
422 EVT elemtype = vtparts[i];
423 if (vtparts[i].isVector()) {
424 elems = vtparts[i].getVectorNumElements();
425 elemtype = vtparts[i].getVectorElementType();
426 }
427
Justin Holewinski0497ab12013-03-30 14:29:21 +0000428 for (unsigned j = 0, je = elems; j != je; ++j) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000429 unsigned sz = elemtype.getSizeInBits();
Justin Holewinski0497ab12013-03-30 14:29:21 +0000430 if (elemtype.isInteger() && (sz < 32))
431 sz = 32;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000432 O << ".reg .b" << sz << " ";
433 O << "_";
Justin Holewinski0497ab12013-03-30 14:29:21 +0000434 if (j < je - 1)
435 O << ", ";
Justin Holewinskiae556d32012-05-04 20:18:50 +0000436 }
Justin Holewinski0497ab12013-03-30 14:29:21 +0000437 if (i < e - 1)
Justin Holewinskiae556d32012-05-04 20:18:50 +0000438 O << ", ";
439 }
440 continue;
441 }
442 }
443 O << ");";
444 return O.str();
445}
446
Justin Holewinski0497ab12013-03-30 14:29:21 +0000447SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
448 SmallVectorImpl<SDValue> &InVals) const {
449 SelectionDAG &DAG = CLI.DAG;
Andrew Trickef9de2a2013-05-25 02:42:55 +0000450 SDLoc dl = CLI.DL;
Justin Holewinskiaa583972012-05-25 16:35:28 +0000451 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000452 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
453 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
454 SDValue Chain = CLI.Chain;
455 SDValue Callee = CLI.Callee;
456 bool &isTailCall = CLI.IsTailCall;
457 ArgListTy &Args = CLI.Args;
458 Type *retTy = CLI.RetTy;
459 ImmutableCallSite *CS = CLI.CS;
Justin Holewinskiaa583972012-05-25 16:35:28 +0000460
Justin Holewinskiae556d32012-05-04 20:18:50 +0000461 bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
462
463 SDValue tempChain = Chain;
Andrew Trickad6d08a2013-05-29 22:03:55 +0000464 Chain = DAG.getCALLSEQ_START(Chain,
465 DAG.getIntPtrConstant(uniqueCallSite, true),
466 dl);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000467 SDValue InFlag = Chain.getValue(1);
468
469 assert((Outs.size() == Args.size()) &&
470 "Unexpected number of arguments to function call");
471 unsigned paramCount = 0;
472 // Declare the .params or .reg need to pass values
473 // to the function
Justin Holewinski0497ab12013-03-30 14:29:21 +0000474 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000475 EVT VT = Outs[i].VT;
476
477 if (Outs[i].Flags.isByVal() == false) {
478 // Plain scalar
479 // for ABI, declare .param .b<size> .param<n>;
480 // for nonABI, declare .reg .b<size> .param<n>;
481 unsigned isReg = 1;
482 if (isABI)
483 isReg = 0;
484 unsigned sz = VT.getSizeInBits();
Justin Holewinski0497ab12013-03-30 14:29:21 +0000485 if (VT.isInteger() && (sz < 32))
486 sz = 32;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000487 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
488 SDValue DeclareParamOps[] = { Chain,
489 DAG.getConstant(paramCount, MVT::i32),
490 DAG.getConstant(sz, MVT::i32),
Justin Holewinski0497ab12013-03-30 14:29:21 +0000491 DAG.getConstant(isReg, MVT::i32), InFlag };
Justin Holewinskiae556d32012-05-04 20:18:50 +0000492 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
493 DeclareParamOps, 5);
494 InFlag = Chain.getValue(1);
495 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
496 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
Justin Holewinski0497ab12013-03-30 14:29:21 +0000497 DAG.getConstant(0, MVT::i32), OutVals[i],
498 InFlag };
Justin Holewinskiae556d32012-05-04 20:18:50 +0000499
500 unsigned opcode = NVPTXISD::StoreParam;
501 if (isReg)
502 opcode = NVPTXISD::MoveToParam;
503 else {
504 if (Outs[i].Flags.isZExt())
505 opcode = NVPTXISD::StoreParamU32;
506 else if (Outs[i].Flags.isSExt())
507 opcode = NVPTXISD::StoreParamS32;
508 }
509 Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5);
510
511 InFlag = Chain.getValue(1);
512 ++paramCount;
513 continue;
514 }
515 // struct or vector
516 SmallVector<EVT, 16> vtparts;
517 const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000518 assert(PTy && "Type of a byval parameter should be pointer");
Justin Holewinskiae556d32012-05-04 20:18:50 +0000519 ComputeValueVTs(*this, PTy->getElementType(), vtparts);
520
521 if (isABI) {
522 // declare .param .align 16 .b8 .param<n>[<size>];
523 unsigned sz = Outs[i].Flags.getByValSize();
524 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
525 // The ByValAlign in the Outs[i].Flags is alway set at this point, so we
526 // don't need to
527 // worry about natural alignment or not. See TargetLowering::LowerCallTo()
Justin Holewinski0497ab12013-03-30 14:29:21 +0000528 SDValue DeclareParamOps[] = {
529 Chain, DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
530 DAG.getConstant(paramCount, MVT::i32), DAG.getConstant(sz, MVT::i32),
531 InFlag
532 };
Justin Holewinskiae556d32012-05-04 20:18:50 +0000533 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
534 DeclareParamOps, 5);
535 InFlag = Chain.getValue(1);
536 unsigned curOffset = 0;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000537 for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000538 unsigned elems = 1;
539 EVT elemtype = vtparts[j];
540 if (vtparts[j].isVector()) {
541 elems = vtparts[j].getVectorNumElements();
542 elemtype = vtparts[j].getVectorElementType();
543 }
Justin Holewinski0497ab12013-03-30 14:29:21 +0000544 for (unsigned k = 0, ke = elems; k != ke; ++k) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000545 unsigned sz = elemtype.getSizeInBits();
Justin Holewinski0497ab12013-03-30 14:29:21 +0000546 if (elemtype.isInteger() && (sz < 8))
547 sz = 8;
548 SDValue srcAddr =
549 DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
550 DAG.getConstant(curOffset, getPointerTy()));
551 SDValue theVal =
552 DAG.getLoad(elemtype, dl, tempChain, srcAddr,
553 MachinePointerInfo(), false, false, false, 0);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000554 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000555 SDValue CopyParamOps[] = { Chain,
556 DAG.getConstant(paramCount, MVT::i32),
557 DAG.getConstant(curOffset, MVT::i32),
558 theVal, InFlag };
Justin Holewinskiae556d32012-05-04 20:18:50 +0000559 Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
560 CopyParamOps, 5);
561 InFlag = Chain.getValue(1);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000562 curOffset += sz / 8;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000563 }
564 }
565 ++paramCount;
566 continue;
567 }
568 // Non-abi, struct or vector
569 // Declare a bunch or .reg .b<size> .param<n>
570 unsigned curOffset = 0;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000571 for (unsigned j = 0, je = vtparts.size(); j != je; ++j) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000572 unsigned elems = 1;
573 EVT elemtype = vtparts[j];
574 if (vtparts[j].isVector()) {
575 elems = vtparts[j].getVectorNumElements();
576 elemtype = vtparts[j].getVectorElementType();
577 }
Justin Holewinski0497ab12013-03-30 14:29:21 +0000578 for (unsigned k = 0, ke = elems; k != ke; ++k) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000579 unsigned sz = elemtype.getSizeInBits();
Justin Holewinski0497ab12013-03-30 14:29:21 +0000580 if (elemtype.isInteger() && (sz < 32))
581 sz = 32;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000582 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000583 SDValue DeclareParamOps[] = { Chain,
584 DAG.getConstant(paramCount, MVT::i32),
585 DAG.getConstant(sz, MVT::i32),
586 DAG.getConstant(1, MVT::i32), InFlag };
Justin Holewinskiae556d32012-05-04 20:18:50 +0000587 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
588 DeclareParamOps, 5);
589 InFlag = Chain.getValue(1);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000590 SDValue srcAddr =
591 DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
592 DAG.getConstant(curOffset, getPointerTy()));
593 SDValue theVal =
594 DAG.getLoad(elemtype, dl, tempChain, srcAddr, MachinePointerInfo(),
595 false, false, false, 0);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000596 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
597 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
598 DAG.getConstant(0, MVT::i32), theVal,
599 InFlag };
600 Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs,
601 CopyParamOps, 5);
602 InFlag = Chain.getValue(1);
603 ++paramCount;
604 }
605 }
606 }
607
608 GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
609 unsigned retAlignment = 0;
610
611 // Handle Result
612 unsigned retCount = 0;
613 if (Ins.size() > 0) {
614 SmallVector<EVT, 16> resvtparts;
615 ComputeValueVTs(*this, retTy, resvtparts);
616
617 // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or
618 // individual .reg .b<size> func_retval<0..> for non ABI
619 unsigned resultsz = 0;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000620 for (unsigned i = 0, e = resvtparts.size(); i != e; ++i) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000621 unsigned elems = 1;
622 EVT elemtype = resvtparts[i];
623 if (resvtparts[i].isVector()) {
624 elems = resvtparts[i].getVectorNumElements();
625 elemtype = resvtparts[i].getVectorElementType();
626 }
Justin Holewinski0497ab12013-03-30 14:29:21 +0000627 for (unsigned j = 0, je = elems; j != je; ++j) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000628 unsigned sz = elemtype.getSizeInBits();
629 if (isABI == false) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000630 if (elemtype.isInteger() && (sz < 32))
631 sz = 32;
632 } else {
633 if (elemtype.isInteger() && (sz < 8))
634 sz = 8;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000635 }
636 if (isABI == false) {
637 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
638 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32),
639 DAG.getConstant(sz, MVT::i32),
640 DAG.getConstant(retCount, MVT::i32),
641 InFlag };
642 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
643 DeclareRetOps, 5);
644 InFlag = Chain.getValue(1);
645 ++retCount;
646 }
647 resultsz += sz;
648 }
649 }
650 if (isABI) {
651 if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
Justin Holewinski0497ab12013-03-30 14:29:21 +0000652 retTy->isPointerTy()) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000653 // Scalar needs to be at least 32bit wide
654 if (resultsz < 32)
655 resultsz = 32;
656 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
657 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
658 DAG.getConstant(resultsz, MVT::i32),
659 DAG.getConstant(0, MVT::i32), InFlag };
660 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
661 DeclareRetOps, 5);
662 InFlag = Chain.getValue(1);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000663 } else {
Justin Holewinskiaa583972012-05-25 16:35:28 +0000664 if (Func) { // direct call
665 if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment))
Micah Villmowcdfe20b2012-10-08 16:38:25 +0000666 retAlignment = getDataLayout()->getABITypeAlignment(retTy);
Justin Holewinskiaa583972012-05-25 16:35:28 +0000667 } else { // indirect call
668 const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction());
669 if (!llvm::getAlign(*CallI, 0, retAlignment))
Micah Villmowcdfe20b2012-10-08 16:38:25 +0000670 retAlignment = getDataLayout()->getABITypeAlignment(retTy);
Justin Holewinskiaa583972012-05-25 16:35:28 +0000671 }
Justin Holewinskiae556d32012-05-04 20:18:50 +0000672 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000673 SDValue DeclareRetOps[] = { Chain,
674 DAG.getConstant(retAlignment, MVT::i32),
675 DAG.getConstant(resultsz / 8, MVT::i32),
676 DAG.getConstant(0, MVT::i32), InFlag };
Justin Holewinskiae556d32012-05-04 20:18:50 +0000677 Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
678 DeclareRetOps, 5);
679 InFlag = Chain.getValue(1);
680 }
681 }
682 }
683
684 if (!Func) {
685 // This is indirect function call case : PTX requires a prototype of the
686 // form
687 // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
688 // to be emitted, and the label has to used as the last arg of call
689 // instruction.
690 // The prototype is embedded in a string and put as the operand for an
691 // INLINEASM SDNode.
692 SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
693 std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000694 const char *asmstr = nvTM->getManagedStrPool()
695 ->getManagedString(proto_string.c_str())->c_str();
696 SDValue InlineAsmOps[] = {
697 Chain, DAG.getTargetExternalSymbol(asmstr, getPointerTy()),
698 DAG.getMDNode(0), DAG.getTargetConstant(0, MVT::i32), InFlag
699 };
Justin Holewinskiae556d32012-05-04 20:18:50 +0000700 Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
701 InFlag = Chain.getValue(1);
702 }
703 // Op to just print "call"
704 SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000705 SDValue PrintCallOps[] = {
706 Chain,
707 DAG.getConstant(isABI ? ((Ins.size() == 0) ? 0 : 1) : retCount, MVT::i32),
708 InFlag
709 };
710 Chain = DAG.getNode(Func ? (NVPTXISD::PrintCallUni) : (NVPTXISD::PrintCall),
711 dl, PrintCallVTs, PrintCallOps, 3);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000712 InFlag = Chain.getValue(1);
713
714 // Ops to print out the function name
715 SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
716 SDValue CallVoidOps[] = { Chain, Callee, InFlag };
717 Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3);
718 InFlag = Chain.getValue(1);
719
720 // Ops to print out the param list
721 SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
722 SDValue CallArgBeginOps[] = { Chain, InFlag };
723 Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
724 CallArgBeginOps, 2);
725 InFlag = Chain.getValue(1);
726
Justin Holewinski0497ab12013-03-30 14:29:21 +0000727 for (unsigned i = 0, e = paramCount; i != e; ++i) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000728 unsigned opcode;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000729 if (i == (e - 1))
Justin Holewinskiae556d32012-05-04 20:18:50 +0000730 opcode = NVPTXISD::LastCallArg;
731 else
732 opcode = NVPTXISD::CallArg;
733 SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
734 SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
Justin Holewinski0497ab12013-03-30 14:29:21 +0000735 DAG.getConstant(i, MVT::i32), InFlag };
Justin Holewinskiae556d32012-05-04 20:18:50 +0000736 Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
737 InFlag = Chain.getValue(1);
738 }
739 SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000740 SDValue CallArgEndOps[] = { Chain, DAG.getConstant(Func ? 1 : 0, MVT::i32),
Justin Holewinskiae556d32012-05-04 20:18:50 +0000741 InFlag };
Justin Holewinski0497ab12013-03-30 14:29:21 +0000742 Chain =
743 DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps, 3);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000744 InFlag = Chain.getValue(1);
745
746 if (!Func) {
747 SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000748 SDValue PrototypeOps[] = { Chain, DAG.getConstant(uniqueCallSite, MVT::i32),
Justin Holewinskiae556d32012-05-04 20:18:50 +0000749 InFlag };
750 Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
751 InFlag = Chain.getValue(1);
752 }
753
754 // Generate loads from param memory/moves from registers for result
755 if (Ins.size() > 0) {
756 if (isABI) {
757 unsigned resoffset = 0;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000758 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000759 unsigned sz = Ins[i].VT.getSizeInBits();
Justin Holewinski0497ab12013-03-30 14:29:21 +0000760 if (Ins[i].VT.isInteger() && (sz < 8))
761 sz = 8;
Benjamin Kramerfdf362b2013-03-07 20:33:29 +0000762 EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue };
Justin Holewinski0497ab12013-03-30 14:29:21 +0000763 SDValue LoadRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
764 DAG.getConstant(resoffset, MVT::i32), InFlag };
Justin Holewinskiae556d32012-05-04 20:18:50 +0000765 SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
Benjamin Kramerfdf362b2013-03-07 20:33:29 +0000766 LoadRetOps, array_lengthof(LoadRetOps));
Justin Holewinskiae556d32012-05-04 20:18:50 +0000767 Chain = retval.getValue(1);
768 InFlag = retval.getValue(2);
769 InVals.push_back(retval);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000770 resoffset += sz / 8;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000771 }
Justin Holewinski0497ab12013-03-30 14:29:21 +0000772 } else {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000773 SmallVector<EVT, 16> resvtparts;
774 ComputeValueVTs(*this, retTy, resvtparts);
775
776 assert(Ins.size() == resvtparts.size() &&
777 "Unexpected number of return values in non-ABI case");
778 unsigned paramNum = 0;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000779 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000780 assert(EVT(Ins[i].VT) == resvtparts[i] &&
781 "Unexpected EVT type in non-ABI case");
782 unsigned numelems = 1;
783 EVT elemtype = Ins[i].VT;
784 if (Ins[i].VT.isVector()) {
785 numelems = Ins[i].VT.getVectorNumElements();
786 elemtype = Ins[i].VT.getVectorElementType();
787 }
788 std::vector<SDValue> tempRetVals;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000789 for (unsigned j = 0; j < numelems; ++j) {
Benjamin Kramerfdf362b2013-03-07 20:33:29 +0000790 EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue };
Justin Holewinski0497ab12013-03-30 14:29:21 +0000791 SDValue MoveRetOps[] = { Chain, DAG.getConstant(0, MVT::i32),
792 DAG.getConstant(paramNum, MVT::i32),
793 InFlag };
Justin Holewinskiae556d32012-05-04 20:18:50 +0000794 SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
Benjamin Kramerfdf362b2013-03-07 20:33:29 +0000795 MoveRetOps, array_lengthof(MoveRetOps));
Justin Holewinskiae556d32012-05-04 20:18:50 +0000796 Chain = retval.getValue(1);
797 InFlag = retval.getValue(2);
798 tempRetVals.push_back(retval);
799 ++paramNum;
800 }
801 if (Ins[i].VT.isVector())
802 InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT,
803 &tempRetVals[0], tempRetVals.size()));
804 else
805 InVals.push_back(tempRetVals[0]);
806 }
807 }
808 }
Justin Holewinski0497ab12013-03-30 14:29:21 +0000809 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(uniqueCallSite, true),
810 DAG.getIntPtrConstant(uniqueCallSite + 1, true),
Andrew Trickad6d08a2013-05-29 22:03:55 +0000811 InFlag, dl);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000812 uniqueCallSite++;
813
814 // set isTailCall to false for now, until we figure out how to express
815 // tail call optimization in PTX
816 isTailCall = false;
817 return Chain;
818}
Justin Holewinskiae556d32012-05-04 20:18:50 +0000819
820// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
821// (see LegalizeDAG.cpp). This is slow and uses local memory.
822// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
Justin Holewinski0497ab12013-03-30 14:29:21 +0000823SDValue
824NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000825 SDNode *Node = Op.getNode();
Andrew Trickef9de2a2013-05-25 02:42:55 +0000826 SDLoc dl(Node);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000827 SmallVector<SDValue, 8> Ops;
828 unsigned NumOperands = Node->getNumOperands();
Justin Holewinski0497ab12013-03-30 14:29:21 +0000829 for (unsigned i = 0; i < NumOperands; ++i) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000830 SDValue SubOp = Node->getOperand(i);
831 EVT VVT = SubOp.getNode()->getValueType(0);
832 EVT EltVT = VVT.getVectorElementType();
833 unsigned NumSubElem = VVT.getVectorNumElements();
Justin Holewinski0497ab12013-03-30 14:29:21 +0000834 for (unsigned j = 0; j < NumSubElem; ++j) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000835 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
836 DAG.getIntPtrConstant(j)));
837 }
838 }
Justin Holewinski0497ab12013-03-30 14:29:21 +0000839 return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Ops[0],
840 Ops.size());
Justin Holewinskiae556d32012-05-04 20:18:50 +0000841}
842
Justin Holewinski0497ab12013-03-30 14:29:21 +0000843SDValue
844NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000845 switch (Op.getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000846 case ISD::RETURNADDR:
847 return SDValue();
848 case ISD::FRAMEADDR:
849 return SDValue();
850 case ISD::GlobalAddress:
851 return LowerGlobalAddress(Op, DAG);
852 case ISD::INTRINSIC_W_CHAIN:
853 return Op;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000854 case ISD::BUILD_VECTOR:
855 case ISD::EXTRACT_SUBVECTOR:
856 return Op;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000857 case ISD::CONCAT_VECTORS:
858 return LowerCONCAT_VECTORS(Op, DAG);
859 case ISD::STORE:
860 return LowerSTORE(Op, DAG);
861 case ISD::LOAD:
862 return LowerLOAD(Op, DAG);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000863 default:
David Blaikie891d0a32012-05-04 22:34:16 +0000864 llvm_unreachable("Custom lowering not defined for operation");
Justin Holewinskiae556d32012-05-04 20:18:50 +0000865 }
866}
867
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000868SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
869 if (Op.getValueType() == MVT::i1)
870 return LowerLOADi1(Op, DAG);
871 else
872 return SDValue();
873}
874
Justin Holewinskic6462aa2012-11-14 19:19:16 +0000875// v = ld i1* addr
876// =>
877// v1 = ld i8* addr
878// v = trunc v1 to i1
Justin Holewinski0497ab12013-03-30 14:29:21 +0000879SDValue NVPTXTargetLowering::LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
Justin Holewinskic6462aa2012-11-14 19:19:16 +0000880 SDNode *Node = Op.getNode();
881 LoadSDNode *LD = cast<LoadSDNode>(Node);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000882 SDLoc dl(Node);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000883 assert(LD->getExtensionType() == ISD::NON_EXTLOAD);
NAKAMURA Takumi5bbe0e12012-11-14 23:46:15 +0000884 assert(Node->getValueType(0) == MVT::i1 &&
885 "Custom lowering for i1 load only");
Justin Holewinski0497ab12013-03-30 14:29:21 +0000886 SDValue newLD =
887 DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
888 LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(),
889 LD->isInvariant(), LD->getAlignment());
Justin Holewinskic6462aa2012-11-14 19:19:16 +0000890 SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
891 // The legalizer (the caller) is expecting two values from the legalized
892 // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
893 // in LegalizeDAG.cpp which also uses MergeValues.
Justin Holewinski0497ab12013-03-30 14:29:21 +0000894 SDValue Ops[] = { result, LD->getChain() };
Justin Holewinskic6462aa2012-11-14 19:19:16 +0000895 return DAG.getMergeValues(Ops, 2, dl);
896}
897
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000898SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
899 EVT ValVT = Op.getOperand(1).getValueType();
900 if (ValVT == MVT::i1)
901 return LowerSTOREi1(Op, DAG);
902 else if (ValVT.isVector())
903 return LowerSTOREVector(Op, DAG);
904 else
905 return SDValue();
906}
907
908SDValue
909NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
910 SDNode *N = Op.getNode();
911 SDValue Val = N->getOperand(1);
Andrew Trickef9de2a2013-05-25 02:42:55 +0000912 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000913 EVT ValVT = Val.getValueType();
914
915 if (ValVT.isVector()) {
916 // We only handle "native" vector sizes for now, e.g. <4 x double> is not
917 // legal. We can (and should) split that into 2 stores of <2 x double> here
918 // but I'm leaving that as a TODO for now.
919 if (!ValVT.isSimple())
920 return SDValue();
921 switch (ValVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000922 default:
923 return SDValue();
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000924 case MVT::v2i8:
925 case MVT::v2i16:
926 case MVT::v2i32:
927 case MVT::v2i64:
928 case MVT::v2f32:
929 case MVT::v2f64:
930 case MVT::v4i8:
931 case MVT::v4i16:
932 case MVT::v4i32:
933 case MVT::v4f32:
934 // This is a "native" vector type
935 break;
936 }
937
938 unsigned Opcode = 0;
939 EVT EltVT = ValVT.getVectorElementType();
940 unsigned NumElts = ValVT.getVectorNumElements();
941
942 // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
943 // Therefore, we must ensure the type is legal. For i1 and i8, we set the
944 // stored type to i16 and propogate the "real" type as the memory type.
945 bool NeedExt = false;
946 if (EltVT.getSizeInBits() < 16)
947 NeedExt = true;
948
949 switch (NumElts) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000950 default:
951 return SDValue();
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000952 case 2:
953 Opcode = NVPTXISD::StoreV2;
954 break;
955 case 4: {
956 Opcode = NVPTXISD::StoreV4;
957 break;
958 }
959 }
960
961 SmallVector<SDValue, 8> Ops;
962
963 // First is the chain
964 Ops.push_back(N->getOperand(0));
965
966 // Then the split values
967 for (unsigned i = 0; i < NumElts; ++i) {
968 SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
969 DAG.getIntPtrConstant(i));
970 if (NeedExt)
971 // ANY_EXTEND is correct here since the store will only look at the
972 // lower-order bits anyway.
973 ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
974 Ops.push_back(ExtVal);
975 }
976
977 // Then any remaining arguments
978 for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
979 Ops.push_back(N->getOperand(i));
980 }
981
982 MemSDNode *MemSD = cast<MemSDNode>(N);
983
Justin Holewinski0497ab12013-03-30 14:29:21 +0000984 SDValue NewSt = DAG.getMemIntrinsicNode(
985 Opcode, DL, DAG.getVTList(MVT::Other), &Ops[0], Ops.size(),
986 MemSD->getMemoryVT(), MemSD->getMemOperand());
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000987
988 //return DCI.CombineTo(N, NewSt, true);
989 return NewSt;
990 }
991
992 return SDValue();
993}
994
Justin Holewinskic6462aa2012-11-14 19:19:16 +0000995// st i1 v, addr
996// =>
997// v1 = zxt v to i8
998// st i8, addr
Justin Holewinski0497ab12013-03-30 14:29:21 +0000999SDValue NVPTXTargetLowering::LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
Justin Holewinskic6462aa2012-11-14 19:19:16 +00001000 SDNode *Node = Op.getNode();
Andrew Trickef9de2a2013-05-25 02:42:55 +00001001 SDLoc dl(Node);
Justin Holewinskic6462aa2012-11-14 19:19:16 +00001002 StoreSDNode *ST = cast<StoreSDNode>(Node);
1003 SDValue Tmp1 = ST->getChain();
1004 SDValue Tmp2 = ST->getBasePtr();
1005 SDValue Tmp3 = ST->getValue();
NAKAMURA Takumi5bbe0e12012-11-14 23:46:15 +00001006 assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
Justin Holewinskic6462aa2012-11-14 19:19:16 +00001007 unsigned Alignment = ST->getAlignment();
1008 bool isVolatile = ST->isVolatile();
1009 bool isNonTemporal = ST->isNonTemporal();
Justin Holewinski0497ab12013-03-30 14:29:21 +00001010 Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Tmp3);
1011 SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
1012 isVolatile, isNonTemporal, Alignment);
Justin Holewinskic6462aa2012-11-14 19:19:16 +00001013 return Result;
1014}
1015
Justin Holewinski0497ab12013-03-30 14:29:21 +00001016SDValue NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname,
1017 int idx, EVT v) const {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001018 std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
1019 std::stringstream suffix;
1020 suffix << idx;
1021 *name += suffix.str();
1022 return DAG.getTargetExternalSymbol(name->c_str(), v);
1023}
1024
1025SDValue
1026NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
1027 return getExtSymb(DAG, ".PARAM", idx, v);
1028}
1029
Justin Holewinski0497ab12013-03-30 14:29:21 +00001030SDValue NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001031 return getExtSymb(DAG, ".HLPPARAM", idx);
1032}
1033
1034// Check to see if the kernel argument is image*_t or sampler_t
1035
1036bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001037 static const char *const specialTypes[] = { "struct._image2d_t",
1038 "struct._image3d_t",
1039 "struct._sampler_t" };
Justin Holewinskiae556d32012-05-04 20:18:50 +00001040
1041 const Type *Ty = arg->getType();
1042 const PointerType *PTy = dyn_cast<PointerType>(Ty);
1043
1044 if (!PTy)
1045 return false;
1046
1047 if (!context)
1048 return false;
1049
1050 const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
Justin Holewinskifb711152012-12-05 20:50:28 +00001051 const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
Justin Holewinskiae556d32012-05-04 20:18:50 +00001052
Craig Toppere4260f92012-05-24 04:22:05 +00001053 for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
Justin Holewinskiae556d32012-05-04 20:18:50 +00001054 if (TypeName == specialTypes[i])
1055 return true;
1056
1057 return false;
1058}
1059
Justin Holewinski0497ab12013-03-30 14:29:21 +00001060SDValue NVPTXTargetLowering::LowerFormalArguments(
1061 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001062 const SmallVectorImpl<ISD::InputArg> &Ins, SDLoc dl, SelectionDAG &DAG,
Justin Holewinski0497ab12013-03-30 14:29:21 +00001063 SmallVectorImpl<SDValue> &InVals) const {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001064 MachineFunction &MF = DAG.getMachineFunction();
Micah Villmowcdfe20b2012-10-08 16:38:25 +00001065 const DataLayout *TD = getDataLayout();
Justin Holewinskiae556d32012-05-04 20:18:50 +00001066
1067 const Function *F = MF.getFunction();
Bill Wendlinge94d8432012-12-07 23:16:57 +00001068 const AttributeSet &PAL = F->getAttributes();
Justin Holewinski44f5c602013-06-28 17:57:53 +00001069 const TargetLowering *TLI = nvTM->getTargetLowering();
Justin Holewinskiae556d32012-05-04 20:18:50 +00001070
1071 SDValue Root = DAG.getRoot();
1072 std::vector<SDValue> OutChains;
1073
1074 bool isKernel = llvm::isKernelFunction(*F);
1075 bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
Justin Holewinski44f5c602013-06-28 17:57:53 +00001076 assert(isABI && "Non-ABI compilation is not supported");
1077 if (!isABI)
1078 return Chain;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001079
1080 std::vector<Type *> argTypes;
1081 std::vector<const Argument *> theArgs;
1082 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
Justin Holewinski0497ab12013-03-30 14:29:21 +00001083 I != E; ++I) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001084 theArgs.push_back(I);
1085 argTypes.push_back(I->getType());
1086 }
Justin Holewinski44f5c602013-06-28 17:57:53 +00001087 // argTypes.size() (or theArgs.size()) and Ins.size() need not match.
1088 // Ins.size() will be larger
1089 // * if there is an aggregate argument with multiple fields (each field
1090 // showing up separately in Ins)
1091 // * if there is a vector argument with more than typical vector-length
1092 // elements (generally if more than 4) where each vector element is
1093 // individually present in Ins.
1094 // So a different index should be used for indexing into Ins.
1095 // See similar issue in LowerCall.
1096 unsigned InsIdx = 0;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001097
1098 int idx = 0;
Justin Holewinski44f5c602013-06-28 17:57:53 +00001099 for (unsigned i = 0, e = theArgs.size(); i != e; ++i, ++idx, ++InsIdx) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001100 Type *Ty = argTypes[i];
Justin Holewinskiae556d32012-05-04 20:18:50 +00001101
1102 // If the kernel argument is image*_t or sampler_t, convert it to
1103 // a i32 constant holding the parameter position. This can later
1104 // matched in the AsmPrinter to output the correct mangled name.
Justin Holewinski0497ab12013-03-30 14:29:21 +00001105 if (isImageOrSamplerVal(
1106 theArgs[i],
1107 (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
1108 : 0))) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001109 assert(isKernel && "Only kernels can have image/sampler params");
Justin Holewinski0497ab12013-03-30 14:29:21 +00001110 InVals.push_back(DAG.getConstant(i + 1, MVT::i32));
Justin Holewinskiae556d32012-05-04 20:18:50 +00001111 continue;
1112 }
1113
1114 if (theArgs[i]->use_empty()) {
1115 // argument is dead
Justin Holewinski44f5c602013-06-28 17:57:53 +00001116 if (Ty->isAggregateType()) {
1117 SmallVector<EVT, 16> vtparts;
1118
1119 ComputeValueVTs(*this, Ty, vtparts);
1120 assert(vtparts.size() > 0 && "empty aggregate type not expected");
1121 for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
1122 ++parti) {
1123 EVT partVT = vtparts[parti];
1124 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, partVT));
1125 ++InsIdx;
Justin Holewinskie9884092013-03-24 21:17:47 +00001126 }
Justin Holewinski44f5c602013-06-28 17:57:53 +00001127 if (vtparts.size() > 0)
1128 --InsIdx;
1129 continue;
Justin Holewinskie9884092013-03-24 21:17:47 +00001130 }
Justin Holewinski44f5c602013-06-28 17:57:53 +00001131 if (Ty->isVectorTy()) {
1132 EVT ObjectVT = getValueType(Ty);
1133 unsigned NumRegs = TLI->getNumRegisters(F->getContext(), ObjectVT);
1134 for (unsigned parti = 0; parti < NumRegs; ++parti) {
1135 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
1136 ++InsIdx;
1137 }
1138 if (NumRegs > 0)
1139 --InsIdx;
1140 continue;
1141 }
1142 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT));
Justin Holewinskiae556d32012-05-04 20:18:50 +00001143 continue;
1144 }
1145
1146 // In the following cases, assign a node order of "idx+1"
Justin Holewinski44f5c602013-06-28 17:57:53 +00001147 // to newly created nodes. The SDNodes for params have to
Justin Holewinskiae556d32012-05-04 20:18:50 +00001148 // appear in the same order as their order of appearance
1149 // in the original function. "idx+1" holds that order.
Justin Holewinski0497ab12013-03-30 14:29:21 +00001150 if (PAL.hasAttribute(i + 1, Attribute::ByVal) == false) {
Justin Holewinski44f5c602013-06-28 17:57:53 +00001151 if (Ty->isAggregateType()) {
1152 SmallVector<EVT, 16> vtparts;
1153 SmallVector<uint64_t, 16> offsets;
1154
1155 ComputeValueVTs(*this, Ty, vtparts, &offsets, 0);
1156 assert(vtparts.size() > 0 && "empty aggregate type not expected");
1157 bool aggregateIsPacked = false;
1158 if (StructType *STy = llvm::dyn_cast<StructType>(Ty))
1159 aggregateIsPacked = STy->isPacked();
1160
1161 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
1162 for (unsigned parti = 0, parte = vtparts.size(); parti != parte;
1163 ++parti) {
1164 EVT partVT = vtparts[parti];
1165 Value *srcValue = Constant::getNullValue(
1166 PointerType::get(partVT.getTypeForEVT(F->getContext()),
1167 llvm::ADDRESS_SPACE_PARAM));
1168 SDValue srcAddr =
1169 DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
1170 DAG.getConstant(offsets[parti], getPointerTy()));
1171 unsigned partAlign =
1172 aggregateIsPacked ? 1
1173 : TD->getABITypeAlignment(
1174 partVT.getTypeForEVT(F->getContext()));
1175 SDValue p = DAG.getLoad(partVT, dl, Root, srcAddr,
1176 MachinePointerInfo(srcValue), false, false,
1177 true, partAlign);
1178 if (p.getNode())
1179 p.getNode()->setIROrder(idx + 1);
1180 InVals.push_back(p);
1181 ++InsIdx;
Justin Holewinskie9884092013-03-24 21:17:47 +00001182 }
Justin Holewinski44f5c602013-06-28 17:57:53 +00001183 if (vtparts.size() > 0)
1184 --InsIdx;
Justin Holewinskie9884092013-03-24 21:17:47 +00001185 continue;
1186 }
Justin Holewinski44f5c602013-06-28 17:57:53 +00001187 if (Ty->isVectorTy()) {
1188 EVT ObjectVT = getValueType(Ty);
Justin Holewinskiaaaf2892013-06-25 12:22:21 +00001189 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
Justin Holewinski44f5c602013-06-28 17:57:53 +00001190 unsigned NumElts = ObjectVT.getVectorNumElements();
1191 assert(TLI->getNumRegisters(F->getContext(), ObjectVT) == NumElts &&
1192 "Vector was not scalarized");
1193 unsigned Ofst = 0;
1194 EVT EltVT = ObjectVT.getVectorElementType();
1195
1196 // V1 load
1197 // f32 = load ...
1198 if (NumElts == 1) {
1199 // We only have one element, so just directly load it
1200 Value *SrcValue = Constant::getNullValue(PointerType::get(
1201 EltVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
1202 SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
1203 DAG.getConstant(Ofst, getPointerTy()));
1204 SDValue P = DAG.getLoad(
1205 EltVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
1206 false, true,
1207 TD->getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())));
1208 if (P.getNode())
1209 P.getNode()->setIROrder(idx + 1);
1210
1211 InVals.push_back(P);
1212 Ofst += TD->getTypeAllocSize(EltVT.getTypeForEVT(F->getContext()));
1213 ++InsIdx;
1214 } else if (NumElts == 2) {
1215 // V2 load
1216 // f32,f32 = load ...
1217 EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, 2);
1218 Value *SrcValue = Constant::getNullValue(PointerType::get(
1219 VecVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
1220 SDValue SrcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
1221 DAG.getConstant(Ofst, getPointerTy()));
1222 SDValue P = DAG.getLoad(
1223 VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
1224 false, true,
1225 TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
1226 if (P.getNode())
1227 P.getNode()->setIROrder(idx + 1);
1228
1229 SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
1230 DAG.getIntPtrConstant(0));
1231 SDValue Elt1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
1232 DAG.getIntPtrConstant(1));
1233 InVals.push_back(Elt0);
1234 InVals.push_back(Elt1);
1235 Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
1236 InsIdx += 2;
1237 } else {
1238 // V4 loads
1239 // We have at least 4 elements (<3 x Ty> expands to 4 elements) and
1240 // the
1241 // vector will be expanded to a power of 2 elements, so we know we can
1242 // always round up to the next multiple of 4 when creating the vector
1243 // loads.
1244 // e.g. 4 elem => 1 ld.v4
1245 // 6 elem => 2 ld.v4
1246 // 8 elem => 2 ld.v4
1247 // 11 elem => 3 ld.v4
1248 unsigned VecSize = 4;
1249 if (EltVT.getSizeInBits() == 64) {
1250 VecSize = 2;
1251 }
1252 EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize);
1253 for (unsigned i = 0; i < NumElts; i += VecSize) {
1254 Value *SrcValue = Constant::getNullValue(
1255 PointerType::get(VecVT.getTypeForEVT(F->getContext()),
1256 llvm::ADDRESS_SPACE_PARAM));
1257 SDValue SrcAddr =
1258 DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg,
1259 DAG.getConstant(Ofst, getPointerTy()));
1260 SDValue P = DAG.getLoad(
1261 VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue), false,
1262 false, true,
1263 TD->getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())));
1264 if (P.getNode())
1265 P.getNode()->setIROrder(idx + 1);
1266
1267 for (unsigned j = 0; j < VecSize; ++j) {
1268 if (i + j >= NumElts)
1269 break;
1270 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, P,
1271 DAG.getIntPtrConstant(j));
1272 InVals.push_back(Elt);
1273 }
1274 Ofst += TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext()));
1275 InsIdx += VecSize;
1276 }
1277 }
1278
1279 if (NumElts > 0)
1280 --InsIdx;
1281 continue;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001282 }
Justin Holewinski44f5c602013-06-28 17:57:53 +00001283 // A plain scalar.
1284 EVT ObjectVT = getValueType(Ty);
1285 assert(ObjectVT == Ins[InsIdx].VT &&
1286 "Ins type did not match function type");
1287 // If ABI, load from the param symbol
1288 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
1289 Value *srcValue = Constant::getNullValue(PointerType::get(
1290 ObjectVT.getTypeForEVT(F->getContext()), llvm::ADDRESS_SPACE_PARAM));
1291 SDValue p = DAG.getLoad(
1292 ObjectVT, dl, Root, Arg, MachinePointerInfo(srcValue), false, false,
1293 true,
1294 TD->getABITypeAlignment(ObjectVT.getTypeForEVT(F->getContext())));
1295 if (p.getNode())
1296 p.getNode()->setIROrder(idx + 1);
1297 InVals.push_back(p);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001298 continue;
1299 }
1300
1301 // Param has ByVal attribute
Justin Holewinski44f5c602013-06-28 17:57:53 +00001302 // Return MoveParam(param symbol).
1303 // Ideally, the param symbol can be returned directly,
1304 // but when SDNode builder decides to use it in a CopyToReg(),
1305 // machine instruction fails because TargetExternalSymbol
1306 // (not lowered) is target dependent, and CopyToReg assumes
1307 // the source is lowered.
1308 EVT ObjectVT = getValueType(Ty);
1309 assert(ObjectVT == Ins[InsIdx].VT &&
1310 "Ins type did not match function type");
1311 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
1312 SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
1313 if (p.getNode())
1314 p.getNode()->setIROrder(idx + 1);
1315 if (isKernel)
1316 InVals.push_back(p);
1317 else {
1318 SDValue p2 = DAG.getNode(
1319 ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
1320 DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32), p);
1321 InVals.push_back(p2);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001322 }
1323 }
1324
1325 // Clang will check explicit VarArg and issue error if any. However, Clang
1326 // will let code with
Justin Holewinski44f5c602013-06-28 17:57:53 +00001327 // implicit var arg like f() pass. See bug 617733.
Justin Holewinskiae556d32012-05-04 20:18:50 +00001328 // We treat this case as if the arg list is empty.
Justin Holewinski44f5c602013-06-28 17:57:53 +00001329 // if (F.isVarArg()) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001330 // assert(0 && "VarArg not supported yet!");
1331 //}
1332
1333 if (!OutChains.empty())
Justin Holewinski0497ab12013-03-30 14:29:21 +00001334 DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &OutChains[0],
1335 OutChains.size()));
Justin Holewinskiae556d32012-05-04 20:18:50 +00001336
1337 return Chain;
1338}
1339
Justin Holewinski44f5c602013-06-28 17:57:53 +00001340
Justin Holewinski0497ab12013-03-30 14:29:21 +00001341SDValue NVPTXTargetLowering::LowerReturn(
1342 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1343 const SmallVectorImpl<ISD::OutputArg> &Outs,
Andrew Trickef9de2a2013-05-25 02:42:55 +00001344 const SmallVectorImpl<SDValue> &OutVals, SDLoc dl,
Justin Holewinski0497ab12013-03-30 14:29:21 +00001345 SelectionDAG &DAG) const {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001346
1347 bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
1348
1349 unsigned sizesofar = 0;
1350 unsigned idx = 0;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001351 for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001352 SDValue theVal = OutVals[i];
1353 EVT theValType = theVal.getValueType();
1354 unsigned numElems = 1;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001355 if (theValType.isVector())
1356 numElems = theValType.getVectorNumElements();
1357 for (unsigned j = 0, je = numElems; j != je; ++j) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001358 SDValue tmpval = theVal;
1359 if (theValType.isVector())
1360 tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
Justin Holewinski0497ab12013-03-30 14:29:21 +00001361 theValType.getVectorElementType(), tmpval,
1362 DAG.getIntPtrConstant(j));
1363 Chain = DAG.getNode(
1364 isABI ? NVPTXISD::StoreRetval : NVPTXISD::MoveToRetval, dl,
1365 MVT::Other, Chain, DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
Justin Holewinskiae556d32012-05-04 20:18:50 +00001366 tmpval);
1367 if (theValType.isVector())
Justin Holewinski0497ab12013-03-30 14:29:21 +00001368 sizesofar += theValType.getVectorElementType().getStoreSizeInBits() / 8;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001369 else
Justin Holewinski0497ab12013-03-30 14:29:21 +00001370 sizesofar += theValType.getStoreSizeInBits() / 8;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001371 ++idx;
1372 }
1373 }
1374
1375 return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
1376}
1377
Justin Holewinski0497ab12013-03-30 14:29:21 +00001378void NVPTXTargetLowering::LowerAsmOperandForConstraint(
1379 SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
1380 SelectionDAG &DAG) const {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001381 if (Constraint.length() > 1)
1382 return;
1383 else
1384 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1385}
1386
1387// NVPTX suuport vector of legal types of any length in Intrinsics because the
1388// NVPTX specific type legalizer
1389// will legalize them to the PTX supported length.
Justin Holewinski0497ab12013-03-30 14:29:21 +00001390bool NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001391 if (isTypeLegal(VT))
1392 return true;
1393 if (VT.isVector()) {
1394 MVT eVT = VT.getVectorElementType();
1395 if (isTypeLegal(eVT))
1396 return true;
1397 }
1398 return false;
1399}
1400
Justin Holewinskiae556d32012-05-04 20:18:50 +00001401// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
1402// TgtMemIntrinsic
1403// because we need the information that is only available in the "Value" type
1404// of destination
1405// pointer. In particular, the address space information.
Justin Holewinski0497ab12013-03-30 14:29:21 +00001406bool NVPTXTargetLowering::getTgtMemIntrinsic(
1407 IntrinsicInfo &Info, const CallInst &I, unsigned Intrinsic) const {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001408 switch (Intrinsic) {
1409 default:
1410 return false;
1411
1412 case Intrinsic::nvvm_atomic_load_add_f32:
1413 Info.opc = ISD::INTRINSIC_W_CHAIN;
1414 Info.memVT = MVT::f32;
1415 Info.ptrVal = I.getArgOperand(0);
1416 Info.offset = 0;
1417 Info.vol = 0;
1418 Info.readMem = true;
1419 Info.writeMem = true;
1420 Info.align = 0;
1421 return true;
1422
1423 case Intrinsic::nvvm_atomic_load_inc_32:
1424 case Intrinsic::nvvm_atomic_load_dec_32:
1425 Info.opc = ISD::INTRINSIC_W_CHAIN;
1426 Info.memVT = MVT::i32;
1427 Info.ptrVal = I.getArgOperand(0);
1428 Info.offset = 0;
1429 Info.vol = 0;
1430 Info.readMem = true;
1431 Info.writeMem = true;
1432 Info.align = 0;
1433 return true;
1434
1435 case Intrinsic::nvvm_ldu_global_i:
1436 case Intrinsic::nvvm_ldu_global_f:
1437 case Intrinsic::nvvm_ldu_global_p:
1438
1439 Info.opc = ISD::INTRINSIC_W_CHAIN;
1440 if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
1441 Info.memVT = MVT::i32;
1442 else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
1443 Info.memVT = getPointerTy();
1444 else
1445 Info.memVT = MVT::f32;
1446 Info.ptrVal = I.getArgOperand(0);
1447 Info.offset = 0;
1448 Info.vol = 0;
1449 Info.readMem = true;
1450 Info.writeMem = false;
1451 Info.align = 0;
1452 return true;
1453
1454 }
1455 return false;
1456}
1457
1458/// isLegalAddressingMode - Return true if the addressing mode represented
1459/// by AM is legal for this target, for a load/store of the specified type.
1460/// Used to guide target specific optimizations, like loop strength reduction
1461/// (LoopStrengthReduce.cpp) and memory optimization for address mode
1462/// (CodeGenPrepare.cpp)
Justin Holewinski0497ab12013-03-30 14:29:21 +00001463bool NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
1464 Type *Ty) const {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001465
1466 // AddrMode - This represents an addressing mode of:
1467 // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
1468 //
1469 // The legal address modes are
1470 // - [avar]
1471 // - [areg]
1472 // - [areg+immoff]
1473 // - [immAddr]
1474
1475 if (AM.BaseGV) {
1476 if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
1477 return false;
1478 return true;
1479 }
1480
1481 switch (AM.Scale) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001482 case 0: // "r", "r+i" or "i" is allowed
Justin Holewinskiae556d32012-05-04 20:18:50 +00001483 break;
1484 case 1:
Justin Holewinski0497ab12013-03-30 14:29:21 +00001485 if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
Justin Holewinskiae556d32012-05-04 20:18:50 +00001486 return false;
1487 // Otherwise we have r+i.
1488 break;
1489 default:
1490 // No scale > 1 is allowed
1491 return false;
1492 }
1493 return true;
1494}
1495
1496//===----------------------------------------------------------------------===//
1497// NVPTX Inline Assembly Support
1498//===----------------------------------------------------------------------===//
1499
1500/// getConstraintType - Given a constraint letter, return the type of
1501/// constraint it is for this target.
1502NVPTXTargetLowering::ConstraintType
1503NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
1504 if (Constraint.size() == 1) {
1505 switch (Constraint[0]) {
1506 default:
1507 break;
1508 case 'r':
1509 case 'h':
1510 case 'c':
1511 case 'l':
1512 case 'f':
1513 case 'd':
1514 case '0':
1515 case 'N':
1516 return C_RegisterClass;
1517 }
1518 }
1519 return TargetLowering::getConstraintType(Constraint);
1520}
1521
Justin Holewinski0497ab12013-03-30 14:29:21 +00001522std::pair<unsigned, const TargetRegisterClass *>
Justin Holewinskiae556d32012-05-04 20:18:50 +00001523NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
Chad Rosier295bd432013-06-22 18:37:38 +00001524 MVT VT) const {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001525 if (Constraint.size() == 1) {
1526 switch (Constraint[0]) {
1527 case 'c':
1528 return std::make_pair(0U, &NVPTX::Int8RegsRegClass);
1529 case 'h':
1530 return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
1531 case 'r':
1532 return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
1533 case 'l':
1534 case 'N':
1535 return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
1536 case 'f':
1537 return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
1538 case 'd':
1539 return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
1540 }
1541 }
1542 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
1543}
1544
Justin Holewinskiae556d32012-05-04 20:18:50 +00001545/// getFunctionAlignment - Return the Log2 alignment of this function.
1546unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
1547 return 4;
1548}
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001549
1550/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
1551static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
Justin Holewinski0497ab12013-03-30 14:29:21 +00001552 SmallVectorImpl<SDValue> &Results) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001553 EVT ResVT = N->getValueType(0);
Andrew Trickef9de2a2013-05-25 02:42:55 +00001554 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001555
1556 assert(ResVT.isVector() && "Vector load must have vector type");
1557
1558 // We only handle "native" vector sizes for now, e.g. <4 x double> is not
1559 // legal. We can (and should) split that into 2 loads of <2 x double> here
1560 // but I'm leaving that as a TODO for now.
1561 assert(ResVT.isSimple() && "Can only handle simple types");
1562 switch (ResVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001563 default:
1564 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001565 case MVT::v2i8:
1566 case MVT::v2i16:
1567 case MVT::v2i32:
1568 case MVT::v2i64:
1569 case MVT::v2f32:
1570 case MVT::v2f64:
1571 case MVT::v4i8:
1572 case MVT::v4i16:
1573 case MVT::v4i32:
1574 case MVT::v4f32:
1575 // This is a "native" vector type
1576 break;
1577 }
1578
1579 EVT EltVT = ResVT.getVectorElementType();
1580 unsigned NumElts = ResVT.getVectorNumElements();
1581
1582 // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
1583 // Therefore, we must ensure the type is legal. For i1 and i8, we set the
1584 // loaded type to i16 and propogate the "real" type as the memory type.
1585 bool NeedTrunc = false;
1586 if (EltVT.getSizeInBits() < 16) {
1587 EltVT = MVT::i16;
1588 NeedTrunc = true;
1589 }
1590
1591 unsigned Opcode = 0;
1592 SDVTList LdResVTs;
1593
1594 switch (NumElts) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001595 default:
1596 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001597 case 2:
1598 Opcode = NVPTXISD::LoadV2;
1599 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
1600 break;
1601 case 4: {
1602 Opcode = NVPTXISD::LoadV4;
1603 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
1604 LdResVTs = DAG.getVTList(ListVTs, 5);
1605 break;
1606 }
1607 }
1608
1609 SmallVector<SDValue, 8> OtherOps;
1610
1611 // Copy regular operands
1612 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1613 OtherOps.push_back(N->getOperand(i));
1614
1615 LoadSDNode *LD = cast<LoadSDNode>(N);
1616
1617 // The select routine does not have access to the LoadSDNode instance, so
1618 // pass along the extension information
1619 OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
1620
1621 SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
1622 OtherOps.size(), LD->getMemoryVT(),
1623 LD->getMemOperand());
1624
1625 SmallVector<SDValue, 4> ScalarRes;
1626
1627 for (unsigned i = 0; i < NumElts; ++i) {
1628 SDValue Res = NewLD.getValue(i);
1629 if (NeedTrunc)
1630 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
1631 ScalarRes.push_back(Res);
1632 }
1633
1634 SDValue LoadChain = NewLD.getValue(NumElts);
1635
Justin Holewinski0497ab12013-03-30 14:29:21 +00001636 SDValue BuildVec =
1637 DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001638
1639 Results.push_back(BuildVec);
1640 Results.push_back(LoadChain);
1641}
1642
Justin Holewinski0497ab12013-03-30 14:29:21 +00001643static void ReplaceINTRINSIC_W_CHAIN(SDNode *N, SelectionDAG &DAG,
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001644 SmallVectorImpl<SDValue> &Results) {
1645 SDValue Chain = N->getOperand(0);
1646 SDValue Intrin = N->getOperand(1);
Andrew Trickef9de2a2013-05-25 02:42:55 +00001647 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001648
1649 // Get the intrinsic ID
1650 unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
Justin Holewinski0497ab12013-03-30 14:29:21 +00001651 switch (IntrinNo) {
1652 default:
1653 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001654 case Intrinsic::nvvm_ldg_global_i:
1655 case Intrinsic::nvvm_ldg_global_f:
1656 case Intrinsic::nvvm_ldg_global_p:
1657 case Intrinsic::nvvm_ldu_global_i:
1658 case Intrinsic::nvvm_ldu_global_f:
1659 case Intrinsic::nvvm_ldu_global_p: {
1660 EVT ResVT = N->getValueType(0);
1661
1662 if (ResVT.isVector()) {
1663 // Vector LDG/LDU
1664
1665 unsigned NumElts = ResVT.getVectorNumElements();
1666 EVT EltVT = ResVT.getVectorElementType();
1667
1668 // Since LDU/LDG are target nodes, we cannot rely on DAG type legalization.
1669 // Therefore, we must ensure the type is legal. For i1 and i8, we set the
1670 // loaded type to i16 and propogate the "real" type as the memory type.
1671 bool NeedTrunc = false;
1672 if (EltVT.getSizeInBits() < 16) {
1673 EltVT = MVT::i16;
1674 NeedTrunc = true;
1675 }
1676
1677 unsigned Opcode = 0;
1678 SDVTList LdResVTs;
1679
1680 switch (NumElts) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001681 default:
1682 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001683 case 2:
Justin Holewinski0497ab12013-03-30 14:29:21 +00001684 switch (IntrinNo) {
1685 default:
1686 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001687 case Intrinsic::nvvm_ldg_global_i:
1688 case Intrinsic::nvvm_ldg_global_f:
1689 case Intrinsic::nvvm_ldg_global_p:
1690 Opcode = NVPTXISD::LDGV2;
1691 break;
1692 case Intrinsic::nvvm_ldu_global_i:
1693 case Intrinsic::nvvm_ldu_global_f:
1694 case Intrinsic::nvvm_ldu_global_p:
1695 Opcode = NVPTXISD::LDUV2;
1696 break;
1697 }
1698 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
1699 break;
1700 case 4: {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001701 switch (IntrinNo) {
1702 default:
1703 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001704 case Intrinsic::nvvm_ldg_global_i:
1705 case Intrinsic::nvvm_ldg_global_f:
1706 case Intrinsic::nvvm_ldg_global_p:
1707 Opcode = NVPTXISD::LDGV4;
1708 break;
1709 case Intrinsic::nvvm_ldu_global_i:
1710 case Intrinsic::nvvm_ldu_global_f:
1711 case Intrinsic::nvvm_ldu_global_p:
1712 Opcode = NVPTXISD::LDUV4;
1713 break;
1714 }
1715 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
1716 LdResVTs = DAG.getVTList(ListVTs, 5);
1717 break;
1718 }
1719 }
1720
1721 SmallVector<SDValue, 8> OtherOps;
1722
1723 // Copy regular operands
1724
1725 OtherOps.push_back(Chain); // Chain
Justin Holewinski0497ab12013-03-30 14:29:21 +00001726 // Skip operand 1 (intrinsic ID)
1727 // Others
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001728 for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
1729 OtherOps.push_back(N->getOperand(i));
1730
1731 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
1732
Justin Holewinski0497ab12013-03-30 14:29:21 +00001733 SDValue NewLD = DAG.getMemIntrinsicNode(
1734 Opcode, DL, LdResVTs, &OtherOps[0], OtherOps.size(),
1735 MemSD->getMemoryVT(), MemSD->getMemOperand());
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001736
1737 SmallVector<SDValue, 4> ScalarRes;
1738
1739 for (unsigned i = 0; i < NumElts; ++i) {
1740 SDValue Res = NewLD.getValue(i);
1741 if (NeedTrunc)
Justin Holewinski0497ab12013-03-30 14:29:21 +00001742 Res =
1743 DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001744 ScalarRes.push_back(Res);
1745 }
1746
1747 SDValue LoadChain = NewLD.getValue(NumElts);
1748
Justin Holewinski0497ab12013-03-30 14:29:21 +00001749 SDValue BuildVec =
1750 DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001751
1752 Results.push_back(BuildVec);
1753 Results.push_back(LoadChain);
1754 } else {
1755 // i8 LDG/LDU
1756 assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
1757 "Custom handling of non-i8 ldu/ldg?");
1758
1759 // Just copy all operands as-is
1760 SmallVector<SDValue, 4> Ops;
1761 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1762 Ops.push_back(N->getOperand(i));
1763
1764 // Force output to i16
1765 SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
1766
1767 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
1768
1769 // We make sure the memory type is i8, which will be used during isel
1770 // to select the proper instruction.
Justin Holewinski0497ab12013-03-30 14:29:21 +00001771 SDValue NewLD =
1772 DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, LdResVTs, &Ops[0],
1773 Ops.size(), MVT::i8, MemSD->getMemOperand());
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001774
1775 Results.push_back(NewLD.getValue(0));
1776 Results.push_back(NewLD.getValue(1));
1777 }
1778 }
1779 }
1780}
1781
Justin Holewinski0497ab12013-03-30 14:29:21 +00001782void NVPTXTargetLowering::ReplaceNodeResults(
1783 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001784 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001785 default:
1786 report_fatal_error("Unhandled custom legalization");
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001787 case ISD::LOAD:
1788 ReplaceLoadVector(N, DAG, Results);
1789 return;
1790 case ISD::INTRINSIC_W_CHAIN:
1791 ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
1792 return;
1793 }
1794}