blob: e7b866b30367faef63bbe70d58b25aff9612b4ad [file] [log] [blame]
Justin Holewinskiae556d32012-05-04 20:18:50 +00001//
2// The LLVM Compiler Infrastructure
3//
4// This file is distributed under the University of Illinois Open Source
5// License. See LICENSE.TXT for details.
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that NVPTX uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14
Justin Holewinskiae556d32012-05-04 20:18:50 +000015#include "NVPTXISelLowering.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000016#include "NVPTX.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000017#include "NVPTXTargetMachine.h"
18#include "NVPTXTargetObjectFile.h"
19#include "NVPTXUtilities.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000020#include "llvm/CodeGen/Analysis.h"
21#include "llvm/CodeGen/MachineFrameInfo.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000025#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000026#include "llvm/IR/DerivedTypes.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/GlobalValue.h"
29#include "llvm/IR/IntrinsicInst.h"
30#include "llvm/IR/Intrinsics.h"
31#include "llvm/IR/Module.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000032#include "llvm/MC/MCSectionELF.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000033#include "llvm/Support/CallSite.h"
34#include "llvm/Support/CommandLine.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/ErrorHandling.h"
37#include "llvm/Support/raw_ostream.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000038#include <sstream>
39
40#undef DEBUG_TYPE
41#define DEBUG_TYPE "nvptx-lower"
42
43using namespace llvm;
44
45static unsigned int uniqueCallSite = 0;
46
47static cl::opt<bool>
Justin Holewinskiae556d32012-05-04 20:18:50 +000048sched4reg("nvptx-sched4reg",
49 cl::desc("NVPTX Specific: schedule for register pressue"),
50 cl::init(false));
51
Justin Holewinskibe8dc642013-02-12 14:18:49 +000052static bool IsPTXVectorType(MVT VT) {
53 switch (VT.SimpleTy) {
54 default: return false;
55 case MVT::v2i8:
56 case MVT::v4i8:
57 case MVT::v2i16:
58 case MVT::v4i16:
59 case MVT::v2i32:
60 case MVT::v4i32:
61 case MVT::v2i64:
62 case MVT::v2f32:
63 case MVT::v4f32:
64 case MVT::v2f64:
65 return true;
66 }
67}
68
Justin Holewinskiae556d32012-05-04 20:18:50 +000069// NVPTXTargetLowering Constructor.
70NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM)
71: TargetLowering(TM, new NVPTXTargetObjectFile()),
72 nvTM(&TM),
73 nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
74
75 // always lower memset, memcpy, and memmove intrinsics to load/store
76 // instructions, rather
77 // then generating calls to memset, mempcy or memmove.
Jim Grosbach341ad3e2013-02-20 21:13:59 +000078 MaxStoresPerMemset = (unsigned)0xFFFFFFFF;
79 MaxStoresPerMemcpy = (unsigned)0xFFFFFFFF;
80 MaxStoresPerMemmove = (unsigned)0xFFFFFFFF;
Justin Holewinskiae556d32012-05-04 20:18:50 +000081
82 setBooleanContents(ZeroOrNegativeOneBooleanContent);
83
84 // Jump is Expensive. Don't create extra control flow for 'and', 'or'
85 // condition branches.
86 setJumpIsExpensive(true);
87
88 // By default, use the Source scheduling
89 if (sched4reg)
90 setSchedulingPreference(Sched::RegPressure);
91 else
92 setSchedulingPreference(Sched::Source);
93
94 addRegisterClass(MVT::i1, &NVPTX::Int1RegsRegClass);
95 addRegisterClass(MVT::i8, &NVPTX::Int8RegsRegClass);
96 addRegisterClass(MVT::i16, &NVPTX::Int16RegsRegClass);
97 addRegisterClass(MVT::i32, &NVPTX::Int32RegsRegClass);
98 addRegisterClass(MVT::i64, &NVPTX::Int64RegsRegClass);
99 addRegisterClass(MVT::f32, &NVPTX::Float32RegsRegClass);
100 addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass);
101
Justin Holewinskiae556d32012-05-04 20:18:50 +0000102 // Operations not directly supported by NVPTX.
103 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
104 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
105 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i64, Expand);
106 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Expand);
107 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
108 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Expand);
109 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
110
111 if (nvptxSubtarget.hasROT64()) {
112 setOperationAction(ISD::ROTL , MVT::i64, Legal);
113 setOperationAction(ISD::ROTR , MVT::i64, Legal);
114 }
115 else {
116 setOperationAction(ISD::ROTL , MVT::i64, Expand);
117 setOperationAction(ISD::ROTR , MVT::i64, Expand);
118 }
119 if (nvptxSubtarget.hasROT32()) {
120 setOperationAction(ISD::ROTL , MVT::i32, Legal);
121 setOperationAction(ISD::ROTR , MVT::i32, Legal);
122 }
123 else {
124 setOperationAction(ISD::ROTL , MVT::i32, Expand);
125 setOperationAction(ISD::ROTR , MVT::i32, Expand);
126 }
127
128 setOperationAction(ISD::ROTL , MVT::i16, Expand);
129 setOperationAction(ISD::ROTR , MVT::i16, Expand);
130 setOperationAction(ISD::ROTL , MVT::i8, Expand);
131 setOperationAction(ISD::ROTR , MVT::i8, Expand);
132 setOperationAction(ISD::BSWAP , MVT::i16, Expand);
133 setOperationAction(ISD::BSWAP , MVT::i32, Expand);
134 setOperationAction(ISD::BSWAP , MVT::i64, Expand);
135
136 // Indirect branch is not supported.
137 // This also disables Jump Table creation.
138 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
139 setOperationAction(ISD::BRIND, MVT::Other, Expand);
140
141 setOperationAction(ISD::GlobalAddress , MVT::i32 , Custom);
142 setOperationAction(ISD::GlobalAddress , MVT::i64 , Custom);
143
144 // We want to legalize constant related memmove and memcopy
145 // intrinsics.
146 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
147
148 // Turn FP extload into load/fextend
149 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
150 // Turn FP truncstore into trunc + store.
151 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
152
153 // PTX does not support load / store predicate registers
Justin Holewinskic6462aa2012-11-14 19:19:16 +0000154 setOperationAction(ISD::LOAD, MVT::i1, Custom);
155 setOperationAction(ISD::STORE, MVT::i1, Custom);
156
Justin Holewinskiae556d32012-05-04 20:18:50 +0000157 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
158 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000159 setTruncStoreAction(MVT::i64, MVT::i1, Expand);
160 setTruncStoreAction(MVT::i32, MVT::i1, Expand);
161 setTruncStoreAction(MVT::i16, MVT::i1, Expand);
162 setTruncStoreAction(MVT::i8, MVT::i1, Expand);
163
164 // This is legal in NVPTX
165 setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
166 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
167
168 // TRAP can be lowered to PTX trap
169 setOperationAction(ISD::TRAP, MVT::Other, Legal);
170
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000171 // Register custom handling for vector loads/stores
172 for (int i = MVT::FIRST_VECTOR_VALUETYPE;
173 i <= MVT::LAST_VECTOR_VALUETYPE; ++i) {
174 MVT VT = (MVT::SimpleValueType)i;
175 if (IsPTXVectorType(VT)) {
176 setOperationAction(ISD::LOAD, VT, Custom);
177 setOperationAction(ISD::STORE, VT, Custom);
178 setOperationAction(ISD::INTRINSIC_W_CHAIN, VT, Custom);
179 }
180 }
Justin Holewinskiae556d32012-05-04 20:18:50 +0000181
182 // Now deduce the information based on the above mentioned
183 // actions
184 computeRegisterProperties();
185}
186
187
188const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
189 switch (Opcode) {
190 default: return 0;
191 case NVPTXISD::CALL: return "NVPTXISD::CALL";
192 case NVPTXISD::RET_FLAG: return "NVPTXISD::RET_FLAG";
193 case NVPTXISD::Wrapper: return "NVPTXISD::Wrapper";
194 case NVPTXISD::NVBuiltin: return "NVPTXISD::NVBuiltin";
195 case NVPTXISD::DeclareParam: return "NVPTXISD::DeclareParam";
196 case NVPTXISD::DeclareScalarParam:
197 return "NVPTXISD::DeclareScalarParam";
198 case NVPTXISD::DeclareRet: return "NVPTXISD::DeclareRet";
199 case NVPTXISD::DeclareRetParam: return "NVPTXISD::DeclareRetParam";
200 case NVPTXISD::PrintCall: return "NVPTXISD::PrintCall";
201 case NVPTXISD::LoadParam: return "NVPTXISD::LoadParam";
202 case NVPTXISD::StoreParam: return "NVPTXISD::StoreParam";
203 case NVPTXISD::StoreParamS32: return "NVPTXISD::StoreParamS32";
204 case NVPTXISD::StoreParamU32: return "NVPTXISD::StoreParamU32";
205 case NVPTXISD::MoveToParam: return "NVPTXISD::MoveToParam";
206 case NVPTXISD::CallArgBegin: return "NVPTXISD::CallArgBegin";
207 case NVPTXISD::CallArg: return "NVPTXISD::CallArg";
208 case NVPTXISD::LastCallArg: return "NVPTXISD::LastCallArg";
209 case NVPTXISD::CallArgEnd: return "NVPTXISD::CallArgEnd";
210 case NVPTXISD::CallVoid: return "NVPTXISD::CallVoid";
211 case NVPTXISD::CallVal: return "NVPTXISD::CallVal";
212 case NVPTXISD::CallSymbol: return "NVPTXISD::CallSymbol";
213 case NVPTXISD::Prototype: return "NVPTXISD::Prototype";
214 case NVPTXISD::MoveParam: return "NVPTXISD::MoveParam";
215 case NVPTXISD::MoveRetval: return "NVPTXISD::MoveRetval";
216 case NVPTXISD::MoveToRetval: return "NVPTXISD::MoveToRetval";
217 case NVPTXISD::StoreRetval: return "NVPTXISD::StoreRetval";
218 case NVPTXISD::PseudoUseParam: return "NVPTXISD::PseudoUseParam";
219 case NVPTXISD::RETURN: return "NVPTXISD::RETURN";
220 case NVPTXISD::CallSeqBegin: return "NVPTXISD::CallSeqBegin";
221 case NVPTXISD::CallSeqEnd: return "NVPTXISD::CallSeqEnd";
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000222 case NVPTXISD::LoadV2: return "NVPTXISD::LoadV2";
223 case NVPTXISD::LoadV4: return "NVPTXISD::LoadV4";
224 case NVPTXISD::LDGV2: return "NVPTXISD::LDGV2";
225 case NVPTXISD::LDGV4: return "NVPTXISD::LDGV4";
226 case NVPTXISD::LDUV2: return "NVPTXISD::LDUV2";
227 case NVPTXISD::LDUV4: return "NVPTXISD::LDUV4";
228 case NVPTXISD::StoreV2: return "NVPTXISD::StoreV2";
229 case NVPTXISD::StoreV4: return "NVPTXISD::StoreV4";
Justin Holewinskiae556d32012-05-04 20:18:50 +0000230 }
231}
232
Justin Holewinskibc451192012-11-29 14:26:24 +0000233bool NVPTXTargetLowering::shouldSplitVectorElementType(EVT VT) const {
234 return VT == MVT::i1;
235}
Justin Holewinskiae556d32012-05-04 20:18:50 +0000236
237SDValue
238NVPTXTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
239 DebugLoc dl = Op.getDebugLoc();
240 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
241 Op = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
242 return DAG.getNode(NVPTXISD::Wrapper, dl, getPointerTy(), Op);
243}
244
245std::string NVPTXTargetLowering::getPrototype(Type *retTy,
246 const ArgListTy &Args,
247 const SmallVectorImpl<ISD::OutputArg> &Outs,
248 unsigned retAlignment) const {
249
250 bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
251
252 std::stringstream O;
253 O << "prototype_" << uniqueCallSite << " : .callprototype ";
254
255 if (retTy->getTypeID() == Type::VoidTyID)
256 O << "()";
257 else {
258 O << "(";
259 if (isABI) {
260 if (retTy->isPrimitiveType() || retTy->isIntegerTy()) {
261 unsigned size = 0;
262 if (const IntegerType *ITy = dyn_cast<IntegerType>(retTy)) {
263 size = ITy->getBitWidth();
264 if (size < 32) size = 32;
265 }
266 else {
267 assert(retTy->isFloatingPointTy() &&
268 "Floating point type expected here");
269 size = retTy->getPrimitiveSizeInBits();
270 }
271
272 O << ".param .b" << size << " _";
273 }
274 else if (isa<PointerType>(retTy))
275 O << ".param .b" << getPointerTy().getSizeInBits()
276 << " _";
277 else {
278 if ((retTy->getTypeID() == Type::StructTyID) ||
279 isa<VectorType>(retTy)) {
280 SmallVector<EVT, 16> vtparts;
281 ComputeValueVTs(*this, retTy, vtparts);
282 unsigned totalsz = 0;
283 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
284 unsigned elems = 1;
285 EVT elemtype = vtparts[i];
286 if (vtparts[i].isVector()) {
287 elems = vtparts[i].getVectorNumElements();
288 elemtype = vtparts[i].getVectorElementType();
289 }
290 for (unsigned j=0, je=elems; j!=je; ++j) {
291 unsigned sz = elemtype.getSizeInBits();
292 if (elemtype.isInteger() && (sz < 8)) sz = 8;
293 totalsz += sz/8;
294 }
295 }
296 O << ".param .align "
297 << retAlignment
298 << " .b8 _["
299 << totalsz << "]";
300 }
301 else {
302 assert(false &&
303 "Unknown return type");
304 }
305 }
306 }
307 else {
308 SmallVector<EVT, 16> vtparts;
309 ComputeValueVTs(*this, retTy, vtparts);
310 unsigned idx = 0;
311 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
312 unsigned elems = 1;
313 EVT elemtype = vtparts[i];
314 if (vtparts[i].isVector()) {
315 elems = vtparts[i].getVectorNumElements();
316 elemtype = vtparts[i].getVectorElementType();
317 }
318
319 for (unsigned j=0, je=elems; j!=je; ++j) {
320 unsigned sz = elemtype.getSizeInBits();
321 if (elemtype.isInteger() && (sz < 32)) sz = 32;
322 O << ".reg .b" << sz << " _";
323 if (j<je-1) O << ", ";
324 ++idx;
325 }
326 if (i < e-1)
327 O << ", ";
328 }
329 }
330 O << ") ";
331 }
332 O << "_ (";
333
334 bool first = true;
335 MVT thePointerTy = getPointerTy();
336
337 for (unsigned i=0,e=Args.size(); i!=e; ++i) {
338 const Type *Ty = Args[i].Ty;
339 if (!first) {
340 O << ", ";
341 }
342 first = false;
343
344 if (Outs[i].Flags.isByVal() == false) {
345 unsigned sz = 0;
346 if (isa<IntegerType>(Ty)) {
347 sz = cast<IntegerType>(Ty)->getBitWidth();
348 if (sz < 32) sz = 32;
349 }
350 else if (isa<PointerType>(Ty))
351 sz = thePointerTy.getSizeInBits();
352 else
353 sz = Ty->getPrimitiveSizeInBits();
354 if (isABI)
355 O << ".param .b" << sz << " ";
356 else
357 O << ".reg .b" << sz << " ";
358 O << "_";
359 continue;
360 }
361 const PointerType *PTy = dyn_cast<PointerType>(Ty);
362 assert(PTy &&
363 "Param with byval attribute should be a pointer type");
364 Type *ETy = PTy->getElementType();
365
366 if (isABI) {
367 unsigned align = Outs[i].Flags.getByValAlign();
Micah Villmowcdfe20b2012-10-08 16:38:25 +0000368 unsigned sz = getDataLayout()->getTypeAllocSize(ETy);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000369 O << ".param .align " << align
370 << " .b8 ";
371 O << "_";
372 O << "[" << sz << "]";
373 continue;
374 }
375 else {
376 SmallVector<EVT, 16> vtparts;
377 ComputeValueVTs(*this, ETy, vtparts);
378 for (unsigned i=0,e=vtparts.size(); i!=e; ++i) {
379 unsigned elems = 1;
380 EVT elemtype = vtparts[i];
381 if (vtparts[i].isVector()) {
382 elems = vtparts[i].getVectorNumElements();
383 elemtype = vtparts[i].getVectorElementType();
384 }
385
386 for (unsigned j=0,je=elems; j!=je; ++j) {
387 unsigned sz = elemtype.getSizeInBits();
388 if (elemtype.isInteger() && (sz < 32)) sz = 32;
389 O << ".reg .b" << sz << " ";
390 O << "_";
391 if (j<je-1) O << ", ";
392 }
393 if (i<e-1)
394 O << ", ";
395 }
396 continue;
397 }
398 }
399 O << ");";
400 return O.str();
401}
402
403
Justin Holewinskiae556d32012-05-04 20:18:50 +0000404SDValue
Justin Holewinskiaa583972012-05-25 16:35:28 +0000405NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
406 SmallVectorImpl<SDValue> &InVals) const {
407 SelectionDAG &DAG = CLI.DAG;
408 DebugLoc &dl = CLI.DL;
409 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
410 SmallVector<SDValue, 32> &OutVals = CLI.OutVals;
411 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins;
412 SDValue Chain = CLI.Chain;
413 SDValue Callee = CLI.Callee;
414 bool &isTailCall = CLI.IsTailCall;
415 ArgListTy &Args = CLI.Args;
416 Type *retTy = CLI.RetTy;
417 ImmutableCallSite *CS = CLI.CS;
418
Justin Holewinskiae556d32012-05-04 20:18:50 +0000419 bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
420
421 SDValue tempChain = Chain;
422 Chain = DAG.getCALLSEQ_START(Chain,
423 DAG.getIntPtrConstant(uniqueCallSite, true));
424 SDValue InFlag = Chain.getValue(1);
425
426 assert((Outs.size() == Args.size()) &&
427 "Unexpected number of arguments to function call");
428 unsigned paramCount = 0;
429 // Declare the .params or .reg need to pass values
430 // to the function
431 for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
432 EVT VT = Outs[i].VT;
433
434 if (Outs[i].Flags.isByVal() == false) {
435 // Plain scalar
436 // for ABI, declare .param .b<size> .param<n>;
437 // for nonABI, declare .reg .b<size> .param<n>;
438 unsigned isReg = 1;
439 if (isABI)
440 isReg = 0;
441 unsigned sz = VT.getSizeInBits();
442 if (VT.isInteger() && (sz < 32)) sz = 32;
443 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
444 SDValue DeclareParamOps[] = { Chain,
445 DAG.getConstant(paramCount, MVT::i32),
446 DAG.getConstant(sz, MVT::i32),
447 DAG.getConstant(isReg, MVT::i32),
448 InFlag };
449 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
450 DeclareParamOps, 5);
451 InFlag = Chain.getValue(1);
452 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
453 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
454 DAG.getConstant(0, MVT::i32), OutVals[i], InFlag };
455
456 unsigned opcode = NVPTXISD::StoreParam;
457 if (isReg)
458 opcode = NVPTXISD::MoveToParam;
459 else {
460 if (Outs[i].Flags.isZExt())
461 opcode = NVPTXISD::StoreParamU32;
462 else if (Outs[i].Flags.isSExt())
463 opcode = NVPTXISD::StoreParamS32;
464 }
465 Chain = DAG.getNode(opcode, dl, CopyParamVTs, CopyParamOps, 5);
466
467 InFlag = Chain.getValue(1);
468 ++paramCount;
469 continue;
470 }
471 // struct or vector
472 SmallVector<EVT, 16> vtparts;
473 const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty);
474 assert(PTy &&
475 "Type of a byval parameter should be pointer");
476 ComputeValueVTs(*this, PTy->getElementType(), vtparts);
477
478 if (isABI) {
479 // declare .param .align 16 .b8 .param<n>[<size>];
480 unsigned sz = Outs[i].Flags.getByValSize();
481 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
482 // The ByValAlign in the Outs[i].Flags is alway set at this point, so we
483 // don't need to
484 // worry about natural alignment or not. See TargetLowering::LowerCallTo()
485 SDValue DeclareParamOps[] = { Chain,
486 DAG.getConstant(Outs[i].Flags.getByValAlign(), MVT::i32),
487 DAG.getConstant(paramCount, MVT::i32),
488 DAG.getConstant(sz, MVT::i32),
489 InFlag };
490 Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
491 DeclareParamOps, 5);
492 InFlag = Chain.getValue(1);
493 unsigned curOffset = 0;
494 for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
495 unsigned elems = 1;
496 EVT elemtype = vtparts[j];
497 if (vtparts[j].isVector()) {
498 elems = vtparts[j].getVectorNumElements();
499 elemtype = vtparts[j].getVectorElementType();
500 }
501 for (unsigned k=0,ke=elems; k!=ke; ++k) {
502 unsigned sz = elemtype.getSizeInBits();
503 if (elemtype.isInteger() && (sz < 8)) sz = 8;
504 SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(),
505 OutVals[i],
506 DAG.getConstant(curOffset,
507 getPointerTy()));
508 SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
509 MachinePointerInfo(), false, false, false, 0);
510 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
511 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount,
512 MVT::i32),
513 DAG.getConstant(curOffset, MVT::i32),
514 theVal, InFlag };
515 Chain = DAG.getNode(NVPTXISD::StoreParam, dl, CopyParamVTs,
516 CopyParamOps, 5);
517 InFlag = Chain.getValue(1);
518 curOffset += sz/8;
519 }
520 }
521 ++paramCount;
522 continue;
523 }
524 // Non-abi, struct or vector
525 // Declare a bunch or .reg .b<size> .param<n>
526 unsigned curOffset = 0;
527 for (unsigned j=0,je=vtparts.size(); j!=je; ++j) {
528 unsigned elems = 1;
529 EVT elemtype = vtparts[j];
530 if (vtparts[j].isVector()) {
531 elems = vtparts[j].getVectorNumElements();
532 elemtype = vtparts[j].getVectorElementType();
533 }
534 for (unsigned k=0,ke=elems; k!=ke; ++k) {
535 unsigned sz = elemtype.getSizeInBits();
536 if (elemtype.isInteger() && (sz < 32)) sz = 32;
537 SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
538 SDValue DeclareParamOps[] = { Chain, DAG.getConstant(paramCount,
539 MVT::i32),
540 DAG.getConstant(sz, MVT::i32),
541 DAG.getConstant(1, MVT::i32),
542 InFlag };
543 Chain = DAG.getNode(NVPTXISD::DeclareScalarParam, dl, DeclareParamVTs,
544 DeclareParamOps, 5);
545 InFlag = Chain.getValue(1);
546 SDValue srcAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[i],
547 DAG.getConstant(curOffset,
548 getPointerTy()));
549 SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr,
550 MachinePointerInfo(), false, false, false, 0);
551 SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
552 SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32),
553 DAG.getConstant(0, MVT::i32), theVal,
554 InFlag };
555 Chain = DAG.getNode(NVPTXISD::MoveToParam, dl, CopyParamVTs,
556 CopyParamOps, 5);
557 InFlag = Chain.getValue(1);
558 ++paramCount;
559 }
560 }
561 }
562
563 GlobalAddressSDNode *Func = dyn_cast<GlobalAddressSDNode>(Callee.getNode());
564 unsigned retAlignment = 0;
565
566 // Handle Result
567 unsigned retCount = 0;
568 if (Ins.size() > 0) {
569 SmallVector<EVT, 16> resvtparts;
570 ComputeValueVTs(*this, retTy, resvtparts);
571
572 // Declare one .param .align 16 .b8 func_retval0[<size>] for ABI or
573 // individual .reg .b<size> func_retval<0..> for non ABI
574 unsigned resultsz = 0;
575 for (unsigned i=0,e=resvtparts.size(); i!=e; ++i) {
576 unsigned elems = 1;
577 EVT elemtype = resvtparts[i];
578 if (resvtparts[i].isVector()) {
579 elems = resvtparts[i].getVectorNumElements();
580 elemtype = resvtparts[i].getVectorElementType();
581 }
582 for (unsigned j=0,je=elems; j!=je; ++j) {
583 unsigned sz = elemtype.getSizeInBits();
584 if (isABI == false) {
585 if (elemtype.isInteger() && (sz < 32)) sz = 32;
586 }
587 else {
588 if (elemtype.isInteger() && (sz < 8)) sz = 8;
589 }
590 if (isABI == false) {
591 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
592 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(2, MVT::i32),
593 DAG.getConstant(sz, MVT::i32),
594 DAG.getConstant(retCount, MVT::i32),
595 InFlag };
596 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
597 DeclareRetOps, 5);
598 InFlag = Chain.getValue(1);
599 ++retCount;
600 }
601 resultsz += sz;
602 }
603 }
604 if (isABI) {
605 if (retTy->isPrimitiveType() || retTy->isIntegerTy() ||
606 retTy->isPointerTy() ) {
607 // Scalar needs to be at least 32bit wide
608 if (resultsz < 32)
609 resultsz = 32;
610 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
611 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(1, MVT::i32),
612 DAG.getConstant(resultsz, MVT::i32),
613 DAG.getConstant(0, MVT::i32), InFlag };
614 Chain = DAG.getNode(NVPTXISD::DeclareRet, dl, DeclareRetVTs,
615 DeclareRetOps, 5);
616 InFlag = Chain.getValue(1);
617 }
618 else {
Justin Holewinskiaa583972012-05-25 16:35:28 +0000619 if (Func) { // direct call
620 if (!llvm::getAlign(*(CS->getCalledFunction()), 0, retAlignment))
Micah Villmowcdfe20b2012-10-08 16:38:25 +0000621 retAlignment = getDataLayout()->getABITypeAlignment(retTy);
Justin Holewinskiaa583972012-05-25 16:35:28 +0000622 } else { // indirect call
623 const CallInst *CallI = dyn_cast<CallInst>(CS->getInstruction());
624 if (!llvm::getAlign(*CallI, 0, retAlignment))
Micah Villmowcdfe20b2012-10-08 16:38:25 +0000625 retAlignment = getDataLayout()->getABITypeAlignment(retTy);
Justin Holewinskiaa583972012-05-25 16:35:28 +0000626 }
Justin Holewinskiae556d32012-05-04 20:18:50 +0000627 SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
628 SDValue DeclareRetOps[] = { Chain, DAG.getConstant(retAlignment,
629 MVT::i32),
630 DAG.getConstant(resultsz/8, MVT::i32),
631 DAG.getConstant(0, MVT::i32), InFlag };
632 Chain = DAG.getNode(NVPTXISD::DeclareRetParam, dl, DeclareRetVTs,
633 DeclareRetOps, 5);
634 InFlag = Chain.getValue(1);
635 }
636 }
637 }
638
639 if (!Func) {
640 // This is indirect function call case : PTX requires a prototype of the
641 // form
642 // proto_0 : .callprototype(.param .b32 _) _ (.param .b32 _);
643 // to be emitted, and the label has to used as the last arg of call
644 // instruction.
645 // The prototype is embedded in a string and put as the operand for an
646 // INLINEASM SDNode.
647 SDVTList InlineAsmVTs = DAG.getVTList(MVT::Other, MVT::Glue);
648 std::string proto_string = getPrototype(retTy, Args, Outs, retAlignment);
649 const char *asmstr = nvTM->getManagedStrPool()->
650 getManagedString(proto_string.c_str())->c_str();
651 SDValue InlineAsmOps[] = { Chain,
652 DAG.getTargetExternalSymbol(asmstr,
653 getPointerTy()),
654 DAG.getMDNode(0),
655 DAG.getTargetConstant(0, MVT::i32), InFlag };
656 Chain = DAG.getNode(ISD::INLINEASM, dl, InlineAsmVTs, InlineAsmOps, 5);
657 InFlag = Chain.getValue(1);
658 }
659 // Op to just print "call"
660 SDVTList PrintCallVTs = DAG.getVTList(MVT::Other, MVT::Glue);
661 SDValue PrintCallOps[] = { Chain,
662 DAG.getConstant(isABI ? ((Ins.size()==0) ? 0 : 1)
663 : retCount, MVT::i32),
664 InFlag };
665 Chain = DAG.getNode(Func?(NVPTXISD::PrintCallUni):(NVPTXISD::PrintCall), dl,
666 PrintCallVTs, PrintCallOps, 3);
667 InFlag = Chain.getValue(1);
668
669 // Ops to print out the function name
670 SDVTList CallVoidVTs = DAG.getVTList(MVT::Other, MVT::Glue);
671 SDValue CallVoidOps[] = { Chain, Callee, InFlag };
672 Chain = DAG.getNode(NVPTXISD::CallVoid, dl, CallVoidVTs, CallVoidOps, 3);
673 InFlag = Chain.getValue(1);
674
675 // Ops to print out the param list
676 SDVTList CallArgBeginVTs = DAG.getVTList(MVT::Other, MVT::Glue);
677 SDValue CallArgBeginOps[] = { Chain, InFlag };
678 Chain = DAG.getNode(NVPTXISD::CallArgBegin, dl, CallArgBeginVTs,
679 CallArgBeginOps, 2);
680 InFlag = Chain.getValue(1);
681
682 for (unsigned i=0, e=paramCount; i!=e; ++i) {
683 unsigned opcode;
684 if (i==(e-1))
685 opcode = NVPTXISD::LastCallArg;
686 else
687 opcode = NVPTXISD::CallArg;
688 SDVTList CallArgVTs = DAG.getVTList(MVT::Other, MVT::Glue);
689 SDValue CallArgOps[] = { Chain, DAG.getConstant(1, MVT::i32),
690 DAG.getConstant(i, MVT::i32),
691 InFlag };
692 Chain = DAG.getNode(opcode, dl, CallArgVTs, CallArgOps, 4);
693 InFlag = Chain.getValue(1);
694 }
695 SDVTList CallArgEndVTs = DAG.getVTList(MVT::Other, MVT::Glue);
696 SDValue CallArgEndOps[] = { Chain,
697 DAG.getConstant(Func ? 1 : 0, MVT::i32),
698 InFlag };
699 Chain = DAG.getNode(NVPTXISD::CallArgEnd, dl, CallArgEndVTs, CallArgEndOps,
700 3);
701 InFlag = Chain.getValue(1);
702
703 if (!Func) {
704 SDVTList PrototypeVTs = DAG.getVTList(MVT::Other, MVT::Glue);
705 SDValue PrototypeOps[] = { Chain,
706 DAG.getConstant(uniqueCallSite, MVT::i32),
707 InFlag };
708 Chain = DAG.getNode(NVPTXISD::Prototype, dl, PrototypeVTs, PrototypeOps, 3);
709 InFlag = Chain.getValue(1);
710 }
711
712 // Generate loads from param memory/moves from registers for result
713 if (Ins.size() > 0) {
714 if (isABI) {
715 unsigned resoffset = 0;
716 for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
717 unsigned sz = Ins[i].VT.getSizeInBits();
718 if (Ins[i].VT.isInteger() && (sz < 8)) sz = 8;
Benjamin Kramerfdf362b2013-03-07 20:33:29 +0000719 EVT LoadRetVTs[] = { Ins[i].VT, MVT::Other, MVT::Glue };
720 SDValue LoadRetOps[] = {
721 Chain,
722 DAG.getConstant(1, MVT::i32),
723 DAG.getConstant(resoffset, MVT::i32),
724 InFlag
725 };
Justin Holewinskiae556d32012-05-04 20:18:50 +0000726 SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, LoadRetVTs,
Benjamin Kramerfdf362b2013-03-07 20:33:29 +0000727 LoadRetOps, array_lengthof(LoadRetOps));
Justin Holewinskiae556d32012-05-04 20:18:50 +0000728 Chain = retval.getValue(1);
729 InFlag = retval.getValue(2);
730 InVals.push_back(retval);
731 resoffset += sz/8;
732 }
733 }
734 else {
735 SmallVector<EVT, 16> resvtparts;
736 ComputeValueVTs(*this, retTy, resvtparts);
737
738 assert(Ins.size() == resvtparts.size() &&
739 "Unexpected number of return values in non-ABI case");
740 unsigned paramNum = 0;
741 for (unsigned i=0,e=Ins.size(); i!=e; ++i) {
742 assert(EVT(Ins[i].VT) == resvtparts[i] &&
743 "Unexpected EVT type in non-ABI case");
744 unsigned numelems = 1;
745 EVT elemtype = Ins[i].VT;
746 if (Ins[i].VT.isVector()) {
747 numelems = Ins[i].VT.getVectorNumElements();
748 elemtype = Ins[i].VT.getVectorElementType();
749 }
750 std::vector<SDValue> tempRetVals;
751 for (unsigned j=0; j<numelems; ++j) {
Benjamin Kramerfdf362b2013-03-07 20:33:29 +0000752 EVT MoveRetVTs[] = { elemtype, MVT::Other, MVT::Glue };
753 SDValue MoveRetOps[] = {
754 Chain,
755 DAG.getConstant(0, MVT::i32),
756 DAG.getConstant(paramNum, MVT::i32),
757 InFlag
758 };
Justin Holewinskiae556d32012-05-04 20:18:50 +0000759 SDValue retval = DAG.getNode(NVPTXISD::LoadParam, dl, MoveRetVTs,
Benjamin Kramerfdf362b2013-03-07 20:33:29 +0000760 MoveRetOps, array_lengthof(MoveRetOps));
Justin Holewinskiae556d32012-05-04 20:18:50 +0000761 Chain = retval.getValue(1);
762 InFlag = retval.getValue(2);
763 tempRetVals.push_back(retval);
764 ++paramNum;
765 }
766 if (Ins[i].VT.isVector())
767 InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, dl, Ins[i].VT,
768 &tempRetVals[0], tempRetVals.size()));
769 else
770 InVals.push_back(tempRetVals[0]);
771 }
772 }
773 }
774 Chain = DAG.getCALLSEQ_END(Chain,
775 DAG.getIntPtrConstant(uniqueCallSite, true),
776 DAG.getIntPtrConstant(uniqueCallSite+1, true),
777 InFlag);
778 uniqueCallSite++;
779
780 // set isTailCall to false for now, until we figure out how to express
781 // tail call optimization in PTX
782 isTailCall = false;
783 return Chain;
784}
Justin Holewinskiae556d32012-05-04 20:18:50 +0000785
786// By default CONCAT_VECTORS is lowered by ExpandVectorBuildThroughStack()
787// (see LegalizeDAG.cpp). This is slow and uses local memory.
788// We use extract/insert/build vector just as what LegalizeOp() does in llvm 2.5
789SDValue NVPTXTargetLowering::
790LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const {
791 SDNode *Node = Op.getNode();
792 DebugLoc dl = Node->getDebugLoc();
793 SmallVector<SDValue, 8> Ops;
794 unsigned NumOperands = Node->getNumOperands();
795 for (unsigned i=0; i < NumOperands; ++i) {
796 SDValue SubOp = Node->getOperand(i);
797 EVT VVT = SubOp.getNode()->getValueType(0);
798 EVT EltVT = VVT.getVectorElementType();
799 unsigned NumSubElem = VVT.getVectorNumElements();
800 for (unsigned j=0; j < NumSubElem; ++j) {
801 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, SubOp,
802 DAG.getIntPtrConstant(j)));
803 }
804 }
805 return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0),
806 &Ops[0], Ops.size());
807}
808
809SDValue NVPTXTargetLowering::
810LowerOperation(SDValue Op, SelectionDAG &DAG) const {
811 switch (Op.getOpcode()) {
812 case ISD::RETURNADDR: return SDValue();
813 case ISD::FRAMEADDR: return SDValue();
814 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
815 case ISD::INTRINSIC_W_CHAIN: return Op;
816 case ISD::BUILD_VECTOR:
817 case ISD::EXTRACT_SUBVECTOR:
818 return Op;
819 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
Justin Holewinskic6462aa2012-11-14 19:19:16 +0000820 case ISD::STORE: return LowerSTORE(Op, DAG);
821 case ISD::LOAD: return LowerLOAD(Op, DAG);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000822 default:
David Blaikie891d0a32012-05-04 22:34:16 +0000823 llvm_unreachable("Custom lowering not defined for operation");
Justin Holewinskiae556d32012-05-04 20:18:50 +0000824 }
825}
826
Justin Holewinskic6462aa2012-11-14 19:19:16 +0000827
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000828SDValue NVPTXTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
829 if (Op.getValueType() == MVT::i1)
830 return LowerLOADi1(Op, DAG);
831 else
832 return SDValue();
833}
834
Justin Holewinskic6462aa2012-11-14 19:19:16 +0000835// v = ld i1* addr
836// =>
837// v1 = ld i8* addr
838// v = trunc v1 to i1
839SDValue NVPTXTargetLowering::
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000840LowerLOADi1(SDValue Op, SelectionDAG &DAG) const {
Justin Holewinskic6462aa2012-11-14 19:19:16 +0000841 SDNode *Node = Op.getNode();
842 LoadSDNode *LD = cast<LoadSDNode>(Node);
843 DebugLoc dl = Node->getDebugLoc();
NAKAMURA Takumi5bbe0e12012-11-14 23:46:15 +0000844 assert(LD->getExtensionType() == ISD::NON_EXTLOAD) ;
845 assert(Node->getValueType(0) == MVT::i1 &&
846 "Custom lowering for i1 load only");
Justin Holewinskic6462aa2012-11-14 19:19:16 +0000847 SDValue newLD = DAG.getLoad(MVT::i8, dl, LD->getChain(), LD->getBasePtr(),
848 LD->getPointerInfo(),
849 LD->isVolatile(), LD->isNonTemporal(),
850 LD->isInvariant(),
851 LD->getAlignment());
852 SDValue result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, newLD);
853 // The legalizer (the caller) is expecting two values from the legalized
854 // load, so we build a MergeValues node for it. See ExpandUnalignedLoad()
855 // in LegalizeDAG.cpp which also uses MergeValues.
856 SDValue Ops[] = {result, LD->getChain()};
857 return DAG.getMergeValues(Ops, 2, dl);
858}
859
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000860SDValue NVPTXTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
861 EVT ValVT = Op.getOperand(1).getValueType();
862 if (ValVT == MVT::i1)
863 return LowerSTOREi1(Op, DAG);
864 else if (ValVT.isVector())
865 return LowerSTOREVector(Op, DAG);
866 else
867 return SDValue();
868}
869
870SDValue
871NVPTXTargetLowering::LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const {
872 SDNode *N = Op.getNode();
873 SDValue Val = N->getOperand(1);
874 DebugLoc DL = N->getDebugLoc();
875 EVT ValVT = Val.getValueType();
876
877 if (ValVT.isVector()) {
878 // We only handle "native" vector sizes for now, e.g. <4 x double> is not
879 // legal. We can (and should) split that into 2 stores of <2 x double> here
880 // but I'm leaving that as a TODO for now.
881 if (!ValVT.isSimple())
882 return SDValue();
883 switch (ValVT.getSimpleVT().SimpleTy) {
884 default: return SDValue();
885 case MVT::v2i8:
886 case MVT::v2i16:
887 case MVT::v2i32:
888 case MVT::v2i64:
889 case MVT::v2f32:
890 case MVT::v2f64:
891 case MVT::v4i8:
892 case MVT::v4i16:
893 case MVT::v4i32:
894 case MVT::v4f32:
895 // This is a "native" vector type
896 break;
897 }
898
899 unsigned Opcode = 0;
900 EVT EltVT = ValVT.getVectorElementType();
901 unsigned NumElts = ValVT.getVectorNumElements();
902
903 // Since StoreV2 is a target node, we cannot rely on DAG type legalization.
904 // Therefore, we must ensure the type is legal. For i1 and i8, we set the
905 // stored type to i16 and propogate the "real" type as the memory type.
906 bool NeedExt = false;
907 if (EltVT.getSizeInBits() < 16)
908 NeedExt = true;
909
910 switch (NumElts) {
911 default: return SDValue();
912 case 2:
913 Opcode = NVPTXISD::StoreV2;
914 break;
915 case 4: {
916 Opcode = NVPTXISD::StoreV4;
917 break;
918 }
919 }
920
921 SmallVector<SDValue, 8> Ops;
922
923 // First is the chain
924 Ops.push_back(N->getOperand(0));
925
926 // Then the split values
927 for (unsigned i = 0; i < NumElts; ++i) {
928 SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Val,
929 DAG.getIntPtrConstant(i));
930 if (NeedExt)
931 // ANY_EXTEND is correct here since the store will only look at the
932 // lower-order bits anyway.
933 ExtVal = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i16, ExtVal);
934 Ops.push_back(ExtVal);
935 }
936
937 // Then any remaining arguments
938 for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i) {
939 Ops.push_back(N->getOperand(i));
940 }
941
942 MemSDNode *MemSD = cast<MemSDNode>(N);
943
944 SDValue NewSt = DAG.getMemIntrinsicNode(Opcode, DL,
945 DAG.getVTList(MVT::Other), &Ops[0],
946 Ops.size(), MemSD->getMemoryVT(),
947 MemSD->getMemOperand());
948
949
950 //return DCI.CombineTo(N, NewSt, true);
951 return NewSt;
952 }
953
954 return SDValue();
955}
956
Justin Holewinskic6462aa2012-11-14 19:19:16 +0000957// st i1 v, addr
958// =>
959// v1 = zxt v to i8
960// st i8, addr
961SDValue NVPTXTargetLowering::
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000962LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const {
Justin Holewinskic6462aa2012-11-14 19:19:16 +0000963 SDNode *Node = Op.getNode();
964 DebugLoc dl = Node->getDebugLoc();
965 StoreSDNode *ST = cast<StoreSDNode>(Node);
966 SDValue Tmp1 = ST->getChain();
967 SDValue Tmp2 = ST->getBasePtr();
968 SDValue Tmp3 = ST->getValue();
NAKAMURA Takumi5bbe0e12012-11-14 23:46:15 +0000969 assert(Tmp3.getValueType() == MVT::i1 && "Custom lowering for i1 store only");
Justin Holewinskic6462aa2012-11-14 19:19:16 +0000970 unsigned Alignment = ST->getAlignment();
971 bool isVolatile = ST->isVolatile();
972 bool isNonTemporal = ST->isNonTemporal();
973 Tmp3 = DAG.getNode(ISD::ZERO_EXTEND, dl,
974 MVT::i8, Tmp3);
975 SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
976 ST->getPointerInfo(), isVolatile,
977 isNonTemporal, Alignment);
978 return Result;
979}
980
981
Justin Holewinskiae556d32012-05-04 20:18:50 +0000982SDValue
983NVPTXTargetLowering::getExtSymb(SelectionDAG &DAG, const char *inname, int idx,
984 EVT v) const {
985 std::string *name = nvTM->getManagedStrPool()->getManagedString(inname);
986 std::stringstream suffix;
987 suffix << idx;
988 *name += suffix.str();
989 return DAG.getTargetExternalSymbol(name->c_str(), v);
990}
991
992SDValue
993NVPTXTargetLowering::getParamSymbol(SelectionDAG &DAG, int idx, EVT v) const {
994 return getExtSymb(DAG, ".PARAM", idx, v);
995}
996
997SDValue
998NVPTXTargetLowering::getParamHelpSymbol(SelectionDAG &DAG, int idx) {
999 return getExtSymb(DAG, ".HLPPARAM", idx);
1000}
1001
1002// Check to see if the kernel argument is image*_t or sampler_t
1003
1004bool llvm::isImageOrSamplerVal(const Value *arg, const Module *context) {
Craig Toppere4260f92012-05-24 04:22:05 +00001005 static const char *const specialTypes[] = {
1006 "struct._image2d_t",
1007 "struct._image3d_t",
1008 "struct._sampler_t"
Justin Holewinskiae556d32012-05-04 20:18:50 +00001009 };
1010
1011 const Type *Ty = arg->getType();
1012 const PointerType *PTy = dyn_cast<PointerType>(Ty);
1013
1014 if (!PTy)
1015 return false;
1016
1017 if (!context)
1018 return false;
1019
1020 const StructType *STy = dyn_cast<StructType>(PTy->getElementType());
Justin Holewinskifb711152012-12-05 20:50:28 +00001021 const std::string TypeName = STy && !STy->isLiteral() ? STy->getName() : "";
Justin Holewinskiae556d32012-05-04 20:18:50 +00001022
Craig Toppere4260f92012-05-24 04:22:05 +00001023 for (int i = 0, e = array_lengthof(specialTypes); i != e; ++i)
Justin Holewinskiae556d32012-05-04 20:18:50 +00001024 if (TypeName == specialTypes[i])
1025 return true;
1026
1027 return false;
1028}
1029
1030SDValue
1031NVPTXTargetLowering::LowerFormalArguments(SDValue Chain,
1032 CallingConv::ID CallConv, bool isVarArg,
1033 const SmallVectorImpl<ISD::InputArg> &Ins,
1034 DebugLoc dl, SelectionDAG &DAG,
1035 SmallVectorImpl<SDValue> &InVals) const {
1036 MachineFunction &MF = DAG.getMachineFunction();
Micah Villmowcdfe20b2012-10-08 16:38:25 +00001037 const DataLayout *TD = getDataLayout();
Justin Holewinskiae556d32012-05-04 20:18:50 +00001038
1039 const Function *F = MF.getFunction();
Bill Wendlinge94d8432012-12-07 23:16:57 +00001040 const AttributeSet &PAL = F->getAttributes();
Justin Holewinskiae556d32012-05-04 20:18:50 +00001041
1042 SDValue Root = DAG.getRoot();
1043 std::vector<SDValue> OutChains;
1044
1045 bool isKernel = llvm::isKernelFunction(*F);
1046 bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
1047
1048 std::vector<Type *> argTypes;
1049 std::vector<const Argument *> theArgs;
1050 for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
1051 I != E; ++I) {
1052 theArgs.push_back(I);
1053 argTypes.push_back(I->getType());
1054 }
1055 assert(argTypes.size() == Ins.size() &&
1056 "Ins types and function types did not match");
1057
1058 int idx = 0;
1059 for (unsigned i=0, e=Ins.size(); i!=e; ++i, ++idx) {
1060 Type *Ty = argTypes[i];
1061 EVT ObjectVT = getValueType(Ty);
1062 assert(ObjectVT == Ins[i].VT &&
1063 "Ins type did not match function type");
1064
1065 // If the kernel argument is image*_t or sampler_t, convert it to
1066 // a i32 constant holding the parameter position. This can later
1067 // matched in the AsmPrinter to output the correct mangled name.
1068 if (isImageOrSamplerVal(theArgs[i],
1069 (theArgs[i]->getParent() ?
1070 theArgs[i]->getParent()->getParent() : 0))) {
1071 assert(isKernel && "Only kernels can have image/sampler params");
1072 InVals.push_back(DAG.getConstant(i+1, MVT::i32));
1073 continue;
1074 }
1075
1076 if (theArgs[i]->use_empty()) {
1077 // argument is dead
1078 InVals.push_back(DAG.getNode(ISD::UNDEF, dl, ObjectVT));
1079 continue;
1080 }
1081
1082 // In the following cases, assign a node order of "idx+1"
1083 // to newly created nodes. The SDNOdes for params have to
1084 // appear in the same order as their order of appearance
1085 // in the original function. "idx+1" holds that order.
Bill Wendling749a43d2012-12-30 13:50:49 +00001086 if (PAL.hasAttribute(i+1, Attribute::ByVal) == false) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001087 // A plain scalar.
1088 if (isABI || isKernel) {
1089 // If ABI, load from the param symbol
1090 SDValue Arg = getParamSymbol(DAG, idx);
Benjamin Kramerc4231cc2013-01-23 15:21:44 +00001091 // Conjure up a value that we can get the address space from.
1092 // FIXME: Using a constant here is a hack.
1093 Value *srcValue = Constant::getNullValue(PointerType::get(
1094 ObjectVT.getTypeForEVT(F->getContext()),
1095 llvm::ADDRESS_SPACE_PARAM));
Justin Holewinskiae556d32012-05-04 20:18:50 +00001096 SDValue p = DAG.getLoad(ObjectVT, dl, Root, Arg,
1097 MachinePointerInfo(srcValue), false, false,
1098 false,
1099 TD->getABITypeAlignment(ObjectVT.getTypeForEVT(
1100 F->getContext())));
1101 if (p.getNode())
1102 DAG.AssignOrdering(p.getNode(), idx+1);
1103 InVals.push_back(p);
1104 }
1105 else {
1106 // If no ABI, just move the param symbol
1107 SDValue Arg = getParamSymbol(DAG, idx, ObjectVT);
1108 SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
1109 if (p.getNode())
1110 DAG.AssignOrdering(p.getNode(), idx+1);
1111 InVals.push_back(p);
1112 }
1113 continue;
1114 }
1115
1116 // Param has ByVal attribute
1117 if (isABI || isKernel) {
1118 // Return MoveParam(param symbol).
1119 // Ideally, the param symbol can be returned directly,
1120 // but when SDNode builder decides to use it in a CopyToReg(),
1121 // machine instruction fails because TargetExternalSymbol
1122 // (not lowered) is target dependent, and CopyToReg assumes
1123 // the source is lowered.
1124 SDValue Arg = getParamSymbol(DAG, idx, getPointerTy());
1125 SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
1126 if (p.getNode())
1127 DAG.AssignOrdering(p.getNode(), idx+1);
1128 if (isKernel)
1129 InVals.push_back(p);
1130 else {
1131 SDValue p2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
1132 DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, MVT::i32),
1133 p);
1134 InVals.push_back(p2);
1135 }
1136 } else {
1137 // Have to move a set of param symbols to registers and
1138 // store them locally and return the local pointer in InVals
1139 const PointerType *elemPtrType = dyn_cast<PointerType>(argTypes[i]);
1140 assert(elemPtrType &&
1141 "Byval parameter should be a pointer type");
1142 Type *elemType = elemPtrType->getElementType();
1143 // Compute the constituent parts
1144 SmallVector<EVT, 16> vtparts;
1145 SmallVector<uint64_t, 16> offsets;
1146 ComputeValueVTs(*this, elemType, vtparts, &offsets, 0);
1147 unsigned totalsize = 0;
1148 for (unsigned j=0, je=vtparts.size(); j!=je; ++j)
1149 totalsize += vtparts[j].getStoreSizeInBits();
1150 SDValue localcopy = DAG.getFrameIndex(MF.getFrameInfo()->
1151 CreateStackObject(totalsize/8, 16, false),
1152 getPointerTy());
1153 unsigned sizesofar = 0;
1154 std::vector<SDValue> theChains;
1155 for (unsigned j=0, je=vtparts.size(); j!=je; ++j) {
1156 unsigned numElems = 1;
1157 if (vtparts[j].isVector()) numElems = vtparts[j].getVectorNumElements();
1158 for (unsigned k=0, ke=numElems; k!=ke; ++k) {
1159 EVT tmpvt = vtparts[j];
1160 if (tmpvt.isVector()) tmpvt = tmpvt.getVectorElementType();
1161 SDValue arg = DAG.getNode(NVPTXISD::MoveParam, dl, tmpvt,
1162 getParamSymbol(DAG, idx, tmpvt));
1163 SDValue addr = DAG.getNode(ISD::ADD, dl, getPointerTy(), localcopy,
1164 DAG.getConstant(sizesofar, getPointerTy()));
1165 theChains.push_back(DAG.getStore(Chain, dl, arg, addr,
1166 MachinePointerInfo(), false, false, 0));
1167 sizesofar += tmpvt.getStoreSizeInBits()/8;
1168 ++idx;
1169 }
1170 }
1171 --idx;
1172 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &theChains[0],
1173 theChains.size());
1174 InVals.push_back(localcopy);
1175 }
1176 }
1177
1178 // Clang will check explicit VarArg and issue error if any. However, Clang
1179 // will let code with
1180 // implicit var arg like f() pass.
1181 // We treat this case as if the arg list is empty.
1182 //if (F.isVarArg()) {
1183 // assert(0 && "VarArg not supported yet!");
1184 //}
1185
1186 if (!OutChains.empty())
1187 DAG.setRoot(DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1188 &OutChains[0], OutChains.size()));
1189
1190 return Chain;
1191}
1192
1193SDValue
1194NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
1195 bool isVarArg,
1196 const SmallVectorImpl<ISD::OutputArg> &Outs,
1197 const SmallVectorImpl<SDValue> &OutVals,
1198 DebugLoc dl, SelectionDAG &DAG) const {
1199
1200 bool isABI = (nvptxSubtarget.getSmVersion() >= 20);
1201
1202 unsigned sizesofar = 0;
1203 unsigned idx = 0;
1204 for (unsigned i=0, e=Outs.size(); i!=e; ++i) {
1205 SDValue theVal = OutVals[i];
1206 EVT theValType = theVal.getValueType();
1207 unsigned numElems = 1;
1208 if (theValType.isVector()) numElems = theValType.getVectorNumElements();
1209 for (unsigned j=0,je=numElems; j!=je; ++j) {
1210 SDValue tmpval = theVal;
1211 if (theValType.isVector())
1212 tmpval = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
1213 theValType.getVectorElementType(),
1214 tmpval, DAG.getIntPtrConstant(j));
1215 Chain = DAG.getNode(isABI ? NVPTXISD::StoreRetval :NVPTXISD::MoveToRetval,
1216 dl, MVT::Other,
1217 Chain,
1218 DAG.getConstant(isABI ? sizesofar : idx, MVT::i32),
1219 tmpval);
1220 if (theValType.isVector())
1221 sizesofar += theValType.getVectorElementType().getStoreSizeInBits()/8;
1222 else
1223 sizesofar += theValType.getStoreSizeInBits()/8;
1224 ++idx;
1225 }
1226 }
1227
1228 return DAG.getNode(NVPTXISD::RET_FLAG, dl, MVT::Other, Chain);
1229}
1230
1231void
1232NVPTXTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
1233 std::string &Constraint,
1234 std::vector<SDValue> &Ops,
1235 SelectionDAG &DAG) const
1236{
1237 if (Constraint.length() > 1)
1238 return;
1239 else
1240 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
1241}
1242
1243// NVPTX suuport vector of legal types of any length in Intrinsics because the
1244// NVPTX specific type legalizer
1245// will legalize them to the PTX supported length.
1246bool
1247NVPTXTargetLowering::isTypeSupportedInIntrinsic(MVT VT) const {
1248 if (isTypeLegal(VT))
1249 return true;
1250 if (VT.isVector()) {
1251 MVT eVT = VT.getVectorElementType();
1252 if (isTypeLegal(eVT))
1253 return true;
1254 }
1255 return false;
1256}
1257
1258
1259// llvm.ptx.memcpy.const and llvm.ptx.memmove.const need to be modeled as
1260// TgtMemIntrinsic
1261// because we need the information that is only available in the "Value" type
1262// of destination
1263// pointer. In particular, the address space information.
1264bool
1265NVPTXTargetLowering::getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
1266 unsigned Intrinsic) const {
1267 switch (Intrinsic) {
1268 default:
1269 return false;
1270
1271 case Intrinsic::nvvm_atomic_load_add_f32:
1272 Info.opc = ISD::INTRINSIC_W_CHAIN;
1273 Info.memVT = MVT::f32;
1274 Info.ptrVal = I.getArgOperand(0);
1275 Info.offset = 0;
1276 Info.vol = 0;
1277 Info.readMem = true;
1278 Info.writeMem = true;
1279 Info.align = 0;
1280 return true;
1281
1282 case Intrinsic::nvvm_atomic_load_inc_32:
1283 case Intrinsic::nvvm_atomic_load_dec_32:
1284 Info.opc = ISD::INTRINSIC_W_CHAIN;
1285 Info.memVT = MVT::i32;
1286 Info.ptrVal = I.getArgOperand(0);
1287 Info.offset = 0;
1288 Info.vol = 0;
1289 Info.readMem = true;
1290 Info.writeMem = true;
1291 Info.align = 0;
1292 return true;
1293
1294 case Intrinsic::nvvm_ldu_global_i:
1295 case Intrinsic::nvvm_ldu_global_f:
1296 case Intrinsic::nvvm_ldu_global_p:
1297
1298 Info.opc = ISD::INTRINSIC_W_CHAIN;
1299 if (Intrinsic == Intrinsic::nvvm_ldu_global_i)
1300 Info.memVT = MVT::i32;
1301 else if (Intrinsic == Intrinsic::nvvm_ldu_global_p)
1302 Info.memVT = getPointerTy();
1303 else
1304 Info.memVT = MVT::f32;
1305 Info.ptrVal = I.getArgOperand(0);
1306 Info.offset = 0;
1307 Info.vol = 0;
1308 Info.readMem = true;
1309 Info.writeMem = false;
1310 Info.align = 0;
1311 return true;
1312
1313 }
1314 return false;
1315}
1316
1317/// isLegalAddressingMode - Return true if the addressing mode represented
1318/// by AM is legal for this target, for a load/store of the specified type.
1319/// Used to guide target specific optimizations, like loop strength reduction
1320/// (LoopStrengthReduce.cpp) and memory optimization for address mode
1321/// (CodeGenPrepare.cpp)
1322bool
1323NVPTXTargetLowering::isLegalAddressingMode(const AddrMode &AM,
1324 Type *Ty) const {
1325
1326 // AddrMode - This represents an addressing mode of:
1327 // BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
1328 //
1329 // The legal address modes are
1330 // - [avar]
1331 // - [areg]
1332 // - [areg+immoff]
1333 // - [immAddr]
1334
1335 if (AM.BaseGV) {
1336 if (AM.BaseOffs || AM.HasBaseReg || AM.Scale)
1337 return false;
1338 return true;
1339 }
1340
1341 switch (AM.Scale) {
1342 case 0: // "r", "r+i" or "i" is allowed
1343 break;
1344 case 1:
1345 if (AM.HasBaseReg) // "r+r+i" or "r+r" is not allowed.
1346 return false;
1347 // Otherwise we have r+i.
1348 break;
1349 default:
1350 // No scale > 1 is allowed
1351 return false;
1352 }
1353 return true;
1354}
1355
1356//===----------------------------------------------------------------------===//
1357// NVPTX Inline Assembly Support
1358//===----------------------------------------------------------------------===//
1359
1360/// getConstraintType - Given a constraint letter, return the type of
1361/// constraint it is for this target.
1362NVPTXTargetLowering::ConstraintType
1363NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const {
1364 if (Constraint.size() == 1) {
1365 switch (Constraint[0]) {
1366 default:
1367 break;
1368 case 'r':
1369 case 'h':
1370 case 'c':
1371 case 'l':
1372 case 'f':
1373 case 'd':
1374 case '0':
1375 case 'N':
1376 return C_RegisterClass;
1377 }
1378 }
1379 return TargetLowering::getConstraintType(Constraint);
1380}
1381
1382
1383std::pair<unsigned, const TargetRegisterClass*>
1384NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
1385 EVT VT) const {
1386 if (Constraint.size() == 1) {
1387 switch (Constraint[0]) {
1388 case 'c':
1389 return std::make_pair(0U, &NVPTX::Int8RegsRegClass);
1390 case 'h':
1391 return std::make_pair(0U, &NVPTX::Int16RegsRegClass);
1392 case 'r':
1393 return std::make_pair(0U, &NVPTX::Int32RegsRegClass);
1394 case 'l':
1395 case 'N':
1396 return std::make_pair(0U, &NVPTX::Int64RegsRegClass);
1397 case 'f':
1398 return std::make_pair(0U, &NVPTX::Float32RegsRegClass);
1399 case 'd':
1400 return std::make_pair(0U, &NVPTX::Float64RegsRegClass);
1401 }
1402 }
1403 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
1404}
1405
1406
1407
1408/// getFunctionAlignment - Return the Log2 alignment of this function.
1409unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const {
1410 return 4;
1411}
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001412
1413/// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads.
1414static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG,
1415 SmallVectorImpl<SDValue>& Results) {
1416 EVT ResVT = N->getValueType(0);
1417 DebugLoc DL = N->getDebugLoc();
1418
1419 assert(ResVT.isVector() && "Vector load must have vector type");
1420
1421 // We only handle "native" vector sizes for now, e.g. <4 x double> is not
1422 // legal. We can (and should) split that into 2 loads of <2 x double> here
1423 // but I'm leaving that as a TODO for now.
1424 assert(ResVT.isSimple() && "Can only handle simple types");
1425 switch (ResVT.getSimpleVT().SimpleTy) {
1426 default: return;
1427 case MVT::v2i8:
1428 case MVT::v2i16:
1429 case MVT::v2i32:
1430 case MVT::v2i64:
1431 case MVT::v2f32:
1432 case MVT::v2f64:
1433 case MVT::v4i8:
1434 case MVT::v4i16:
1435 case MVT::v4i32:
1436 case MVT::v4f32:
1437 // This is a "native" vector type
1438 break;
1439 }
1440
1441 EVT EltVT = ResVT.getVectorElementType();
1442 unsigned NumElts = ResVT.getVectorNumElements();
1443
1444 // Since LoadV2 is a target node, we cannot rely on DAG type legalization.
1445 // Therefore, we must ensure the type is legal. For i1 and i8, we set the
1446 // loaded type to i16 and propogate the "real" type as the memory type.
1447 bool NeedTrunc = false;
1448 if (EltVT.getSizeInBits() < 16) {
1449 EltVT = MVT::i16;
1450 NeedTrunc = true;
1451 }
1452
1453 unsigned Opcode = 0;
1454 SDVTList LdResVTs;
1455
1456 switch (NumElts) {
1457 default: return;
1458 case 2:
1459 Opcode = NVPTXISD::LoadV2;
1460 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
1461 break;
1462 case 4: {
1463 Opcode = NVPTXISD::LoadV4;
1464 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
1465 LdResVTs = DAG.getVTList(ListVTs, 5);
1466 break;
1467 }
1468 }
1469
1470 SmallVector<SDValue, 8> OtherOps;
1471
1472 // Copy regular operands
1473 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1474 OtherOps.push_back(N->getOperand(i));
1475
1476 LoadSDNode *LD = cast<LoadSDNode>(N);
1477
1478 // The select routine does not have access to the LoadSDNode instance, so
1479 // pass along the extension information
1480 OtherOps.push_back(DAG.getIntPtrConstant(LD->getExtensionType()));
1481
1482 SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
1483 OtherOps.size(), LD->getMemoryVT(),
1484 LD->getMemOperand());
1485
1486 SmallVector<SDValue, 4> ScalarRes;
1487
1488 for (unsigned i = 0; i < NumElts; ++i) {
1489 SDValue Res = NewLD.getValue(i);
1490 if (NeedTrunc)
1491 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
1492 ScalarRes.push_back(Res);
1493 }
1494
1495 SDValue LoadChain = NewLD.getValue(NumElts);
1496
1497 SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
1498
1499 Results.push_back(BuildVec);
1500 Results.push_back(LoadChain);
1501}
1502
1503static void ReplaceINTRINSIC_W_CHAIN(SDNode *N,
1504 SelectionDAG &DAG,
1505 SmallVectorImpl<SDValue> &Results) {
1506 SDValue Chain = N->getOperand(0);
1507 SDValue Intrin = N->getOperand(1);
1508 DebugLoc DL = N->getDebugLoc();
1509
1510 // Get the intrinsic ID
1511 unsigned IntrinNo = cast<ConstantSDNode>(Intrin.getNode())->getZExtValue();
1512 switch(IntrinNo) {
1513 default: return;
1514 case Intrinsic::nvvm_ldg_global_i:
1515 case Intrinsic::nvvm_ldg_global_f:
1516 case Intrinsic::nvvm_ldg_global_p:
1517 case Intrinsic::nvvm_ldu_global_i:
1518 case Intrinsic::nvvm_ldu_global_f:
1519 case Intrinsic::nvvm_ldu_global_p: {
1520 EVT ResVT = N->getValueType(0);
1521
1522 if (ResVT.isVector()) {
1523 // Vector LDG/LDU
1524
1525 unsigned NumElts = ResVT.getVectorNumElements();
1526 EVT EltVT = ResVT.getVectorElementType();
1527
1528 // Since LDU/LDG are target nodes, we cannot rely on DAG type legalization.
1529 // Therefore, we must ensure the type is legal. For i1 and i8, we set the
1530 // loaded type to i16 and propogate the "real" type as the memory type.
1531 bool NeedTrunc = false;
1532 if (EltVT.getSizeInBits() < 16) {
1533 EltVT = MVT::i16;
1534 NeedTrunc = true;
1535 }
1536
1537 unsigned Opcode = 0;
1538 SDVTList LdResVTs;
1539
1540 switch (NumElts) {
1541 default: return;
1542 case 2:
1543 switch(IntrinNo) {
1544 default: return;
1545 case Intrinsic::nvvm_ldg_global_i:
1546 case Intrinsic::nvvm_ldg_global_f:
1547 case Intrinsic::nvvm_ldg_global_p:
1548 Opcode = NVPTXISD::LDGV2;
1549 break;
1550 case Intrinsic::nvvm_ldu_global_i:
1551 case Intrinsic::nvvm_ldu_global_f:
1552 case Intrinsic::nvvm_ldu_global_p:
1553 Opcode = NVPTXISD::LDUV2;
1554 break;
1555 }
1556 LdResVTs = DAG.getVTList(EltVT, EltVT, MVT::Other);
1557 break;
1558 case 4: {
1559 switch(IntrinNo) {
1560 default: return;
1561 case Intrinsic::nvvm_ldg_global_i:
1562 case Intrinsic::nvvm_ldg_global_f:
1563 case Intrinsic::nvvm_ldg_global_p:
1564 Opcode = NVPTXISD::LDGV4;
1565 break;
1566 case Intrinsic::nvvm_ldu_global_i:
1567 case Intrinsic::nvvm_ldu_global_f:
1568 case Intrinsic::nvvm_ldu_global_p:
1569 Opcode = NVPTXISD::LDUV4;
1570 break;
1571 }
1572 EVT ListVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other };
1573 LdResVTs = DAG.getVTList(ListVTs, 5);
1574 break;
1575 }
1576 }
1577
1578 SmallVector<SDValue, 8> OtherOps;
1579
1580 // Copy regular operands
1581
1582 OtherOps.push_back(Chain); // Chain
1583 // Skip operand 1 (intrinsic ID)
1584 // Others
1585 for (unsigned i = 2, e = N->getNumOperands(); i != e; ++i)
1586 OtherOps.push_back(N->getOperand(i));
1587
1588 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
1589
1590 SDValue NewLD = DAG.getMemIntrinsicNode(Opcode, DL, LdResVTs, &OtherOps[0],
1591 OtherOps.size(), MemSD->getMemoryVT(),
1592 MemSD->getMemOperand());
1593
1594 SmallVector<SDValue, 4> ScalarRes;
1595
1596 for (unsigned i = 0; i < NumElts; ++i) {
1597 SDValue Res = NewLD.getValue(i);
1598 if (NeedTrunc)
1599 Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT.getVectorElementType(), Res);
1600 ScalarRes.push_back(Res);
1601 }
1602
1603 SDValue LoadChain = NewLD.getValue(NumElts);
1604
1605 SDValue BuildVec = DAG.getNode(ISD::BUILD_VECTOR, DL, ResVT, &ScalarRes[0], NumElts);
1606
1607 Results.push_back(BuildVec);
1608 Results.push_back(LoadChain);
1609 } else {
1610 // i8 LDG/LDU
1611 assert(ResVT.isSimple() && ResVT.getSimpleVT().SimpleTy == MVT::i8 &&
1612 "Custom handling of non-i8 ldu/ldg?");
1613
1614 // Just copy all operands as-is
1615 SmallVector<SDValue, 4> Ops;
1616 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1617 Ops.push_back(N->getOperand(i));
1618
1619 // Force output to i16
1620 SDVTList LdResVTs = DAG.getVTList(MVT::i16, MVT::Other);
1621
1622 MemIntrinsicSDNode *MemSD = cast<MemIntrinsicSDNode>(N);
1623
1624 // We make sure the memory type is i8, which will be used during isel
1625 // to select the proper instruction.
1626 SDValue NewLD = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL,
1627 LdResVTs, &Ops[0],
1628 Ops.size(), MVT::i8,
1629 MemSD->getMemOperand());
1630
1631 Results.push_back(NewLD.getValue(0));
1632 Results.push_back(NewLD.getValue(1));
1633 }
1634 }
1635 }
1636}
1637
1638void NVPTXTargetLowering::ReplaceNodeResults(SDNode *N,
1639 SmallVectorImpl<SDValue> &Results,
1640 SelectionDAG &DAG) const {
1641 switch (N->getOpcode()) {
1642 default: report_fatal_error("Unhandled custom legalization");
1643 case ISD::LOAD:
1644 ReplaceLoadVector(N, DAG, Results);
1645 return;
1646 case ISD::INTRINSIC_W_CHAIN:
1647 ReplaceINTRINSIC_W_CHAIN(N, DAG, Results);
1648 return;
1649 }
1650}