blob: 0dfbf10cd9d7da43b4f6f51bddccc96d707b9b62 [file] [log] [blame]
Justin Holewinskiae556d32012-05-04 20:18:50 +00001//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
Justin Holewinskiae556d32012-05-04 20:18:50 +000014#include "NVPTXISelDAGToDAG.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000015#include "llvm/IR/GlobalValue.h"
16#include "llvm/IR/Instructions.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000017#include "llvm/Support/CommandLine.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000018#include "llvm/Support/Debug.h"
19#include "llvm/Support/ErrorHandling.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000020#include "llvm/Support/raw_ostream.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000021#include "llvm/Target/TargetIntrinsicInfo.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000022
Justin Holewinskiae556d32012-05-04 20:18:50 +000023using namespace llvm;
24
Chandler Carruth84e68b22014-04-22 02:41:26 +000025#define DEBUG_TYPE "nvptx-isel"
26
Justin Holewinskieafe26d2014-06-27 18:35:37 +000027unsigned FMAContractLevel = 0;
28
29static cl::opt<unsigned, true>
30FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden,
31 cl::desc("NVPTX Specific: FMA contraction (0: don't do it"
32 " 1: do it 2: do it aggressively"),
33 cl::location(FMAContractLevel),
34 cl::init(2));
Justin Holewinskiae556d32012-05-04 20:18:50 +000035
Justin Holewinski0497ab12013-03-30 14:29:21 +000036static cl::opt<int> UsePrecDivF32(
Nadav Rotem7f27e0b2013-10-18 23:38:13 +000037 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
Justin Holewinski0497ab12013-03-30 14:29:21 +000038 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
39 " IEEE Compliant F32 div.rnd if avaiable."),
40 cl::init(2));
Justin Holewinskiae556d32012-05-04 20:18:50 +000041
Justin Holewinski48f4ad32013-05-21 16:51:30 +000042static cl::opt<bool>
Nadav Rotem7f27e0b2013-10-18 23:38:13 +000043UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
Justin Holewinski48f4ad32013-05-21 16:51:30 +000044 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
45 cl::init(true));
46
Justin Holewinskicd069e62013-07-22 12:18:04 +000047static cl::opt<bool>
Nadav Rotem7f27e0b2013-10-18 23:38:13 +000048FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
Justin Holewinskicd069e62013-07-22 12:18:04 +000049 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
50 cl::init(false));
51
52
Justin Holewinskiae556d32012-05-04 20:18:50 +000053/// createNVPTXISelDag - This pass converts a legalized DAG into a
54/// NVPTX-specific DAG, ready for instruction scheduling.
55FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
56 llvm::CodeGenOpt::Level OptLevel) {
57 return new NVPTXDAGToDAGISel(TM, OptLevel);
58}
59
Justin Holewinskiae556d32012-05-04 20:18:50 +000060NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
61 CodeGenOpt::Level OptLevel)
Justin Holewinski0497ab12013-03-30 14:29:21 +000062 : SelectionDAGISel(tm, OptLevel),
63 Subtarget(tm.getSubtarget<NVPTXSubtarget>()) {
Justin Holewinskiae556d32012-05-04 20:18:50 +000064
Justin Holewinski0497ab12013-03-30 14:29:21 +000065 doFMAF32 = (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel >= 1);
66 doFMAF64 = (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel >= 1);
67 doFMAF32AGG =
68 (OptLevel > 0) && Subtarget.hasFMAF32() && (FMAContractLevel == 2);
69 doFMAF64AGG =
70 (OptLevel > 0) && Subtarget.hasFMAF64() && (FMAContractLevel == 2);
Justin Holewinskiae556d32012-05-04 20:18:50 +000071
Justin Holewinskicd069e62013-07-22 12:18:04 +000072 allowFMA = (FMAContractLevel >= 1);
Benjamin Kramera25a61b2012-05-05 11:22:02 +000073
Justin Holewinskiae556d32012-05-04 20:18:50 +000074 doMulWide = (OptLevel > 0);
Justin Holewinskicd069e62013-07-22 12:18:04 +000075}
Justin Holewinskiae556d32012-05-04 20:18:50 +000076
Justin Holewinskicd069e62013-07-22 12:18:04 +000077int NVPTXDAGToDAGISel::getDivF32Level() const {
78 if (UsePrecDivF32.getNumOccurrences() > 0) {
79 // If nvptx-prec-div32=N is used on the command-line, always honor it
80 return UsePrecDivF32;
81 } else {
82 // Otherwise, use div.approx if fast math is enabled
83 if (TM.Options.UnsafeFPMath)
84 return 0;
85 else
86 return 2;
87 }
88}
Justin Holewinskiae556d32012-05-04 20:18:50 +000089
Justin Holewinskicd069e62013-07-22 12:18:04 +000090bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
91 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
92 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
93 return UsePrecSqrtF32;
94 } else {
95 // Otherwise, use sqrt.approx if fast math is enabled
96 if (TM.Options.UnsafeFPMath)
97 return false;
98 else
99 return true;
100 }
101}
102
103bool NVPTXDAGToDAGISel::useF32FTZ() const {
104 if (FtzEnabled.getNumOccurrences() > 0) {
105 // If nvptx-f32ftz is used on the command-line, always honor it
106 return FtzEnabled;
107 } else {
108 const Function *F = MF->getFunction();
109 // Otherwise, check for an nvptx-f32ftz attribute on the function
110 if (F->hasFnAttribute("nvptx-f32ftz"))
111 return (F->getAttributes().getAttribute(AttributeSet::FunctionIndex,
112 "nvptx-f32ftz")
113 .getValueAsString() == "true");
114 else
115 return false;
116 }
Justin Holewinskiae556d32012-05-04 20:18:50 +0000117}
118
119/// Select - Select instructions not customized! Used for
120/// expanded, promoted and normal instructions.
Justin Holewinski0497ab12013-03-30 14:29:21 +0000121SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000122
Tim Northover31d093c2013-09-22 08:21:56 +0000123 if (N->isMachineOpcode()) {
124 N->setNodeId(-1);
Craig Topper062a2ba2014-04-25 05:30:21 +0000125 return nullptr; // Already selected.
Tim Northover31d093c2013-09-22 08:21:56 +0000126 }
Justin Holewinskiae556d32012-05-04 20:18:50 +0000127
Craig Topper062a2ba2014-04-25 05:30:21 +0000128 SDNode *ResNode = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000129 switch (N->getOpcode()) {
130 case ISD::LOAD:
131 ResNode = SelectLoad(N);
132 break;
133 case ISD::STORE:
134 ResNode = SelectStore(N);
135 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000136 case NVPTXISD::LoadV2:
137 case NVPTXISD::LoadV4:
138 ResNode = SelectLoadVector(N);
139 break;
140 case NVPTXISD::LDGV2:
141 case NVPTXISD::LDGV4:
142 case NVPTXISD::LDUV2:
143 case NVPTXISD::LDUV4:
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000144 ResNode = SelectLDGLDU(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000145 break;
146 case NVPTXISD::StoreV2:
147 case NVPTXISD::StoreV4:
148 ResNode = SelectStoreVector(N);
149 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000150 case NVPTXISD::LoadParam:
151 case NVPTXISD::LoadParamV2:
152 case NVPTXISD::LoadParamV4:
153 ResNode = SelectLoadParam(N);
154 break;
155 case NVPTXISD::StoreRetval:
156 case NVPTXISD::StoreRetvalV2:
157 case NVPTXISD::StoreRetvalV4:
158 ResNode = SelectStoreRetval(N);
159 break;
160 case NVPTXISD::StoreParam:
161 case NVPTXISD::StoreParamV2:
162 case NVPTXISD::StoreParamV4:
163 case NVPTXISD::StoreParamS32:
164 case NVPTXISD::StoreParamU32:
165 ResNode = SelectStoreParam(N);
166 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000167 case ISD::INTRINSIC_WO_CHAIN:
168 ResNode = SelectIntrinsicNoChain(N);
169 break;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000170 case ISD::INTRINSIC_W_CHAIN:
171 ResNode = SelectIntrinsicChain(N);
172 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000173 case NVPTXISD::Tex1DFloatI32:
174 case NVPTXISD::Tex1DFloatFloat:
175 case NVPTXISD::Tex1DFloatFloatLevel:
176 case NVPTXISD::Tex1DFloatFloatGrad:
177 case NVPTXISD::Tex1DI32I32:
178 case NVPTXISD::Tex1DI32Float:
179 case NVPTXISD::Tex1DI32FloatLevel:
180 case NVPTXISD::Tex1DI32FloatGrad:
181 case NVPTXISD::Tex1DArrayFloatI32:
182 case NVPTXISD::Tex1DArrayFloatFloat:
183 case NVPTXISD::Tex1DArrayFloatFloatLevel:
184 case NVPTXISD::Tex1DArrayFloatFloatGrad:
185 case NVPTXISD::Tex1DArrayI32I32:
186 case NVPTXISD::Tex1DArrayI32Float:
187 case NVPTXISD::Tex1DArrayI32FloatLevel:
188 case NVPTXISD::Tex1DArrayI32FloatGrad:
189 case NVPTXISD::Tex2DFloatI32:
190 case NVPTXISD::Tex2DFloatFloat:
191 case NVPTXISD::Tex2DFloatFloatLevel:
192 case NVPTXISD::Tex2DFloatFloatGrad:
193 case NVPTXISD::Tex2DI32I32:
194 case NVPTXISD::Tex2DI32Float:
195 case NVPTXISD::Tex2DI32FloatLevel:
196 case NVPTXISD::Tex2DI32FloatGrad:
197 case NVPTXISD::Tex2DArrayFloatI32:
198 case NVPTXISD::Tex2DArrayFloatFloat:
199 case NVPTXISD::Tex2DArrayFloatFloatLevel:
200 case NVPTXISD::Tex2DArrayFloatFloatGrad:
201 case NVPTXISD::Tex2DArrayI32I32:
202 case NVPTXISD::Tex2DArrayI32Float:
203 case NVPTXISD::Tex2DArrayI32FloatLevel:
204 case NVPTXISD::Tex2DArrayI32FloatGrad:
205 case NVPTXISD::Tex3DFloatI32:
206 case NVPTXISD::Tex3DFloatFloat:
207 case NVPTXISD::Tex3DFloatFloatLevel:
208 case NVPTXISD::Tex3DFloatFloatGrad:
209 case NVPTXISD::Tex3DI32I32:
210 case NVPTXISD::Tex3DI32Float:
211 case NVPTXISD::Tex3DI32FloatLevel:
212 case NVPTXISD::Tex3DI32FloatGrad:
213 ResNode = SelectTextureIntrinsic(N);
214 break;
215 case NVPTXISD::Suld1DI8Trap:
216 case NVPTXISD::Suld1DI16Trap:
217 case NVPTXISD::Suld1DI32Trap:
218 case NVPTXISD::Suld1DV2I8Trap:
219 case NVPTXISD::Suld1DV2I16Trap:
220 case NVPTXISD::Suld1DV2I32Trap:
221 case NVPTXISD::Suld1DV4I8Trap:
222 case NVPTXISD::Suld1DV4I16Trap:
223 case NVPTXISD::Suld1DV4I32Trap:
224 case NVPTXISD::Suld1DArrayI8Trap:
225 case NVPTXISD::Suld1DArrayI16Trap:
226 case NVPTXISD::Suld1DArrayI32Trap:
227 case NVPTXISD::Suld1DArrayV2I8Trap:
228 case NVPTXISD::Suld1DArrayV2I16Trap:
229 case NVPTXISD::Suld1DArrayV2I32Trap:
230 case NVPTXISD::Suld1DArrayV4I8Trap:
231 case NVPTXISD::Suld1DArrayV4I16Trap:
232 case NVPTXISD::Suld1DArrayV4I32Trap:
233 case NVPTXISD::Suld2DI8Trap:
234 case NVPTXISD::Suld2DI16Trap:
235 case NVPTXISD::Suld2DI32Trap:
236 case NVPTXISD::Suld2DV2I8Trap:
237 case NVPTXISD::Suld2DV2I16Trap:
238 case NVPTXISD::Suld2DV2I32Trap:
239 case NVPTXISD::Suld2DV4I8Trap:
240 case NVPTXISD::Suld2DV4I16Trap:
241 case NVPTXISD::Suld2DV4I32Trap:
242 case NVPTXISD::Suld2DArrayI8Trap:
243 case NVPTXISD::Suld2DArrayI16Trap:
244 case NVPTXISD::Suld2DArrayI32Trap:
245 case NVPTXISD::Suld2DArrayV2I8Trap:
246 case NVPTXISD::Suld2DArrayV2I16Trap:
247 case NVPTXISD::Suld2DArrayV2I32Trap:
248 case NVPTXISD::Suld2DArrayV4I8Trap:
249 case NVPTXISD::Suld2DArrayV4I16Trap:
250 case NVPTXISD::Suld2DArrayV4I32Trap:
251 case NVPTXISD::Suld3DI8Trap:
252 case NVPTXISD::Suld3DI16Trap:
253 case NVPTXISD::Suld3DI32Trap:
254 case NVPTXISD::Suld3DV2I8Trap:
255 case NVPTXISD::Suld3DV2I16Trap:
256 case NVPTXISD::Suld3DV2I32Trap:
257 case NVPTXISD::Suld3DV4I8Trap:
258 case NVPTXISD::Suld3DV4I16Trap:
259 case NVPTXISD::Suld3DV4I32Trap:
260 ResNode = SelectSurfaceIntrinsic(N);
261 break;
Justin Holewinskica7a4f12014-06-27 18:35:27 +0000262 case ISD::AND:
263 case ISD::SRA:
264 case ISD::SRL:
265 // Try to select BFE
266 ResNode = SelectBFE(N);
267 break;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000268 case ISD::ADDRSPACECAST:
269 ResNode = SelectAddrSpaceCast(N);
270 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000271 default:
272 break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000273 }
274 if (ResNode)
275 return ResNode;
276 return SelectCode(N);
277}
278
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000279SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
280 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
281 switch (IID) {
282 default:
283 return NULL;
284 case Intrinsic::nvvm_ldg_global_f:
285 case Intrinsic::nvvm_ldg_global_i:
286 case Intrinsic::nvvm_ldg_global_p:
287 case Intrinsic::nvvm_ldu_global_f:
288 case Intrinsic::nvvm_ldu_global_i:
289 case Intrinsic::nvvm_ldu_global_p:
290 return SelectLDGLDU(N);
291 }
292}
293
Justin Holewinski0497ab12013-03-30 14:29:21 +0000294static unsigned int getCodeAddrSpace(MemSDNode *N,
295 const NVPTXSubtarget &Subtarget) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +0000296 const Value *Src = N->getMemOperand()->getValue();
Justin Holewinskib96d1392013-06-10 13:29:47 +0000297
Justin Holewinskiae556d32012-05-04 20:18:50 +0000298 if (!Src)
Justin Holewinskib96d1392013-06-10 13:29:47 +0000299 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000300
301 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType())) {
302 switch (PT->getAddressSpace()) {
Justin Holewinskib96d1392013-06-10 13:29:47 +0000303 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
304 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
305 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
306 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
307 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
308 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
309 default: break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000310 }
311 }
Justin Holewinskib96d1392013-06-10 13:29:47 +0000312 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000313}
314
Justin Holewinski30d56a72014-04-09 15:39:15 +0000315SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
316 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
317 switch (IID) {
318 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000319 return nullptr;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000320 case Intrinsic::nvvm_texsurf_handle_internal:
321 return SelectTexSurfHandle(N);
322 }
323}
324
325SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
326 // Op 0 is the intrinsic ID
327 SDValue Wrapper = N->getOperand(1);
328 SDValue GlobalVal = Wrapper.getOperand(0);
329 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
330 GlobalVal);
331}
332
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000333SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
334 SDValue Src = N->getOperand(0);
335 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
336 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
337 unsigned DstAddrSpace = CastN->getDestAddressSpace();
338
339 assert(SrcAddrSpace != DstAddrSpace &&
340 "addrspacecast must be between different address spaces");
341
342 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
343 // Specific to generic
344 unsigned Opc;
345 switch (SrcAddrSpace) {
346 default: report_fatal_error("Bad address space in addrspacecast");
347 case ADDRESS_SPACE_GLOBAL:
348 Opc = Subtarget.is64Bit() ? NVPTX::cvta_global_yes_64
349 : NVPTX::cvta_global_yes;
350 break;
351 case ADDRESS_SPACE_SHARED:
352 Opc = Subtarget.is64Bit() ? NVPTX::cvta_shared_yes_64
353 : NVPTX::cvta_shared_yes;
354 break;
355 case ADDRESS_SPACE_CONST:
356 Opc = Subtarget.is64Bit() ? NVPTX::cvta_const_yes_64
357 : NVPTX::cvta_const_yes;
358 break;
359 case ADDRESS_SPACE_LOCAL:
360 Opc = Subtarget.is64Bit() ? NVPTX::cvta_local_yes_64
361 : NVPTX::cvta_local_yes;
362 break;
363 }
364 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
365 } else {
366 // Generic to specific
367 if (SrcAddrSpace != 0)
368 report_fatal_error("Cannot cast between two non-generic address spaces");
369 unsigned Opc;
370 switch (DstAddrSpace) {
371 default: report_fatal_error("Bad address space in addrspacecast");
372 case ADDRESS_SPACE_GLOBAL:
373 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_global_yes_64
374 : NVPTX::cvta_to_global_yes;
375 break;
376 case ADDRESS_SPACE_SHARED:
377 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_shared_yes_64
378 : NVPTX::cvta_to_shared_yes;
379 break;
380 case ADDRESS_SPACE_CONST:
381 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_const_yes_64
382 : NVPTX::cvta_to_const_yes;
383 break;
384 case ADDRESS_SPACE_LOCAL:
385 Opc = Subtarget.is64Bit() ? NVPTX::cvta_to_local_yes_64
386 : NVPTX::cvta_to_local_yes;
387 break;
388 }
389 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
390 }
391}
392
Justin Holewinski0497ab12013-03-30 14:29:21 +0000393SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000394 SDLoc dl(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000395 LoadSDNode *LD = cast<LoadSDNode>(N);
396 EVT LoadedVT = LD->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +0000397 SDNode *NVPTXLD = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000398
399 // do not support pre/post inc/dec
400 if (LD->isIndexed())
Craig Topper062a2ba2014-04-25 05:30:21 +0000401 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000402
403 if (!LoadedVT.isSimple())
Craig Topper062a2ba2014-04-25 05:30:21 +0000404 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000405
406 // Address Space Setting
407 unsigned int codeAddrSpace = getCodeAddrSpace(LD, Subtarget);
408
409 // Volatile Setting
410 // - .volatile is only availalble for .global and .shared
411 bool isVolatile = LD->isVolatile();
412 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
413 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
414 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
415 isVolatile = false;
416
417 // Vector Setting
418 MVT SimpleVT = LoadedVT.getSimpleVT();
419 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
420 if (SimpleVT.isVector()) {
421 unsigned num = SimpleVT.getVectorNumElements();
422 if (num == 2)
423 vecType = NVPTX::PTXLdStInstCode::V2;
424 else if (num == 4)
425 vecType = NVPTX::PTXLdStInstCode::V4;
426 else
Craig Topper062a2ba2014-04-25 05:30:21 +0000427 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000428 }
429
430 // Type Setting: fromType + fromTypeWidth
431 //
432 // Sign : ISD::SEXTLOAD
433 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
434 // type is integer
435 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
436 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +0000437 // Read at least 8 bits (predicates are stored as 8-bit values)
438 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskiae556d32012-05-04 20:18:50 +0000439 unsigned int fromType;
440 if ((LD->getExtensionType() == ISD::SEXTLOAD))
441 fromType = NVPTX::PTXLdStInstCode::Signed;
442 else if (ScalarVT.isFloatingPoint())
443 fromType = NVPTX::PTXLdStInstCode::Float;
444 else
445 fromType = NVPTX::PTXLdStInstCode::Unsigned;
446
447 // Create the machine instruction DAG
448 SDValue Chain = N->getOperand(0);
449 SDValue N1 = N->getOperand(1);
450 SDValue Addr;
451 SDValue Offset, Base;
452 unsigned Opcode;
Craig Topperd9c27832013-08-15 02:44:19 +0000453 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000454
455 if (SelectDirectAddr(N1, Addr)) {
456 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000457 case MVT::i8:
458 Opcode = NVPTX::LD_i8_avar;
459 break;
460 case MVT::i16:
461 Opcode = NVPTX::LD_i16_avar;
462 break;
463 case MVT::i32:
464 Opcode = NVPTX::LD_i32_avar;
465 break;
466 case MVT::i64:
467 Opcode = NVPTX::LD_i64_avar;
468 break;
469 case MVT::f32:
470 Opcode = NVPTX::LD_f32_avar;
471 break;
472 case MVT::f64:
473 Opcode = NVPTX::LD_f64_avar;
474 break;
475 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000476 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000477 }
Justin Holewinski0497ab12013-03-30 14:29:21 +0000478 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
479 getI32Imm(vecType), getI32Imm(fromType),
480 getI32Imm(fromTypeWidth), Addr, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +0000481 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000482 } else if (Subtarget.is64Bit()
483 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
484 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000485 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000486 case MVT::i8:
487 Opcode = NVPTX::LD_i8_asi;
488 break;
489 case MVT::i16:
490 Opcode = NVPTX::LD_i16_asi;
491 break;
492 case MVT::i32:
493 Opcode = NVPTX::LD_i32_asi;
494 break;
495 case MVT::i64:
496 Opcode = NVPTX::LD_i64_asi;
497 break;
498 case MVT::f32:
499 Opcode = NVPTX::LD_f32_asi;
500 break;
501 case MVT::f64:
502 Opcode = NVPTX::LD_f64_asi;
503 break;
504 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000505 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000506 }
Justin Holewinski0497ab12013-03-30 14:29:21 +0000507 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
508 getI32Imm(vecType), getI32Imm(fromType),
509 getI32Imm(fromTypeWidth), Base, Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +0000510 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000511 } else if (Subtarget.is64Bit()
512 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
513 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000514 if (Subtarget.is64Bit()) {
515 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000516 case MVT::i8:
517 Opcode = NVPTX::LD_i8_ari_64;
518 break;
519 case MVT::i16:
520 Opcode = NVPTX::LD_i16_ari_64;
521 break;
522 case MVT::i32:
523 Opcode = NVPTX::LD_i32_ari_64;
524 break;
525 case MVT::i64:
526 Opcode = NVPTX::LD_i64_ari_64;
527 break;
528 case MVT::f32:
529 Opcode = NVPTX::LD_f32_ari_64;
530 break;
531 case MVT::f64:
532 Opcode = NVPTX::LD_f64_ari_64;
533 break;
534 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000535 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000536 }
537 } else {
538 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000539 case MVT::i8:
540 Opcode = NVPTX::LD_i8_ari;
541 break;
542 case MVT::i16:
543 Opcode = NVPTX::LD_i16_ari;
544 break;
545 case MVT::i32:
546 Opcode = NVPTX::LD_i32_ari;
547 break;
548 case MVT::i64:
549 Opcode = NVPTX::LD_i64_ari;
550 break;
551 case MVT::f32:
552 Opcode = NVPTX::LD_f32_ari;
553 break;
554 case MVT::f64:
555 Opcode = NVPTX::LD_f64_ari;
556 break;
557 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000558 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000559 }
Justin Holewinskiae556d32012-05-04 20:18:50 +0000560 }
Justin Holewinski0497ab12013-03-30 14:29:21 +0000561 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
562 getI32Imm(vecType), getI32Imm(fromType),
563 getI32Imm(fromTypeWidth), Base, Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +0000564 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000565 } else {
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000566 if (Subtarget.is64Bit()) {
567 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000568 case MVT::i8:
569 Opcode = NVPTX::LD_i8_areg_64;
570 break;
571 case MVT::i16:
572 Opcode = NVPTX::LD_i16_areg_64;
573 break;
574 case MVT::i32:
575 Opcode = NVPTX::LD_i32_areg_64;
576 break;
577 case MVT::i64:
578 Opcode = NVPTX::LD_i64_areg_64;
579 break;
580 case MVT::f32:
581 Opcode = NVPTX::LD_f32_areg_64;
582 break;
583 case MVT::f64:
584 Opcode = NVPTX::LD_f64_areg_64;
585 break;
586 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000587 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000588 }
589 } else {
590 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000591 case MVT::i8:
592 Opcode = NVPTX::LD_i8_areg;
593 break;
594 case MVT::i16:
595 Opcode = NVPTX::LD_i16_areg;
596 break;
597 case MVT::i32:
598 Opcode = NVPTX::LD_i32_areg;
599 break;
600 case MVT::i64:
601 Opcode = NVPTX::LD_i64_areg;
602 break;
603 case MVT::f32:
604 Opcode = NVPTX::LD_f32_areg;
605 break;
606 case MVT::f64:
607 Opcode = NVPTX::LD_f64_areg;
608 break;
609 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000610 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000611 }
Justin Holewinskiae556d32012-05-04 20:18:50 +0000612 }
Justin Holewinski0497ab12013-03-30 14:29:21 +0000613 SDValue Ops[] = { getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
614 getI32Imm(vecType), getI32Imm(fromType),
615 getI32Imm(fromTypeWidth), N1, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +0000616 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000617 }
618
Craig Topper062a2ba2014-04-25 05:30:21 +0000619 if (NVPTXLD) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000620 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
621 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
622 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
623 }
624
625 return NVPTXLD;
626}
627
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000628SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
629
630 SDValue Chain = N->getOperand(0);
631 SDValue Op1 = N->getOperand(1);
632 SDValue Addr, Offset, Base;
633 unsigned Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +0000634 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000635 SDNode *LD;
636 MemSDNode *MemSD = cast<MemSDNode>(N);
637 EVT LoadedVT = MemSD->getMemoryVT();
638
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000639 if (!LoadedVT.isSimple())
Craig Topper062a2ba2014-04-25 05:30:21 +0000640 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000641
642 // Address Space Setting
643 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
644
645 // Volatile Setting
646 // - .volatile is only availalble for .global and .shared
647 bool IsVolatile = MemSD->isVolatile();
648 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
649 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
650 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
651 IsVolatile = false;
652
653 // Vector Setting
654 MVT SimpleVT = LoadedVT.getSimpleVT();
655
656 // Type Setting: fromType + fromTypeWidth
657 //
658 // Sign : ISD::SEXTLOAD
659 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
660 // type is integer
661 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
662 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +0000663 // Read at least 8 bits (predicates are stored as 8-bit values)
664 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000665 unsigned int FromType;
666 // The last operand holds the original LoadSDNode::getExtensionType() value
Justin Holewinski0497ab12013-03-30 14:29:21 +0000667 unsigned ExtensionType = cast<ConstantSDNode>(
668 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000669 if (ExtensionType == ISD::SEXTLOAD)
670 FromType = NVPTX::PTXLdStInstCode::Signed;
671 else if (ScalarVT.isFloatingPoint())
672 FromType = NVPTX::PTXLdStInstCode::Float;
673 else
674 FromType = NVPTX::PTXLdStInstCode::Unsigned;
675
676 unsigned VecType;
677
678 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000679 case NVPTXISD::LoadV2:
680 VecType = NVPTX::PTXLdStInstCode::V2;
681 break;
682 case NVPTXISD::LoadV4:
683 VecType = NVPTX::PTXLdStInstCode::V4;
684 break;
685 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000686 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000687 }
688
689 EVT EltVT = N->getValueType(0);
690
691 if (SelectDirectAddr(Op1, Addr)) {
692 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000693 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000694 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000695 case NVPTXISD::LoadV2:
696 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000697 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000698 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000699 case MVT::i8:
700 Opcode = NVPTX::LDV_i8_v2_avar;
701 break;
702 case MVT::i16:
703 Opcode = NVPTX::LDV_i16_v2_avar;
704 break;
705 case MVT::i32:
706 Opcode = NVPTX::LDV_i32_v2_avar;
707 break;
708 case MVT::i64:
709 Opcode = NVPTX::LDV_i64_v2_avar;
710 break;
711 case MVT::f32:
712 Opcode = NVPTX::LDV_f32_v2_avar;
713 break;
714 case MVT::f64:
715 Opcode = NVPTX::LDV_f64_v2_avar;
716 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000717 }
718 break;
719 case NVPTXISD::LoadV4:
720 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000721 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000722 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000723 case MVT::i8:
724 Opcode = NVPTX::LDV_i8_v4_avar;
725 break;
726 case MVT::i16:
727 Opcode = NVPTX::LDV_i16_v4_avar;
728 break;
729 case MVT::i32:
730 Opcode = NVPTX::LDV_i32_v4_avar;
731 break;
732 case MVT::f32:
733 Opcode = NVPTX::LDV_f32_v4_avar;
734 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000735 }
736 break;
737 }
738
Justin Holewinski0497ab12013-03-30 14:29:21 +0000739 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
740 getI32Imm(VecType), getI32Imm(FromType),
741 getI32Imm(FromTypeWidth), Addr, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +0000742 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000743 } else if (Subtarget.is64Bit()
744 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
745 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000746 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000747 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000748 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000749 case NVPTXISD::LoadV2:
750 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000751 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000752 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000753 case MVT::i8:
754 Opcode = NVPTX::LDV_i8_v2_asi;
755 break;
756 case MVT::i16:
757 Opcode = NVPTX::LDV_i16_v2_asi;
758 break;
759 case MVT::i32:
760 Opcode = NVPTX::LDV_i32_v2_asi;
761 break;
762 case MVT::i64:
763 Opcode = NVPTX::LDV_i64_v2_asi;
764 break;
765 case MVT::f32:
766 Opcode = NVPTX::LDV_f32_v2_asi;
767 break;
768 case MVT::f64:
769 Opcode = NVPTX::LDV_f64_v2_asi;
770 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000771 }
772 break;
773 case NVPTXISD::LoadV4:
774 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000775 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000776 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000777 case MVT::i8:
778 Opcode = NVPTX::LDV_i8_v4_asi;
779 break;
780 case MVT::i16:
781 Opcode = NVPTX::LDV_i16_v4_asi;
782 break;
783 case MVT::i32:
784 Opcode = NVPTX::LDV_i32_v4_asi;
785 break;
786 case MVT::f32:
787 Opcode = NVPTX::LDV_f32_v4_asi;
788 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000789 }
790 break;
791 }
792
Justin Holewinski0497ab12013-03-30 14:29:21 +0000793 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
794 getI32Imm(VecType), getI32Imm(FromType),
795 getI32Imm(FromTypeWidth), Base, Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +0000796 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000797 } else if (Subtarget.is64Bit()
798 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
799 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000800 if (Subtarget.is64Bit()) {
801 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000802 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000803 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000804 case NVPTXISD::LoadV2:
805 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000806 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000807 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000808 case MVT::i8:
809 Opcode = NVPTX::LDV_i8_v2_ari_64;
810 break;
811 case MVT::i16:
812 Opcode = NVPTX::LDV_i16_v2_ari_64;
813 break;
814 case MVT::i32:
815 Opcode = NVPTX::LDV_i32_v2_ari_64;
816 break;
817 case MVT::i64:
818 Opcode = NVPTX::LDV_i64_v2_ari_64;
819 break;
820 case MVT::f32:
821 Opcode = NVPTX::LDV_f32_v2_ari_64;
822 break;
823 case MVT::f64:
824 Opcode = NVPTX::LDV_f64_v2_ari_64;
825 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000826 }
827 break;
828 case NVPTXISD::LoadV4:
829 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000830 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000831 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000832 case MVT::i8:
833 Opcode = NVPTX::LDV_i8_v4_ari_64;
834 break;
835 case MVT::i16:
836 Opcode = NVPTX::LDV_i16_v4_ari_64;
837 break;
838 case MVT::i32:
839 Opcode = NVPTX::LDV_i32_v4_ari_64;
840 break;
841 case MVT::f32:
842 Opcode = NVPTX::LDV_f32_v4_ari_64;
843 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000844 }
845 break;
846 }
847 } else {
848 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000849 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000850 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000851 case NVPTXISD::LoadV2:
852 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000853 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000854 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000855 case MVT::i8:
856 Opcode = NVPTX::LDV_i8_v2_ari;
857 break;
858 case MVT::i16:
859 Opcode = NVPTX::LDV_i16_v2_ari;
860 break;
861 case MVT::i32:
862 Opcode = NVPTX::LDV_i32_v2_ari;
863 break;
864 case MVT::i64:
865 Opcode = NVPTX::LDV_i64_v2_ari;
866 break;
867 case MVT::f32:
868 Opcode = NVPTX::LDV_f32_v2_ari;
869 break;
870 case MVT::f64:
871 Opcode = NVPTX::LDV_f64_v2_ari;
872 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000873 }
874 break;
875 case NVPTXISD::LoadV4:
876 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000877 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000878 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000879 case MVT::i8:
880 Opcode = NVPTX::LDV_i8_v4_ari;
881 break;
882 case MVT::i16:
883 Opcode = NVPTX::LDV_i16_v4_ari;
884 break;
885 case MVT::i32:
886 Opcode = NVPTX::LDV_i32_v4_ari;
887 break;
888 case MVT::f32:
889 Opcode = NVPTX::LDV_f32_v4_ari;
890 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000891 }
892 break;
893 }
894 }
895
Justin Holewinski0497ab12013-03-30 14:29:21 +0000896 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
897 getI32Imm(VecType), getI32Imm(FromType),
898 getI32Imm(FromTypeWidth), Base, Offset, Chain };
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000899
Michael Liaob53d8962013-04-19 22:22:57 +0000900 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000901 } else {
902 if (Subtarget.is64Bit()) {
903 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000904 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000905 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000906 case NVPTXISD::LoadV2:
907 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000908 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000909 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000910 case MVT::i8:
911 Opcode = NVPTX::LDV_i8_v2_areg_64;
912 break;
913 case MVT::i16:
914 Opcode = NVPTX::LDV_i16_v2_areg_64;
915 break;
916 case MVT::i32:
917 Opcode = NVPTX::LDV_i32_v2_areg_64;
918 break;
919 case MVT::i64:
920 Opcode = NVPTX::LDV_i64_v2_areg_64;
921 break;
922 case MVT::f32:
923 Opcode = NVPTX::LDV_f32_v2_areg_64;
924 break;
925 case MVT::f64:
926 Opcode = NVPTX::LDV_f64_v2_areg_64;
927 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000928 }
929 break;
930 case NVPTXISD::LoadV4:
931 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000932 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000933 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000934 case MVT::i8:
935 Opcode = NVPTX::LDV_i8_v4_areg_64;
936 break;
937 case MVT::i16:
938 Opcode = NVPTX::LDV_i16_v4_areg_64;
939 break;
940 case MVT::i32:
941 Opcode = NVPTX::LDV_i32_v4_areg_64;
942 break;
943 case MVT::f32:
944 Opcode = NVPTX::LDV_f32_v4_areg_64;
945 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000946 }
947 break;
948 }
949 } else {
950 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000951 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000952 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000953 case NVPTXISD::LoadV2:
954 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000955 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000956 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000957 case MVT::i8:
958 Opcode = NVPTX::LDV_i8_v2_areg;
959 break;
960 case MVT::i16:
961 Opcode = NVPTX::LDV_i16_v2_areg;
962 break;
963 case MVT::i32:
964 Opcode = NVPTX::LDV_i32_v2_areg;
965 break;
966 case MVT::i64:
967 Opcode = NVPTX::LDV_i64_v2_areg;
968 break;
969 case MVT::f32:
970 Opcode = NVPTX::LDV_f32_v2_areg;
971 break;
972 case MVT::f64:
973 Opcode = NVPTX::LDV_f64_v2_areg;
974 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000975 }
976 break;
977 case NVPTXISD::LoadV4:
978 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000979 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000980 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000981 case MVT::i8:
982 Opcode = NVPTX::LDV_i8_v4_areg;
983 break;
984 case MVT::i16:
985 Opcode = NVPTX::LDV_i16_v4_areg;
986 break;
987 case MVT::i32:
988 Opcode = NVPTX::LDV_i32_v4_areg;
989 break;
990 case MVT::f32:
991 Opcode = NVPTX::LDV_f32_v4_areg;
992 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000993 }
994 break;
995 }
996 }
997
Justin Holewinski0497ab12013-03-30 14:29:21 +0000998 SDValue Ops[] = { getI32Imm(IsVolatile), getI32Imm(CodeAddrSpace),
999 getI32Imm(VecType), getI32Imm(FromType),
1000 getI32Imm(FromTypeWidth), Op1, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00001001 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001002 }
1003
1004 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1005 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1006 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1007
1008 return LD;
1009}
1010
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001011SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001012
1013 SDValue Chain = N->getOperand(0);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001014 SDValue Op1;
1015 MemSDNode *Mem;
1016 bool IsLDG = true;
1017
1018 // If this is an LDG intrinsic, the address is the third operand. Its its an
1019 // LDG/LDU SD node (from custom vector handling), then its the second operand
1020 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1021 Op1 = N->getOperand(2);
1022 Mem = cast<MemIntrinsicSDNode>(N);
1023 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1024 switch (IID) {
1025 default:
1026 return NULL;
1027 case Intrinsic::nvvm_ldg_global_f:
1028 case Intrinsic::nvvm_ldg_global_i:
1029 case Intrinsic::nvvm_ldg_global_p:
1030 IsLDG = true;
1031 break;
1032 case Intrinsic::nvvm_ldu_global_f:
1033 case Intrinsic::nvvm_ldu_global_i:
1034 case Intrinsic::nvvm_ldu_global_p:
1035 IsLDG = false;
1036 break;
1037 }
1038 } else {
1039 Op1 = N->getOperand(1);
1040 Mem = cast<MemSDNode>(N);
1041 }
1042
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001043 unsigned Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00001044 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001045 SDNode *LD;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001046 SDValue Base, Offset, Addr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00001047
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001048 EVT EltVT = Mem->getMemoryVT();
1049 if (EltVT.isVector()) {
1050 EltVT = EltVT.getVectorElementType();
1051 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001052
Justin Holewinskie40e9292013-07-01 12:58:52 +00001053 if (SelectDirectAddr(Op1, Addr)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001054 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001055 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001056 return nullptr;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001057 case ISD::INTRINSIC_W_CHAIN:
1058 if (IsLDG) {
1059 switch (EltVT.getSimpleVT().SimpleTy) {
1060 default:
1061 return nullptr;
1062 case MVT::i8:
1063 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1064 break;
1065 case MVT::i16:
1066 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1067 break;
1068 case MVT::i32:
1069 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1070 break;
1071 case MVT::i64:
1072 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1073 break;
1074 case MVT::f32:
1075 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1076 break;
1077 case MVT::f64:
1078 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1079 break;
1080 }
1081 } else {
1082 switch (EltVT.getSimpleVT().SimpleTy) {
1083 default:
1084 return nullptr;
1085 case MVT::i8:
1086 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1087 break;
1088 case MVT::i16:
1089 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1090 break;
1091 case MVT::i32:
1092 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1093 break;
1094 case MVT::i64:
1095 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1096 break;
1097 case MVT::f32:
1098 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1099 break;
1100 case MVT::f64:
1101 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1102 break;
1103 }
1104 }
1105 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001106 case NVPTXISD::LDGV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001107 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001108 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001109 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001110 case MVT::i8:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001111 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001112 break;
1113 case MVT::i16:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001114 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001115 break;
1116 case MVT::i32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001117 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001118 break;
1119 case MVT::i64:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001120 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001121 break;
1122 case MVT::f32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001123 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001124 break;
1125 case MVT::f64:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001126 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001127 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001128 }
1129 break;
1130 case NVPTXISD::LDUV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001131 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001132 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001133 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001134 case MVT::i8:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001135 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001136 break;
1137 case MVT::i16:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001138 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001139 break;
1140 case MVT::i32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001141 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001142 break;
1143 case MVT::i64:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001144 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001145 break;
1146 case MVT::f32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001147 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001148 break;
1149 case MVT::f64:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001150 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1151 break;
1152 }
1153 break;
1154 case NVPTXISD::LDGV4:
1155 switch (EltVT.getSimpleVT().SimpleTy) {
1156 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001157 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001158 case MVT::i8:
1159 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1160 break;
1161 case MVT::i16:
1162 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1163 break;
1164 case MVT::i32:
1165 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1166 break;
1167 case MVT::f32:
1168 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001169 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001170 }
1171 break;
1172 case NVPTXISD::LDUV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001173 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001174 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001175 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001176 case MVT::i8:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001177 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001178 break;
1179 case MVT::i16:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001180 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001181 break;
1182 case MVT::i32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001183 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001184 break;
1185 case MVT::f32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001186 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001187 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001188 }
1189 break;
1190 }
Justin Holewinskie40e9292013-07-01 12:58:52 +00001191
1192 SDValue Ops[] = { Addr, Chain };
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001193 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001194 } else if (Subtarget.is64Bit()
1195 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1196 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1197 if (Subtarget.is64Bit()) {
1198 switch (N->getOpcode()) {
1199 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001200 return nullptr;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001201 case ISD::INTRINSIC_W_CHAIN:
1202 if (IsLDG) {
1203 switch (EltVT.getSimpleVT().SimpleTy) {
1204 default:
1205 return nullptr;
1206 case MVT::i8:
1207 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1208 break;
1209 case MVT::i16:
1210 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1211 break;
1212 case MVT::i32:
1213 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1214 break;
1215 case MVT::i64:
1216 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1217 break;
1218 case MVT::f32:
1219 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1220 break;
1221 case MVT::f64:
1222 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1223 break;
1224 }
1225 } else {
1226 switch (EltVT.getSimpleVT().SimpleTy) {
1227 default:
1228 return nullptr;
1229 case MVT::i8:
1230 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1231 break;
1232 case MVT::i16:
1233 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1234 break;
1235 case MVT::i32:
1236 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1237 break;
1238 case MVT::i64:
1239 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1240 break;
1241 case MVT::f32:
1242 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1243 break;
1244 case MVT::f64:
1245 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1246 break;
1247 }
1248 }
1249 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001250 case NVPTXISD::LDGV2:
1251 switch (EltVT.getSimpleVT().SimpleTy) {
1252 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001253 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001254 case MVT::i8:
1255 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1256 break;
1257 case MVT::i16:
1258 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1259 break;
1260 case MVT::i32:
1261 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1262 break;
1263 case MVT::i64:
1264 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1265 break;
1266 case MVT::f32:
1267 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1268 break;
1269 case MVT::f64:
1270 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1271 break;
1272 }
1273 break;
1274 case NVPTXISD::LDUV2:
1275 switch (EltVT.getSimpleVT().SimpleTy) {
1276 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001277 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001278 case MVT::i8:
1279 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1280 break;
1281 case MVT::i16:
1282 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1283 break;
1284 case MVT::i32:
1285 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1286 break;
1287 case MVT::i64:
1288 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1289 break;
1290 case MVT::f32:
1291 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1292 break;
1293 case MVT::f64:
1294 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1295 break;
1296 }
1297 break;
1298 case NVPTXISD::LDGV4:
1299 switch (EltVT.getSimpleVT().SimpleTy) {
1300 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001301 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001302 case MVT::i8:
1303 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1304 break;
1305 case MVT::i16:
1306 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1307 break;
1308 case MVT::i32:
1309 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1310 break;
1311 case MVT::f32:
1312 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1313 break;
1314 }
1315 break;
1316 case NVPTXISD::LDUV4:
1317 switch (EltVT.getSimpleVT().SimpleTy) {
1318 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001319 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001320 case MVT::i8:
1321 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1322 break;
1323 case MVT::i16:
1324 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1325 break;
1326 case MVT::i32:
1327 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1328 break;
1329 case MVT::f32:
1330 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1331 break;
1332 }
1333 break;
1334 }
1335 } else {
1336 switch (N->getOpcode()) {
1337 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001338 return nullptr;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001339 case ISD::INTRINSIC_W_CHAIN:
1340 if (IsLDG) {
1341 switch (EltVT.getSimpleVT().SimpleTy) {
1342 default:
1343 return nullptr;
1344 case MVT::i8:
1345 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1346 break;
1347 case MVT::i16:
1348 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1349 break;
1350 case MVT::i32:
1351 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1352 break;
1353 case MVT::i64:
1354 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1355 break;
1356 case MVT::f32:
1357 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1358 break;
1359 case MVT::f64:
1360 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1361 break;
1362 }
1363 } else {
1364 switch (EltVT.getSimpleVT().SimpleTy) {
1365 default:
1366 return nullptr;
1367 case MVT::i8:
1368 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1369 break;
1370 case MVT::i16:
1371 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1372 break;
1373 case MVT::i32:
1374 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1375 break;
1376 case MVT::i64:
1377 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1378 break;
1379 case MVT::f32:
1380 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1381 break;
1382 case MVT::f64:
1383 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1384 break;
1385 }
1386 }
1387 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001388 case NVPTXISD::LDGV2:
1389 switch (EltVT.getSimpleVT().SimpleTy) {
1390 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001391 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001392 case MVT::i8:
1393 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1394 break;
1395 case MVT::i16:
1396 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1397 break;
1398 case MVT::i32:
1399 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1400 break;
1401 case MVT::i64:
1402 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1403 break;
1404 case MVT::f32:
1405 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1406 break;
1407 case MVT::f64:
1408 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1409 break;
1410 }
1411 break;
1412 case NVPTXISD::LDUV2:
1413 switch (EltVT.getSimpleVT().SimpleTy) {
1414 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001415 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001416 case MVT::i8:
1417 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1418 break;
1419 case MVT::i16:
1420 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1421 break;
1422 case MVT::i32:
1423 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1424 break;
1425 case MVT::i64:
1426 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1427 break;
1428 case MVT::f32:
1429 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1430 break;
1431 case MVT::f64:
1432 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1433 break;
1434 }
1435 break;
1436 case NVPTXISD::LDGV4:
1437 switch (EltVT.getSimpleVT().SimpleTy) {
1438 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001439 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001440 case MVT::i8:
1441 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1442 break;
1443 case MVT::i16:
1444 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1445 break;
1446 case MVT::i32:
1447 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1448 break;
1449 case MVT::f32:
1450 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1451 break;
1452 }
1453 break;
1454 case NVPTXISD::LDUV4:
1455 switch (EltVT.getSimpleVT().SimpleTy) {
1456 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001457 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001458 case MVT::i8:
1459 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1460 break;
1461 case MVT::i16:
1462 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1463 break;
1464 case MVT::i32:
1465 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1466 break;
1467 case MVT::f32:
1468 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1469 break;
1470 }
1471 break;
1472 }
1473 }
1474
1475 SDValue Ops[] = { Base, Offset, Chain };
1476
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001477 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001478 } else {
Justin Holewinskie40e9292013-07-01 12:58:52 +00001479 if (Subtarget.is64Bit()) {
1480 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001481 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001482 return nullptr;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001483 case ISD::INTRINSIC_W_CHAIN:
1484 if (IsLDG) {
1485 switch (EltVT.getSimpleVT().SimpleTy) {
1486 default:
1487 return nullptr;
1488 case MVT::i8:
1489 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1490 break;
1491 case MVT::i16:
1492 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1493 break;
1494 case MVT::i32:
1495 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1496 break;
1497 case MVT::i64:
1498 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1499 break;
1500 case MVT::f32:
1501 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1502 break;
1503 case MVT::f64:
1504 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1505 break;
1506 }
1507 } else {
1508 switch (EltVT.getSimpleVT().SimpleTy) {
1509 default:
1510 return nullptr;
1511 case MVT::i8:
1512 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1513 break;
1514 case MVT::i16:
1515 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1516 break;
1517 case MVT::i32:
1518 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1519 break;
1520 case MVT::i64:
1521 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1522 break;
1523 case MVT::f32:
1524 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1525 break;
1526 case MVT::f64:
1527 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1528 break;
1529 }
1530 }
1531 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001532 case NVPTXISD::LDGV2:
1533 switch (EltVT.getSimpleVT().SimpleTy) {
1534 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001535 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001536 case MVT::i8:
1537 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1538 break;
1539 case MVT::i16:
1540 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1541 break;
1542 case MVT::i32:
1543 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1544 break;
1545 case MVT::i64:
1546 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1547 break;
1548 case MVT::f32:
1549 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1550 break;
1551 case MVT::f64:
1552 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1553 break;
1554 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001555 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001556 case NVPTXISD::LDUV2:
1557 switch (EltVT.getSimpleVT().SimpleTy) {
1558 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001559 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001560 case MVT::i8:
1561 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1562 break;
1563 case MVT::i16:
1564 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1565 break;
1566 case MVT::i32:
1567 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1568 break;
1569 case MVT::i64:
1570 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1571 break;
1572 case MVT::f32:
1573 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1574 break;
1575 case MVT::f64:
1576 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1577 break;
1578 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001579 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001580 case NVPTXISD::LDGV4:
1581 switch (EltVT.getSimpleVT().SimpleTy) {
1582 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001583 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001584 case MVT::i8:
1585 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1586 break;
1587 case MVT::i16:
1588 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1589 break;
1590 case MVT::i32:
1591 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1592 break;
1593 case MVT::f32:
1594 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1595 break;
1596 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001597 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001598 case NVPTXISD::LDUV4:
1599 switch (EltVT.getSimpleVT().SimpleTy) {
1600 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001601 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001602 case MVT::i8:
1603 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1604 break;
1605 case MVT::i16:
1606 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1607 break;
1608 case MVT::i32:
1609 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1610 break;
1611 case MVT::f32:
1612 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1613 break;
1614 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001615 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001616 }
Justin Holewinskie40e9292013-07-01 12:58:52 +00001617 } else {
1618 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001619 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001620 return nullptr;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001621 case ISD::INTRINSIC_W_CHAIN:
1622 if (IsLDG) {
1623 switch (EltVT.getSimpleVT().SimpleTy) {
1624 default:
1625 return nullptr;
1626 case MVT::i8:
1627 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1628 break;
1629 case MVT::i16:
1630 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1631 break;
1632 case MVT::i32:
1633 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1634 break;
1635 case MVT::i64:
1636 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1637 break;
1638 case MVT::f32:
1639 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1640 break;
1641 case MVT::f64:
1642 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1643 break;
1644 }
1645 } else {
1646 switch (EltVT.getSimpleVT().SimpleTy) {
1647 default:
1648 return nullptr;
1649 case MVT::i8:
1650 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1651 break;
1652 case MVT::i16:
1653 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1654 break;
1655 case MVT::i32:
1656 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1657 break;
1658 case MVT::i64:
1659 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1660 break;
1661 case MVT::f32:
1662 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1663 break;
1664 case MVT::f64:
1665 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1666 break;
1667 }
1668 }
1669 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001670 case NVPTXISD::LDGV2:
1671 switch (EltVT.getSimpleVT().SimpleTy) {
1672 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001673 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001674 case MVT::i8:
1675 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1676 break;
1677 case MVT::i16:
1678 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1679 break;
1680 case MVT::i32:
1681 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1682 break;
1683 case MVT::i64:
1684 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1685 break;
1686 case MVT::f32:
1687 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1688 break;
1689 case MVT::f64:
1690 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1691 break;
1692 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001693 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001694 case NVPTXISD::LDUV2:
1695 switch (EltVT.getSimpleVT().SimpleTy) {
1696 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001697 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001698 case MVT::i8:
1699 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1700 break;
1701 case MVT::i16:
1702 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1703 break;
1704 case MVT::i32:
1705 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
1706 break;
1707 case MVT::i64:
1708 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
1709 break;
1710 case MVT::f32:
1711 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
1712 break;
1713 case MVT::f64:
1714 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
1715 break;
1716 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001717 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001718 case NVPTXISD::LDGV4:
1719 switch (EltVT.getSimpleVT().SimpleTy) {
1720 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001721 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001722 case MVT::i8:
1723 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
1724 break;
1725 case MVT::i16:
1726 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
1727 break;
1728 case MVT::i32:
1729 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
1730 break;
1731 case MVT::f32:
1732 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
1733 break;
1734 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001735 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001736 case NVPTXISD::LDUV4:
1737 switch (EltVT.getSimpleVT().SimpleTy) {
1738 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001739 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001740 case MVT::i8:
1741 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
1742 break;
1743 case MVT::i16:
1744 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
1745 break;
1746 case MVT::i32:
1747 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
1748 break;
1749 case MVT::f32:
1750 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
1751 break;
1752 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001753 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001754 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001755 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001756
Justin Holewinskie40e9292013-07-01 12:58:52 +00001757 SDValue Ops[] = { Op1, Chain };
Craig Topper2d2aa0c2014-04-30 07:17:30 +00001758 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001759 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001760
1761 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001762 MemRefs0[0] = Mem->getMemOperand();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001763 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1764
1765 return LD;
1766}
1767
Justin Holewinski0497ab12013-03-30 14:29:21 +00001768SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001769 SDLoc dl(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001770 StoreSDNode *ST = cast<StoreSDNode>(N);
1771 EVT StoreVT = ST->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +00001772 SDNode *NVPTXST = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001773
1774 // do not support pre/post inc/dec
1775 if (ST->isIndexed())
Craig Topper062a2ba2014-04-25 05:30:21 +00001776 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001777
1778 if (!StoreVT.isSimple())
Craig Topper062a2ba2014-04-25 05:30:21 +00001779 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001780
1781 // Address Space Setting
1782 unsigned int codeAddrSpace = getCodeAddrSpace(ST, Subtarget);
1783
1784 // Volatile Setting
1785 // - .volatile is only availalble for .global and .shared
1786 bool isVolatile = ST->isVolatile();
1787 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1788 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1789 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1790 isVolatile = false;
1791
1792 // Vector Setting
1793 MVT SimpleVT = StoreVT.getSimpleVT();
1794 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
1795 if (SimpleVT.isVector()) {
1796 unsigned num = SimpleVT.getVectorNumElements();
1797 if (num == 2)
1798 vecType = NVPTX::PTXLdStInstCode::V2;
1799 else if (num == 4)
1800 vecType = NVPTX::PTXLdStInstCode::V4;
1801 else
Craig Topper062a2ba2014-04-25 05:30:21 +00001802 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001803 }
1804
1805 // Type Setting: toType + toTypeWidth
1806 // - for integer type, always use 'u'
1807 //
1808 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00001809 unsigned toTypeWidth = ScalarVT.getSizeInBits();
Justin Holewinskiae556d32012-05-04 20:18:50 +00001810 unsigned int toType;
1811 if (ScalarVT.isFloatingPoint())
1812 toType = NVPTX::PTXLdStInstCode::Float;
1813 else
1814 toType = NVPTX::PTXLdStInstCode::Unsigned;
1815
1816 // Create the machine instruction DAG
1817 SDValue Chain = N->getOperand(0);
1818 SDValue N1 = N->getOperand(1);
1819 SDValue N2 = N->getOperand(2);
1820 SDValue Addr;
1821 SDValue Offset, Base;
1822 unsigned Opcode;
Craig Topperd9c27832013-08-15 02:44:19 +00001823 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001824
1825 if (SelectDirectAddr(N2, Addr)) {
1826 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001827 case MVT::i8:
1828 Opcode = NVPTX::ST_i8_avar;
1829 break;
1830 case MVT::i16:
1831 Opcode = NVPTX::ST_i16_avar;
1832 break;
1833 case MVT::i32:
1834 Opcode = NVPTX::ST_i32_avar;
1835 break;
1836 case MVT::i64:
1837 Opcode = NVPTX::ST_i64_avar;
1838 break;
1839 case MVT::f32:
1840 Opcode = NVPTX::ST_f32_avar;
1841 break;
1842 case MVT::f64:
1843 Opcode = NVPTX::ST_f64_avar;
1844 break;
1845 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001846 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001847 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001848 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1849 getI32Imm(vecType), getI32Imm(toType),
1850 getI32Imm(toTypeWidth), Addr, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00001851 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001852 } else if (Subtarget.is64Bit()
1853 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1854 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001855 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001856 case MVT::i8:
1857 Opcode = NVPTX::ST_i8_asi;
1858 break;
1859 case MVT::i16:
1860 Opcode = NVPTX::ST_i16_asi;
1861 break;
1862 case MVT::i32:
1863 Opcode = NVPTX::ST_i32_asi;
1864 break;
1865 case MVT::i64:
1866 Opcode = NVPTX::ST_i64_asi;
1867 break;
1868 case MVT::f32:
1869 Opcode = NVPTX::ST_f32_asi;
1870 break;
1871 case MVT::f64:
1872 Opcode = NVPTX::ST_f64_asi;
1873 break;
1874 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001875 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001876 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001877 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1878 getI32Imm(vecType), getI32Imm(toType),
1879 getI32Imm(toTypeWidth), Base, Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00001880 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001881 } else if (Subtarget.is64Bit()
1882 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1883 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001884 if (Subtarget.is64Bit()) {
1885 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001886 case MVT::i8:
1887 Opcode = NVPTX::ST_i8_ari_64;
1888 break;
1889 case MVT::i16:
1890 Opcode = NVPTX::ST_i16_ari_64;
1891 break;
1892 case MVT::i32:
1893 Opcode = NVPTX::ST_i32_ari_64;
1894 break;
1895 case MVT::i64:
1896 Opcode = NVPTX::ST_i64_ari_64;
1897 break;
1898 case MVT::f32:
1899 Opcode = NVPTX::ST_f32_ari_64;
1900 break;
1901 case MVT::f64:
1902 Opcode = NVPTX::ST_f64_ari_64;
1903 break;
1904 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001905 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001906 }
1907 } else {
1908 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001909 case MVT::i8:
1910 Opcode = NVPTX::ST_i8_ari;
1911 break;
1912 case MVT::i16:
1913 Opcode = NVPTX::ST_i16_ari;
1914 break;
1915 case MVT::i32:
1916 Opcode = NVPTX::ST_i32_ari;
1917 break;
1918 case MVT::i64:
1919 Opcode = NVPTX::ST_i64_ari;
1920 break;
1921 case MVT::f32:
1922 Opcode = NVPTX::ST_f32_ari;
1923 break;
1924 case MVT::f64:
1925 Opcode = NVPTX::ST_f64_ari;
1926 break;
1927 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001928 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001929 }
Justin Holewinskiae556d32012-05-04 20:18:50 +00001930 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001931 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1932 getI32Imm(vecType), getI32Imm(toType),
1933 getI32Imm(toTypeWidth), Base, Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00001934 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001935 } else {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001936 if (Subtarget.is64Bit()) {
1937 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001938 case MVT::i8:
1939 Opcode = NVPTX::ST_i8_areg_64;
1940 break;
1941 case MVT::i16:
1942 Opcode = NVPTX::ST_i16_areg_64;
1943 break;
1944 case MVT::i32:
1945 Opcode = NVPTX::ST_i32_areg_64;
1946 break;
1947 case MVT::i64:
1948 Opcode = NVPTX::ST_i64_areg_64;
1949 break;
1950 case MVT::f32:
1951 Opcode = NVPTX::ST_f32_areg_64;
1952 break;
1953 case MVT::f64:
1954 Opcode = NVPTX::ST_f64_areg_64;
1955 break;
1956 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001957 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001958 }
1959 } else {
1960 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001961 case MVT::i8:
1962 Opcode = NVPTX::ST_i8_areg;
1963 break;
1964 case MVT::i16:
1965 Opcode = NVPTX::ST_i16_areg;
1966 break;
1967 case MVT::i32:
1968 Opcode = NVPTX::ST_i32_areg;
1969 break;
1970 case MVT::i64:
1971 Opcode = NVPTX::ST_i64_areg;
1972 break;
1973 case MVT::f32:
1974 Opcode = NVPTX::ST_f32_areg;
1975 break;
1976 case MVT::f64:
1977 Opcode = NVPTX::ST_f64_areg;
1978 break;
1979 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001980 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001981 }
Justin Holewinskiae556d32012-05-04 20:18:50 +00001982 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001983 SDValue Ops[] = { N1, getI32Imm(isVolatile), getI32Imm(codeAddrSpace),
1984 getI32Imm(vecType), getI32Imm(toType),
1985 getI32Imm(toTypeWidth), N2, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00001986 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001987 }
1988
Craig Topper062a2ba2014-04-25 05:30:21 +00001989 if (NVPTXST) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00001990 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1991 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1992 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1993 }
1994
1995 return NVPTXST;
1996}
1997
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001998SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
1999 SDValue Chain = N->getOperand(0);
2000 SDValue Op1 = N->getOperand(1);
2001 SDValue Addr, Offset, Base;
2002 unsigned Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00002003 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002004 SDNode *ST;
2005 EVT EltVT = Op1.getValueType();
2006 MemSDNode *MemSD = cast<MemSDNode>(N);
2007 EVT StoreVT = MemSD->getMemoryVT();
2008
2009 // Address Space Setting
2010 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD, Subtarget);
2011
2012 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2013 report_fatal_error("Cannot store to pointer that points to constant "
2014 "memory space");
2015 }
2016
2017 // Volatile Setting
2018 // - .volatile is only availalble for .global and .shared
2019 bool IsVolatile = MemSD->isVolatile();
2020 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2021 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2022 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2023 IsVolatile = false;
2024
2025 // Type Setting: toType + toTypeWidth
2026 // - for integer type, always use 'u'
2027 assert(StoreVT.isSimple() && "Store value is not simple");
2028 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00002029 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002030 unsigned ToType;
2031 if (ScalarVT.isFloatingPoint())
2032 ToType = NVPTX::PTXLdStInstCode::Float;
2033 else
2034 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2035
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002036 SmallVector<SDValue, 12> StOps;
2037 SDValue N2;
2038 unsigned VecType;
2039
2040 switch (N->getOpcode()) {
2041 case NVPTXISD::StoreV2:
2042 VecType = NVPTX::PTXLdStInstCode::V2;
2043 StOps.push_back(N->getOperand(1));
2044 StOps.push_back(N->getOperand(2));
2045 N2 = N->getOperand(3);
2046 break;
2047 case NVPTXISD::StoreV4:
2048 VecType = NVPTX::PTXLdStInstCode::V4;
2049 StOps.push_back(N->getOperand(1));
2050 StOps.push_back(N->getOperand(2));
2051 StOps.push_back(N->getOperand(3));
2052 StOps.push_back(N->getOperand(4));
2053 N2 = N->getOperand(5);
2054 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002055 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002056 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002057 }
2058
2059 StOps.push_back(getI32Imm(IsVolatile));
2060 StOps.push_back(getI32Imm(CodeAddrSpace));
2061 StOps.push_back(getI32Imm(VecType));
2062 StOps.push_back(getI32Imm(ToType));
2063 StOps.push_back(getI32Imm(ToTypeWidth));
2064
2065 if (SelectDirectAddr(N2, Addr)) {
2066 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002067 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002068 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002069 case NVPTXISD::StoreV2:
2070 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002071 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002072 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002073 case MVT::i8:
2074 Opcode = NVPTX::STV_i8_v2_avar;
2075 break;
2076 case MVT::i16:
2077 Opcode = NVPTX::STV_i16_v2_avar;
2078 break;
2079 case MVT::i32:
2080 Opcode = NVPTX::STV_i32_v2_avar;
2081 break;
2082 case MVT::i64:
2083 Opcode = NVPTX::STV_i64_v2_avar;
2084 break;
2085 case MVT::f32:
2086 Opcode = NVPTX::STV_f32_v2_avar;
2087 break;
2088 case MVT::f64:
2089 Opcode = NVPTX::STV_f64_v2_avar;
2090 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002091 }
2092 break;
2093 case NVPTXISD::StoreV4:
2094 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002095 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002096 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002097 case MVT::i8:
2098 Opcode = NVPTX::STV_i8_v4_avar;
2099 break;
2100 case MVT::i16:
2101 Opcode = NVPTX::STV_i16_v4_avar;
2102 break;
2103 case MVT::i32:
2104 Opcode = NVPTX::STV_i32_v4_avar;
2105 break;
2106 case MVT::f32:
2107 Opcode = NVPTX::STV_f32_v4_avar;
2108 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002109 }
2110 break;
2111 }
2112 StOps.push_back(Addr);
Justin Holewinski0497ab12013-03-30 14:29:21 +00002113 } else if (Subtarget.is64Bit()
2114 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2115 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002116 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002117 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002118 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002119 case NVPTXISD::StoreV2:
2120 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002121 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002122 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002123 case MVT::i8:
2124 Opcode = NVPTX::STV_i8_v2_asi;
2125 break;
2126 case MVT::i16:
2127 Opcode = NVPTX::STV_i16_v2_asi;
2128 break;
2129 case MVT::i32:
2130 Opcode = NVPTX::STV_i32_v2_asi;
2131 break;
2132 case MVT::i64:
2133 Opcode = NVPTX::STV_i64_v2_asi;
2134 break;
2135 case MVT::f32:
2136 Opcode = NVPTX::STV_f32_v2_asi;
2137 break;
2138 case MVT::f64:
2139 Opcode = NVPTX::STV_f64_v2_asi;
2140 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002141 }
2142 break;
2143 case NVPTXISD::StoreV4:
2144 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002145 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002146 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002147 case MVT::i8:
2148 Opcode = NVPTX::STV_i8_v4_asi;
2149 break;
2150 case MVT::i16:
2151 Opcode = NVPTX::STV_i16_v4_asi;
2152 break;
2153 case MVT::i32:
2154 Opcode = NVPTX::STV_i32_v4_asi;
2155 break;
2156 case MVT::f32:
2157 Opcode = NVPTX::STV_f32_v4_asi;
2158 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002159 }
2160 break;
2161 }
2162 StOps.push_back(Base);
2163 StOps.push_back(Offset);
Justin Holewinski0497ab12013-03-30 14:29:21 +00002164 } else if (Subtarget.is64Bit()
2165 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2166 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002167 if (Subtarget.is64Bit()) {
2168 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002169 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002170 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002171 case NVPTXISD::StoreV2:
2172 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002173 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002174 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002175 case MVT::i8:
2176 Opcode = NVPTX::STV_i8_v2_ari_64;
2177 break;
2178 case MVT::i16:
2179 Opcode = NVPTX::STV_i16_v2_ari_64;
2180 break;
2181 case MVT::i32:
2182 Opcode = NVPTX::STV_i32_v2_ari_64;
2183 break;
2184 case MVT::i64:
2185 Opcode = NVPTX::STV_i64_v2_ari_64;
2186 break;
2187 case MVT::f32:
2188 Opcode = NVPTX::STV_f32_v2_ari_64;
2189 break;
2190 case MVT::f64:
2191 Opcode = NVPTX::STV_f64_v2_ari_64;
2192 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002193 }
2194 break;
2195 case NVPTXISD::StoreV4:
2196 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002197 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002198 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002199 case MVT::i8:
2200 Opcode = NVPTX::STV_i8_v4_ari_64;
2201 break;
2202 case MVT::i16:
2203 Opcode = NVPTX::STV_i16_v4_ari_64;
2204 break;
2205 case MVT::i32:
2206 Opcode = NVPTX::STV_i32_v4_ari_64;
2207 break;
2208 case MVT::f32:
2209 Opcode = NVPTX::STV_f32_v4_ari_64;
2210 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002211 }
2212 break;
2213 }
2214 } else {
2215 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002216 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002217 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002218 case NVPTXISD::StoreV2:
2219 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002220 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002221 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002222 case MVT::i8:
2223 Opcode = NVPTX::STV_i8_v2_ari;
2224 break;
2225 case MVT::i16:
2226 Opcode = NVPTX::STV_i16_v2_ari;
2227 break;
2228 case MVT::i32:
2229 Opcode = NVPTX::STV_i32_v2_ari;
2230 break;
2231 case MVT::i64:
2232 Opcode = NVPTX::STV_i64_v2_ari;
2233 break;
2234 case MVT::f32:
2235 Opcode = NVPTX::STV_f32_v2_ari;
2236 break;
2237 case MVT::f64:
2238 Opcode = NVPTX::STV_f64_v2_ari;
2239 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002240 }
2241 break;
2242 case NVPTXISD::StoreV4:
2243 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002244 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002245 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002246 case MVT::i8:
2247 Opcode = NVPTX::STV_i8_v4_ari;
2248 break;
2249 case MVT::i16:
2250 Opcode = NVPTX::STV_i16_v4_ari;
2251 break;
2252 case MVT::i32:
2253 Opcode = NVPTX::STV_i32_v4_ari;
2254 break;
2255 case MVT::f32:
2256 Opcode = NVPTX::STV_f32_v4_ari;
2257 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002258 }
2259 break;
2260 }
2261 }
2262 StOps.push_back(Base);
2263 StOps.push_back(Offset);
2264 } else {
2265 if (Subtarget.is64Bit()) {
2266 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002267 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002268 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002269 case NVPTXISD::StoreV2:
2270 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002271 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002272 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002273 case MVT::i8:
2274 Opcode = NVPTX::STV_i8_v2_areg_64;
2275 break;
2276 case MVT::i16:
2277 Opcode = NVPTX::STV_i16_v2_areg_64;
2278 break;
2279 case MVT::i32:
2280 Opcode = NVPTX::STV_i32_v2_areg_64;
2281 break;
2282 case MVT::i64:
2283 Opcode = NVPTX::STV_i64_v2_areg_64;
2284 break;
2285 case MVT::f32:
2286 Opcode = NVPTX::STV_f32_v2_areg_64;
2287 break;
2288 case MVT::f64:
2289 Opcode = NVPTX::STV_f64_v2_areg_64;
2290 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002291 }
2292 break;
2293 case NVPTXISD::StoreV4:
2294 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002295 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002296 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002297 case MVT::i8:
2298 Opcode = NVPTX::STV_i8_v4_areg_64;
2299 break;
2300 case MVT::i16:
2301 Opcode = NVPTX::STV_i16_v4_areg_64;
2302 break;
2303 case MVT::i32:
2304 Opcode = NVPTX::STV_i32_v4_areg_64;
2305 break;
2306 case MVT::f32:
2307 Opcode = NVPTX::STV_f32_v4_areg_64;
2308 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002309 }
2310 break;
2311 }
2312 } else {
2313 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002314 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002315 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002316 case NVPTXISD::StoreV2:
2317 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002318 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002319 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002320 case MVT::i8:
2321 Opcode = NVPTX::STV_i8_v2_areg;
2322 break;
2323 case MVT::i16:
2324 Opcode = NVPTX::STV_i16_v2_areg;
2325 break;
2326 case MVT::i32:
2327 Opcode = NVPTX::STV_i32_v2_areg;
2328 break;
2329 case MVT::i64:
2330 Opcode = NVPTX::STV_i64_v2_areg;
2331 break;
2332 case MVT::f32:
2333 Opcode = NVPTX::STV_f32_v2_areg;
2334 break;
2335 case MVT::f64:
2336 Opcode = NVPTX::STV_f64_v2_areg;
2337 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002338 }
2339 break;
2340 case NVPTXISD::StoreV4:
2341 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002342 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002343 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002344 case MVT::i8:
2345 Opcode = NVPTX::STV_i8_v4_areg;
2346 break;
2347 case MVT::i16:
2348 Opcode = NVPTX::STV_i16_v4_areg;
2349 break;
2350 case MVT::i32:
2351 Opcode = NVPTX::STV_i32_v4_areg;
2352 break;
2353 case MVT::f32:
2354 Opcode = NVPTX::STV_f32_v4_areg;
2355 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002356 }
2357 break;
2358 }
2359 }
2360 StOps.push_back(N2);
2361 }
2362
2363 StOps.push_back(Chain);
2364
Michael Liaob53d8962013-04-19 22:22:57 +00002365 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002366
2367 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2368 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2369 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2370
2371 return ST;
2372}
2373
Justin Holewinskif8f70912013-06-28 17:57:59 +00002374SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2375 SDValue Chain = Node->getOperand(0);
2376 SDValue Offset = Node->getOperand(2);
2377 SDValue Flag = Node->getOperand(3);
2378 SDLoc DL(Node);
2379 MemSDNode *Mem = cast<MemSDNode>(Node);
2380
2381 unsigned VecSize;
2382 switch (Node->getOpcode()) {
2383 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002384 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002385 case NVPTXISD::LoadParam:
2386 VecSize = 1;
2387 break;
2388 case NVPTXISD::LoadParamV2:
2389 VecSize = 2;
2390 break;
2391 case NVPTXISD::LoadParamV4:
2392 VecSize = 4;
2393 break;
2394 }
2395
2396 EVT EltVT = Node->getValueType(0);
2397 EVT MemVT = Mem->getMemoryVT();
2398
2399 unsigned Opc = 0;
2400
2401 switch (VecSize) {
2402 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002403 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002404 case 1:
2405 switch (MemVT.getSimpleVT().SimpleTy) {
2406 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002407 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002408 case MVT::i1:
2409 Opc = NVPTX::LoadParamMemI8;
2410 break;
2411 case MVT::i8:
2412 Opc = NVPTX::LoadParamMemI8;
2413 break;
2414 case MVT::i16:
2415 Opc = NVPTX::LoadParamMemI16;
2416 break;
2417 case MVT::i32:
2418 Opc = NVPTX::LoadParamMemI32;
2419 break;
2420 case MVT::i64:
2421 Opc = NVPTX::LoadParamMemI64;
2422 break;
2423 case MVT::f32:
2424 Opc = NVPTX::LoadParamMemF32;
2425 break;
2426 case MVT::f64:
2427 Opc = NVPTX::LoadParamMemF64;
2428 break;
2429 }
2430 break;
2431 case 2:
2432 switch (MemVT.getSimpleVT().SimpleTy) {
2433 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002434 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002435 case MVT::i1:
2436 Opc = NVPTX::LoadParamMemV2I8;
2437 break;
2438 case MVT::i8:
2439 Opc = NVPTX::LoadParamMemV2I8;
2440 break;
2441 case MVT::i16:
2442 Opc = NVPTX::LoadParamMemV2I16;
2443 break;
2444 case MVT::i32:
2445 Opc = NVPTX::LoadParamMemV2I32;
2446 break;
2447 case MVT::i64:
2448 Opc = NVPTX::LoadParamMemV2I64;
2449 break;
2450 case MVT::f32:
2451 Opc = NVPTX::LoadParamMemV2F32;
2452 break;
2453 case MVT::f64:
2454 Opc = NVPTX::LoadParamMemV2F64;
2455 break;
2456 }
2457 break;
2458 case 4:
2459 switch (MemVT.getSimpleVT().SimpleTy) {
2460 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002461 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002462 case MVT::i1:
2463 Opc = NVPTX::LoadParamMemV4I8;
2464 break;
2465 case MVT::i8:
2466 Opc = NVPTX::LoadParamMemV4I8;
2467 break;
2468 case MVT::i16:
2469 Opc = NVPTX::LoadParamMemV4I16;
2470 break;
2471 case MVT::i32:
2472 Opc = NVPTX::LoadParamMemV4I32;
2473 break;
2474 case MVT::f32:
2475 Opc = NVPTX::LoadParamMemV4F32;
2476 break;
2477 }
2478 break;
2479 }
2480
2481 SDVTList VTs;
2482 if (VecSize == 1) {
2483 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2484 } else if (VecSize == 2) {
2485 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2486 } else {
2487 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
Craig Topperabb4ac72014-04-16 06:10:51 +00002488 VTs = CurDAG->getVTList(EVTs);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002489 }
2490
2491 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2492
2493 SmallVector<SDValue, 2> Ops;
2494 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2495 Ops.push_back(Chain);
2496 Ops.push_back(Flag);
2497
2498 SDNode *Ret =
Justin Holewinskidff28d22013-07-01 12:59:01 +00002499 CurDAG->getMachineNode(Opc, DL, VTs, Ops);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002500 return Ret;
2501}
2502
2503SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2504 SDLoc DL(N);
2505 SDValue Chain = N->getOperand(0);
2506 SDValue Offset = N->getOperand(1);
2507 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2508 MemSDNode *Mem = cast<MemSDNode>(N);
2509
2510 // How many elements do we have?
2511 unsigned NumElts = 1;
2512 switch (N->getOpcode()) {
2513 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002514 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002515 case NVPTXISD::StoreRetval:
2516 NumElts = 1;
2517 break;
2518 case NVPTXISD::StoreRetvalV2:
2519 NumElts = 2;
2520 break;
2521 case NVPTXISD::StoreRetvalV4:
2522 NumElts = 4;
2523 break;
2524 }
2525
2526 // Build vector of operands
2527 SmallVector<SDValue, 6> Ops;
2528 for (unsigned i = 0; i < NumElts; ++i)
2529 Ops.push_back(N->getOperand(i + 2));
2530 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2531 Ops.push_back(Chain);
2532
2533 // Determine target opcode
2534 // If we have an i1, use an 8-bit store. The lowering code in
2535 // NVPTXISelLowering will have already emitted an upcast.
2536 unsigned Opcode = 0;
2537 switch (NumElts) {
2538 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002539 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002540 case 1:
2541 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2542 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002543 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002544 case MVT::i1:
2545 Opcode = NVPTX::StoreRetvalI8;
2546 break;
2547 case MVT::i8:
2548 Opcode = NVPTX::StoreRetvalI8;
2549 break;
2550 case MVT::i16:
2551 Opcode = NVPTX::StoreRetvalI16;
2552 break;
2553 case MVT::i32:
2554 Opcode = NVPTX::StoreRetvalI32;
2555 break;
2556 case MVT::i64:
2557 Opcode = NVPTX::StoreRetvalI64;
2558 break;
2559 case MVT::f32:
2560 Opcode = NVPTX::StoreRetvalF32;
2561 break;
2562 case MVT::f64:
2563 Opcode = NVPTX::StoreRetvalF64;
2564 break;
2565 }
2566 break;
2567 case 2:
2568 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2569 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002570 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002571 case MVT::i1:
2572 Opcode = NVPTX::StoreRetvalV2I8;
2573 break;
2574 case MVT::i8:
2575 Opcode = NVPTX::StoreRetvalV2I8;
2576 break;
2577 case MVT::i16:
2578 Opcode = NVPTX::StoreRetvalV2I16;
2579 break;
2580 case MVT::i32:
2581 Opcode = NVPTX::StoreRetvalV2I32;
2582 break;
2583 case MVT::i64:
2584 Opcode = NVPTX::StoreRetvalV2I64;
2585 break;
2586 case MVT::f32:
2587 Opcode = NVPTX::StoreRetvalV2F32;
2588 break;
2589 case MVT::f64:
2590 Opcode = NVPTX::StoreRetvalV2F64;
2591 break;
2592 }
2593 break;
2594 case 4:
2595 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2596 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002597 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002598 case MVT::i1:
2599 Opcode = NVPTX::StoreRetvalV4I8;
2600 break;
2601 case MVT::i8:
2602 Opcode = NVPTX::StoreRetvalV4I8;
2603 break;
2604 case MVT::i16:
2605 Opcode = NVPTX::StoreRetvalV4I16;
2606 break;
2607 case MVT::i32:
2608 Opcode = NVPTX::StoreRetvalV4I32;
2609 break;
2610 case MVT::f32:
2611 Opcode = NVPTX::StoreRetvalV4F32;
2612 break;
2613 }
2614 break;
2615 }
2616
2617 SDNode *Ret =
2618 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2619 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2620 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2621 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2622
2623 return Ret;
2624}
2625
2626SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2627 SDLoc DL(N);
2628 SDValue Chain = N->getOperand(0);
2629 SDValue Param = N->getOperand(1);
2630 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2631 SDValue Offset = N->getOperand(2);
2632 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2633 MemSDNode *Mem = cast<MemSDNode>(N);
2634 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2635
2636 // How many elements do we have?
2637 unsigned NumElts = 1;
2638 switch (N->getOpcode()) {
2639 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002640 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002641 case NVPTXISD::StoreParamU32:
2642 case NVPTXISD::StoreParamS32:
2643 case NVPTXISD::StoreParam:
2644 NumElts = 1;
2645 break;
2646 case NVPTXISD::StoreParamV2:
2647 NumElts = 2;
2648 break;
2649 case NVPTXISD::StoreParamV4:
2650 NumElts = 4;
2651 break;
2652 }
2653
2654 // Build vector of operands
2655 SmallVector<SDValue, 8> Ops;
2656 for (unsigned i = 0; i < NumElts; ++i)
2657 Ops.push_back(N->getOperand(i + 3));
2658 Ops.push_back(CurDAG->getTargetConstant(ParamVal, MVT::i32));
2659 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, MVT::i32));
2660 Ops.push_back(Chain);
2661 Ops.push_back(Flag);
2662
2663 // Determine target opcode
2664 // If we have an i1, use an 8-bit store. The lowering code in
2665 // NVPTXISelLowering will have already emitted an upcast.
2666 unsigned Opcode = 0;
2667 switch (N->getOpcode()) {
2668 default:
2669 switch (NumElts) {
2670 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002671 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002672 case 1:
2673 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2674 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002675 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002676 case MVT::i1:
2677 Opcode = NVPTX::StoreParamI8;
2678 break;
2679 case MVT::i8:
2680 Opcode = NVPTX::StoreParamI8;
2681 break;
2682 case MVT::i16:
2683 Opcode = NVPTX::StoreParamI16;
2684 break;
2685 case MVT::i32:
2686 Opcode = NVPTX::StoreParamI32;
2687 break;
2688 case MVT::i64:
2689 Opcode = NVPTX::StoreParamI64;
2690 break;
2691 case MVT::f32:
2692 Opcode = NVPTX::StoreParamF32;
2693 break;
2694 case MVT::f64:
2695 Opcode = NVPTX::StoreParamF64;
2696 break;
2697 }
2698 break;
2699 case 2:
2700 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2701 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002702 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002703 case MVT::i1:
2704 Opcode = NVPTX::StoreParamV2I8;
2705 break;
2706 case MVT::i8:
2707 Opcode = NVPTX::StoreParamV2I8;
2708 break;
2709 case MVT::i16:
2710 Opcode = NVPTX::StoreParamV2I16;
2711 break;
2712 case MVT::i32:
2713 Opcode = NVPTX::StoreParamV2I32;
2714 break;
2715 case MVT::i64:
2716 Opcode = NVPTX::StoreParamV2I64;
2717 break;
2718 case MVT::f32:
2719 Opcode = NVPTX::StoreParamV2F32;
2720 break;
2721 case MVT::f64:
2722 Opcode = NVPTX::StoreParamV2F64;
2723 break;
2724 }
2725 break;
2726 case 4:
2727 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2728 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002729 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002730 case MVT::i1:
2731 Opcode = NVPTX::StoreParamV4I8;
2732 break;
2733 case MVT::i8:
2734 Opcode = NVPTX::StoreParamV4I8;
2735 break;
2736 case MVT::i16:
2737 Opcode = NVPTX::StoreParamV4I16;
2738 break;
2739 case MVT::i32:
2740 Opcode = NVPTX::StoreParamV4I32;
2741 break;
2742 case MVT::f32:
2743 Opcode = NVPTX::StoreParamV4F32;
2744 break;
2745 }
2746 break;
2747 }
2748 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002749 // Special case: if we have a sign-extend/zero-extend node, insert the
2750 // conversion instruction first, and use that as the value operand to
2751 // the selected StoreParam node.
2752 case NVPTXISD::StoreParamU32: {
2753 Opcode = NVPTX::StoreParamI32;
2754 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2755 MVT::i32);
2756 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2757 MVT::i32, Ops[0], CvtNone);
2758 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002759 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002760 }
2761 case NVPTXISD::StoreParamS32: {
2762 Opcode = NVPTX::StoreParamI32;
2763 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2764 MVT::i32);
2765 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2766 MVT::i32, Ops[0], CvtNone);
2767 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002768 break;
2769 }
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002770 }
Justin Holewinskif8f70912013-06-28 17:57:59 +00002771
Justin Holewinskidff28d22013-07-01 12:59:01 +00002772 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002773 SDNode *Ret =
Justin Holewinskidff28d22013-07-01 12:59:01 +00002774 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002775 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2776 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2777 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2778
2779 return Ret;
2780}
2781
Justin Holewinski30d56a72014-04-09 15:39:15 +00002782SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
2783 SDValue Chain = N->getOperand(0);
2784 SDValue TexRef = N->getOperand(1);
2785 SDValue SampRef = N->getOperand(2);
Craig Topper062a2ba2014-04-25 05:30:21 +00002786 SDNode *Ret = nullptr;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002787 unsigned Opc = 0;
2788 SmallVector<SDValue, 8> Ops;
2789
2790 switch (N->getOpcode()) {
Craig Topper062a2ba2014-04-25 05:30:21 +00002791 default: return nullptr;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002792 case NVPTXISD::Tex1DFloatI32:
2793 Opc = NVPTX::TEX_1D_F32_I32;
2794 break;
2795 case NVPTXISD::Tex1DFloatFloat:
2796 Opc = NVPTX::TEX_1D_F32_F32;
2797 break;
2798 case NVPTXISD::Tex1DFloatFloatLevel:
2799 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2800 break;
2801 case NVPTXISD::Tex1DFloatFloatGrad:
2802 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2803 break;
2804 case NVPTXISD::Tex1DI32I32:
2805 Opc = NVPTX::TEX_1D_I32_I32;
2806 break;
2807 case NVPTXISD::Tex1DI32Float:
2808 Opc = NVPTX::TEX_1D_I32_F32;
2809 break;
2810 case NVPTXISD::Tex1DI32FloatLevel:
2811 Opc = NVPTX::TEX_1D_I32_F32_LEVEL;
2812 break;
2813 case NVPTXISD::Tex1DI32FloatGrad:
2814 Opc = NVPTX::TEX_1D_I32_F32_GRAD;
2815 break;
2816 case NVPTXISD::Tex1DArrayFloatI32:
2817 Opc = NVPTX::TEX_1D_ARRAY_F32_I32;
2818 break;
2819 case NVPTXISD::Tex1DArrayFloatFloat:
2820 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2821 break;
2822 case NVPTXISD::Tex1DArrayFloatFloatLevel:
2823 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2824 break;
2825 case NVPTXISD::Tex1DArrayFloatFloatGrad:
2826 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2827 break;
2828 case NVPTXISD::Tex1DArrayI32I32:
2829 Opc = NVPTX::TEX_1D_ARRAY_I32_I32;
2830 break;
2831 case NVPTXISD::Tex1DArrayI32Float:
2832 Opc = NVPTX::TEX_1D_ARRAY_I32_F32;
2833 break;
2834 case NVPTXISD::Tex1DArrayI32FloatLevel:
2835 Opc = NVPTX::TEX_1D_ARRAY_I32_F32_LEVEL;
2836 break;
2837 case NVPTXISD::Tex1DArrayI32FloatGrad:
2838 Opc = NVPTX::TEX_1D_ARRAY_I32_F32_GRAD;
2839 break;
2840 case NVPTXISD::Tex2DFloatI32:
2841 Opc = NVPTX::TEX_2D_F32_I32;
2842 break;
2843 case NVPTXISD::Tex2DFloatFloat:
2844 Opc = NVPTX::TEX_2D_F32_F32;
2845 break;
2846 case NVPTXISD::Tex2DFloatFloatLevel:
2847 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2848 break;
2849 case NVPTXISD::Tex2DFloatFloatGrad:
2850 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2851 break;
2852 case NVPTXISD::Tex2DI32I32:
2853 Opc = NVPTX::TEX_2D_I32_I32;
2854 break;
2855 case NVPTXISD::Tex2DI32Float:
2856 Opc = NVPTX::TEX_2D_I32_F32;
2857 break;
2858 case NVPTXISD::Tex2DI32FloatLevel:
2859 Opc = NVPTX::TEX_2D_I32_F32_LEVEL;
2860 break;
2861 case NVPTXISD::Tex2DI32FloatGrad:
2862 Opc = NVPTX::TEX_2D_I32_F32_GRAD;
2863 break;
2864 case NVPTXISD::Tex2DArrayFloatI32:
2865 Opc = NVPTX::TEX_2D_ARRAY_F32_I32;
2866 break;
2867 case NVPTXISD::Tex2DArrayFloatFloat:
2868 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2869 break;
2870 case NVPTXISD::Tex2DArrayFloatFloatLevel:
2871 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2872 break;
2873 case NVPTXISD::Tex2DArrayFloatFloatGrad:
2874 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2875 break;
2876 case NVPTXISD::Tex2DArrayI32I32:
2877 Opc = NVPTX::TEX_2D_ARRAY_I32_I32;
2878 break;
2879 case NVPTXISD::Tex2DArrayI32Float:
2880 Opc = NVPTX::TEX_2D_ARRAY_I32_F32;
2881 break;
2882 case NVPTXISD::Tex2DArrayI32FloatLevel:
2883 Opc = NVPTX::TEX_2D_ARRAY_I32_F32_LEVEL;
2884 break;
2885 case NVPTXISD::Tex2DArrayI32FloatGrad:
2886 Opc = NVPTX::TEX_2D_ARRAY_I32_F32_GRAD;
2887 break;
2888 case NVPTXISD::Tex3DFloatI32:
2889 Opc = NVPTX::TEX_3D_F32_I32;
2890 break;
2891 case NVPTXISD::Tex3DFloatFloat:
2892 Opc = NVPTX::TEX_3D_F32_F32;
2893 break;
2894 case NVPTXISD::Tex3DFloatFloatLevel:
2895 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2896 break;
2897 case NVPTXISD::Tex3DFloatFloatGrad:
2898 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2899 break;
2900 case NVPTXISD::Tex3DI32I32:
2901 Opc = NVPTX::TEX_3D_I32_I32;
2902 break;
2903 case NVPTXISD::Tex3DI32Float:
2904 Opc = NVPTX::TEX_3D_I32_F32;
2905 break;
2906 case NVPTXISD::Tex3DI32FloatLevel:
2907 Opc = NVPTX::TEX_3D_I32_F32_LEVEL;
2908 break;
2909 case NVPTXISD::Tex3DI32FloatGrad:
2910 Opc = NVPTX::TEX_3D_I32_F32_GRAD;
2911 break;
2912 }
2913
2914 Ops.push_back(TexRef);
2915 Ops.push_back(SampRef);
2916
2917 // Copy over indices
2918 for (unsigned i = 3; i < N->getNumOperands(); ++i) {
2919 Ops.push_back(N->getOperand(i));
2920 }
2921
2922 Ops.push_back(Chain);
2923 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
2924 return Ret;
2925}
2926
2927SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
2928 SDValue Chain = N->getOperand(0);
2929 SDValue TexHandle = N->getOperand(1);
Craig Topper062a2ba2014-04-25 05:30:21 +00002930 SDNode *Ret = nullptr;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002931 unsigned Opc = 0;
2932 SmallVector<SDValue, 8> Ops;
2933 switch (N->getOpcode()) {
Craig Topper062a2ba2014-04-25 05:30:21 +00002934 default: return nullptr;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002935 case NVPTXISD::Suld1DI8Trap:
2936 Opc = NVPTX::SULD_1D_I8_TRAP;
2937 Ops.push_back(TexHandle);
2938 Ops.push_back(N->getOperand(2));
2939 Ops.push_back(Chain);
2940 break;
2941 case NVPTXISD::Suld1DI16Trap:
2942 Opc = NVPTX::SULD_1D_I16_TRAP;
2943 Ops.push_back(TexHandle);
2944 Ops.push_back(N->getOperand(2));
2945 Ops.push_back(Chain);
2946 break;
2947 case NVPTXISD::Suld1DI32Trap:
2948 Opc = NVPTX::SULD_1D_I32_TRAP;
2949 Ops.push_back(TexHandle);
2950 Ops.push_back(N->getOperand(2));
2951 Ops.push_back(Chain);
2952 break;
2953 case NVPTXISD::Suld1DV2I8Trap:
2954 Opc = NVPTX::SULD_1D_V2I8_TRAP;
2955 Ops.push_back(TexHandle);
2956 Ops.push_back(N->getOperand(2));
2957 Ops.push_back(Chain);
2958 break;
2959 case NVPTXISD::Suld1DV2I16Trap:
2960 Opc = NVPTX::SULD_1D_V2I16_TRAP;
2961 Ops.push_back(TexHandle);
2962 Ops.push_back(N->getOperand(2));
2963 Ops.push_back(Chain);
2964 break;
2965 case NVPTXISD::Suld1DV2I32Trap:
2966 Opc = NVPTX::SULD_1D_V2I32_TRAP;
2967 Ops.push_back(TexHandle);
2968 Ops.push_back(N->getOperand(2));
2969 Ops.push_back(Chain);
2970 break;
2971 case NVPTXISD::Suld1DV4I8Trap:
2972 Opc = NVPTX::SULD_1D_V4I8_TRAP;
2973 Ops.push_back(TexHandle);
2974 Ops.push_back(N->getOperand(2));
2975 Ops.push_back(Chain);
2976 break;
2977 case NVPTXISD::Suld1DV4I16Trap:
2978 Opc = NVPTX::SULD_1D_V4I16_TRAP;
2979 Ops.push_back(TexHandle);
2980 Ops.push_back(N->getOperand(2));
2981 Ops.push_back(Chain);
2982 break;
2983 case NVPTXISD::Suld1DV4I32Trap:
2984 Opc = NVPTX::SULD_1D_V4I32_TRAP;
2985 Ops.push_back(TexHandle);
2986 Ops.push_back(N->getOperand(2));
2987 Ops.push_back(Chain);
2988 break;
2989 case NVPTXISD::Suld1DArrayI8Trap:
2990 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
2991 Ops.push_back(TexHandle);
2992 Ops.push_back(N->getOperand(2));
2993 Ops.push_back(N->getOperand(3));
2994 Ops.push_back(Chain);
2995 break;
2996 case NVPTXISD::Suld1DArrayI16Trap:
2997 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
2998 Ops.push_back(TexHandle);
2999 Ops.push_back(N->getOperand(2));
3000 Ops.push_back(N->getOperand(3));
3001 Ops.push_back(Chain);
3002 break;
3003 case NVPTXISD::Suld1DArrayI32Trap:
3004 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
3005 Ops.push_back(TexHandle);
3006 Ops.push_back(N->getOperand(2));
3007 Ops.push_back(N->getOperand(3));
3008 Ops.push_back(Chain);
3009 break;
3010 case NVPTXISD::Suld1DArrayV2I8Trap:
3011 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
3012 Ops.push_back(TexHandle);
3013 Ops.push_back(N->getOperand(2));
3014 Ops.push_back(N->getOperand(3));
3015 Ops.push_back(Chain);
3016 break;
3017 case NVPTXISD::Suld1DArrayV2I16Trap:
3018 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
3019 Ops.push_back(TexHandle);
3020 Ops.push_back(N->getOperand(2));
3021 Ops.push_back(N->getOperand(3));
3022 Ops.push_back(Chain);
3023 break;
3024 case NVPTXISD::Suld1DArrayV2I32Trap:
3025 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
3026 Ops.push_back(TexHandle);
3027 Ops.push_back(N->getOperand(2));
3028 Ops.push_back(N->getOperand(3));
3029 Ops.push_back(Chain);
3030 break;
3031 case NVPTXISD::Suld1DArrayV4I8Trap:
3032 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
3033 Ops.push_back(TexHandle);
3034 Ops.push_back(N->getOperand(2));
3035 Ops.push_back(N->getOperand(3));
3036 Ops.push_back(Chain);
3037 break;
3038 case NVPTXISD::Suld1DArrayV4I16Trap:
3039 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
3040 Ops.push_back(TexHandle);
3041 Ops.push_back(N->getOperand(2));
3042 Ops.push_back(N->getOperand(3));
3043 Ops.push_back(Chain);
3044 break;
3045 case NVPTXISD::Suld1DArrayV4I32Trap:
3046 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
3047 Ops.push_back(TexHandle);
3048 Ops.push_back(N->getOperand(2));
3049 Ops.push_back(N->getOperand(3));
3050 Ops.push_back(Chain);
3051 break;
3052 case NVPTXISD::Suld2DI8Trap:
3053 Opc = NVPTX::SULD_2D_I8_TRAP;
3054 Ops.push_back(TexHandle);
3055 Ops.push_back(N->getOperand(2));
3056 Ops.push_back(N->getOperand(3));
3057 Ops.push_back(Chain);
3058 break;
3059 case NVPTXISD::Suld2DI16Trap:
3060 Opc = NVPTX::SULD_2D_I16_TRAP;
3061 Ops.push_back(TexHandle);
3062 Ops.push_back(N->getOperand(2));
3063 Ops.push_back(N->getOperand(3));
3064 Ops.push_back(Chain);
3065 break;
3066 case NVPTXISD::Suld2DI32Trap:
3067 Opc = NVPTX::SULD_2D_I32_TRAP;
3068 Ops.push_back(TexHandle);
3069 Ops.push_back(N->getOperand(2));
3070 Ops.push_back(N->getOperand(3));
3071 Ops.push_back(Chain);
3072 break;
3073 case NVPTXISD::Suld2DV2I8Trap:
3074 Opc = NVPTX::SULD_2D_V2I8_TRAP;
3075 Ops.push_back(TexHandle);
3076 Ops.push_back(N->getOperand(2));
3077 Ops.push_back(N->getOperand(3));
3078 Ops.push_back(Chain);
3079 break;
3080 case NVPTXISD::Suld2DV2I16Trap:
3081 Opc = NVPTX::SULD_2D_V2I16_TRAP;
3082 Ops.push_back(TexHandle);
3083 Ops.push_back(N->getOperand(2));
3084 Ops.push_back(N->getOperand(3));
3085 Ops.push_back(Chain);
3086 break;
3087 case NVPTXISD::Suld2DV2I32Trap:
3088 Opc = NVPTX::SULD_2D_V2I32_TRAP;
3089 Ops.push_back(TexHandle);
3090 Ops.push_back(N->getOperand(2));
3091 Ops.push_back(N->getOperand(3));
3092 Ops.push_back(Chain);
3093 break;
3094 case NVPTXISD::Suld2DV4I8Trap:
3095 Opc = NVPTX::SULD_2D_V4I8_TRAP;
3096 Ops.push_back(TexHandle);
3097 Ops.push_back(N->getOperand(2));
3098 Ops.push_back(N->getOperand(3));
3099 Ops.push_back(Chain);
3100 break;
3101 case NVPTXISD::Suld2DV4I16Trap:
3102 Opc = NVPTX::SULD_2D_V4I16_TRAP;
3103 Ops.push_back(TexHandle);
3104 Ops.push_back(N->getOperand(2));
3105 Ops.push_back(N->getOperand(3));
3106 Ops.push_back(Chain);
3107 break;
3108 case NVPTXISD::Suld2DV4I32Trap:
3109 Opc = NVPTX::SULD_2D_V4I32_TRAP;
3110 Ops.push_back(TexHandle);
3111 Ops.push_back(N->getOperand(2));
3112 Ops.push_back(N->getOperand(3));
3113 Ops.push_back(Chain);
3114 break;
3115 case NVPTXISD::Suld2DArrayI8Trap:
3116 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
3117 Ops.push_back(TexHandle);
3118 Ops.push_back(N->getOperand(2));
3119 Ops.push_back(N->getOperand(3));
3120 Ops.push_back(N->getOperand(4));
3121 Ops.push_back(Chain);
3122 break;
3123 case NVPTXISD::Suld2DArrayI16Trap:
3124 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
3125 Ops.push_back(TexHandle);
3126 Ops.push_back(N->getOperand(2));
3127 Ops.push_back(N->getOperand(3));
3128 Ops.push_back(N->getOperand(4));
3129 Ops.push_back(Chain);
3130 break;
3131 case NVPTXISD::Suld2DArrayI32Trap:
3132 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
3133 Ops.push_back(TexHandle);
3134 Ops.push_back(N->getOperand(2));
3135 Ops.push_back(N->getOperand(3));
3136 Ops.push_back(N->getOperand(4));
3137 Ops.push_back(Chain);
3138 break;
3139 case NVPTXISD::Suld2DArrayV2I8Trap:
3140 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
3141 Ops.push_back(TexHandle);
3142 Ops.push_back(N->getOperand(2));
3143 Ops.push_back(N->getOperand(3));
3144 Ops.push_back(N->getOperand(4));
3145 Ops.push_back(Chain);
3146 break;
3147 case NVPTXISD::Suld2DArrayV2I16Trap:
3148 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
3149 Ops.push_back(TexHandle);
3150 Ops.push_back(N->getOperand(2));
3151 Ops.push_back(N->getOperand(3));
3152 Ops.push_back(N->getOperand(4));
3153 Ops.push_back(Chain);
3154 break;
3155 case NVPTXISD::Suld2DArrayV2I32Trap:
3156 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
3157 Ops.push_back(TexHandle);
3158 Ops.push_back(N->getOperand(2));
3159 Ops.push_back(N->getOperand(3));
3160 Ops.push_back(N->getOperand(4));
3161 Ops.push_back(Chain);
3162 break;
3163 case NVPTXISD::Suld2DArrayV4I8Trap:
3164 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
3165 Ops.push_back(TexHandle);
3166 Ops.push_back(N->getOperand(2));
3167 Ops.push_back(N->getOperand(3));
3168 Ops.push_back(N->getOperand(4));
3169 Ops.push_back(Chain);
3170 break;
3171 case NVPTXISD::Suld2DArrayV4I16Trap:
3172 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
3173 Ops.push_back(TexHandle);
3174 Ops.push_back(N->getOperand(2));
3175 Ops.push_back(N->getOperand(3));
3176 Ops.push_back(N->getOperand(4));
3177 Ops.push_back(Chain);
3178 break;
3179 case NVPTXISD::Suld2DArrayV4I32Trap:
3180 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
3181 Ops.push_back(TexHandle);
3182 Ops.push_back(N->getOperand(2));
3183 Ops.push_back(N->getOperand(3));
3184 Ops.push_back(N->getOperand(4));
3185 Ops.push_back(Chain);
3186 break;
3187 case NVPTXISD::Suld3DI8Trap:
3188 Opc = NVPTX::SULD_3D_I8_TRAP;
3189 Ops.push_back(TexHandle);
3190 Ops.push_back(N->getOperand(2));
3191 Ops.push_back(N->getOperand(3));
3192 Ops.push_back(N->getOperand(4));
3193 Ops.push_back(Chain);
3194 break;
3195 case NVPTXISD::Suld3DI16Trap:
3196 Opc = NVPTX::SULD_3D_I16_TRAP;
3197 Ops.push_back(TexHandle);
3198 Ops.push_back(N->getOperand(2));
3199 Ops.push_back(N->getOperand(3));
3200 Ops.push_back(N->getOperand(4));
3201 Ops.push_back(Chain);
3202 break;
3203 case NVPTXISD::Suld3DI32Trap:
3204 Opc = NVPTX::SULD_3D_I32_TRAP;
3205 Ops.push_back(TexHandle);
3206 Ops.push_back(N->getOperand(2));
3207 Ops.push_back(N->getOperand(3));
3208 Ops.push_back(N->getOperand(4));
3209 Ops.push_back(Chain);
3210 break;
3211 case NVPTXISD::Suld3DV2I8Trap:
3212 Opc = NVPTX::SULD_3D_V2I8_TRAP;
3213 Ops.push_back(TexHandle);
3214 Ops.push_back(N->getOperand(2));
3215 Ops.push_back(N->getOperand(3));
3216 Ops.push_back(N->getOperand(4));
3217 Ops.push_back(Chain);
3218 break;
3219 case NVPTXISD::Suld3DV2I16Trap:
3220 Opc = NVPTX::SULD_3D_V2I16_TRAP;
3221 Ops.push_back(TexHandle);
3222 Ops.push_back(N->getOperand(2));
3223 Ops.push_back(N->getOperand(3));
3224 Ops.push_back(N->getOperand(4));
3225 Ops.push_back(Chain);
3226 break;
3227 case NVPTXISD::Suld3DV2I32Trap:
3228 Opc = NVPTX::SULD_3D_V2I32_TRAP;
3229 Ops.push_back(TexHandle);
3230 Ops.push_back(N->getOperand(2));
3231 Ops.push_back(N->getOperand(3));
3232 Ops.push_back(N->getOperand(4));
3233 Ops.push_back(Chain);
3234 break;
3235 case NVPTXISD::Suld3DV4I8Trap:
3236 Opc = NVPTX::SULD_3D_V4I8_TRAP;
3237 Ops.push_back(TexHandle);
3238 Ops.push_back(N->getOperand(2));
3239 Ops.push_back(N->getOperand(3));
3240 Ops.push_back(N->getOperand(4));
3241 Ops.push_back(Chain);
3242 break;
3243 case NVPTXISD::Suld3DV4I16Trap:
3244 Opc = NVPTX::SULD_3D_V4I16_TRAP;
3245 Ops.push_back(TexHandle);
3246 Ops.push_back(N->getOperand(2));
3247 Ops.push_back(N->getOperand(3));
3248 Ops.push_back(N->getOperand(4));
3249 Ops.push_back(Chain);
3250 break;
3251 case NVPTXISD::Suld3DV4I32Trap:
3252 Opc = NVPTX::SULD_3D_V4I32_TRAP;
3253 Ops.push_back(TexHandle);
3254 Ops.push_back(N->getOperand(2));
3255 Ops.push_back(N->getOperand(3));
3256 Ops.push_back(N->getOperand(4));
3257 Ops.push_back(Chain);
3258 break;
3259 }
3260 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3261 return Ret;
3262}
3263
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003264/// SelectBFE - Look for instruction sequences that can be made more efficient
3265/// by using the 'bfe' (bit-field extract) PTX instruction
3266SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
3267 SDValue LHS = N->getOperand(0);
3268 SDValue RHS = N->getOperand(1);
3269 SDValue Len;
3270 SDValue Start;
3271 SDValue Val;
3272 bool IsSigned = false;
3273
3274 if (N->getOpcode() == ISD::AND) {
3275 // Canonicalize the operands
3276 // We want 'and %val, %mask'
3277 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3278 std::swap(LHS, RHS);
3279 }
3280
3281 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
3282 if (!Mask) {
3283 // We need a constant mask on the RHS of the AND
3284 return NULL;
3285 }
3286
3287 // Extract the mask bits
3288 uint64_t MaskVal = Mask->getZExtValue();
3289 if (!isMask_64(MaskVal)) {
3290 // We *could* handle shifted masks here, but doing so would require an
3291 // 'and' operation to fix up the low-order bits so we would trade
3292 // shr+and for bfe+and, which has the same throughput
3293 return NULL;
3294 }
3295
3296 // How many bits are in our mask?
3297 uint64_t NumBits = CountTrailingOnes_64(MaskVal);
3298 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
3299
3300 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3301 // We have a 'srl/and' pair, extract the effective start bit and length
3302 Val = LHS.getNode()->getOperand(0);
3303 Start = LHS.getNode()->getOperand(1);
3304 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3305 if (StartConst) {
3306 uint64_t StartVal = StartConst->getZExtValue();
3307 // How many "good" bits do we have left? "good" is defined here as bits
3308 // that exist in the original value, not shifted in.
3309 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
3310 if (NumBits > GoodBits) {
3311 // Do not handle the case where bits have been shifted in. In theory
3312 // we could handle this, but the cost is likely higher than just
3313 // emitting the srl/and pair.
3314 return NULL;
3315 }
3316 Start = CurDAG->getTargetConstant(StartVal, MVT::i32);
3317 } else {
3318 // Do not handle the case where the shift amount (can be zero if no srl
3319 // was found) is not constant. We could handle this case, but it would
3320 // require run-time logic that would be more expensive than just
3321 // emitting the srl/and pair.
3322 return NULL;
3323 }
3324 } else {
3325 // Do not handle the case where the LHS of the and is not a shift. While
3326 // it would be trivial to handle this case, it would just transform
3327 // 'and' -> 'bfe', but 'and' has higher-throughput.
3328 return NULL;
3329 }
3330 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3331 if (LHS->getOpcode() == ISD::AND) {
3332 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3333 if (!ShiftCnst) {
3334 // Shift amount must be constant
3335 return NULL;
3336 }
3337
3338 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3339
3340 SDValue AndLHS = LHS->getOperand(0);
3341 SDValue AndRHS = LHS->getOperand(1);
3342
3343 // Canonicalize the AND to have the mask on the RHS
3344 if (isa<ConstantSDNode>(AndLHS)) {
3345 std::swap(AndLHS, AndRHS);
3346 }
3347
3348 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3349 if (!MaskCnst) {
3350 // Mask must be constant
3351 return NULL;
3352 }
3353
3354 uint64_t MaskVal = MaskCnst->getZExtValue();
3355 uint64_t NumZeros;
3356 uint64_t NumBits;
3357 if (isMask_64(MaskVal)) {
3358 NumZeros = 0;
3359 // The number of bits in the result bitfield will be the number of
3360 // trailing ones (the AND) minus the number of bits we shift off
3361 NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt;
3362 } else if (isShiftedMask_64(MaskVal)) {
3363 NumZeros = countTrailingZeros(MaskVal);
3364 unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros);
3365 // The number of bits in the result bitfield will be the number of
3366 // trailing zeros plus the number of set bits in the mask minus the
3367 // number of bits we shift off
3368 NumBits = NumZeros + NumOnes - ShiftAmt;
3369 } else {
3370 // This is not a mask we can handle
3371 return NULL;
3372 }
3373
3374 if (ShiftAmt < NumZeros) {
3375 // Handling this case would require extra logic that would make this
3376 // transformation non-profitable
3377 return NULL;
3378 }
3379
3380 Val = AndLHS;
3381 Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32);
3382 Len = CurDAG->getTargetConstant(NumBits, MVT::i32);
3383 } else if (LHS->getOpcode() == ISD::SHL) {
3384 // Here, we have a pattern like:
3385 //
3386 // (sra (shl val, NN), MM)
3387 // or
3388 // (srl (shl val, NN), MM)
3389 //
3390 // If MM >= NN, we can efficiently optimize this with bfe
3391 Val = LHS->getOperand(0);
3392
3393 SDValue ShlRHS = LHS->getOperand(1);
3394 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3395 if (!ShlCnst) {
3396 // Shift amount must be constant
3397 return NULL;
3398 }
3399 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3400
3401 SDValue ShrRHS = RHS;
3402 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3403 if (!ShrCnst) {
3404 // Shift amount must be constant
3405 return NULL;
3406 }
3407 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3408
3409 // To avoid extra codegen and be profitable, we need Outer >= Inner
3410 if (OuterShiftAmt < InnerShiftAmt) {
3411 return NULL;
3412 }
3413
3414 // If the outer shift is more than the type size, we have no bitfield to
3415 // extract (since we also check that the inner shift is <= the outer shift
3416 // then this also implies that the inner shift is < the type size)
3417 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
3418 return NULL;
3419 }
3420
3421 Start =
3422 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32);
3423 Len =
3424 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
3425 OuterShiftAmt, MVT::i32);
3426
3427 if (N->getOpcode() == ISD::SRA) {
3428 // If we have a arithmetic right shift, we need to use the signed bfe
3429 // variant
3430 IsSigned = true;
3431 }
3432 } else {
3433 // No can do...
3434 return NULL;
3435 }
3436 } else {
3437 // No can do...
3438 return NULL;
3439 }
3440
3441
3442 unsigned Opc;
3443 // For the BFE operations we form here from "and" and "srl", always use the
3444 // unsigned variants.
3445 if (Val.getValueType() == MVT::i32) {
3446 if (IsSigned) {
3447 Opc = NVPTX::BFE_S32rii;
3448 } else {
3449 Opc = NVPTX::BFE_U32rii;
3450 }
3451 } else if (Val.getValueType() == MVT::i64) {
3452 if (IsSigned) {
3453 Opc = NVPTX::BFE_S64rii;
3454 } else {
3455 Opc = NVPTX::BFE_U64rii;
3456 }
3457 } else {
3458 // We cannot handle this type
3459 return NULL;
3460 }
3461
3462 SDValue Ops[] = {
3463 Val, Start, Len
3464 };
3465
3466 SDNode *Ret =
3467 CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3468
3469 return Ret;
3470}
3471
Justin Holewinskiae556d32012-05-04 20:18:50 +00003472// SelectDirectAddr - Match a direct address for DAG.
3473// A direct address could be a globaladdress or externalsymbol.
3474bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3475 // Return true if TGA or ES.
Justin Holewinski0497ab12013-03-30 14:29:21 +00003476 if (N.getOpcode() == ISD::TargetGlobalAddress ||
3477 N.getOpcode() == ISD::TargetExternalSymbol) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003478 Address = N;
3479 return true;
3480 }
3481 if (N.getOpcode() == NVPTXISD::Wrapper) {
3482 Address = N.getOperand(0);
3483 return true;
3484 }
3485 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
3486 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
3487 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
3488 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
3489 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
3490 }
3491 return false;
3492}
3493
3494// symbol+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00003495bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3496 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003497 if (Addr.getOpcode() == ISD::ADD) {
3498 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00003499 SDValue base = Addr.getOperand(0);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003500 if (SelectDirectAddr(base, Base)) {
3501 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
3502 return true;
3503 }
3504 }
3505 }
3506 return false;
3507}
3508
3509// symbol+offset
3510bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3511 SDValue &Base, SDValue &Offset) {
3512 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3513}
3514
3515// symbol+offset
3516bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3517 SDValue &Base, SDValue &Offset) {
3518 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3519}
3520
3521// register+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00003522bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3523 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003524 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3525 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3526 Offset = CurDAG->getTargetConstant(0, mvt);
3527 return true;
3528 }
3529 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3530 Addr.getOpcode() == ISD::TargetGlobalAddress)
Justin Holewinski0497ab12013-03-30 14:29:21 +00003531 return false; // direct calls.
Justin Holewinskiae556d32012-05-04 20:18:50 +00003532
3533 if (Addr.getOpcode() == ISD::ADD) {
3534 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3535 return false;
3536 }
3537 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3538 if (FrameIndexSDNode *FIN =
Justin Holewinski0497ab12013-03-30 14:29:21 +00003539 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
Justin Holewinskiae556d32012-05-04 20:18:50 +00003540 // Constant offset from frame ref.
3541 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3542 else
3543 Base = Addr.getOperand(0);
3544 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), mvt);
3545 return true;
3546 }
3547 }
3548 return false;
3549}
3550
3551// register+offset
3552bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3553 SDValue &Base, SDValue &Offset) {
3554 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3555}
3556
3557// register+offset
3558bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3559 SDValue &Base, SDValue &Offset) {
3560 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3561}
3562
3563bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3564 unsigned int spN) const {
Craig Topper062a2ba2014-04-25 05:30:21 +00003565 const Value *Src = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00003566 // Even though MemIntrinsicSDNode is a subclas of MemSDNode,
3567 // the classof() for MemSDNode does not include MemIntrinsicSDNode
3568 // (See SelectionDAGNodes.h). So we need to check for both.
3569 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +00003570 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3571 return true;
3572 Src = mN->getMemOperand()->getValue();
Justin Holewinski0497ab12013-03-30 14:29:21 +00003573 } else if (MemSDNode *mN = dyn_cast<MemIntrinsicSDNode>(N)) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +00003574 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3575 return true;
3576 Src = mN->getMemOperand()->getValue();
Justin Holewinskiae556d32012-05-04 20:18:50 +00003577 }
3578 if (!Src)
3579 return false;
3580 if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
3581 return (PT->getAddressSpace() == spN);
3582 return false;
3583}
3584
3585/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3586/// inline asm expressions.
Justin Holewinski0497ab12013-03-30 14:29:21 +00003587bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
3588 const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003589 SDValue Op0, Op1;
3590 switch (ConstraintCode) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00003591 default:
3592 return true;
3593 case 'm': // memory
Justin Holewinskiae556d32012-05-04 20:18:50 +00003594 if (SelectDirectAddr(Op, Op0)) {
3595 OutOps.push_back(Op0);
3596 OutOps.push_back(CurDAG->getTargetConstant(0, MVT::i32));
3597 return false;
3598 }
3599 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3600 OutOps.push_back(Op0);
3601 OutOps.push_back(Op1);
3602 return false;
3603 }
3604 break;
3605 }
3606 return true;
3607}