blob: 32bb279f0e7bf30e4317a0d8fd3005fffd16987c [file] [log] [blame]
Justin Holewinskiae556d32012-05-04 20:18:50 +00001//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
Justin Holewinskiae556d32012-05-04 20:18:50 +000014#include "NVPTXISelDAGToDAG.h"
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +000015#include "NVPTXUtilities.h"
Jingyue Wu48a9bdc2015-07-20 21:28:54 +000016#include "llvm/Analysis/ValueTracking.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000017#include "llvm/IR/GlobalValue.h"
18#include "llvm/IR/Instructions.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000019#include "llvm/Support/CommandLine.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000020#include "llvm/Support/Debug.h"
21#include "llvm/Support/ErrorHandling.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000022#include "llvm/Support/raw_ostream.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000023#include "llvm/Target/TargetIntrinsicInfo.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000024
Justin Holewinskiae556d32012-05-04 20:18:50 +000025using namespace llvm;
26
Chandler Carruth84e68b22014-04-22 02:41:26 +000027#define DEBUG_TYPE "nvptx-isel"
28
Justin Holewinski0497ab12013-03-30 14:29:21 +000029static cl::opt<int> UsePrecDivF32(
Nadav Rotem7f27e0b2013-10-18 23:38:13 +000030 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
Justin Holewinski0497ab12013-03-30 14:29:21 +000031 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
Sylvestre Ledru469de192014-08-11 18:04:46 +000032 " IEEE Compliant F32 div.rnd if available."),
Justin Holewinski0497ab12013-03-30 14:29:21 +000033 cl::init(2));
Justin Holewinskiae556d32012-05-04 20:18:50 +000034
Justin Holewinski48f4ad32013-05-21 16:51:30 +000035static cl::opt<bool>
Nadav Rotem7f27e0b2013-10-18 23:38:13 +000036UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
Justin Holewinski48f4ad32013-05-21 16:51:30 +000037 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
38 cl::init(true));
39
Justin Holewinskicd069e62013-07-22 12:18:04 +000040static cl::opt<bool>
Nadav Rotem7f27e0b2013-10-18 23:38:13 +000041FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
Justin Holewinskicd069e62013-07-22 12:18:04 +000042 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
43 cl::init(false));
44
45
Justin Holewinskiae556d32012-05-04 20:18:50 +000046/// createNVPTXISelDag - This pass converts a legalized DAG into a
47/// NVPTX-specific DAG, ready for instruction scheduling.
48FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
49 llvm::CodeGenOpt::Level OptLevel) {
50 return new NVPTXDAGToDAGISel(TM, OptLevel);
51}
52
Justin Holewinskiae556d32012-05-04 20:18:50 +000053NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
54 CodeGenOpt::Level OptLevel)
Eric Christopher02389e32015-02-19 00:08:27 +000055 : SelectionDAGISel(tm, OptLevel), TM(tm) {
Justin Holewinskiae556d32012-05-04 20:18:50 +000056 doMulWide = (OptLevel > 0);
Justin Holewinskicd069e62013-07-22 12:18:04 +000057}
Justin Holewinskiae556d32012-05-04 20:18:50 +000058
Eric Christopher147bba22015-01-30 01:40:59 +000059bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
60 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
61 return SelectionDAGISel::runOnMachineFunction(MF);
62}
63
Justin Holewinskicd069e62013-07-22 12:18:04 +000064int NVPTXDAGToDAGISel::getDivF32Level() const {
65 if (UsePrecDivF32.getNumOccurrences() > 0) {
66 // If nvptx-prec-div32=N is used on the command-line, always honor it
67 return UsePrecDivF32;
68 } else {
69 // Otherwise, use div.approx if fast math is enabled
70 if (TM.Options.UnsafeFPMath)
71 return 0;
72 else
73 return 2;
74 }
75}
Justin Holewinskiae556d32012-05-04 20:18:50 +000076
Justin Holewinskicd069e62013-07-22 12:18:04 +000077bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
78 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
79 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
80 return UsePrecSqrtF32;
81 } else {
82 // Otherwise, use sqrt.approx if fast math is enabled
Eli Bendersky3e840192015-03-23 16:26:23 +000083 return !TM.Options.UnsafeFPMath;
Justin Holewinskicd069e62013-07-22 12:18:04 +000084 }
85}
86
87bool NVPTXDAGToDAGISel::useF32FTZ() const {
88 if (FtzEnabled.getNumOccurrences() > 0) {
89 // If nvptx-f32ftz is used on the command-line, always honor it
90 return FtzEnabled;
91 } else {
92 const Function *F = MF->getFunction();
93 // Otherwise, check for an nvptx-f32ftz attribute on the function
94 if (F->hasFnAttribute("nvptx-f32ftz"))
Duncan P. N. Exon Smithb5054332015-02-14 15:35:43 +000095 return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
Justin Holewinskicd069e62013-07-22 12:18:04 +000096 else
97 return false;
98 }
Justin Holewinskiae556d32012-05-04 20:18:50 +000099}
100
Justin Holewinski428cf0e2014-07-17 18:10:09 +0000101bool NVPTXDAGToDAGISel::allowFMA() const {
Eric Christopher147bba22015-01-30 01:40:59 +0000102 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
Justin Holewinski428cf0e2014-07-17 18:10:09 +0000103 return TL->allowFMA(*MF, OptLevel);
104}
105
Justin Holewinskiae556d32012-05-04 20:18:50 +0000106/// Select - Select instructions not customized! Used for
107/// expanded, promoted and normal instructions.
Justin Holewinski0497ab12013-03-30 14:29:21 +0000108SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000109
Tim Northover31d093c2013-09-22 08:21:56 +0000110 if (N->isMachineOpcode()) {
111 N->setNodeId(-1);
Craig Topper062a2ba2014-04-25 05:30:21 +0000112 return nullptr; // Already selected.
Tim Northover31d093c2013-09-22 08:21:56 +0000113 }
Justin Holewinskiae556d32012-05-04 20:18:50 +0000114
Craig Topper062a2ba2014-04-25 05:30:21 +0000115 SDNode *ResNode = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000116 switch (N->getOpcode()) {
117 case ISD::LOAD:
118 ResNode = SelectLoad(N);
119 break;
120 case ISD::STORE:
121 ResNode = SelectStore(N);
122 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000123 case NVPTXISD::LoadV2:
124 case NVPTXISD::LoadV4:
125 ResNode = SelectLoadVector(N);
126 break;
127 case NVPTXISD::LDGV2:
128 case NVPTXISD::LDGV4:
129 case NVPTXISD::LDUV2:
130 case NVPTXISD::LDUV4:
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000131 ResNode = SelectLDGLDU(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000132 break;
133 case NVPTXISD::StoreV2:
134 case NVPTXISD::StoreV4:
135 ResNode = SelectStoreVector(N);
136 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000137 case NVPTXISD::LoadParam:
138 case NVPTXISD::LoadParamV2:
139 case NVPTXISD::LoadParamV4:
140 ResNode = SelectLoadParam(N);
141 break;
142 case NVPTXISD::StoreRetval:
143 case NVPTXISD::StoreRetvalV2:
144 case NVPTXISD::StoreRetvalV4:
145 ResNode = SelectStoreRetval(N);
146 break;
147 case NVPTXISD::StoreParam:
148 case NVPTXISD::StoreParamV2:
149 case NVPTXISD::StoreParamV4:
150 case NVPTXISD::StoreParamS32:
151 case NVPTXISD::StoreParamU32:
152 ResNode = SelectStoreParam(N);
153 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000154 case ISD::INTRINSIC_WO_CHAIN:
155 ResNode = SelectIntrinsicNoChain(N);
156 break;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000157 case ISD::INTRINSIC_W_CHAIN:
158 ResNode = SelectIntrinsicChain(N);
159 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000160 case NVPTXISD::Tex1DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000161 case NVPTXISD::Tex1DFloatFloat:
162 case NVPTXISD::Tex1DFloatFloatLevel:
163 case NVPTXISD::Tex1DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000164 case NVPTXISD::Tex1DS32S32:
165 case NVPTXISD::Tex1DS32Float:
166 case NVPTXISD::Tex1DS32FloatLevel:
167 case NVPTXISD::Tex1DS32FloatGrad:
168 case NVPTXISD::Tex1DU32S32:
169 case NVPTXISD::Tex1DU32Float:
170 case NVPTXISD::Tex1DU32FloatLevel:
171 case NVPTXISD::Tex1DU32FloatGrad:
172 case NVPTXISD::Tex1DArrayFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000173 case NVPTXISD::Tex1DArrayFloatFloat:
174 case NVPTXISD::Tex1DArrayFloatFloatLevel:
175 case NVPTXISD::Tex1DArrayFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000176 case NVPTXISD::Tex1DArrayS32S32:
177 case NVPTXISD::Tex1DArrayS32Float:
178 case NVPTXISD::Tex1DArrayS32FloatLevel:
179 case NVPTXISD::Tex1DArrayS32FloatGrad:
180 case NVPTXISD::Tex1DArrayU32S32:
181 case NVPTXISD::Tex1DArrayU32Float:
182 case NVPTXISD::Tex1DArrayU32FloatLevel:
183 case NVPTXISD::Tex1DArrayU32FloatGrad:
184 case NVPTXISD::Tex2DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000185 case NVPTXISD::Tex2DFloatFloat:
186 case NVPTXISD::Tex2DFloatFloatLevel:
187 case NVPTXISD::Tex2DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000188 case NVPTXISD::Tex2DS32S32:
189 case NVPTXISD::Tex2DS32Float:
190 case NVPTXISD::Tex2DS32FloatLevel:
191 case NVPTXISD::Tex2DS32FloatGrad:
192 case NVPTXISD::Tex2DU32S32:
193 case NVPTXISD::Tex2DU32Float:
194 case NVPTXISD::Tex2DU32FloatLevel:
195 case NVPTXISD::Tex2DU32FloatGrad:
196 case NVPTXISD::Tex2DArrayFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000197 case NVPTXISD::Tex2DArrayFloatFloat:
198 case NVPTXISD::Tex2DArrayFloatFloatLevel:
199 case NVPTXISD::Tex2DArrayFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000200 case NVPTXISD::Tex2DArrayS32S32:
201 case NVPTXISD::Tex2DArrayS32Float:
202 case NVPTXISD::Tex2DArrayS32FloatLevel:
203 case NVPTXISD::Tex2DArrayS32FloatGrad:
204 case NVPTXISD::Tex2DArrayU32S32:
205 case NVPTXISD::Tex2DArrayU32Float:
206 case NVPTXISD::Tex2DArrayU32FloatLevel:
207 case NVPTXISD::Tex2DArrayU32FloatGrad:
208 case NVPTXISD::Tex3DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000209 case NVPTXISD::Tex3DFloatFloat:
210 case NVPTXISD::Tex3DFloatFloatLevel:
211 case NVPTXISD::Tex3DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000212 case NVPTXISD::Tex3DS32S32:
213 case NVPTXISD::Tex3DS32Float:
214 case NVPTXISD::Tex3DS32FloatLevel:
215 case NVPTXISD::Tex3DS32FloatGrad:
216 case NVPTXISD::Tex3DU32S32:
217 case NVPTXISD::Tex3DU32Float:
218 case NVPTXISD::Tex3DU32FloatLevel:
219 case NVPTXISD::Tex3DU32FloatGrad:
220 case NVPTXISD::TexCubeFloatFloat:
221 case NVPTXISD::TexCubeFloatFloatLevel:
222 case NVPTXISD::TexCubeS32Float:
223 case NVPTXISD::TexCubeS32FloatLevel:
224 case NVPTXISD::TexCubeU32Float:
225 case NVPTXISD::TexCubeU32FloatLevel:
226 case NVPTXISD::TexCubeArrayFloatFloat:
227 case NVPTXISD::TexCubeArrayFloatFloatLevel:
228 case NVPTXISD::TexCubeArrayS32Float:
229 case NVPTXISD::TexCubeArrayS32FloatLevel:
230 case NVPTXISD::TexCubeArrayU32Float:
231 case NVPTXISD::TexCubeArrayU32FloatLevel:
232 case NVPTXISD::Tld4R2DFloatFloat:
233 case NVPTXISD::Tld4G2DFloatFloat:
234 case NVPTXISD::Tld4B2DFloatFloat:
235 case NVPTXISD::Tld4A2DFloatFloat:
236 case NVPTXISD::Tld4R2DS64Float:
237 case NVPTXISD::Tld4G2DS64Float:
238 case NVPTXISD::Tld4B2DS64Float:
239 case NVPTXISD::Tld4A2DS64Float:
240 case NVPTXISD::Tld4R2DU64Float:
241 case NVPTXISD::Tld4G2DU64Float:
242 case NVPTXISD::Tld4B2DU64Float:
243 case NVPTXISD::Tld4A2DU64Float:
244 case NVPTXISD::TexUnified1DFloatS32:
245 case NVPTXISD::TexUnified1DFloatFloat:
246 case NVPTXISD::TexUnified1DFloatFloatLevel:
247 case NVPTXISD::TexUnified1DFloatFloatGrad:
248 case NVPTXISD::TexUnified1DS32S32:
249 case NVPTXISD::TexUnified1DS32Float:
250 case NVPTXISD::TexUnified1DS32FloatLevel:
251 case NVPTXISD::TexUnified1DS32FloatGrad:
252 case NVPTXISD::TexUnified1DU32S32:
253 case NVPTXISD::TexUnified1DU32Float:
254 case NVPTXISD::TexUnified1DU32FloatLevel:
255 case NVPTXISD::TexUnified1DU32FloatGrad:
256 case NVPTXISD::TexUnified1DArrayFloatS32:
257 case NVPTXISD::TexUnified1DArrayFloatFloat:
258 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
259 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
260 case NVPTXISD::TexUnified1DArrayS32S32:
261 case NVPTXISD::TexUnified1DArrayS32Float:
262 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
263 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
264 case NVPTXISD::TexUnified1DArrayU32S32:
265 case NVPTXISD::TexUnified1DArrayU32Float:
266 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
267 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
268 case NVPTXISD::TexUnified2DFloatS32:
269 case NVPTXISD::TexUnified2DFloatFloat:
270 case NVPTXISD::TexUnified2DFloatFloatLevel:
271 case NVPTXISD::TexUnified2DFloatFloatGrad:
272 case NVPTXISD::TexUnified2DS32S32:
273 case NVPTXISD::TexUnified2DS32Float:
274 case NVPTXISD::TexUnified2DS32FloatLevel:
275 case NVPTXISD::TexUnified2DS32FloatGrad:
276 case NVPTXISD::TexUnified2DU32S32:
277 case NVPTXISD::TexUnified2DU32Float:
278 case NVPTXISD::TexUnified2DU32FloatLevel:
279 case NVPTXISD::TexUnified2DU32FloatGrad:
280 case NVPTXISD::TexUnified2DArrayFloatS32:
281 case NVPTXISD::TexUnified2DArrayFloatFloat:
282 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
283 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
284 case NVPTXISD::TexUnified2DArrayS32S32:
285 case NVPTXISD::TexUnified2DArrayS32Float:
286 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
287 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
288 case NVPTXISD::TexUnified2DArrayU32S32:
289 case NVPTXISD::TexUnified2DArrayU32Float:
290 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
291 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
292 case NVPTXISD::TexUnified3DFloatS32:
293 case NVPTXISD::TexUnified3DFloatFloat:
294 case NVPTXISD::TexUnified3DFloatFloatLevel:
295 case NVPTXISD::TexUnified3DFloatFloatGrad:
296 case NVPTXISD::TexUnified3DS32S32:
297 case NVPTXISD::TexUnified3DS32Float:
298 case NVPTXISD::TexUnified3DS32FloatLevel:
299 case NVPTXISD::TexUnified3DS32FloatGrad:
300 case NVPTXISD::TexUnified3DU32S32:
301 case NVPTXISD::TexUnified3DU32Float:
302 case NVPTXISD::TexUnified3DU32FloatLevel:
303 case NVPTXISD::TexUnified3DU32FloatGrad:
304 case NVPTXISD::TexUnifiedCubeFloatFloat:
305 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
306 case NVPTXISD::TexUnifiedCubeS32Float:
307 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
308 case NVPTXISD::TexUnifiedCubeU32Float:
309 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
310 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
311 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
312 case NVPTXISD::TexUnifiedCubeArrayS32Float:
313 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
314 case NVPTXISD::TexUnifiedCubeArrayU32Float:
315 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
316 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
317 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
318 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
319 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
320 case NVPTXISD::Tld4UnifiedR2DS64Float:
321 case NVPTXISD::Tld4UnifiedG2DS64Float:
322 case NVPTXISD::Tld4UnifiedB2DS64Float:
323 case NVPTXISD::Tld4UnifiedA2DS64Float:
324 case NVPTXISD::Tld4UnifiedR2DU64Float:
325 case NVPTXISD::Tld4UnifiedG2DU64Float:
326 case NVPTXISD::Tld4UnifiedB2DU64Float:
327 case NVPTXISD::Tld4UnifiedA2DU64Float:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000328 ResNode = SelectTextureIntrinsic(N);
329 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000330 case NVPTXISD::Suld1DI8Clamp:
331 case NVPTXISD::Suld1DI16Clamp:
332 case NVPTXISD::Suld1DI32Clamp:
333 case NVPTXISD::Suld1DI64Clamp:
334 case NVPTXISD::Suld1DV2I8Clamp:
335 case NVPTXISD::Suld1DV2I16Clamp:
336 case NVPTXISD::Suld1DV2I32Clamp:
337 case NVPTXISD::Suld1DV2I64Clamp:
338 case NVPTXISD::Suld1DV4I8Clamp:
339 case NVPTXISD::Suld1DV4I16Clamp:
340 case NVPTXISD::Suld1DV4I32Clamp:
341 case NVPTXISD::Suld1DArrayI8Clamp:
342 case NVPTXISD::Suld1DArrayI16Clamp:
343 case NVPTXISD::Suld1DArrayI32Clamp:
344 case NVPTXISD::Suld1DArrayI64Clamp:
345 case NVPTXISD::Suld1DArrayV2I8Clamp:
346 case NVPTXISD::Suld1DArrayV2I16Clamp:
347 case NVPTXISD::Suld1DArrayV2I32Clamp:
348 case NVPTXISD::Suld1DArrayV2I64Clamp:
349 case NVPTXISD::Suld1DArrayV4I8Clamp:
350 case NVPTXISD::Suld1DArrayV4I16Clamp:
351 case NVPTXISD::Suld1DArrayV4I32Clamp:
352 case NVPTXISD::Suld2DI8Clamp:
353 case NVPTXISD::Suld2DI16Clamp:
354 case NVPTXISD::Suld2DI32Clamp:
355 case NVPTXISD::Suld2DI64Clamp:
356 case NVPTXISD::Suld2DV2I8Clamp:
357 case NVPTXISD::Suld2DV2I16Clamp:
358 case NVPTXISD::Suld2DV2I32Clamp:
359 case NVPTXISD::Suld2DV2I64Clamp:
360 case NVPTXISD::Suld2DV4I8Clamp:
361 case NVPTXISD::Suld2DV4I16Clamp:
362 case NVPTXISD::Suld2DV4I32Clamp:
363 case NVPTXISD::Suld2DArrayI8Clamp:
364 case NVPTXISD::Suld2DArrayI16Clamp:
365 case NVPTXISD::Suld2DArrayI32Clamp:
366 case NVPTXISD::Suld2DArrayI64Clamp:
367 case NVPTXISD::Suld2DArrayV2I8Clamp:
368 case NVPTXISD::Suld2DArrayV2I16Clamp:
369 case NVPTXISD::Suld2DArrayV2I32Clamp:
370 case NVPTXISD::Suld2DArrayV2I64Clamp:
371 case NVPTXISD::Suld2DArrayV4I8Clamp:
372 case NVPTXISD::Suld2DArrayV4I16Clamp:
373 case NVPTXISD::Suld2DArrayV4I32Clamp:
374 case NVPTXISD::Suld3DI8Clamp:
375 case NVPTXISD::Suld3DI16Clamp:
376 case NVPTXISD::Suld3DI32Clamp:
377 case NVPTXISD::Suld3DI64Clamp:
378 case NVPTXISD::Suld3DV2I8Clamp:
379 case NVPTXISD::Suld3DV2I16Clamp:
380 case NVPTXISD::Suld3DV2I32Clamp:
381 case NVPTXISD::Suld3DV2I64Clamp:
382 case NVPTXISD::Suld3DV4I8Clamp:
383 case NVPTXISD::Suld3DV4I16Clamp:
384 case NVPTXISD::Suld3DV4I32Clamp:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000385 case NVPTXISD::Suld1DI8Trap:
386 case NVPTXISD::Suld1DI16Trap:
387 case NVPTXISD::Suld1DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000388 case NVPTXISD::Suld1DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000389 case NVPTXISD::Suld1DV2I8Trap:
390 case NVPTXISD::Suld1DV2I16Trap:
391 case NVPTXISD::Suld1DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000392 case NVPTXISD::Suld1DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000393 case NVPTXISD::Suld1DV4I8Trap:
394 case NVPTXISD::Suld1DV4I16Trap:
395 case NVPTXISD::Suld1DV4I32Trap:
396 case NVPTXISD::Suld1DArrayI8Trap:
397 case NVPTXISD::Suld1DArrayI16Trap:
398 case NVPTXISD::Suld1DArrayI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000399 case NVPTXISD::Suld1DArrayI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000400 case NVPTXISD::Suld1DArrayV2I8Trap:
401 case NVPTXISD::Suld1DArrayV2I16Trap:
402 case NVPTXISD::Suld1DArrayV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000403 case NVPTXISD::Suld1DArrayV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000404 case NVPTXISD::Suld1DArrayV4I8Trap:
405 case NVPTXISD::Suld1DArrayV4I16Trap:
406 case NVPTXISD::Suld1DArrayV4I32Trap:
407 case NVPTXISD::Suld2DI8Trap:
408 case NVPTXISD::Suld2DI16Trap:
409 case NVPTXISD::Suld2DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000410 case NVPTXISD::Suld2DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000411 case NVPTXISD::Suld2DV2I8Trap:
412 case NVPTXISD::Suld2DV2I16Trap:
413 case NVPTXISD::Suld2DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000414 case NVPTXISD::Suld2DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000415 case NVPTXISD::Suld2DV4I8Trap:
416 case NVPTXISD::Suld2DV4I16Trap:
417 case NVPTXISD::Suld2DV4I32Trap:
418 case NVPTXISD::Suld2DArrayI8Trap:
419 case NVPTXISD::Suld2DArrayI16Trap:
420 case NVPTXISD::Suld2DArrayI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000421 case NVPTXISD::Suld2DArrayI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000422 case NVPTXISD::Suld2DArrayV2I8Trap:
423 case NVPTXISD::Suld2DArrayV2I16Trap:
424 case NVPTXISD::Suld2DArrayV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000425 case NVPTXISD::Suld2DArrayV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000426 case NVPTXISD::Suld2DArrayV4I8Trap:
427 case NVPTXISD::Suld2DArrayV4I16Trap:
428 case NVPTXISD::Suld2DArrayV4I32Trap:
429 case NVPTXISD::Suld3DI8Trap:
430 case NVPTXISD::Suld3DI16Trap:
431 case NVPTXISD::Suld3DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000432 case NVPTXISD::Suld3DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000433 case NVPTXISD::Suld3DV2I8Trap:
434 case NVPTXISD::Suld3DV2I16Trap:
435 case NVPTXISD::Suld3DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000436 case NVPTXISD::Suld3DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000437 case NVPTXISD::Suld3DV4I8Trap:
438 case NVPTXISD::Suld3DV4I16Trap:
439 case NVPTXISD::Suld3DV4I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000440 case NVPTXISD::Suld1DI8Zero:
441 case NVPTXISD::Suld1DI16Zero:
442 case NVPTXISD::Suld1DI32Zero:
443 case NVPTXISD::Suld1DI64Zero:
444 case NVPTXISD::Suld1DV2I8Zero:
445 case NVPTXISD::Suld1DV2I16Zero:
446 case NVPTXISD::Suld1DV2I32Zero:
447 case NVPTXISD::Suld1DV2I64Zero:
448 case NVPTXISD::Suld1DV4I8Zero:
449 case NVPTXISD::Suld1DV4I16Zero:
450 case NVPTXISD::Suld1DV4I32Zero:
451 case NVPTXISD::Suld1DArrayI8Zero:
452 case NVPTXISD::Suld1DArrayI16Zero:
453 case NVPTXISD::Suld1DArrayI32Zero:
454 case NVPTXISD::Suld1DArrayI64Zero:
455 case NVPTXISD::Suld1DArrayV2I8Zero:
456 case NVPTXISD::Suld1DArrayV2I16Zero:
457 case NVPTXISD::Suld1DArrayV2I32Zero:
458 case NVPTXISD::Suld1DArrayV2I64Zero:
459 case NVPTXISD::Suld1DArrayV4I8Zero:
460 case NVPTXISD::Suld1DArrayV4I16Zero:
461 case NVPTXISD::Suld1DArrayV4I32Zero:
462 case NVPTXISD::Suld2DI8Zero:
463 case NVPTXISD::Suld2DI16Zero:
464 case NVPTXISD::Suld2DI32Zero:
465 case NVPTXISD::Suld2DI64Zero:
466 case NVPTXISD::Suld2DV2I8Zero:
467 case NVPTXISD::Suld2DV2I16Zero:
468 case NVPTXISD::Suld2DV2I32Zero:
469 case NVPTXISD::Suld2DV2I64Zero:
470 case NVPTXISD::Suld2DV4I8Zero:
471 case NVPTXISD::Suld2DV4I16Zero:
472 case NVPTXISD::Suld2DV4I32Zero:
473 case NVPTXISD::Suld2DArrayI8Zero:
474 case NVPTXISD::Suld2DArrayI16Zero:
475 case NVPTXISD::Suld2DArrayI32Zero:
476 case NVPTXISD::Suld2DArrayI64Zero:
477 case NVPTXISD::Suld2DArrayV2I8Zero:
478 case NVPTXISD::Suld2DArrayV2I16Zero:
479 case NVPTXISD::Suld2DArrayV2I32Zero:
480 case NVPTXISD::Suld2DArrayV2I64Zero:
481 case NVPTXISD::Suld2DArrayV4I8Zero:
482 case NVPTXISD::Suld2DArrayV4I16Zero:
483 case NVPTXISD::Suld2DArrayV4I32Zero:
484 case NVPTXISD::Suld3DI8Zero:
485 case NVPTXISD::Suld3DI16Zero:
486 case NVPTXISD::Suld3DI32Zero:
487 case NVPTXISD::Suld3DI64Zero:
488 case NVPTXISD::Suld3DV2I8Zero:
489 case NVPTXISD::Suld3DV2I16Zero:
490 case NVPTXISD::Suld3DV2I32Zero:
491 case NVPTXISD::Suld3DV2I64Zero:
492 case NVPTXISD::Suld3DV4I8Zero:
493 case NVPTXISD::Suld3DV4I16Zero:
494 case NVPTXISD::Suld3DV4I32Zero:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000495 ResNode = SelectSurfaceIntrinsic(N);
496 break;
Justin Holewinskica7a4f12014-06-27 18:35:27 +0000497 case ISD::AND:
498 case ISD::SRA:
499 case ISD::SRL:
500 // Try to select BFE
501 ResNode = SelectBFE(N);
502 break;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000503 case ISD::ADDRSPACECAST:
504 ResNode = SelectAddrSpaceCast(N);
505 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000506 default:
507 break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000508 }
509 if (ResNode)
510 return ResNode;
511 return SelectCode(N);
512}
513
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000514SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
515 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
516 switch (IID) {
517 default:
518 return NULL;
519 case Intrinsic::nvvm_ldg_global_f:
520 case Intrinsic::nvvm_ldg_global_i:
521 case Intrinsic::nvvm_ldg_global_p:
522 case Intrinsic::nvvm_ldu_global_f:
523 case Intrinsic::nvvm_ldu_global_i:
524 case Intrinsic::nvvm_ldu_global_p:
525 return SelectLDGLDU(N);
526 }
527}
528
Eric Christopher9745b3a2015-01-30 01:41:01 +0000529static unsigned int getCodeAddrSpace(MemSDNode *N) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +0000530 const Value *Src = N->getMemOperand()->getValue();
Justin Holewinskib96d1392013-06-10 13:29:47 +0000531
Justin Holewinskiae556d32012-05-04 20:18:50 +0000532 if (!Src)
Justin Holewinskib96d1392013-06-10 13:29:47 +0000533 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000534
Craig Toppere3dcce92015-08-01 22:20:21 +0000535 if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000536 switch (PT->getAddressSpace()) {
Justin Holewinskib96d1392013-06-10 13:29:47 +0000537 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
538 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
539 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
540 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
541 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
542 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
543 default: break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000544 }
545 }
Justin Holewinskib96d1392013-06-10 13:29:47 +0000546 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000547}
548
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000549static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000550 unsigned CodeAddrSpace, MachineFunction *F) {
551 // To use non-coherent caching, the load has to be from global
552 // memory and we have to prove that the memory area is not written
553 // to anywhere for the duration of the kernel call, not even after
554 // the load.
555 //
556 // To ensure that there are no writes to the memory, we require the
557 // underlying pointer to be a noalias (__restrict) kernel parameter
558 // that is never used for a write. We can only do this for kernel
559 // functions since from within a device function, we cannot know if
560 // there were or will be writes to the memory from the caller - or we
561 // could, but then we would have to do inter-procedural analysis.
562 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
563 !isKernelFunction(*F->getFunction())) {
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000564 return false;
565 }
566
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000567 // We use GetUnderlyingObjects() here instead of
568 // GetUnderlyingObject() mainly because the former looks through phi
569 // nodes while the latter does not. We need to look through phi
570 // nodes to handle pointer induction variables.
571 SmallVector<Value *, 8> Objs;
572 GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
573 Objs, F->getDataLayout());
574 for (Value *Obj : Objs) {
575 auto *A = dyn_cast<const Argument>(Obj);
576 if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
577 }
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000578
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000579 return true;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000580}
581
Justin Holewinski30d56a72014-04-09 15:39:15 +0000582SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
583 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
584 switch (IID) {
585 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000586 return nullptr;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000587 case Intrinsic::nvvm_texsurf_handle_internal:
588 return SelectTexSurfHandle(N);
589 }
590}
591
592SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
593 // Op 0 is the intrinsic ID
594 SDValue Wrapper = N->getOperand(1);
595 SDValue GlobalVal = Wrapper.getOperand(0);
596 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
597 GlobalVal);
598}
599
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000600SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
601 SDValue Src = N->getOperand(0);
602 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
603 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
604 unsigned DstAddrSpace = CastN->getDestAddressSpace();
605
606 assert(SrcAddrSpace != DstAddrSpace &&
607 "addrspacecast must be between different address spaces");
608
609 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
610 // Specific to generic
611 unsigned Opc;
612 switch (SrcAddrSpace) {
613 default: report_fatal_error("Bad address space in addrspacecast");
614 case ADDRESS_SPACE_GLOBAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000615 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000616 break;
617 case ADDRESS_SPACE_SHARED:
Eric Christopher02389e32015-02-19 00:08:27 +0000618 Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000619 break;
620 case ADDRESS_SPACE_CONST:
Eric Christopher02389e32015-02-19 00:08:27 +0000621 Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000622 break;
623 case ADDRESS_SPACE_LOCAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000624 Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000625 break;
626 }
627 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
628 } else {
629 // Generic to specific
630 if (SrcAddrSpace != 0)
631 report_fatal_error("Cannot cast between two non-generic address spaces");
632 unsigned Opc;
633 switch (DstAddrSpace) {
634 default: report_fatal_error("Bad address space in addrspacecast");
635 case ADDRESS_SPACE_GLOBAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000636 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
637 : NVPTX::cvta_to_global_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000638 break;
639 case ADDRESS_SPACE_SHARED:
Eric Christopher02389e32015-02-19 00:08:27 +0000640 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
641 : NVPTX::cvta_to_shared_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000642 break;
643 case ADDRESS_SPACE_CONST:
Eric Christopher02389e32015-02-19 00:08:27 +0000644 Opc =
645 TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000646 break;
647 case ADDRESS_SPACE_LOCAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000648 Opc =
649 TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000650 break;
Jingyue Wua2f60272015-06-04 21:28:26 +0000651 case ADDRESS_SPACE_PARAM:
652 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
653 : NVPTX::nvvm_ptr_gen_to_param;
654 break;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000655 }
656 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
657 }
658}
659
Justin Holewinski0497ab12013-03-30 14:29:21 +0000660SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000661 SDLoc dl(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000662 LoadSDNode *LD = cast<LoadSDNode>(N);
663 EVT LoadedVT = LD->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +0000664 SDNode *NVPTXLD = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000665
666 // do not support pre/post inc/dec
667 if (LD->isIndexed())
Craig Topper062a2ba2014-04-25 05:30:21 +0000668 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000669
670 if (!LoadedVT.isSimple())
Craig Topper062a2ba2014-04-25 05:30:21 +0000671 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000672
673 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +0000674 unsigned int codeAddrSpace = getCodeAddrSpace(LD);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000675
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000676 if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000677 return SelectLDGLDU(N);
678 }
679
Justin Holewinskiae556d32012-05-04 20:18:50 +0000680 // Volatile Setting
681 // - .volatile is only availalble for .global and .shared
682 bool isVolatile = LD->isVolatile();
683 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
684 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
685 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
686 isVolatile = false;
687
688 // Vector Setting
689 MVT SimpleVT = LoadedVT.getSimpleVT();
690 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
691 if (SimpleVT.isVector()) {
692 unsigned num = SimpleVT.getVectorNumElements();
693 if (num == 2)
694 vecType = NVPTX::PTXLdStInstCode::V2;
695 else if (num == 4)
696 vecType = NVPTX::PTXLdStInstCode::V4;
697 else
Craig Topper062a2ba2014-04-25 05:30:21 +0000698 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000699 }
700
701 // Type Setting: fromType + fromTypeWidth
702 //
703 // Sign : ISD::SEXTLOAD
704 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
705 // type is integer
706 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
707 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +0000708 // Read at least 8 bits (predicates are stored as 8-bit values)
709 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskiae556d32012-05-04 20:18:50 +0000710 unsigned int fromType;
711 if ((LD->getExtensionType() == ISD::SEXTLOAD))
712 fromType = NVPTX::PTXLdStInstCode::Signed;
713 else if (ScalarVT.isFloatingPoint())
714 fromType = NVPTX::PTXLdStInstCode::Float;
715 else
716 fromType = NVPTX::PTXLdStInstCode::Unsigned;
717
718 // Create the machine instruction DAG
719 SDValue Chain = N->getOperand(0);
720 SDValue N1 = N->getOperand(1);
721 SDValue Addr;
722 SDValue Offset, Base;
723 unsigned Opcode;
Craig Topperd9c27832013-08-15 02:44:19 +0000724 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000725
726 if (SelectDirectAddr(N1, Addr)) {
727 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000728 case MVT::i8:
729 Opcode = NVPTX::LD_i8_avar;
730 break;
731 case MVT::i16:
732 Opcode = NVPTX::LD_i16_avar;
733 break;
734 case MVT::i32:
735 Opcode = NVPTX::LD_i32_avar;
736 break;
737 case MVT::i64:
738 Opcode = NVPTX::LD_i64_avar;
739 break;
740 case MVT::f32:
741 Opcode = NVPTX::LD_f32_avar;
742 break;
743 case MVT::f64:
744 Opcode = NVPTX::LD_f64_avar;
745 break;
746 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000747 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000748 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000749 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
750 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
751 getI32Imm(fromTypeWidth, dl), Addr, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +0000752 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +0000753 } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
754 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000755 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000756 case MVT::i8:
757 Opcode = NVPTX::LD_i8_asi;
758 break;
759 case MVT::i16:
760 Opcode = NVPTX::LD_i16_asi;
761 break;
762 case MVT::i32:
763 Opcode = NVPTX::LD_i32_asi;
764 break;
765 case MVT::i64:
766 Opcode = NVPTX::LD_i64_asi;
767 break;
768 case MVT::f32:
769 Opcode = NVPTX::LD_f32_asi;
770 break;
771 case MVT::f64:
772 Opcode = NVPTX::LD_f64_asi;
773 break;
774 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000775 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000776 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000777 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
778 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
779 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +0000780 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +0000781 } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
782 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
783 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000784 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000785 case MVT::i8:
786 Opcode = NVPTX::LD_i8_ari_64;
787 break;
788 case MVT::i16:
789 Opcode = NVPTX::LD_i16_ari_64;
790 break;
791 case MVT::i32:
792 Opcode = NVPTX::LD_i32_ari_64;
793 break;
794 case MVT::i64:
795 Opcode = NVPTX::LD_i64_ari_64;
796 break;
797 case MVT::f32:
798 Opcode = NVPTX::LD_f32_ari_64;
799 break;
800 case MVT::f64:
801 Opcode = NVPTX::LD_f64_ari_64;
802 break;
803 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000804 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000805 }
806 } else {
807 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000808 case MVT::i8:
809 Opcode = NVPTX::LD_i8_ari;
810 break;
811 case MVT::i16:
812 Opcode = NVPTX::LD_i16_ari;
813 break;
814 case MVT::i32:
815 Opcode = NVPTX::LD_i32_ari;
816 break;
817 case MVT::i64:
818 Opcode = NVPTX::LD_i64_ari;
819 break;
820 case MVT::f32:
821 Opcode = NVPTX::LD_f32_ari;
822 break;
823 case MVT::f64:
824 Opcode = NVPTX::LD_f64_ari;
825 break;
826 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000827 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000828 }
Justin Holewinskiae556d32012-05-04 20:18:50 +0000829 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000830 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
831 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
832 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +0000833 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000834 } else {
Eric Christopher02389e32015-02-19 00:08:27 +0000835 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000836 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000837 case MVT::i8:
838 Opcode = NVPTX::LD_i8_areg_64;
839 break;
840 case MVT::i16:
841 Opcode = NVPTX::LD_i16_areg_64;
842 break;
843 case MVT::i32:
844 Opcode = NVPTX::LD_i32_areg_64;
845 break;
846 case MVT::i64:
847 Opcode = NVPTX::LD_i64_areg_64;
848 break;
849 case MVT::f32:
850 Opcode = NVPTX::LD_f32_areg_64;
851 break;
852 case MVT::f64:
853 Opcode = NVPTX::LD_f64_areg_64;
854 break;
855 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000856 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000857 }
858 } else {
859 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000860 case MVT::i8:
861 Opcode = NVPTX::LD_i8_areg;
862 break;
863 case MVT::i16:
864 Opcode = NVPTX::LD_i16_areg;
865 break;
866 case MVT::i32:
867 Opcode = NVPTX::LD_i32_areg;
868 break;
869 case MVT::i64:
870 Opcode = NVPTX::LD_i64_areg;
871 break;
872 case MVT::f32:
873 Opcode = NVPTX::LD_f32_areg;
874 break;
875 case MVT::f64:
876 Opcode = NVPTX::LD_f64_areg;
877 break;
878 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000879 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000880 }
Justin Holewinskiae556d32012-05-04 20:18:50 +0000881 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000882 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
883 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
884 getI32Imm(fromTypeWidth, dl), N1, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +0000885 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000886 }
887
Craig Topper062a2ba2014-04-25 05:30:21 +0000888 if (NVPTXLD) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000889 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
890 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
891 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
892 }
893
894 return NVPTXLD;
895}
896
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000897SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
898
899 SDValue Chain = N->getOperand(0);
900 SDValue Op1 = N->getOperand(1);
901 SDValue Addr, Offset, Base;
902 unsigned Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +0000903 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000904 SDNode *LD;
905 MemSDNode *MemSD = cast<MemSDNode>(N);
906 EVT LoadedVT = MemSD->getMemoryVT();
907
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000908 if (!LoadedVT.isSimple())
Craig Topper062a2ba2014-04-25 05:30:21 +0000909 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000910
911 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +0000912 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000913
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000914 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000915 return SelectLDGLDU(N);
916 }
917
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000918 // Volatile Setting
919 // - .volatile is only availalble for .global and .shared
920 bool IsVolatile = MemSD->isVolatile();
921 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
922 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
923 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
924 IsVolatile = false;
925
926 // Vector Setting
927 MVT SimpleVT = LoadedVT.getSimpleVT();
928
929 // Type Setting: fromType + fromTypeWidth
930 //
931 // Sign : ISD::SEXTLOAD
932 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
933 // type is integer
934 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
935 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +0000936 // Read at least 8 bits (predicates are stored as 8-bit values)
937 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000938 unsigned int FromType;
939 // The last operand holds the original LoadSDNode::getExtensionType() value
Justin Holewinski0497ab12013-03-30 14:29:21 +0000940 unsigned ExtensionType = cast<ConstantSDNode>(
941 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000942 if (ExtensionType == ISD::SEXTLOAD)
943 FromType = NVPTX::PTXLdStInstCode::Signed;
944 else if (ScalarVT.isFloatingPoint())
945 FromType = NVPTX::PTXLdStInstCode::Float;
946 else
947 FromType = NVPTX::PTXLdStInstCode::Unsigned;
948
949 unsigned VecType;
950
951 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000952 case NVPTXISD::LoadV2:
953 VecType = NVPTX::PTXLdStInstCode::V2;
954 break;
955 case NVPTXISD::LoadV4:
956 VecType = NVPTX::PTXLdStInstCode::V4;
957 break;
958 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000959 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000960 }
961
962 EVT EltVT = N->getValueType(0);
963
964 if (SelectDirectAddr(Op1, Addr)) {
965 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000966 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000967 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000968 case NVPTXISD::LoadV2:
969 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000970 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000971 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000972 case MVT::i8:
973 Opcode = NVPTX::LDV_i8_v2_avar;
974 break;
975 case MVT::i16:
976 Opcode = NVPTX::LDV_i16_v2_avar;
977 break;
978 case MVT::i32:
979 Opcode = NVPTX::LDV_i32_v2_avar;
980 break;
981 case MVT::i64:
982 Opcode = NVPTX::LDV_i64_v2_avar;
983 break;
984 case MVT::f32:
985 Opcode = NVPTX::LDV_f32_v2_avar;
986 break;
987 case MVT::f64:
988 Opcode = NVPTX::LDV_f64_v2_avar;
989 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000990 }
991 break;
992 case NVPTXISD::LoadV4:
993 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000994 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000995 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000996 case MVT::i8:
997 Opcode = NVPTX::LDV_i8_v4_avar;
998 break;
999 case MVT::i16:
1000 Opcode = NVPTX::LDV_i16_v4_avar;
1001 break;
1002 case MVT::i32:
1003 Opcode = NVPTX::LDV_i32_v4_avar;
1004 break;
1005 case MVT::f32:
1006 Opcode = NVPTX::LDV_f32_v4_avar;
1007 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001008 }
1009 break;
1010 }
1011
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001012 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1013 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1014 getI32Imm(FromTypeWidth, DL), Addr, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00001015 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001016 } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1017 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001018 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001019 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001020 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001021 case NVPTXISD::LoadV2:
1022 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001023 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001024 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001025 case MVT::i8:
1026 Opcode = NVPTX::LDV_i8_v2_asi;
1027 break;
1028 case MVT::i16:
1029 Opcode = NVPTX::LDV_i16_v2_asi;
1030 break;
1031 case MVT::i32:
1032 Opcode = NVPTX::LDV_i32_v2_asi;
1033 break;
1034 case MVT::i64:
1035 Opcode = NVPTX::LDV_i64_v2_asi;
1036 break;
1037 case MVT::f32:
1038 Opcode = NVPTX::LDV_f32_v2_asi;
1039 break;
1040 case MVT::f64:
1041 Opcode = NVPTX::LDV_f64_v2_asi;
1042 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001043 }
1044 break;
1045 case NVPTXISD::LoadV4:
1046 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001047 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001048 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001049 case MVT::i8:
1050 Opcode = NVPTX::LDV_i8_v4_asi;
1051 break;
1052 case MVT::i16:
1053 Opcode = NVPTX::LDV_i16_v4_asi;
1054 break;
1055 case MVT::i32:
1056 Opcode = NVPTX::LDV_i32_v4_asi;
1057 break;
1058 case MVT::f32:
1059 Opcode = NVPTX::LDV_f32_v4_asi;
1060 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001061 }
1062 break;
1063 }
1064
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001065 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1066 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1067 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00001068 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001069 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1070 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1071 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001072 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001073 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001074 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001075 case NVPTXISD::LoadV2:
1076 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001077 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001078 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001079 case MVT::i8:
1080 Opcode = NVPTX::LDV_i8_v2_ari_64;
1081 break;
1082 case MVT::i16:
1083 Opcode = NVPTX::LDV_i16_v2_ari_64;
1084 break;
1085 case MVT::i32:
1086 Opcode = NVPTX::LDV_i32_v2_ari_64;
1087 break;
1088 case MVT::i64:
1089 Opcode = NVPTX::LDV_i64_v2_ari_64;
1090 break;
1091 case MVT::f32:
1092 Opcode = NVPTX::LDV_f32_v2_ari_64;
1093 break;
1094 case MVT::f64:
1095 Opcode = NVPTX::LDV_f64_v2_ari_64;
1096 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001097 }
1098 break;
1099 case NVPTXISD::LoadV4:
1100 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001101 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001102 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001103 case MVT::i8:
1104 Opcode = NVPTX::LDV_i8_v4_ari_64;
1105 break;
1106 case MVT::i16:
1107 Opcode = NVPTX::LDV_i16_v4_ari_64;
1108 break;
1109 case MVT::i32:
1110 Opcode = NVPTX::LDV_i32_v4_ari_64;
1111 break;
1112 case MVT::f32:
1113 Opcode = NVPTX::LDV_f32_v4_ari_64;
1114 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001115 }
1116 break;
1117 }
1118 } else {
1119 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001120 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001121 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001122 case NVPTXISD::LoadV2:
1123 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001124 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001125 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001126 case MVT::i8:
1127 Opcode = NVPTX::LDV_i8_v2_ari;
1128 break;
1129 case MVT::i16:
1130 Opcode = NVPTX::LDV_i16_v2_ari;
1131 break;
1132 case MVT::i32:
1133 Opcode = NVPTX::LDV_i32_v2_ari;
1134 break;
1135 case MVT::i64:
1136 Opcode = NVPTX::LDV_i64_v2_ari;
1137 break;
1138 case MVT::f32:
1139 Opcode = NVPTX::LDV_f32_v2_ari;
1140 break;
1141 case MVT::f64:
1142 Opcode = NVPTX::LDV_f64_v2_ari;
1143 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001144 }
1145 break;
1146 case NVPTXISD::LoadV4:
1147 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001148 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001149 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001150 case MVT::i8:
1151 Opcode = NVPTX::LDV_i8_v4_ari;
1152 break;
1153 case MVT::i16:
1154 Opcode = NVPTX::LDV_i16_v4_ari;
1155 break;
1156 case MVT::i32:
1157 Opcode = NVPTX::LDV_i32_v4_ari;
1158 break;
1159 case MVT::f32:
1160 Opcode = NVPTX::LDV_f32_v4_ari;
1161 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001162 }
1163 break;
1164 }
1165 }
1166
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001167 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1168 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1169 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001170
Michael Liaob53d8962013-04-19 22:22:57 +00001171 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001172 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001173 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001174 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001175 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001176 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001177 case NVPTXISD::LoadV2:
1178 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001179 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001180 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001181 case MVT::i8:
1182 Opcode = NVPTX::LDV_i8_v2_areg_64;
1183 break;
1184 case MVT::i16:
1185 Opcode = NVPTX::LDV_i16_v2_areg_64;
1186 break;
1187 case MVT::i32:
1188 Opcode = NVPTX::LDV_i32_v2_areg_64;
1189 break;
1190 case MVT::i64:
1191 Opcode = NVPTX::LDV_i64_v2_areg_64;
1192 break;
1193 case MVT::f32:
1194 Opcode = NVPTX::LDV_f32_v2_areg_64;
1195 break;
1196 case MVT::f64:
1197 Opcode = NVPTX::LDV_f64_v2_areg_64;
1198 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001199 }
1200 break;
1201 case NVPTXISD::LoadV4:
1202 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001203 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001204 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001205 case MVT::i8:
1206 Opcode = NVPTX::LDV_i8_v4_areg_64;
1207 break;
1208 case MVT::i16:
1209 Opcode = NVPTX::LDV_i16_v4_areg_64;
1210 break;
1211 case MVT::i32:
1212 Opcode = NVPTX::LDV_i32_v4_areg_64;
1213 break;
1214 case MVT::f32:
1215 Opcode = NVPTX::LDV_f32_v4_areg_64;
1216 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001217 }
1218 break;
1219 }
1220 } else {
1221 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001222 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001223 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001224 case NVPTXISD::LoadV2:
1225 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001226 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001227 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001228 case MVT::i8:
1229 Opcode = NVPTX::LDV_i8_v2_areg;
1230 break;
1231 case MVT::i16:
1232 Opcode = NVPTX::LDV_i16_v2_areg;
1233 break;
1234 case MVT::i32:
1235 Opcode = NVPTX::LDV_i32_v2_areg;
1236 break;
1237 case MVT::i64:
1238 Opcode = NVPTX::LDV_i64_v2_areg;
1239 break;
1240 case MVT::f32:
1241 Opcode = NVPTX::LDV_f32_v2_areg;
1242 break;
1243 case MVT::f64:
1244 Opcode = NVPTX::LDV_f64_v2_areg;
1245 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001246 }
1247 break;
1248 case NVPTXISD::LoadV4:
1249 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001250 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001251 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001252 case MVT::i8:
1253 Opcode = NVPTX::LDV_i8_v4_areg;
1254 break;
1255 case MVT::i16:
1256 Opcode = NVPTX::LDV_i16_v4_areg;
1257 break;
1258 case MVT::i32:
1259 Opcode = NVPTX::LDV_i32_v4_areg;
1260 break;
1261 case MVT::f32:
1262 Opcode = NVPTX::LDV_f32_v4_areg;
1263 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001264 }
1265 break;
1266 }
1267 }
1268
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001269 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1270 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1271 getI32Imm(FromTypeWidth, DL), Op1, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00001272 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001273 }
1274
1275 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1276 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1277 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1278
1279 return LD;
1280}
1281
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001282SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001283
1284 SDValue Chain = N->getOperand(0);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001285 SDValue Op1;
1286 MemSDNode *Mem;
1287 bool IsLDG = true;
1288
Justin Holewinskic7997922016-04-05 12:38:01 +00001289 // If this is an LDG intrinsic, the address is the third operand. If its an
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001290 // LDG/LDU SD node (from custom vector handling), then its the second operand
1291 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1292 Op1 = N->getOperand(2);
1293 Mem = cast<MemIntrinsicSDNode>(N);
1294 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1295 switch (IID) {
1296 default:
1297 return NULL;
1298 case Intrinsic::nvvm_ldg_global_f:
1299 case Intrinsic::nvvm_ldg_global_i:
1300 case Intrinsic::nvvm_ldg_global_p:
1301 IsLDG = true;
1302 break;
1303 case Intrinsic::nvvm_ldu_global_f:
1304 case Intrinsic::nvvm_ldu_global_i:
1305 case Intrinsic::nvvm_ldu_global_p:
1306 IsLDG = false;
1307 break;
1308 }
1309 } else {
1310 Op1 = N->getOperand(1);
1311 Mem = cast<MemSDNode>(N);
1312 }
1313
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001314 unsigned Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00001315 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001316 SDNode *LD;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001317 SDValue Base, Offset, Addr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00001318
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001319 EVT EltVT = Mem->getMemoryVT();
Justin Holewinskic7997922016-04-05 12:38:01 +00001320 unsigned NumElts = 1;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001321 if (EltVT.isVector()) {
Justin Holewinskic7997922016-04-05 12:38:01 +00001322 NumElts = EltVT.getVectorNumElements();
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001323 EltVT = EltVT.getVectorElementType();
1324 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001325
Justin Holewinskic7997922016-04-05 12:38:01 +00001326 // Build the "promoted" result VTList for the load. If we are really loading
1327 // i8s, then the return type will be promoted to i16 since we do not expose
1328 // 8-bit registers in NVPTX.
1329 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1330 SmallVector<EVT, 5> InstVTs;
1331 for (unsigned i = 0; i != NumElts; ++i) {
1332 InstVTs.push_back(NodeVT);
1333 }
1334 InstVTs.push_back(MVT::Other);
1335 SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1336
Justin Holewinskie40e9292013-07-01 12:58:52 +00001337 if (SelectDirectAddr(Op1, Addr)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001338 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001339 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001340 return nullptr;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001341 case ISD::INTRINSIC_W_CHAIN:
1342 if (IsLDG) {
1343 switch (EltVT.getSimpleVT().SimpleTy) {
1344 default:
1345 return nullptr;
1346 case MVT::i8:
1347 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1348 break;
1349 case MVT::i16:
1350 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1351 break;
1352 case MVT::i32:
1353 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1354 break;
1355 case MVT::i64:
1356 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1357 break;
1358 case MVT::f32:
1359 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1360 break;
1361 case MVT::f64:
1362 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1363 break;
1364 }
1365 } else {
1366 switch (EltVT.getSimpleVT().SimpleTy) {
1367 default:
1368 return nullptr;
1369 case MVT::i8:
1370 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1371 break;
1372 case MVT::i16:
1373 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1374 break;
1375 case MVT::i32:
1376 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1377 break;
1378 case MVT::i64:
1379 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1380 break;
1381 case MVT::f32:
1382 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1383 break;
1384 case MVT::f64:
1385 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1386 break;
1387 }
1388 }
1389 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001390 case NVPTXISD::LDGV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001391 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001392 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001393 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001394 case MVT::i8:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001395 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001396 break;
1397 case MVT::i16:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001398 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001399 break;
1400 case MVT::i32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001401 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001402 break;
1403 case MVT::i64:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001404 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001405 break;
1406 case MVT::f32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001407 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001408 break;
1409 case MVT::f64:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001410 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001411 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001412 }
1413 break;
1414 case NVPTXISD::LDUV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001415 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001416 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001417 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001418 case MVT::i8:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001419 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001420 break;
1421 case MVT::i16:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001422 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001423 break;
1424 case MVT::i32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001425 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001426 break;
1427 case MVT::i64:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001428 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001429 break;
1430 case MVT::f32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001431 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001432 break;
1433 case MVT::f64:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001434 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1435 break;
1436 }
1437 break;
1438 case NVPTXISD::LDGV4:
1439 switch (EltVT.getSimpleVT().SimpleTy) {
1440 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001441 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001442 case MVT::i8:
1443 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1444 break;
1445 case MVT::i16:
1446 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1447 break;
1448 case MVT::i32:
1449 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1450 break;
1451 case MVT::f32:
1452 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001453 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001454 }
1455 break;
1456 case NVPTXISD::LDUV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001457 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001458 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001459 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001460 case MVT::i8:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001461 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001462 break;
1463 case MVT::i16:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001464 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001465 break;
1466 case MVT::i32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001467 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001468 break;
1469 case MVT::f32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001470 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001471 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001472 }
1473 break;
1474 }
Justin Holewinskie40e9292013-07-01 12:58:52 +00001475
1476 SDValue Ops[] = { Addr, Chain };
Justin Holewinskic7997922016-04-05 12:38:01 +00001477 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001478 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1479 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1480 if (TM.is64Bit()) {
Justin Holewinskie40e9292013-07-01 12:58:52 +00001481 switch (N->getOpcode()) {
1482 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001483 return nullptr;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001484 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001485 case ISD::INTRINSIC_W_CHAIN:
1486 if (IsLDG) {
1487 switch (EltVT.getSimpleVT().SimpleTy) {
1488 default:
1489 return nullptr;
1490 case MVT::i8:
1491 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1492 break;
1493 case MVT::i16:
1494 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1495 break;
1496 case MVT::i32:
1497 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1498 break;
1499 case MVT::i64:
1500 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1501 break;
1502 case MVT::f32:
1503 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1504 break;
1505 case MVT::f64:
1506 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1507 break;
1508 }
1509 } else {
1510 switch (EltVT.getSimpleVT().SimpleTy) {
1511 default:
1512 return nullptr;
1513 case MVT::i8:
1514 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1515 break;
1516 case MVT::i16:
1517 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1518 break;
1519 case MVT::i32:
1520 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1521 break;
1522 case MVT::i64:
1523 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1524 break;
1525 case MVT::f32:
1526 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1527 break;
1528 case MVT::f64:
1529 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1530 break;
1531 }
1532 }
1533 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001534 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001535 case NVPTXISD::LDGV2:
1536 switch (EltVT.getSimpleVT().SimpleTy) {
1537 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001538 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001539 case MVT::i8:
1540 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1541 break;
1542 case MVT::i16:
1543 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1544 break;
1545 case MVT::i32:
1546 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1547 break;
1548 case MVT::i64:
1549 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1550 break;
1551 case MVT::f32:
1552 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1553 break;
1554 case MVT::f64:
1555 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1556 break;
1557 }
1558 break;
1559 case NVPTXISD::LDUV2:
1560 switch (EltVT.getSimpleVT().SimpleTy) {
1561 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001562 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001563 case MVT::i8:
1564 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1565 break;
1566 case MVT::i16:
1567 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1568 break;
1569 case MVT::i32:
1570 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1571 break;
1572 case MVT::i64:
1573 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1574 break;
1575 case MVT::f32:
1576 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1577 break;
1578 case MVT::f64:
1579 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1580 break;
1581 }
1582 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001583 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001584 case NVPTXISD::LDGV4:
1585 switch (EltVT.getSimpleVT().SimpleTy) {
1586 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001587 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001588 case MVT::i8:
1589 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1590 break;
1591 case MVT::i16:
1592 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1593 break;
1594 case MVT::i32:
1595 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1596 break;
1597 case MVT::f32:
1598 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1599 break;
1600 }
1601 break;
1602 case NVPTXISD::LDUV4:
1603 switch (EltVT.getSimpleVT().SimpleTy) {
1604 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001605 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001606 case MVT::i8:
1607 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1608 break;
1609 case MVT::i16:
1610 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1611 break;
1612 case MVT::i32:
1613 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1614 break;
1615 case MVT::f32:
1616 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1617 break;
1618 }
1619 break;
1620 }
1621 } else {
1622 switch (N->getOpcode()) {
1623 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001624 return nullptr;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001625 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001626 case ISD::INTRINSIC_W_CHAIN:
1627 if (IsLDG) {
1628 switch (EltVT.getSimpleVT().SimpleTy) {
1629 default:
1630 return nullptr;
1631 case MVT::i8:
1632 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1633 break;
1634 case MVT::i16:
1635 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1636 break;
1637 case MVT::i32:
1638 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1639 break;
1640 case MVT::i64:
1641 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1642 break;
1643 case MVT::f32:
1644 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1645 break;
1646 case MVT::f64:
1647 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1648 break;
1649 }
1650 } else {
1651 switch (EltVT.getSimpleVT().SimpleTy) {
1652 default:
1653 return nullptr;
1654 case MVT::i8:
1655 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1656 break;
1657 case MVT::i16:
1658 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1659 break;
1660 case MVT::i32:
1661 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1662 break;
1663 case MVT::i64:
1664 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1665 break;
1666 case MVT::f32:
1667 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1668 break;
1669 case MVT::f64:
1670 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1671 break;
1672 }
1673 }
1674 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001675 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001676 case NVPTXISD::LDGV2:
1677 switch (EltVT.getSimpleVT().SimpleTy) {
1678 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001679 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001680 case MVT::i8:
1681 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1682 break;
1683 case MVT::i16:
1684 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1685 break;
1686 case MVT::i32:
1687 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1688 break;
1689 case MVT::i64:
1690 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1691 break;
1692 case MVT::f32:
1693 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1694 break;
1695 case MVT::f64:
1696 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1697 break;
1698 }
1699 break;
1700 case NVPTXISD::LDUV2:
1701 switch (EltVT.getSimpleVT().SimpleTy) {
1702 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001703 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001704 case MVT::i8:
1705 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1706 break;
1707 case MVT::i16:
1708 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1709 break;
1710 case MVT::i32:
1711 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1712 break;
1713 case MVT::i64:
1714 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1715 break;
1716 case MVT::f32:
1717 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1718 break;
1719 case MVT::f64:
1720 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1721 break;
1722 }
1723 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001724 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001725 case NVPTXISD::LDGV4:
1726 switch (EltVT.getSimpleVT().SimpleTy) {
1727 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001728 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001729 case MVT::i8:
1730 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1731 break;
1732 case MVT::i16:
1733 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1734 break;
1735 case MVT::i32:
1736 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1737 break;
1738 case MVT::f32:
1739 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1740 break;
1741 }
1742 break;
1743 case NVPTXISD::LDUV4:
1744 switch (EltVT.getSimpleVT().SimpleTy) {
1745 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001746 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001747 case MVT::i8:
1748 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1749 break;
1750 case MVT::i16:
1751 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1752 break;
1753 case MVT::i32:
1754 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1755 break;
1756 case MVT::f32:
1757 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1758 break;
1759 }
1760 break;
1761 }
1762 }
1763
1764 SDValue Ops[] = { Base, Offset, Chain };
1765
Justin Holewinskic7997922016-04-05 12:38:01 +00001766 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001767 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001768 if (TM.is64Bit()) {
Justin Holewinskie40e9292013-07-01 12:58:52 +00001769 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001770 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001771 return nullptr;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001772 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001773 case ISD::INTRINSIC_W_CHAIN:
1774 if (IsLDG) {
1775 switch (EltVT.getSimpleVT().SimpleTy) {
1776 default:
1777 return nullptr;
1778 case MVT::i8:
1779 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1780 break;
1781 case MVT::i16:
1782 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1783 break;
1784 case MVT::i32:
1785 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1786 break;
1787 case MVT::i64:
1788 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1789 break;
1790 case MVT::f32:
1791 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1792 break;
1793 case MVT::f64:
1794 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1795 break;
1796 }
1797 } else {
1798 switch (EltVT.getSimpleVT().SimpleTy) {
1799 default:
1800 return nullptr;
1801 case MVT::i8:
1802 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1803 break;
1804 case MVT::i16:
1805 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1806 break;
1807 case MVT::i32:
1808 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1809 break;
1810 case MVT::i64:
1811 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1812 break;
1813 case MVT::f32:
1814 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1815 break;
1816 case MVT::f64:
1817 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1818 break;
1819 }
1820 }
1821 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001822 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001823 case NVPTXISD::LDGV2:
1824 switch (EltVT.getSimpleVT().SimpleTy) {
1825 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001826 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001827 case MVT::i8:
1828 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1829 break;
1830 case MVT::i16:
1831 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1832 break;
1833 case MVT::i32:
1834 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1835 break;
1836 case MVT::i64:
1837 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1838 break;
1839 case MVT::f32:
1840 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1841 break;
1842 case MVT::f64:
1843 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1844 break;
1845 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001846 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001847 case NVPTXISD::LDUV2:
1848 switch (EltVT.getSimpleVT().SimpleTy) {
1849 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001850 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001851 case MVT::i8:
1852 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1853 break;
1854 case MVT::i16:
1855 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1856 break;
1857 case MVT::i32:
1858 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1859 break;
1860 case MVT::i64:
1861 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1862 break;
1863 case MVT::f32:
1864 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1865 break;
1866 case MVT::f64:
1867 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1868 break;
1869 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001870 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001871 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001872 case NVPTXISD::LDGV4:
1873 switch (EltVT.getSimpleVT().SimpleTy) {
1874 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001875 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001876 case MVT::i8:
1877 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1878 break;
1879 case MVT::i16:
1880 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1881 break;
1882 case MVT::i32:
1883 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1884 break;
1885 case MVT::f32:
1886 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1887 break;
1888 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001889 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001890 case NVPTXISD::LDUV4:
1891 switch (EltVT.getSimpleVT().SimpleTy) {
1892 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001893 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001894 case MVT::i8:
1895 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1896 break;
1897 case MVT::i16:
1898 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1899 break;
1900 case MVT::i32:
1901 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1902 break;
1903 case MVT::f32:
1904 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1905 break;
1906 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001907 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001908 }
Justin Holewinskie40e9292013-07-01 12:58:52 +00001909 } else {
1910 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001911 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001912 return nullptr;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001913 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001914 case ISD::INTRINSIC_W_CHAIN:
1915 if (IsLDG) {
1916 switch (EltVT.getSimpleVT().SimpleTy) {
1917 default:
1918 return nullptr;
1919 case MVT::i8:
1920 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1921 break;
1922 case MVT::i16:
1923 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1924 break;
1925 case MVT::i32:
1926 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1927 break;
1928 case MVT::i64:
1929 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1930 break;
1931 case MVT::f32:
1932 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1933 break;
1934 case MVT::f64:
1935 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1936 break;
1937 }
1938 } else {
1939 switch (EltVT.getSimpleVT().SimpleTy) {
1940 default:
1941 return nullptr;
1942 case MVT::i8:
1943 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1944 break;
1945 case MVT::i16:
1946 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1947 break;
1948 case MVT::i32:
1949 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1950 break;
1951 case MVT::i64:
1952 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1953 break;
1954 case MVT::f32:
1955 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1956 break;
1957 case MVT::f64:
1958 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1959 break;
1960 }
1961 }
1962 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001963 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001964 case NVPTXISD::LDGV2:
1965 switch (EltVT.getSimpleVT().SimpleTy) {
1966 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001967 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001968 case MVT::i8:
1969 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1970 break;
1971 case MVT::i16:
1972 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1973 break;
1974 case MVT::i32:
1975 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1976 break;
1977 case MVT::i64:
1978 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1979 break;
1980 case MVT::f32:
1981 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1982 break;
1983 case MVT::f64:
1984 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1985 break;
1986 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001987 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001988 case NVPTXISD::LDUV2:
1989 switch (EltVT.getSimpleVT().SimpleTy) {
1990 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001991 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001992 case MVT::i8:
1993 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1994 break;
1995 case MVT::i16:
1996 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1997 break;
1998 case MVT::i32:
1999 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
2000 break;
2001 case MVT::i64:
2002 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
2003 break;
2004 case MVT::f32:
2005 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
2006 break;
2007 case MVT::f64:
2008 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
2009 break;
2010 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00002011 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00002012 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00002013 case NVPTXISD::LDGV4:
2014 switch (EltVT.getSimpleVT().SimpleTy) {
2015 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002016 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002017 case MVT::i8:
2018 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
2019 break;
2020 case MVT::i16:
2021 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
2022 break;
2023 case MVT::i32:
2024 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
2025 break;
2026 case MVT::f32:
2027 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
2028 break;
2029 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00002030 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002031 case NVPTXISD::LDUV4:
2032 switch (EltVT.getSimpleVT().SimpleTy) {
2033 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002034 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002035 case MVT::i8:
2036 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
2037 break;
2038 case MVT::i16:
2039 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
2040 break;
2041 case MVT::i32:
2042 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
2043 break;
2044 case MVT::f32:
2045 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
2046 break;
2047 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00002048 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002049 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002050 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002051
Justin Holewinskie40e9292013-07-01 12:58:52 +00002052 SDValue Ops[] = { Op1, Chain };
Justin Holewinskic7997922016-04-05 12:38:01 +00002053 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
Justin Holewinskie40e9292013-07-01 12:58:52 +00002054 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002055
2056 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00002057 MemRefs0[0] = Mem->getMemOperand();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002058 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
2059
Justin Holewinskic7997922016-04-05 12:38:01 +00002060 // For automatic generation of LDG (through SelectLoad[Vector], not the
2061 // intrinsics), we may have an extending load like:
2062 //
2063 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
2064 //
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00002065 // In this case, the matching logic above will select a load for the original
2066 // memory type (in this case, i8) and our types will not match (the node needs
2067 // to return an i32 in this case). Our LDG/LDU nodes do not support the
2068 // concept of sign-/zero-extension, so emulate it here by adding an explicit
2069 // CVT instruction. Ptxas should clean up any redundancies here.
2070
Justin Holewinskic7997922016-04-05 12:38:01 +00002071 EVT OrigType = N->getValueType(0);
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00002072 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
Justin Holewinskic7997922016-04-05 12:38:01 +00002073
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00002074 if (OrigType != EltVT && LdNode) {
2075 // We have an extending-load. The instruction we selected operates on the
2076 // smaller type, but the SDNode we are replacing has the larger type. We
2077 // need to emit a CVT to make the types match.
2078 bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
2079 unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
2080 EltVT.getSimpleVT(), IsSigned);
Justin Holewinskic7997922016-04-05 12:38:01 +00002081
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00002082 // For each output value, apply the manual sign/zero-extension and make sure
2083 // all users of the load go through that CVT.
Justin Holewinskic7997922016-04-05 12:38:01 +00002084 for (unsigned i = 0; i != NumElts; ++i) {
2085 SDValue Res(LD, i);
2086 SDValue OrigVal(N, i);
2087
2088 SDNode *CvtNode =
2089 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00002090 CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
2091 DL, MVT::i32));
Justin Holewinskic7997922016-04-05 12:38:01 +00002092 ReplaceUses(OrigVal, SDValue(CvtNode, 0));
2093 }
2094 }
2095
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002096 return LD;
2097}
2098
Justin Holewinski0497ab12013-03-30 14:29:21 +00002099SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00002100 SDLoc dl(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +00002101 StoreSDNode *ST = cast<StoreSDNode>(N);
2102 EVT StoreVT = ST->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +00002103 SDNode *NVPTXST = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002104
2105 // do not support pre/post inc/dec
2106 if (ST->isIndexed())
Craig Topper062a2ba2014-04-25 05:30:21 +00002107 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002108
2109 if (!StoreVT.isSimple())
Craig Topper062a2ba2014-04-25 05:30:21 +00002110 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002111
2112 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00002113 unsigned int codeAddrSpace = getCodeAddrSpace(ST);
Justin Holewinskiae556d32012-05-04 20:18:50 +00002114
2115 // Volatile Setting
2116 // - .volatile is only availalble for .global and .shared
2117 bool isVolatile = ST->isVolatile();
2118 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2119 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2120 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2121 isVolatile = false;
2122
2123 // Vector Setting
2124 MVT SimpleVT = StoreVT.getSimpleVT();
2125 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2126 if (SimpleVT.isVector()) {
2127 unsigned num = SimpleVT.getVectorNumElements();
2128 if (num == 2)
2129 vecType = NVPTX::PTXLdStInstCode::V2;
2130 else if (num == 4)
2131 vecType = NVPTX::PTXLdStInstCode::V4;
2132 else
Craig Topper062a2ba2014-04-25 05:30:21 +00002133 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002134 }
2135
2136 // Type Setting: toType + toTypeWidth
2137 // - for integer type, always use 'u'
2138 //
2139 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00002140 unsigned toTypeWidth = ScalarVT.getSizeInBits();
Justin Holewinskiae556d32012-05-04 20:18:50 +00002141 unsigned int toType;
2142 if (ScalarVT.isFloatingPoint())
2143 toType = NVPTX::PTXLdStInstCode::Float;
2144 else
2145 toType = NVPTX::PTXLdStInstCode::Unsigned;
2146
2147 // Create the machine instruction DAG
2148 SDValue Chain = N->getOperand(0);
2149 SDValue N1 = N->getOperand(1);
2150 SDValue N2 = N->getOperand(2);
2151 SDValue Addr;
2152 SDValue Offset, Base;
2153 unsigned Opcode;
Craig Topperd9c27832013-08-15 02:44:19 +00002154 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002155
2156 if (SelectDirectAddr(N2, Addr)) {
2157 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002158 case MVT::i8:
2159 Opcode = NVPTX::ST_i8_avar;
2160 break;
2161 case MVT::i16:
2162 Opcode = NVPTX::ST_i16_avar;
2163 break;
2164 case MVT::i32:
2165 Opcode = NVPTX::ST_i32_avar;
2166 break;
2167 case MVT::i64:
2168 Opcode = NVPTX::ST_i64_avar;
2169 break;
2170 case MVT::f32:
2171 Opcode = NVPTX::ST_f32_avar;
2172 break;
2173 case MVT::f64:
2174 Opcode = NVPTX::ST_f64_avar;
2175 break;
2176 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002177 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002178 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002179 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2180 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2181 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
2182 Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00002183 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00002184 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2185 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00002186 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002187 case MVT::i8:
2188 Opcode = NVPTX::ST_i8_asi;
2189 break;
2190 case MVT::i16:
2191 Opcode = NVPTX::ST_i16_asi;
2192 break;
2193 case MVT::i32:
2194 Opcode = NVPTX::ST_i32_asi;
2195 break;
2196 case MVT::i64:
2197 Opcode = NVPTX::ST_i64_asi;
2198 break;
2199 case MVT::f32:
2200 Opcode = NVPTX::ST_f32_asi;
2201 break;
2202 case MVT::f64:
2203 Opcode = NVPTX::ST_f64_asi;
2204 break;
2205 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002206 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002207 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002208 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2209 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2210 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2211 Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00002212 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00002213 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2214 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2215 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002216 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002217 case MVT::i8:
2218 Opcode = NVPTX::ST_i8_ari_64;
2219 break;
2220 case MVT::i16:
2221 Opcode = NVPTX::ST_i16_ari_64;
2222 break;
2223 case MVT::i32:
2224 Opcode = NVPTX::ST_i32_ari_64;
2225 break;
2226 case MVT::i64:
2227 Opcode = NVPTX::ST_i64_ari_64;
2228 break;
2229 case MVT::f32:
2230 Opcode = NVPTX::ST_f32_ari_64;
2231 break;
2232 case MVT::f64:
2233 Opcode = NVPTX::ST_f64_ari_64;
2234 break;
2235 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002236 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002237 }
2238 } else {
2239 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002240 case MVT::i8:
2241 Opcode = NVPTX::ST_i8_ari;
2242 break;
2243 case MVT::i16:
2244 Opcode = NVPTX::ST_i16_ari;
2245 break;
2246 case MVT::i32:
2247 Opcode = NVPTX::ST_i32_ari;
2248 break;
2249 case MVT::i64:
2250 Opcode = NVPTX::ST_i64_ari;
2251 break;
2252 case MVT::f32:
2253 Opcode = NVPTX::ST_f32_ari;
2254 break;
2255 case MVT::f64:
2256 Opcode = NVPTX::ST_f64_ari;
2257 break;
2258 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002259 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002260 }
Justin Holewinskiae556d32012-05-04 20:18:50 +00002261 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002262 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2263 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2264 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2265 Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00002266 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00002267 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00002268 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002269 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002270 case MVT::i8:
2271 Opcode = NVPTX::ST_i8_areg_64;
2272 break;
2273 case MVT::i16:
2274 Opcode = NVPTX::ST_i16_areg_64;
2275 break;
2276 case MVT::i32:
2277 Opcode = NVPTX::ST_i32_areg_64;
2278 break;
2279 case MVT::i64:
2280 Opcode = NVPTX::ST_i64_areg_64;
2281 break;
2282 case MVT::f32:
2283 Opcode = NVPTX::ST_f32_areg_64;
2284 break;
2285 case MVT::f64:
2286 Opcode = NVPTX::ST_f64_areg_64;
2287 break;
2288 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002289 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002290 }
2291 } else {
2292 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002293 case MVT::i8:
2294 Opcode = NVPTX::ST_i8_areg;
2295 break;
2296 case MVT::i16:
2297 Opcode = NVPTX::ST_i16_areg;
2298 break;
2299 case MVT::i32:
2300 Opcode = NVPTX::ST_i32_areg;
2301 break;
2302 case MVT::i64:
2303 Opcode = NVPTX::ST_i64_areg;
2304 break;
2305 case MVT::f32:
2306 Opcode = NVPTX::ST_f32_areg;
2307 break;
2308 case MVT::f64:
2309 Opcode = NVPTX::ST_f64_areg;
2310 break;
2311 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002312 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002313 }
Justin Holewinskiae556d32012-05-04 20:18:50 +00002314 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002315 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2316 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2317 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
2318 Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00002319 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00002320 }
2321
Craig Topper062a2ba2014-04-25 05:30:21 +00002322 if (NVPTXST) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00002323 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2324 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2325 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2326 }
2327
2328 return NVPTXST;
2329}
2330
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002331SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
2332 SDValue Chain = N->getOperand(0);
2333 SDValue Op1 = N->getOperand(1);
2334 SDValue Addr, Offset, Base;
2335 unsigned Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00002336 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002337 SDNode *ST;
2338 EVT EltVT = Op1.getValueType();
2339 MemSDNode *MemSD = cast<MemSDNode>(N);
2340 EVT StoreVT = MemSD->getMemoryVT();
2341
2342 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00002343 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002344
2345 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2346 report_fatal_error("Cannot store to pointer that points to constant "
2347 "memory space");
2348 }
2349
2350 // Volatile Setting
2351 // - .volatile is only availalble for .global and .shared
2352 bool IsVolatile = MemSD->isVolatile();
2353 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2354 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2355 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2356 IsVolatile = false;
2357
2358 // Type Setting: toType + toTypeWidth
2359 // - for integer type, always use 'u'
2360 assert(StoreVT.isSimple() && "Store value is not simple");
2361 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00002362 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002363 unsigned ToType;
2364 if (ScalarVT.isFloatingPoint())
2365 ToType = NVPTX::PTXLdStInstCode::Float;
2366 else
2367 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2368
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002369 SmallVector<SDValue, 12> StOps;
2370 SDValue N2;
2371 unsigned VecType;
2372
2373 switch (N->getOpcode()) {
2374 case NVPTXISD::StoreV2:
2375 VecType = NVPTX::PTXLdStInstCode::V2;
2376 StOps.push_back(N->getOperand(1));
2377 StOps.push_back(N->getOperand(2));
2378 N2 = N->getOperand(3);
2379 break;
2380 case NVPTXISD::StoreV4:
2381 VecType = NVPTX::PTXLdStInstCode::V4;
2382 StOps.push_back(N->getOperand(1));
2383 StOps.push_back(N->getOperand(2));
2384 StOps.push_back(N->getOperand(3));
2385 StOps.push_back(N->getOperand(4));
2386 N2 = N->getOperand(5);
2387 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002388 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002389 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002390 }
2391
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002392 StOps.push_back(getI32Imm(IsVolatile, DL));
2393 StOps.push_back(getI32Imm(CodeAddrSpace, DL));
2394 StOps.push_back(getI32Imm(VecType, DL));
2395 StOps.push_back(getI32Imm(ToType, DL));
2396 StOps.push_back(getI32Imm(ToTypeWidth, DL));
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002397
2398 if (SelectDirectAddr(N2, Addr)) {
2399 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002400 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002401 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002402 case NVPTXISD::StoreV2:
2403 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002404 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002405 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002406 case MVT::i8:
2407 Opcode = NVPTX::STV_i8_v2_avar;
2408 break;
2409 case MVT::i16:
2410 Opcode = NVPTX::STV_i16_v2_avar;
2411 break;
2412 case MVT::i32:
2413 Opcode = NVPTX::STV_i32_v2_avar;
2414 break;
2415 case MVT::i64:
2416 Opcode = NVPTX::STV_i64_v2_avar;
2417 break;
2418 case MVT::f32:
2419 Opcode = NVPTX::STV_f32_v2_avar;
2420 break;
2421 case MVT::f64:
2422 Opcode = NVPTX::STV_f64_v2_avar;
2423 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002424 }
2425 break;
2426 case NVPTXISD::StoreV4:
2427 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002428 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002429 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002430 case MVT::i8:
2431 Opcode = NVPTX::STV_i8_v4_avar;
2432 break;
2433 case MVT::i16:
2434 Opcode = NVPTX::STV_i16_v4_avar;
2435 break;
2436 case MVT::i32:
2437 Opcode = NVPTX::STV_i32_v4_avar;
2438 break;
2439 case MVT::f32:
2440 Opcode = NVPTX::STV_f32_v4_avar;
2441 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002442 }
2443 break;
2444 }
2445 StOps.push_back(Addr);
Eric Christopher02389e32015-02-19 00:08:27 +00002446 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2447 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002448 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002449 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002450 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002451 case NVPTXISD::StoreV2:
2452 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002453 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002454 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002455 case MVT::i8:
2456 Opcode = NVPTX::STV_i8_v2_asi;
2457 break;
2458 case MVT::i16:
2459 Opcode = NVPTX::STV_i16_v2_asi;
2460 break;
2461 case MVT::i32:
2462 Opcode = NVPTX::STV_i32_v2_asi;
2463 break;
2464 case MVT::i64:
2465 Opcode = NVPTX::STV_i64_v2_asi;
2466 break;
2467 case MVT::f32:
2468 Opcode = NVPTX::STV_f32_v2_asi;
2469 break;
2470 case MVT::f64:
2471 Opcode = NVPTX::STV_f64_v2_asi;
2472 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002473 }
2474 break;
2475 case NVPTXISD::StoreV4:
2476 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002477 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002478 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002479 case MVT::i8:
2480 Opcode = NVPTX::STV_i8_v4_asi;
2481 break;
2482 case MVT::i16:
2483 Opcode = NVPTX::STV_i16_v4_asi;
2484 break;
2485 case MVT::i32:
2486 Opcode = NVPTX::STV_i32_v4_asi;
2487 break;
2488 case MVT::f32:
2489 Opcode = NVPTX::STV_f32_v4_asi;
2490 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002491 }
2492 break;
2493 }
2494 StOps.push_back(Base);
2495 StOps.push_back(Offset);
Eric Christopher02389e32015-02-19 00:08:27 +00002496 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2497 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2498 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002499 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002500 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002501 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002502 case NVPTXISD::StoreV2:
2503 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002504 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002505 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002506 case MVT::i8:
2507 Opcode = NVPTX::STV_i8_v2_ari_64;
2508 break;
2509 case MVT::i16:
2510 Opcode = NVPTX::STV_i16_v2_ari_64;
2511 break;
2512 case MVT::i32:
2513 Opcode = NVPTX::STV_i32_v2_ari_64;
2514 break;
2515 case MVT::i64:
2516 Opcode = NVPTX::STV_i64_v2_ari_64;
2517 break;
2518 case MVT::f32:
2519 Opcode = NVPTX::STV_f32_v2_ari_64;
2520 break;
2521 case MVT::f64:
2522 Opcode = NVPTX::STV_f64_v2_ari_64;
2523 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002524 }
2525 break;
2526 case NVPTXISD::StoreV4:
2527 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002528 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002529 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002530 case MVT::i8:
2531 Opcode = NVPTX::STV_i8_v4_ari_64;
2532 break;
2533 case MVT::i16:
2534 Opcode = NVPTX::STV_i16_v4_ari_64;
2535 break;
2536 case MVT::i32:
2537 Opcode = NVPTX::STV_i32_v4_ari_64;
2538 break;
2539 case MVT::f32:
2540 Opcode = NVPTX::STV_f32_v4_ari_64;
2541 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002542 }
2543 break;
2544 }
2545 } else {
2546 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002547 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002548 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002549 case NVPTXISD::StoreV2:
2550 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002551 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002552 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002553 case MVT::i8:
2554 Opcode = NVPTX::STV_i8_v2_ari;
2555 break;
2556 case MVT::i16:
2557 Opcode = NVPTX::STV_i16_v2_ari;
2558 break;
2559 case MVT::i32:
2560 Opcode = NVPTX::STV_i32_v2_ari;
2561 break;
2562 case MVT::i64:
2563 Opcode = NVPTX::STV_i64_v2_ari;
2564 break;
2565 case MVT::f32:
2566 Opcode = NVPTX::STV_f32_v2_ari;
2567 break;
2568 case MVT::f64:
2569 Opcode = NVPTX::STV_f64_v2_ari;
2570 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002571 }
2572 break;
2573 case NVPTXISD::StoreV4:
2574 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002575 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002576 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002577 case MVT::i8:
2578 Opcode = NVPTX::STV_i8_v4_ari;
2579 break;
2580 case MVT::i16:
2581 Opcode = NVPTX::STV_i16_v4_ari;
2582 break;
2583 case MVT::i32:
2584 Opcode = NVPTX::STV_i32_v4_ari;
2585 break;
2586 case MVT::f32:
2587 Opcode = NVPTX::STV_f32_v4_ari;
2588 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002589 }
2590 break;
2591 }
2592 }
2593 StOps.push_back(Base);
2594 StOps.push_back(Offset);
2595 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00002596 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002597 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002598 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002599 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002600 case NVPTXISD::StoreV2:
2601 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002602 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002603 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002604 case MVT::i8:
2605 Opcode = NVPTX::STV_i8_v2_areg_64;
2606 break;
2607 case MVT::i16:
2608 Opcode = NVPTX::STV_i16_v2_areg_64;
2609 break;
2610 case MVT::i32:
2611 Opcode = NVPTX::STV_i32_v2_areg_64;
2612 break;
2613 case MVT::i64:
2614 Opcode = NVPTX::STV_i64_v2_areg_64;
2615 break;
2616 case MVT::f32:
2617 Opcode = NVPTX::STV_f32_v2_areg_64;
2618 break;
2619 case MVT::f64:
2620 Opcode = NVPTX::STV_f64_v2_areg_64;
2621 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002622 }
2623 break;
2624 case NVPTXISD::StoreV4:
2625 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002626 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002627 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002628 case MVT::i8:
2629 Opcode = NVPTX::STV_i8_v4_areg_64;
2630 break;
2631 case MVT::i16:
2632 Opcode = NVPTX::STV_i16_v4_areg_64;
2633 break;
2634 case MVT::i32:
2635 Opcode = NVPTX::STV_i32_v4_areg_64;
2636 break;
2637 case MVT::f32:
2638 Opcode = NVPTX::STV_f32_v4_areg_64;
2639 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002640 }
2641 break;
2642 }
2643 } else {
2644 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002645 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002646 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002647 case NVPTXISD::StoreV2:
2648 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002649 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002650 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002651 case MVT::i8:
2652 Opcode = NVPTX::STV_i8_v2_areg;
2653 break;
2654 case MVT::i16:
2655 Opcode = NVPTX::STV_i16_v2_areg;
2656 break;
2657 case MVT::i32:
2658 Opcode = NVPTX::STV_i32_v2_areg;
2659 break;
2660 case MVT::i64:
2661 Opcode = NVPTX::STV_i64_v2_areg;
2662 break;
2663 case MVT::f32:
2664 Opcode = NVPTX::STV_f32_v2_areg;
2665 break;
2666 case MVT::f64:
2667 Opcode = NVPTX::STV_f64_v2_areg;
2668 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002669 }
2670 break;
2671 case NVPTXISD::StoreV4:
2672 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002673 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002674 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002675 case MVT::i8:
2676 Opcode = NVPTX::STV_i8_v4_areg;
2677 break;
2678 case MVT::i16:
2679 Opcode = NVPTX::STV_i16_v4_areg;
2680 break;
2681 case MVT::i32:
2682 Opcode = NVPTX::STV_i32_v4_areg;
2683 break;
2684 case MVT::f32:
2685 Opcode = NVPTX::STV_f32_v4_areg;
2686 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002687 }
2688 break;
2689 }
2690 }
2691 StOps.push_back(N2);
2692 }
2693
2694 StOps.push_back(Chain);
2695
Michael Liaob53d8962013-04-19 22:22:57 +00002696 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002697
2698 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2699 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2700 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2701
2702 return ST;
2703}
2704
Justin Holewinskif8f70912013-06-28 17:57:59 +00002705SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2706 SDValue Chain = Node->getOperand(0);
2707 SDValue Offset = Node->getOperand(2);
2708 SDValue Flag = Node->getOperand(3);
2709 SDLoc DL(Node);
2710 MemSDNode *Mem = cast<MemSDNode>(Node);
2711
2712 unsigned VecSize;
2713 switch (Node->getOpcode()) {
2714 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002715 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002716 case NVPTXISD::LoadParam:
2717 VecSize = 1;
2718 break;
2719 case NVPTXISD::LoadParamV2:
2720 VecSize = 2;
2721 break;
2722 case NVPTXISD::LoadParamV4:
2723 VecSize = 4;
2724 break;
2725 }
2726
2727 EVT EltVT = Node->getValueType(0);
2728 EVT MemVT = Mem->getMemoryVT();
2729
2730 unsigned Opc = 0;
2731
2732 switch (VecSize) {
2733 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002734 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002735 case 1:
2736 switch (MemVT.getSimpleVT().SimpleTy) {
2737 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002738 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002739 case MVT::i1:
2740 Opc = NVPTX::LoadParamMemI8;
2741 break;
2742 case MVT::i8:
2743 Opc = NVPTX::LoadParamMemI8;
2744 break;
2745 case MVT::i16:
2746 Opc = NVPTX::LoadParamMemI16;
2747 break;
2748 case MVT::i32:
2749 Opc = NVPTX::LoadParamMemI32;
2750 break;
2751 case MVT::i64:
2752 Opc = NVPTX::LoadParamMemI64;
2753 break;
2754 case MVT::f32:
2755 Opc = NVPTX::LoadParamMemF32;
2756 break;
2757 case MVT::f64:
2758 Opc = NVPTX::LoadParamMemF64;
2759 break;
2760 }
2761 break;
2762 case 2:
2763 switch (MemVT.getSimpleVT().SimpleTy) {
2764 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002765 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002766 case MVT::i1:
2767 Opc = NVPTX::LoadParamMemV2I8;
2768 break;
2769 case MVT::i8:
2770 Opc = NVPTX::LoadParamMemV2I8;
2771 break;
2772 case MVT::i16:
2773 Opc = NVPTX::LoadParamMemV2I16;
2774 break;
2775 case MVT::i32:
2776 Opc = NVPTX::LoadParamMemV2I32;
2777 break;
2778 case MVT::i64:
2779 Opc = NVPTX::LoadParamMemV2I64;
2780 break;
2781 case MVT::f32:
2782 Opc = NVPTX::LoadParamMemV2F32;
2783 break;
2784 case MVT::f64:
2785 Opc = NVPTX::LoadParamMemV2F64;
2786 break;
2787 }
2788 break;
2789 case 4:
2790 switch (MemVT.getSimpleVT().SimpleTy) {
2791 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002792 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002793 case MVT::i1:
2794 Opc = NVPTX::LoadParamMemV4I8;
2795 break;
2796 case MVT::i8:
2797 Opc = NVPTX::LoadParamMemV4I8;
2798 break;
2799 case MVT::i16:
2800 Opc = NVPTX::LoadParamMemV4I16;
2801 break;
2802 case MVT::i32:
2803 Opc = NVPTX::LoadParamMemV4I32;
2804 break;
2805 case MVT::f32:
2806 Opc = NVPTX::LoadParamMemV4F32;
2807 break;
2808 }
2809 break;
2810 }
2811
2812 SDVTList VTs;
2813 if (VecSize == 1) {
2814 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2815 } else if (VecSize == 2) {
2816 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2817 } else {
2818 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
Craig Topperabb4ac72014-04-16 06:10:51 +00002819 VTs = CurDAG->getVTList(EVTs);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002820 }
2821
2822 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2823
2824 SmallVector<SDValue, 2> Ops;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002825 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002826 Ops.push_back(Chain);
2827 Ops.push_back(Flag);
2828
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002829 return CurDAG->getMachineNode(Opc, DL, VTs, Ops);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002830}
2831
2832SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2833 SDLoc DL(N);
2834 SDValue Chain = N->getOperand(0);
2835 SDValue Offset = N->getOperand(1);
2836 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2837 MemSDNode *Mem = cast<MemSDNode>(N);
2838
2839 // How many elements do we have?
2840 unsigned NumElts = 1;
2841 switch (N->getOpcode()) {
2842 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002843 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002844 case NVPTXISD::StoreRetval:
2845 NumElts = 1;
2846 break;
2847 case NVPTXISD::StoreRetvalV2:
2848 NumElts = 2;
2849 break;
2850 case NVPTXISD::StoreRetvalV4:
2851 NumElts = 4;
2852 break;
2853 }
2854
2855 // Build vector of operands
2856 SmallVector<SDValue, 6> Ops;
2857 for (unsigned i = 0; i < NumElts; ++i)
2858 Ops.push_back(N->getOperand(i + 2));
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002859 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002860 Ops.push_back(Chain);
2861
2862 // Determine target opcode
2863 // If we have an i1, use an 8-bit store. The lowering code in
2864 // NVPTXISelLowering will have already emitted an upcast.
2865 unsigned Opcode = 0;
2866 switch (NumElts) {
2867 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002868 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002869 case 1:
2870 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2871 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002872 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002873 case MVT::i1:
2874 Opcode = NVPTX::StoreRetvalI8;
2875 break;
2876 case MVT::i8:
2877 Opcode = NVPTX::StoreRetvalI8;
2878 break;
2879 case MVT::i16:
2880 Opcode = NVPTX::StoreRetvalI16;
2881 break;
2882 case MVT::i32:
2883 Opcode = NVPTX::StoreRetvalI32;
2884 break;
2885 case MVT::i64:
2886 Opcode = NVPTX::StoreRetvalI64;
2887 break;
2888 case MVT::f32:
2889 Opcode = NVPTX::StoreRetvalF32;
2890 break;
2891 case MVT::f64:
2892 Opcode = NVPTX::StoreRetvalF64;
2893 break;
2894 }
2895 break;
2896 case 2:
2897 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2898 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002899 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002900 case MVT::i1:
2901 Opcode = NVPTX::StoreRetvalV2I8;
2902 break;
2903 case MVT::i8:
2904 Opcode = NVPTX::StoreRetvalV2I8;
2905 break;
2906 case MVT::i16:
2907 Opcode = NVPTX::StoreRetvalV2I16;
2908 break;
2909 case MVT::i32:
2910 Opcode = NVPTX::StoreRetvalV2I32;
2911 break;
2912 case MVT::i64:
2913 Opcode = NVPTX::StoreRetvalV2I64;
2914 break;
2915 case MVT::f32:
2916 Opcode = NVPTX::StoreRetvalV2F32;
2917 break;
2918 case MVT::f64:
2919 Opcode = NVPTX::StoreRetvalV2F64;
2920 break;
2921 }
2922 break;
2923 case 4:
2924 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2925 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002926 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002927 case MVT::i1:
2928 Opcode = NVPTX::StoreRetvalV4I8;
2929 break;
2930 case MVT::i8:
2931 Opcode = NVPTX::StoreRetvalV4I8;
2932 break;
2933 case MVT::i16:
2934 Opcode = NVPTX::StoreRetvalV4I16;
2935 break;
2936 case MVT::i32:
2937 Opcode = NVPTX::StoreRetvalV4I32;
2938 break;
2939 case MVT::f32:
2940 Opcode = NVPTX::StoreRetvalV4F32;
2941 break;
2942 }
2943 break;
2944 }
2945
2946 SDNode *Ret =
2947 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2948 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2949 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2950 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2951
2952 return Ret;
2953}
2954
2955SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2956 SDLoc DL(N);
2957 SDValue Chain = N->getOperand(0);
2958 SDValue Param = N->getOperand(1);
2959 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2960 SDValue Offset = N->getOperand(2);
2961 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2962 MemSDNode *Mem = cast<MemSDNode>(N);
2963 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2964
2965 // How many elements do we have?
2966 unsigned NumElts = 1;
2967 switch (N->getOpcode()) {
2968 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002969 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002970 case NVPTXISD::StoreParamU32:
2971 case NVPTXISD::StoreParamS32:
2972 case NVPTXISD::StoreParam:
2973 NumElts = 1;
2974 break;
2975 case NVPTXISD::StoreParamV2:
2976 NumElts = 2;
2977 break;
2978 case NVPTXISD::StoreParamV4:
2979 NumElts = 4;
2980 break;
2981 }
2982
2983 // Build vector of operands
2984 SmallVector<SDValue, 8> Ops;
2985 for (unsigned i = 0; i < NumElts; ++i)
2986 Ops.push_back(N->getOperand(i + 3));
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002987 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2988 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002989 Ops.push_back(Chain);
2990 Ops.push_back(Flag);
2991
2992 // Determine target opcode
2993 // If we have an i1, use an 8-bit store. The lowering code in
2994 // NVPTXISelLowering will have already emitted an upcast.
2995 unsigned Opcode = 0;
2996 switch (N->getOpcode()) {
2997 default:
2998 switch (NumElts) {
2999 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00003000 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003001 case 1:
3002 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3003 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00003004 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003005 case MVT::i1:
3006 Opcode = NVPTX::StoreParamI8;
3007 break;
3008 case MVT::i8:
3009 Opcode = NVPTX::StoreParamI8;
3010 break;
3011 case MVT::i16:
3012 Opcode = NVPTX::StoreParamI16;
3013 break;
3014 case MVT::i32:
3015 Opcode = NVPTX::StoreParamI32;
3016 break;
3017 case MVT::i64:
3018 Opcode = NVPTX::StoreParamI64;
3019 break;
3020 case MVT::f32:
3021 Opcode = NVPTX::StoreParamF32;
3022 break;
3023 case MVT::f64:
3024 Opcode = NVPTX::StoreParamF64;
3025 break;
3026 }
3027 break;
3028 case 2:
3029 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3030 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00003031 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003032 case MVT::i1:
3033 Opcode = NVPTX::StoreParamV2I8;
3034 break;
3035 case MVT::i8:
3036 Opcode = NVPTX::StoreParamV2I8;
3037 break;
3038 case MVT::i16:
3039 Opcode = NVPTX::StoreParamV2I16;
3040 break;
3041 case MVT::i32:
3042 Opcode = NVPTX::StoreParamV2I32;
3043 break;
3044 case MVT::i64:
3045 Opcode = NVPTX::StoreParamV2I64;
3046 break;
3047 case MVT::f32:
3048 Opcode = NVPTX::StoreParamV2F32;
3049 break;
3050 case MVT::f64:
3051 Opcode = NVPTX::StoreParamV2F64;
3052 break;
3053 }
3054 break;
3055 case 4:
3056 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3057 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00003058 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003059 case MVT::i1:
3060 Opcode = NVPTX::StoreParamV4I8;
3061 break;
3062 case MVT::i8:
3063 Opcode = NVPTX::StoreParamV4I8;
3064 break;
3065 case MVT::i16:
3066 Opcode = NVPTX::StoreParamV4I16;
3067 break;
3068 case MVT::i32:
3069 Opcode = NVPTX::StoreParamV4I32;
3070 break;
3071 case MVT::f32:
3072 Opcode = NVPTX::StoreParamV4F32;
3073 break;
3074 }
3075 break;
3076 }
3077 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00003078 // Special case: if we have a sign-extend/zero-extend node, insert the
3079 // conversion instruction first, and use that as the value operand to
3080 // the selected StoreParam node.
3081 case NVPTXISD::StoreParamU32: {
3082 Opcode = NVPTX::StoreParamI32;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003083 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00003084 MVT::i32);
3085 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
3086 MVT::i32, Ops[0], CvtNone);
3087 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00003088 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00003089 }
3090 case NVPTXISD::StoreParamS32: {
3091 Opcode = NVPTX::StoreParamI32;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003092 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00003093 MVT::i32);
3094 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
3095 MVT::i32, Ops[0], CvtNone);
3096 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00003097 break;
3098 }
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00003099 }
Justin Holewinskif8f70912013-06-28 17:57:59 +00003100
Justin Holewinskidff28d22013-07-01 12:59:01 +00003101 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
Justin Holewinskif8f70912013-06-28 17:57:59 +00003102 SDNode *Ret =
Justin Holewinskidff28d22013-07-01 12:59:01 +00003103 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
Justin Holewinskif8f70912013-06-28 17:57:59 +00003104 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
3105 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3106 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3107
3108 return Ret;
3109}
3110
Justin Holewinski30d56a72014-04-09 15:39:15 +00003111SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
3112 SDValue Chain = N->getOperand(0);
Craig Topper062a2ba2014-04-25 05:30:21 +00003113 SDNode *Ret = nullptr;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003114 unsigned Opc = 0;
3115 SmallVector<SDValue, 8> Ops;
3116
3117 switch (N->getOpcode()) {
Craig Topper062a2ba2014-04-25 05:30:21 +00003118 default: return nullptr;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003119 case NVPTXISD::Tex1DFloatS32:
3120 Opc = NVPTX::TEX_1D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003121 break;
3122 case NVPTXISD::Tex1DFloatFloat:
3123 Opc = NVPTX::TEX_1D_F32_F32;
3124 break;
3125 case NVPTXISD::Tex1DFloatFloatLevel:
3126 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3127 break;
3128 case NVPTXISD::Tex1DFloatFloatGrad:
3129 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3130 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003131 case NVPTXISD::Tex1DS32S32:
3132 Opc = NVPTX::TEX_1D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003133 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003134 case NVPTXISD::Tex1DS32Float:
3135 Opc = NVPTX::TEX_1D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003136 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003137 case NVPTXISD::Tex1DS32FloatLevel:
3138 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003139 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003140 case NVPTXISD::Tex1DS32FloatGrad:
3141 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003142 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003143 case NVPTXISD::Tex1DU32S32:
3144 Opc = NVPTX::TEX_1D_U32_S32;
3145 break;
3146 case NVPTXISD::Tex1DU32Float:
3147 Opc = NVPTX::TEX_1D_U32_F32;
3148 break;
3149 case NVPTXISD::Tex1DU32FloatLevel:
3150 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3151 break;
3152 case NVPTXISD::Tex1DU32FloatGrad:
3153 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3154 break;
3155 case NVPTXISD::Tex1DArrayFloatS32:
3156 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003157 break;
3158 case NVPTXISD::Tex1DArrayFloatFloat:
3159 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3160 break;
3161 case NVPTXISD::Tex1DArrayFloatFloatLevel:
3162 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3163 break;
3164 case NVPTXISD::Tex1DArrayFloatFloatGrad:
3165 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3166 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003167 case NVPTXISD::Tex1DArrayS32S32:
3168 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003169 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003170 case NVPTXISD::Tex1DArrayS32Float:
3171 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003172 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003173 case NVPTXISD::Tex1DArrayS32FloatLevel:
3174 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003175 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003176 case NVPTXISD::Tex1DArrayS32FloatGrad:
3177 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003178 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003179 case NVPTXISD::Tex1DArrayU32S32:
3180 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3181 break;
3182 case NVPTXISD::Tex1DArrayU32Float:
3183 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3184 break;
3185 case NVPTXISD::Tex1DArrayU32FloatLevel:
3186 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3187 break;
3188 case NVPTXISD::Tex1DArrayU32FloatGrad:
3189 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3190 break;
3191 case NVPTXISD::Tex2DFloatS32:
3192 Opc = NVPTX::TEX_2D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003193 break;
3194 case NVPTXISD::Tex2DFloatFloat:
3195 Opc = NVPTX::TEX_2D_F32_F32;
3196 break;
3197 case NVPTXISD::Tex2DFloatFloatLevel:
3198 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3199 break;
3200 case NVPTXISD::Tex2DFloatFloatGrad:
3201 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3202 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003203 case NVPTXISD::Tex2DS32S32:
3204 Opc = NVPTX::TEX_2D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003205 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003206 case NVPTXISD::Tex2DS32Float:
3207 Opc = NVPTX::TEX_2D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003208 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003209 case NVPTXISD::Tex2DS32FloatLevel:
3210 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003211 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003212 case NVPTXISD::Tex2DS32FloatGrad:
3213 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003214 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003215 case NVPTXISD::Tex2DU32S32:
3216 Opc = NVPTX::TEX_2D_U32_S32;
3217 break;
3218 case NVPTXISD::Tex2DU32Float:
3219 Opc = NVPTX::TEX_2D_U32_F32;
3220 break;
3221 case NVPTXISD::Tex2DU32FloatLevel:
3222 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3223 break;
3224 case NVPTXISD::Tex2DU32FloatGrad:
3225 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3226 break;
3227 case NVPTXISD::Tex2DArrayFloatS32:
3228 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003229 break;
3230 case NVPTXISD::Tex2DArrayFloatFloat:
3231 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3232 break;
3233 case NVPTXISD::Tex2DArrayFloatFloatLevel:
3234 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3235 break;
3236 case NVPTXISD::Tex2DArrayFloatFloatGrad:
3237 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3238 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003239 case NVPTXISD::Tex2DArrayS32S32:
3240 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003241 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003242 case NVPTXISD::Tex2DArrayS32Float:
3243 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003244 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003245 case NVPTXISD::Tex2DArrayS32FloatLevel:
3246 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003247 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003248 case NVPTXISD::Tex2DArrayS32FloatGrad:
3249 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003250 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003251 case NVPTXISD::Tex2DArrayU32S32:
3252 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3253 break;
3254 case NVPTXISD::Tex2DArrayU32Float:
3255 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3256 break;
3257 case NVPTXISD::Tex2DArrayU32FloatLevel:
3258 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3259 break;
3260 case NVPTXISD::Tex2DArrayU32FloatGrad:
3261 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3262 break;
3263 case NVPTXISD::Tex3DFloatS32:
3264 Opc = NVPTX::TEX_3D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003265 break;
3266 case NVPTXISD::Tex3DFloatFloat:
3267 Opc = NVPTX::TEX_3D_F32_F32;
3268 break;
3269 case NVPTXISD::Tex3DFloatFloatLevel:
3270 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3271 break;
3272 case NVPTXISD::Tex3DFloatFloatGrad:
3273 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3274 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003275 case NVPTXISD::Tex3DS32S32:
3276 Opc = NVPTX::TEX_3D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003277 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003278 case NVPTXISD::Tex3DS32Float:
3279 Opc = NVPTX::TEX_3D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003280 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003281 case NVPTXISD::Tex3DS32FloatLevel:
3282 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003283 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003284 case NVPTXISD::Tex3DS32FloatGrad:
3285 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3286 break;
3287 case NVPTXISD::Tex3DU32S32:
3288 Opc = NVPTX::TEX_3D_U32_S32;
3289 break;
3290 case NVPTXISD::Tex3DU32Float:
3291 Opc = NVPTX::TEX_3D_U32_F32;
3292 break;
3293 case NVPTXISD::Tex3DU32FloatLevel:
3294 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3295 break;
3296 case NVPTXISD::Tex3DU32FloatGrad:
3297 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3298 break;
3299 case NVPTXISD::TexCubeFloatFloat:
3300 Opc = NVPTX::TEX_CUBE_F32_F32;
3301 break;
3302 case NVPTXISD::TexCubeFloatFloatLevel:
3303 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3304 break;
3305 case NVPTXISD::TexCubeS32Float:
3306 Opc = NVPTX::TEX_CUBE_S32_F32;
3307 break;
3308 case NVPTXISD::TexCubeS32FloatLevel:
3309 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3310 break;
3311 case NVPTXISD::TexCubeU32Float:
3312 Opc = NVPTX::TEX_CUBE_U32_F32;
3313 break;
3314 case NVPTXISD::TexCubeU32FloatLevel:
3315 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3316 break;
3317 case NVPTXISD::TexCubeArrayFloatFloat:
3318 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3319 break;
3320 case NVPTXISD::TexCubeArrayFloatFloatLevel:
3321 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3322 break;
3323 case NVPTXISD::TexCubeArrayS32Float:
3324 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3325 break;
3326 case NVPTXISD::TexCubeArrayS32FloatLevel:
3327 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3328 break;
3329 case NVPTXISD::TexCubeArrayU32Float:
3330 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3331 break;
3332 case NVPTXISD::TexCubeArrayU32FloatLevel:
3333 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3334 break;
3335 case NVPTXISD::Tld4R2DFloatFloat:
3336 Opc = NVPTX::TLD4_R_2D_F32_F32;
3337 break;
3338 case NVPTXISD::Tld4G2DFloatFloat:
3339 Opc = NVPTX::TLD4_G_2D_F32_F32;
3340 break;
3341 case NVPTXISD::Tld4B2DFloatFloat:
3342 Opc = NVPTX::TLD4_B_2D_F32_F32;
3343 break;
3344 case NVPTXISD::Tld4A2DFloatFloat:
3345 Opc = NVPTX::TLD4_A_2D_F32_F32;
3346 break;
3347 case NVPTXISD::Tld4R2DS64Float:
3348 Opc = NVPTX::TLD4_R_2D_S32_F32;
3349 break;
3350 case NVPTXISD::Tld4G2DS64Float:
3351 Opc = NVPTX::TLD4_G_2D_S32_F32;
3352 break;
3353 case NVPTXISD::Tld4B2DS64Float:
3354 Opc = NVPTX::TLD4_B_2D_S32_F32;
3355 break;
3356 case NVPTXISD::Tld4A2DS64Float:
3357 Opc = NVPTX::TLD4_A_2D_S32_F32;
3358 break;
3359 case NVPTXISD::Tld4R2DU64Float:
3360 Opc = NVPTX::TLD4_R_2D_U32_F32;
3361 break;
3362 case NVPTXISD::Tld4G2DU64Float:
3363 Opc = NVPTX::TLD4_G_2D_U32_F32;
3364 break;
3365 case NVPTXISD::Tld4B2DU64Float:
3366 Opc = NVPTX::TLD4_B_2D_U32_F32;
3367 break;
3368 case NVPTXISD::Tld4A2DU64Float:
3369 Opc = NVPTX::TLD4_A_2D_U32_F32;
3370 break;
3371 case NVPTXISD::TexUnified1DFloatS32:
3372 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3373 break;
3374 case NVPTXISD::TexUnified1DFloatFloat:
3375 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3376 break;
3377 case NVPTXISD::TexUnified1DFloatFloatLevel:
3378 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3379 break;
3380 case NVPTXISD::TexUnified1DFloatFloatGrad:
3381 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3382 break;
3383 case NVPTXISD::TexUnified1DS32S32:
3384 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3385 break;
3386 case NVPTXISD::TexUnified1DS32Float:
3387 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3388 break;
3389 case NVPTXISD::TexUnified1DS32FloatLevel:
3390 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3391 break;
3392 case NVPTXISD::TexUnified1DS32FloatGrad:
3393 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3394 break;
3395 case NVPTXISD::TexUnified1DU32S32:
3396 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3397 break;
3398 case NVPTXISD::TexUnified1DU32Float:
3399 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3400 break;
3401 case NVPTXISD::TexUnified1DU32FloatLevel:
3402 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3403 break;
3404 case NVPTXISD::TexUnified1DU32FloatGrad:
3405 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3406 break;
3407 case NVPTXISD::TexUnified1DArrayFloatS32:
3408 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3409 break;
3410 case NVPTXISD::TexUnified1DArrayFloatFloat:
3411 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3412 break;
3413 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3414 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3415 break;
3416 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3417 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3418 break;
3419 case NVPTXISD::TexUnified1DArrayS32S32:
3420 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3421 break;
3422 case NVPTXISD::TexUnified1DArrayS32Float:
3423 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3424 break;
3425 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3426 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3427 break;
3428 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3429 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3430 break;
3431 case NVPTXISD::TexUnified1DArrayU32S32:
3432 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3433 break;
3434 case NVPTXISD::TexUnified1DArrayU32Float:
3435 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3436 break;
3437 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3438 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3439 break;
3440 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3441 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3442 break;
3443 case NVPTXISD::TexUnified2DFloatS32:
3444 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3445 break;
3446 case NVPTXISD::TexUnified2DFloatFloat:
3447 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3448 break;
3449 case NVPTXISD::TexUnified2DFloatFloatLevel:
3450 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3451 break;
3452 case NVPTXISD::TexUnified2DFloatFloatGrad:
3453 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3454 break;
3455 case NVPTXISD::TexUnified2DS32S32:
3456 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3457 break;
3458 case NVPTXISD::TexUnified2DS32Float:
3459 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3460 break;
3461 case NVPTXISD::TexUnified2DS32FloatLevel:
3462 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3463 break;
3464 case NVPTXISD::TexUnified2DS32FloatGrad:
3465 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3466 break;
3467 case NVPTXISD::TexUnified2DU32S32:
3468 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3469 break;
3470 case NVPTXISD::TexUnified2DU32Float:
3471 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3472 break;
3473 case NVPTXISD::TexUnified2DU32FloatLevel:
3474 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3475 break;
3476 case NVPTXISD::TexUnified2DU32FloatGrad:
3477 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3478 break;
3479 case NVPTXISD::TexUnified2DArrayFloatS32:
3480 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3481 break;
3482 case NVPTXISD::TexUnified2DArrayFloatFloat:
3483 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3484 break;
3485 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3486 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3487 break;
3488 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3489 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3490 break;
3491 case NVPTXISD::TexUnified2DArrayS32S32:
3492 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3493 break;
3494 case NVPTXISD::TexUnified2DArrayS32Float:
3495 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3496 break;
3497 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3498 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3499 break;
3500 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3501 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3502 break;
3503 case NVPTXISD::TexUnified2DArrayU32S32:
3504 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3505 break;
3506 case NVPTXISD::TexUnified2DArrayU32Float:
3507 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3508 break;
3509 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3510 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3511 break;
3512 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3513 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3514 break;
3515 case NVPTXISD::TexUnified3DFloatS32:
3516 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3517 break;
3518 case NVPTXISD::TexUnified3DFloatFloat:
3519 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3520 break;
3521 case NVPTXISD::TexUnified3DFloatFloatLevel:
3522 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3523 break;
3524 case NVPTXISD::TexUnified3DFloatFloatGrad:
3525 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3526 break;
3527 case NVPTXISD::TexUnified3DS32S32:
3528 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3529 break;
3530 case NVPTXISD::TexUnified3DS32Float:
3531 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3532 break;
3533 case NVPTXISD::TexUnified3DS32FloatLevel:
3534 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3535 break;
3536 case NVPTXISD::TexUnified3DS32FloatGrad:
3537 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3538 break;
3539 case NVPTXISD::TexUnified3DU32S32:
3540 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3541 break;
3542 case NVPTXISD::TexUnified3DU32Float:
3543 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3544 break;
3545 case NVPTXISD::TexUnified3DU32FloatLevel:
3546 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3547 break;
3548 case NVPTXISD::TexUnified3DU32FloatGrad:
3549 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3550 break;
3551 case NVPTXISD::TexUnifiedCubeFloatFloat:
3552 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3553 break;
3554 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3555 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3556 break;
3557 case NVPTXISD::TexUnifiedCubeS32Float:
3558 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3559 break;
3560 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3561 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3562 break;
3563 case NVPTXISD::TexUnifiedCubeU32Float:
3564 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3565 break;
3566 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3567 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3568 break;
3569 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3570 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3571 break;
3572 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3573 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3574 break;
3575 case NVPTXISD::TexUnifiedCubeArrayS32Float:
3576 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3577 break;
3578 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3579 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3580 break;
3581 case NVPTXISD::TexUnifiedCubeArrayU32Float:
3582 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3583 break;
3584 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3585 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3586 break;
3587 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3588 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3589 break;
3590 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3591 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3592 break;
3593 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3594 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3595 break;
3596 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3597 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3598 break;
3599 case NVPTXISD::Tld4UnifiedR2DS64Float:
3600 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3601 break;
3602 case NVPTXISD::Tld4UnifiedG2DS64Float:
3603 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3604 break;
3605 case NVPTXISD::Tld4UnifiedB2DS64Float:
3606 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3607 break;
3608 case NVPTXISD::Tld4UnifiedA2DS64Float:
3609 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3610 break;
3611 case NVPTXISD::Tld4UnifiedR2DU64Float:
3612 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3613 break;
3614 case NVPTXISD::Tld4UnifiedG2DU64Float:
3615 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3616 break;
3617 case NVPTXISD::Tld4UnifiedB2DU64Float:
3618 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3619 break;
3620 case NVPTXISD::Tld4UnifiedA2DU64Float:
3621 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003622 break;
3623 }
3624
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003625 // Copy over operands
3626 for (unsigned i = 1; i < N->getNumOperands(); ++i) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00003627 Ops.push_back(N->getOperand(i));
3628 }
3629
3630 Ops.push_back(Chain);
3631 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3632 return Ret;
3633}
3634
3635SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
3636 SDValue Chain = N->getOperand(0);
3637 SDValue TexHandle = N->getOperand(1);
Craig Topper062a2ba2014-04-25 05:30:21 +00003638 SDNode *Ret = nullptr;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003639 unsigned Opc = 0;
3640 SmallVector<SDValue, 8> Ops;
3641 switch (N->getOpcode()) {
Craig Topper062a2ba2014-04-25 05:30:21 +00003642 default: return nullptr;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003643 case NVPTXISD::Suld1DI8Clamp:
3644 Opc = NVPTX::SULD_1D_I8_CLAMP;
3645 Ops.push_back(TexHandle);
3646 Ops.push_back(N->getOperand(2));
3647 Ops.push_back(Chain);
3648 break;
3649 case NVPTXISD::Suld1DI16Clamp:
3650 Opc = NVPTX::SULD_1D_I16_CLAMP;
3651 Ops.push_back(TexHandle);
3652 Ops.push_back(N->getOperand(2));
3653 Ops.push_back(Chain);
3654 break;
3655 case NVPTXISD::Suld1DI32Clamp:
3656 Opc = NVPTX::SULD_1D_I32_CLAMP;
3657 Ops.push_back(TexHandle);
3658 Ops.push_back(N->getOperand(2));
3659 Ops.push_back(Chain);
3660 break;
3661 case NVPTXISD::Suld1DI64Clamp:
3662 Opc = NVPTX::SULD_1D_I64_CLAMP;
3663 Ops.push_back(TexHandle);
3664 Ops.push_back(N->getOperand(2));
3665 Ops.push_back(Chain);
3666 break;
3667 case NVPTXISD::Suld1DV2I8Clamp:
3668 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3669 Ops.push_back(TexHandle);
3670 Ops.push_back(N->getOperand(2));
3671 Ops.push_back(Chain);
3672 break;
3673 case NVPTXISD::Suld1DV2I16Clamp:
3674 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3675 Ops.push_back(TexHandle);
3676 Ops.push_back(N->getOperand(2));
3677 Ops.push_back(Chain);
3678 break;
3679 case NVPTXISD::Suld1DV2I32Clamp:
3680 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3681 Ops.push_back(TexHandle);
3682 Ops.push_back(N->getOperand(2));
3683 Ops.push_back(Chain);
3684 break;
3685 case NVPTXISD::Suld1DV2I64Clamp:
3686 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3687 Ops.push_back(TexHandle);
3688 Ops.push_back(N->getOperand(2));
3689 Ops.push_back(Chain);
3690 break;
3691 case NVPTXISD::Suld1DV4I8Clamp:
3692 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3693 Ops.push_back(TexHandle);
3694 Ops.push_back(N->getOperand(2));
3695 Ops.push_back(Chain);
3696 break;
3697 case NVPTXISD::Suld1DV4I16Clamp:
3698 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3699 Ops.push_back(TexHandle);
3700 Ops.push_back(N->getOperand(2));
3701 Ops.push_back(Chain);
3702 break;
3703 case NVPTXISD::Suld1DV4I32Clamp:
3704 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3705 Ops.push_back(TexHandle);
3706 Ops.push_back(N->getOperand(2));
3707 Ops.push_back(Chain);
3708 break;
3709 case NVPTXISD::Suld1DArrayI8Clamp:
3710 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3711 Ops.push_back(TexHandle);
3712 Ops.push_back(N->getOperand(2));
3713 Ops.push_back(N->getOperand(3));
3714 Ops.push_back(Chain);
3715 break;
3716 case NVPTXISD::Suld1DArrayI16Clamp:
3717 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3718 Ops.push_back(TexHandle);
3719 Ops.push_back(N->getOperand(2));
3720 Ops.push_back(N->getOperand(3));
3721 Ops.push_back(Chain);
3722 break;
3723 case NVPTXISD::Suld1DArrayI32Clamp:
3724 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3725 Ops.push_back(TexHandle);
3726 Ops.push_back(N->getOperand(2));
3727 Ops.push_back(N->getOperand(3));
3728 Ops.push_back(Chain);
3729 break;
3730 case NVPTXISD::Suld1DArrayI64Clamp:
3731 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3732 Ops.push_back(TexHandle);
3733 Ops.push_back(N->getOperand(2));
3734 Ops.push_back(N->getOperand(3));
3735 Ops.push_back(Chain);
3736 break;
3737 case NVPTXISD::Suld1DArrayV2I8Clamp:
3738 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3739 Ops.push_back(TexHandle);
3740 Ops.push_back(N->getOperand(2));
3741 Ops.push_back(N->getOperand(3));
3742 Ops.push_back(Chain);
3743 break;
3744 case NVPTXISD::Suld1DArrayV2I16Clamp:
3745 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3746 Ops.push_back(TexHandle);
3747 Ops.push_back(N->getOperand(2));
3748 Ops.push_back(N->getOperand(3));
3749 Ops.push_back(Chain);
3750 break;
3751 case NVPTXISD::Suld1DArrayV2I32Clamp:
3752 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3753 Ops.push_back(TexHandle);
3754 Ops.push_back(N->getOperand(2));
3755 Ops.push_back(N->getOperand(3));
3756 Ops.push_back(Chain);
3757 break;
3758 case NVPTXISD::Suld1DArrayV2I64Clamp:
3759 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3760 Ops.push_back(TexHandle);
3761 Ops.push_back(N->getOperand(2));
3762 Ops.push_back(N->getOperand(3));
3763 Ops.push_back(Chain);
3764 break;
3765 case NVPTXISD::Suld1DArrayV4I8Clamp:
3766 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3767 Ops.push_back(TexHandle);
3768 Ops.push_back(N->getOperand(2));
3769 Ops.push_back(N->getOperand(3));
3770 Ops.push_back(Chain);
3771 break;
3772 case NVPTXISD::Suld1DArrayV4I16Clamp:
3773 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3774 Ops.push_back(TexHandle);
3775 Ops.push_back(N->getOperand(2));
3776 Ops.push_back(N->getOperand(3));
3777 Ops.push_back(Chain);
3778 break;
3779 case NVPTXISD::Suld1DArrayV4I32Clamp:
3780 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3781 Ops.push_back(TexHandle);
3782 Ops.push_back(N->getOperand(2));
3783 Ops.push_back(N->getOperand(3));
3784 Ops.push_back(Chain);
3785 break;
3786 case NVPTXISD::Suld2DI8Clamp:
3787 Opc = NVPTX::SULD_2D_I8_CLAMP;
3788 Ops.push_back(TexHandle);
3789 Ops.push_back(N->getOperand(2));
3790 Ops.push_back(N->getOperand(3));
3791 Ops.push_back(Chain);
3792 break;
3793 case NVPTXISD::Suld2DI16Clamp:
3794 Opc = NVPTX::SULD_2D_I16_CLAMP;
3795 Ops.push_back(TexHandle);
3796 Ops.push_back(N->getOperand(2));
3797 Ops.push_back(N->getOperand(3));
3798 Ops.push_back(Chain);
3799 break;
3800 case NVPTXISD::Suld2DI32Clamp:
3801 Opc = NVPTX::SULD_2D_I32_CLAMP;
3802 Ops.push_back(TexHandle);
3803 Ops.push_back(N->getOperand(2));
3804 Ops.push_back(N->getOperand(3));
3805 Ops.push_back(Chain);
3806 break;
3807 case NVPTXISD::Suld2DI64Clamp:
3808 Opc = NVPTX::SULD_2D_I64_CLAMP;
3809 Ops.push_back(TexHandle);
3810 Ops.push_back(N->getOperand(2));
3811 Ops.push_back(N->getOperand(3));
3812 Ops.push_back(Chain);
3813 break;
3814 case NVPTXISD::Suld2DV2I8Clamp:
3815 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3816 Ops.push_back(TexHandle);
3817 Ops.push_back(N->getOperand(2));
3818 Ops.push_back(N->getOperand(3));
3819 Ops.push_back(Chain);
3820 break;
3821 case NVPTXISD::Suld2DV2I16Clamp:
3822 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3823 Ops.push_back(TexHandle);
3824 Ops.push_back(N->getOperand(2));
3825 Ops.push_back(N->getOperand(3));
3826 Ops.push_back(Chain);
3827 break;
3828 case NVPTXISD::Suld2DV2I32Clamp:
3829 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3830 Ops.push_back(TexHandle);
3831 Ops.push_back(N->getOperand(2));
3832 Ops.push_back(N->getOperand(3));
3833 Ops.push_back(Chain);
3834 break;
3835 case NVPTXISD::Suld2DV2I64Clamp:
3836 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3837 Ops.push_back(TexHandle);
3838 Ops.push_back(N->getOperand(2));
3839 Ops.push_back(N->getOperand(3));
3840 Ops.push_back(Chain);
3841 break;
3842 case NVPTXISD::Suld2DV4I8Clamp:
3843 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3844 Ops.push_back(TexHandle);
3845 Ops.push_back(N->getOperand(2));
3846 Ops.push_back(N->getOperand(3));
3847 Ops.push_back(Chain);
3848 break;
3849 case NVPTXISD::Suld2DV4I16Clamp:
3850 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3851 Ops.push_back(TexHandle);
3852 Ops.push_back(N->getOperand(2));
3853 Ops.push_back(N->getOperand(3));
3854 Ops.push_back(Chain);
3855 break;
3856 case NVPTXISD::Suld2DV4I32Clamp:
3857 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3858 Ops.push_back(TexHandle);
3859 Ops.push_back(N->getOperand(2));
3860 Ops.push_back(N->getOperand(3));
3861 Ops.push_back(Chain);
3862 break;
3863 case NVPTXISD::Suld2DArrayI8Clamp:
3864 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3865 Ops.push_back(TexHandle);
3866 Ops.push_back(N->getOperand(2));
3867 Ops.push_back(N->getOperand(3));
3868 Ops.push_back(N->getOperand(4));
3869 Ops.push_back(Chain);
3870 break;
3871 case NVPTXISD::Suld2DArrayI16Clamp:
3872 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3873 Ops.push_back(TexHandle);
3874 Ops.push_back(N->getOperand(2));
3875 Ops.push_back(N->getOperand(3));
3876 Ops.push_back(N->getOperand(4));
3877 Ops.push_back(Chain);
3878 break;
3879 case NVPTXISD::Suld2DArrayI32Clamp:
3880 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3881 Ops.push_back(TexHandle);
3882 Ops.push_back(N->getOperand(2));
3883 Ops.push_back(N->getOperand(3));
3884 Ops.push_back(N->getOperand(4));
3885 Ops.push_back(Chain);
3886 break;
3887 case NVPTXISD::Suld2DArrayI64Clamp:
3888 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3889 Ops.push_back(TexHandle);
3890 Ops.push_back(N->getOperand(2));
3891 Ops.push_back(N->getOperand(3));
3892 Ops.push_back(N->getOperand(4));
3893 Ops.push_back(Chain);
3894 break;
3895 case NVPTXISD::Suld2DArrayV2I8Clamp:
3896 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3897 Ops.push_back(TexHandle);
3898 Ops.push_back(N->getOperand(2));
3899 Ops.push_back(N->getOperand(3));
3900 Ops.push_back(N->getOperand(4));
3901 Ops.push_back(Chain);
3902 break;
3903 case NVPTXISD::Suld2DArrayV2I16Clamp:
3904 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3905 Ops.push_back(TexHandle);
3906 Ops.push_back(N->getOperand(2));
3907 Ops.push_back(N->getOperand(3));
3908 Ops.push_back(N->getOperand(4));
3909 Ops.push_back(Chain);
3910 break;
3911 case NVPTXISD::Suld2DArrayV2I32Clamp:
3912 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3913 Ops.push_back(TexHandle);
3914 Ops.push_back(N->getOperand(2));
3915 Ops.push_back(N->getOperand(3));
3916 Ops.push_back(N->getOperand(4));
3917 Ops.push_back(Chain);
3918 break;
3919 case NVPTXISD::Suld2DArrayV2I64Clamp:
3920 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3921 Ops.push_back(TexHandle);
3922 Ops.push_back(N->getOperand(2));
3923 Ops.push_back(N->getOperand(3));
3924 Ops.push_back(N->getOperand(4));
3925 Ops.push_back(Chain);
3926 break;
3927 case NVPTXISD::Suld2DArrayV4I8Clamp:
3928 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3929 Ops.push_back(TexHandle);
3930 Ops.push_back(N->getOperand(2));
3931 Ops.push_back(N->getOperand(3));
3932 Ops.push_back(N->getOperand(4));
3933 Ops.push_back(Chain);
3934 break;
3935 case NVPTXISD::Suld2DArrayV4I16Clamp:
3936 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3937 Ops.push_back(TexHandle);
3938 Ops.push_back(N->getOperand(2));
3939 Ops.push_back(N->getOperand(3));
3940 Ops.push_back(N->getOperand(4));
3941 Ops.push_back(Chain);
3942 break;
3943 case NVPTXISD::Suld2DArrayV4I32Clamp:
3944 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3945 Ops.push_back(TexHandle);
3946 Ops.push_back(N->getOperand(2));
3947 Ops.push_back(N->getOperand(3));
3948 Ops.push_back(N->getOperand(4));
3949 Ops.push_back(Chain);
3950 break;
3951 case NVPTXISD::Suld3DI8Clamp:
3952 Opc = NVPTX::SULD_3D_I8_CLAMP;
3953 Ops.push_back(TexHandle);
3954 Ops.push_back(N->getOperand(2));
3955 Ops.push_back(N->getOperand(3));
3956 Ops.push_back(N->getOperand(4));
3957 Ops.push_back(Chain);
3958 break;
3959 case NVPTXISD::Suld3DI16Clamp:
3960 Opc = NVPTX::SULD_3D_I16_CLAMP;
3961 Ops.push_back(TexHandle);
3962 Ops.push_back(N->getOperand(2));
3963 Ops.push_back(N->getOperand(3));
3964 Ops.push_back(N->getOperand(4));
3965 Ops.push_back(Chain);
3966 break;
3967 case NVPTXISD::Suld3DI32Clamp:
3968 Opc = NVPTX::SULD_3D_I32_CLAMP;
3969 Ops.push_back(TexHandle);
3970 Ops.push_back(N->getOperand(2));
3971 Ops.push_back(N->getOperand(3));
3972 Ops.push_back(N->getOperand(4));
3973 Ops.push_back(Chain);
3974 break;
3975 case NVPTXISD::Suld3DI64Clamp:
3976 Opc = NVPTX::SULD_3D_I64_CLAMP;
3977 Ops.push_back(TexHandle);
3978 Ops.push_back(N->getOperand(2));
3979 Ops.push_back(N->getOperand(3));
3980 Ops.push_back(N->getOperand(4));
3981 Ops.push_back(Chain);
3982 break;
3983 case NVPTXISD::Suld3DV2I8Clamp:
3984 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3985 Ops.push_back(TexHandle);
3986 Ops.push_back(N->getOperand(2));
3987 Ops.push_back(N->getOperand(3));
3988 Ops.push_back(N->getOperand(4));
3989 Ops.push_back(Chain);
3990 break;
3991 case NVPTXISD::Suld3DV2I16Clamp:
3992 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3993 Ops.push_back(TexHandle);
3994 Ops.push_back(N->getOperand(2));
3995 Ops.push_back(N->getOperand(3));
3996 Ops.push_back(N->getOperand(4));
3997 Ops.push_back(Chain);
3998 break;
3999 case NVPTXISD::Suld3DV2I32Clamp:
4000 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
4001 Ops.push_back(TexHandle);
4002 Ops.push_back(N->getOperand(2));
4003 Ops.push_back(N->getOperand(3));
4004 Ops.push_back(N->getOperand(4));
4005 Ops.push_back(Chain);
4006 break;
4007 case NVPTXISD::Suld3DV2I64Clamp:
4008 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
4009 Ops.push_back(TexHandle);
4010 Ops.push_back(N->getOperand(2));
4011 Ops.push_back(N->getOperand(3));
4012 Ops.push_back(N->getOperand(4));
4013 Ops.push_back(Chain);
4014 break;
4015 case NVPTXISD::Suld3DV4I8Clamp:
4016 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
4017 Ops.push_back(TexHandle);
4018 Ops.push_back(N->getOperand(2));
4019 Ops.push_back(N->getOperand(3));
4020 Ops.push_back(N->getOperand(4));
4021 Ops.push_back(Chain);
4022 break;
4023 case NVPTXISD::Suld3DV4I16Clamp:
4024 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
4025 Ops.push_back(TexHandle);
4026 Ops.push_back(N->getOperand(2));
4027 Ops.push_back(N->getOperand(3));
4028 Ops.push_back(N->getOperand(4));
4029 Ops.push_back(Chain);
4030 break;
4031 case NVPTXISD::Suld3DV4I32Clamp:
4032 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
4033 Ops.push_back(TexHandle);
4034 Ops.push_back(N->getOperand(2));
4035 Ops.push_back(N->getOperand(3));
4036 Ops.push_back(N->getOperand(4));
4037 Ops.push_back(Chain);
4038 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004039 case NVPTXISD::Suld1DI8Trap:
4040 Opc = NVPTX::SULD_1D_I8_TRAP;
4041 Ops.push_back(TexHandle);
4042 Ops.push_back(N->getOperand(2));
4043 Ops.push_back(Chain);
4044 break;
4045 case NVPTXISD::Suld1DI16Trap:
4046 Opc = NVPTX::SULD_1D_I16_TRAP;
4047 Ops.push_back(TexHandle);
4048 Ops.push_back(N->getOperand(2));
4049 Ops.push_back(Chain);
4050 break;
4051 case NVPTXISD::Suld1DI32Trap:
4052 Opc = NVPTX::SULD_1D_I32_TRAP;
4053 Ops.push_back(TexHandle);
4054 Ops.push_back(N->getOperand(2));
4055 Ops.push_back(Chain);
4056 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004057 case NVPTXISD::Suld1DI64Trap:
4058 Opc = NVPTX::SULD_1D_I64_TRAP;
4059 Ops.push_back(TexHandle);
4060 Ops.push_back(N->getOperand(2));
4061 Ops.push_back(Chain);
4062 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004063 case NVPTXISD::Suld1DV2I8Trap:
4064 Opc = NVPTX::SULD_1D_V2I8_TRAP;
4065 Ops.push_back(TexHandle);
4066 Ops.push_back(N->getOperand(2));
4067 Ops.push_back(Chain);
4068 break;
4069 case NVPTXISD::Suld1DV2I16Trap:
4070 Opc = NVPTX::SULD_1D_V2I16_TRAP;
4071 Ops.push_back(TexHandle);
4072 Ops.push_back(N->getOperand(2));
4073 Ops.push_back(Chain);
4074 break;
4075 case NVPTXISD::Suld1DV2I32Trap:
4076 Opc = NVPTX::SULD_1D_V2I32_TRAP;
4077 Ops.push_back(TexHandle);
4078 Ops.push_back(N->getOperand(2));
4079 Ops.push_back(Chain);
4080 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004081 case NVPTXISD::Suld1DV2I64Trap:
4082 Opc = NVPTX::SULD_1D_V2I64_TRAP;
4083 Ops.push_back(TexHandle);
4084 Ops.push_back(N->getOperand(2));
4085 Ops.push_back(Chain);
4086 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004087 case NVPTXISD::Suld1DV4I8Trap:
4088 Opc = NVPTX::SULD_1D_V4I8_TRAP;
4089 Ops.push_back(TexHandle);
4090 Ops.push_back(N->getOperand(2));
4091 Ops.push_back(Chain);
4092 break;
4093 case NVPTXISD::Suld1DV4I16Trap:
4094 Opc = NVPTX::SULD_1D_V4I16_TRAP;
4095 Ops.push_back(TexHandle);
4096 Ops.push_back(N->getOperand(2));
4097 Ops.push_back(Chain);
4098 break;
4099 case NVPTXISD::Suld1DV4I32Trap:
4100 Opc = NVPTX::SULD_1D_V4I32_TRAP;
4101 Ops.push_back(TexHandle);
4102 Ops.push_back(N->getOperand(2));
4103 Ops.push_back(Chain);
4104 break;
4105 case NVPTXISD::Suld1DArrayI8Trap:
4106 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
4107 Ops.push_back(TexHandle);
4108 Ops.push_back(N->getOperand(2));
4109 Ops.push_back(N->getOperand(3));
4110 Ops.push_back(Chain);
4111 break;
4112 case NVPTXISD::Suld1DArrayI16Trap:
4113 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4114 Ops.push_back(TexHandle);
4115 Ops.push_back(N->getOperand(2));
4116 Ops.push_back(N->getOperand(3));
4117 Ops.push_back(Chain);
4118 break;
4119 case NVPTXISD::Suld1DArrayI32Trap:
4120 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4121 Ops.push_back(TexHandle);
4122 Ops.push_back(N->getOperand(2));
4123 Ops.push_back(N->getOperand(3));
4124 Ops.push_back(Chain);
4125 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004126 case NVPTXISD::Suld1DArrayI64Trap:
4127 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4128 Ops.push_back(TexHandle);
4129 Ops.push_back(N->getOperand(2));
4130 Ops.push_back(N->getOperand(3));
4131 Ops.push_back(Chain);
4132 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004133 case NVPTXISD::Suld1DArrayV2I8Trap:
4134 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4135 Ops.push_back(TexHandle);
4136 Ops.push_back(N->getOperand(2));
4137 Ops.push_back(N->getOperand(3));
4138 Ops.push_back(Chain);
4139 break;
4140 case NVPTXISD::Suld1DArrayV2I16Trap:
4141 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4142 Ops.push_back(TexHandle);
4143 Ops.push_back(N->getOperand(2));
4144 Ops.push_back(N->getOperand(3));
4145 Ops.push_back(Chain);
4146 break;
4147 case NVPTXISD::Suld1DArrayV2I32Trap:
4148 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4149 Ops.push_back(TexHandle);
4150 Ops.push_back(N->getOperand(2));
4151 Ops.push_back(N->getOperand(3));
4152 Ops.push_back(Chain);
4153 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004154 case NVPTXISD::Suld1DArrayV2I64Trap:
4155 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4156 Ops.push_back(TexHandle);
4157 Ops.push_back(N->getOperand(2));
4158 Ops.push_back(N->getOperand(3));
4159 Ops.push_back(Chain);
4160 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004161 case NVPTXISD::Suld1DArrayV4I8Trap:
4162 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4163 Ops.push_back(TexHandle);
4164 Ops.push_back(N->getOperand(2));
4165 Ops.push_back(N->getOperand(3));
4166 Ops.push_back(Chain);
4167 break;
4168 case NVPTXISD::Suld1DArrayV4I16Trap:
4169 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4170 Ops.push_back(TexHandle);
4171 Ops.push_back(N->getOperand(2));
4172 Ops.push_back(N->getOperand(3));
4173 Ops.push_back(Chain);
4174 break;
4175 case NVPTXISD::Suld1DArrayV4I32Trap:
4176 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4177 Ops.push_back(TexHandle);
4178 Ops.push_back(N->getOperand(2));
4179 Ops.push_back(N->getOperand(3));
4180 Ops.push_back(Chain);
4181 break;
4182 case NVPTXISD::Suld2DI8Trap:
4183 Opc = NVPTX::SULD_2D_I8_TRAP;
4184 Ops.push_back(TexHandle);
4185 Ops.push_back(N->getOperand(2));
4186 Ops.push_back(N->getOperand(3));
4187 Ops.push_back(Chain);
4188 break;
4189 case NVPTXISD::Suld2DI16Trap:
4190 Opc = NVPTX::SULD_2D_I16_TRAP;
4191 Ops.push_back(TexHandle);
4192 Ops.push_back(N->getOperand(2));
4193 Ops.push_back(N->getOperand(3));
4194 Ops.push_back(Chain);
4195 break;
4196 case NVPTXISD::Suld2DI32Trap:
4197 Opc = NVPTX::SULD_2D_I32_TRAP;
4198 Ops.push_back(TexHandle);
4199 Ops.push_back(N->getOperand(2));
4200 Ops.push_back(N->getOperand(3));
4201 Ops.push_back(Chain);
4202 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004203 case NVPTXISD::Suld2DI64Trap:
4204 Opc = NVPTX::SULD_2D_I64_TRAP;
4205 Ops.push_back(TexHandle);
4206 Ops.push_back(N->getOperand(2));
4207 Ops.push_back(N->getOperand(3));
4208 Ops.push_back(Chain);
4209 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004210 case NVPTXISD::Suld2DV2I8Trap:
4211 Opc = NVPTX::SULD_2D_V2I8_TRAP;
4212 Ops.push_back(TexHandle);
4213 Ops.push_back(N->getOperand(2));
4214 Ops.push_back(N->getOperand(3));
4215 Ops.push_back(Chain);
4216 break;
4217 case NVPTXISD::Suld2DV2I16Trap:
4218 Opc = NVPTX::SULD_2D_V2I16_TRAP;
4219 Ops.push_back(TexHandle);
4220 Ops.push_back(N->getOperand(2));
4221 Ops.push_back(N->getOperand(3));
4222 Ops.push_back(Chain);
4223 break;
4224 case NVPTXISD::Suld2DV2I32Trap:
4225 Opc = NVPTX::SULD_2D_V2I32_TRAP;
4226 Ops.push_back(TexHandle);
4227 Ops.push_back(N->getOperand(2));
4228 Ops.push_back(N->getOperand(3));
4229 Ops.push_back(Chain);
4230 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004231 case NVPTXISD::Suld2DV2I64Trap:
4232 Opc = NVPTX::SULD_2D_V2I64_TRAP;
4233 Ops.push_back(TexHandle);
4234 Ops.push_back(N->getOperand(2));
4235 Ops.push_back(N->getOperand(3));
4236 Ops.push_back(Chain);
4237 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004238 case NVPTXISD::Suld2DV4I8Trap:
4239 Opc = NVPTX::SULD_2D_V4I8_TRAP;
4240 Ops.push_back(TexHandle);
4241 Ops.push_back(N->getOperand(2));
4242 Ops.push_back(N->getOperand(3));
4243 Ops.push_back(Chain);
4244 break;
4245 case NVPTXISD::Suld2DV4I16Trap:
4246 Opc = NVPTX::SULD_2D_V4I16_TRAP;
4247 Ops.push_back(TexHandle);
4248 Ops.push_back(N->getOperand(2));
4249 Ops.push_back(N->getOperand(3));
4250 Ops.push_back(Chain);
4251 break;
4252 case NVPTXISD::Suld2DV4I32Trap:
4253 Opc = NVPTX::SULD_2D_V4I32_TRAP;
4254 Ops.push_back(TexHandle);
4255 Ops.push_back(N->getOperand(2));
4256 Ops.push_back(N->getOperand(3));
4257 Ops.push_back(Chain);
4258 break;
4259 case NVPTXISD::Suld2DArrayI8Trap:
4260 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4261 Ops.push_back(TexHandle);
4262 Ops.push_back(N->getOperand(2));
4263 Ops.push_back(N->getOperand(3));
4264 Ops.push_back(N->getOperand(4));
4265 Ops.push_back(Chain);
4266 break;
4267 case NVPTXISD::Suld2DArrayI16Trap:
4268 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4269 Ops.push_back(TexHandle);
4270 Ops.push_back(N->getOperand(2));
4271 Ops.push_back(N->getOperand(3));
4272 Ops.push_back(N->getOperand(4));
4273 Ops.push_back(Chain);
4274 break;
4275 case NVPTXISD::Suld2DArrayI32Trap:
4276 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4277 Ops.push_back(TexHandle);
4278 Ops.push_back(N->getOperand(2));
4279 Ops.push_back(N->getOperand(3));
4280 Ops.push_back(N->getOperand(4));
4281 Ops.push_back(Chain);
4282 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004283 case NVPTXISD::Suld2DArrayI64Trap:
4284 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4285 Ops.push_back(TexHandle);
4286 Ops.push_back(N->getOperand(2));
4287 Ops.push_back(N->getOperand(3));
4288 Ops.push_back(N->getOperand(4));
4289 Ops.push_back(Chain);
4290 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004291 case NVPTXISD::Suld2DArrayV2I8Trap:
4292 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4293 Ops.push_back(TexHandle);
4294 Ops.push_back(N->getOperand(2));
4295 Ops.push_back(N->getOperand(3));
4296 Ops.push_back(N->getOperand(4));
4297 Ops.push_back(Chain);
4298 break;
4299 case NVPTXISD::Suld2DArrayV2I16Trap:
4300 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4301 Ops.push_back(TexHandle);
4302 Ops.push_back(N->getOperand(2));
4303 Ops.push_back(N->getOperand(3));
4304 Ops.push_back(N->getOperand(4));
4305 Ops.push_back(Chain);
4306 break;
4307 case NVPTXISD::Suld2DArrayV2I32Trap:
4308 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4309 Ops.push_back(TexHandle);
4310 Ops.push_back(N->getOperand(2));
4311 Ops.push_back(N->getOperand(3));
4312 Ops.push_back(N->getOperand(4));
4313 Ops.push_back(Chain);
4314 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004315 case NVPTXISD::Suld2DArrayV2I64Trap:
4316 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4317 Ops.push_back(TexHandle);
4318 Ops.push_back(N->getOperand(2));
4319 Ops.push_back(N->getOperand(3));
4320 Ops.push_back(N->getOperand(4));
4321 Ops.push_back(Chain);
4322 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004323 case NVPTXISD::Suld2DArrayV4I8Trap:
4324 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4325 Ops.push_back(TexHandle);
4326 Ops.push_back(N->getOperand(2));
4327 Ops.push_back(N->getOperand(3));
4328 Ops.push_back(N->getOperand(4));
4329 Ops.push_back(Chain);
4330 break;
4331 case NVPTXISD::Suld2DArrayV4I16Trap:
4332 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4333 Ops.push_back(TexHandle);
4334 Ops.push_back(N->getOperand(2));
4335 Ops.push_back(N->getOperand(3));
4336 Ops.push_back(N->getOperand(4));
4337 Ops.push_back(Chain);
4338 break;
4339 case NVPTXISD::Suld2DArrayV4I32Trap:
4340 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4341 Ops.push_back(TexHandle);
4342 Ops.push_back(N->getOperand(2));
4343 Ops.push_back(N->getOperand(3));
4344 Ops.push_back(N->getOperand(4));
4345 Ops.push_back(Chain);
4346 break;
4347 case NVPTXISD::Suld3DI8Trap:
4348 Opc = NVPTX::SULD_3D_I8_TRAP;
4349 Ops.push_back(TexHandle);
4350 Ops.push_back(N->getOperand(2));
4351 Ops.push_back(N->getOperand(3));
4352 Ops.push_back(N->getOperand(4));
4353 Ops.push_back(Chain);
4354 break;
4355 case NVPTXISD::Suld3DI16Trap:
4356 Opc = NVPTX::SULD_3D_I16_TRAP;
4357 Ops.push_back(TexHandle);
4358 Ops.push_back(N->getOperand(2));
4359 Ops.push_back(N->getOperand(3));
4360 Ops.push_back(N->getOperand(4));
4361 Ops.push_back(Chain);
4362 break;
4363 case NVPTXISD::Suld3DI32Trap:
4364 Opc = NVPTX::SULD_3D_I32_TRAP;
4365 Ops.push_back(TexHandle);
4366 Ops.push_back(N->getOperand(2));
4367 Ops.push_back(N->getOperand(3));
4368 Ops.push_back(N->getOperand(4));
4369 Ops.push_back(Chain);
4370 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004371 case NVPTXISD::Suld3DI64Trap:
4372 Opc = NVPTX::SULD_3D_I64_TRAP;
4373 Ops.push_back(TexHandle);
4374 Ops.push_back(N->getOperand(2));
4375 Ops.push_back(N->getOperand(3));
4376 Ops.push_back(N->getOperand(4));
4377 Ops.push_back(Chain);
4378 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004379 case NVPTXISD::Suld3DV2I8Trap:
4380 Opc = NVPTX::SULD_3D_V2I8_TRAP;
4381 Ops.push_back(TexHandle);
4382 Ops.push_back(N->getOperand(2));
4383 Ops.push_back(N->getOperand(3));
4384 Ops.push_back(N->getOperand(4));
4385 Ops.push_back(Chain);
4386 break;
4387 case NVPTXISD::Suld3DV2I16Trap:
4388 Opc = NVPTX::SULD_3D_V2I16_TRAP;
4389 Ops.push_back(TexHandle);
4390 Ops.push_back(N->getOperand(2));
4391 Ops.push_back(N->getOperand(3));
4392 Ops.push_back(N->getOperand(4));
4393 Ops.push_back(Chain);
4394 break;
4395 case NVPTXISD::Suld3DV2I32Trap:
4396 Opc = NVPTX::SULD_3D_V2I32_TRAP;
4397 Ops.push_back(TexHandle);
4398 Ops.push_back(N->getOperand(2));
4399 Ops.push_back(N->getOperand(3));
4400 Ops.push_back(N->getOperand(4));
4401 Ops.push_back(Chain);
4402 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004403 case NVPTXISD::Suld3DV2I64Trap:
4404 Opc = NVPTX::SULD_3D_V2I64_TRAP;
4405 Ops.push_back(TexHandle);
4406 Ops.push_back(N->getOperand(2));
4407 Ops.push_back(N->getOperand(3));
4408 Ops.push_back(N->getOperand(4));
4409 Ops.push_back(Chain);
4410 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004411 case NVPTXISD::Suld3DV4I8Trap:
4412 Opc = NVPTX::SULD_3D_V4I8_TRAP;
4413 Ops.push_back(TexHandle);
4414 Ops.push_back(N->getOperand(2));
4415 Ops.push_back(N->getOperand(3));
4416 Ops.push_back(N->getOperand(4));
4417 Ops.push_back(Chain);
4418 break;
4419 case NVPTXISD::Suld3DV4I16Trap:
4420 Opc = NVPTX::SULD_3D_V4I16_TRAP;
4421 Ops.push_back(TexHandle);
4422 Ops.push_back(N->getOperand(2));
4423 Ops.push_back(N->getOperand(3));
4424 Ops.push_back(N->getOperand(4));
4425 Ops.push_back(Chain);
4426 break;
4427 case NVPTXISD::Suld3DV4I32Trap:
4428 Opc = NVPTX::SULD_3D_V4I32_TRAP;
4429 Ops.push_back(TexHandle);
4430 Ops.push_back(N->getOperand(2));
4431 Ops.push_back(N->getOperand(3));
4432 Ops.push_back(N->getOperand(4));
4433 Ops.push_back(Chain);
4434 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004435 case NVPTXISD::Suld1DI8Zero:
4436 Opc = NVPTX::SULD_1D_I8_ZERO;
4437 Ops.push_back(TexHandle);
4438 Ops.push_back(N->getOperand(2));
4439 Ops.push_back(Chain);
4440 break;
4441 case NVPTXISD::Suld1DI16Zero:
4442 Opc = NVPTX::SULD_1D_I16_ZERO;
4443 Ops.push_back(TexHandle);
4444 Ops.push_back(N->getOperand(2));
4445 Ops.push_back(Chain);
4446 break;
4447 case NVPTXISD::Suld1DI32Zero:
4448 Opc = NVPTX::SULD_1D_I32_ZERO;
4449 Ops.push_back(TexHandle);
4450 Ops.push_back(N->getOperand(2));
4451 Ops.push_back(Chain);
4452 break;
4453 case NVPTXISD::Suld1DI64Zero:
4454 Opc = NVPTX::SULD_1D_I64_ZERO;
4455 Ops.push_back(TexHandle);
4456 Ops.push_back(N->getOperand(2));
4457 Ops.push_back(Chain);
4458 break;
4459 case NVPTXISD::Suld1DV2I8Zero:
4460 Opc = NVPTX::SULD_1D_V2I8_ZERO;
4461 Ops.push_back(TexHandle);
4462 Ops.push_back(N->getOperand(2));
4463 Ops.push_back(Chain);
4464 break;
4465 case NVPTXISD::Suld1DV2I16Zero:
4466 Opc = NVPTX::SULD_1D_V2I16_ZERO;
4467 Ops.push_back(TexHandle);
4468 Ops.push_back(N->getOperand(2));
4469 Ops.push_back(Chain);
4470 break;
4471 case NVPTXISD::Suld1DV2I32Zero:
4472 Opc = NVPTX::SULD_1D_V2I32_ZERO;
4473 Ops.push_back(TexHandle);
4474 Ops.push_back(N->getOperand(2));
4475 Ops.push_back(Chain);
4476 break;
4477 case NVPTXISD::Suld1DV2I64Zero:
4478 Opc = NVPTX::SULD_1D_V2I64_ZERO;
4479 Ops.push_back(TexHandle);
4480 Ops.push_back(N->getOperand(2));
4481 Ops.push_back(Chain);
4482 break;
4483 case NVPTXISD::Suld1DV4I8Zero:
4484 Opc = NVPTX::SULD_1D_V4I8_ZERO;
4485 Ops.push_back(TexHandle);
4486 Ops.push_back(N->getOperand(2));
4487 Ops.push_back(Chain);
4488 break;
4489 case NVPTXISD::Suld1DV4I16Zero:
4490 Opc = NVPTX::SULD_1D_V4I16_ZERO;
4491 Ops.push_back(TexHandle);
4492 Ops.push_back(N->getOperand(2));
4493 Ops.push_back(Chain);
4494 break;
4495 case NVPTXISD::Suld1DV4I32Zero:
4496 Opc = NVPTX::SULD_1D_V4I32_ZERO;
4497 Ops.push_back(TexHandle);
4498 Ops.push_back(N->getOperand(2));
4499 Ops.push_back(Chain);
4500 break;
4501 case NVPTXISD::Suld1DArrayI8Zero:
4502 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4503 Ops.push_back(TexHandle);
4504 Ops.push_back(N->getOperand(2));
4505 Ops.push_back(N->getOperand(3));
4506 Ops.push_back(Chain);
4507 break;
4508 case NVPTXISD::Suld1DArrayI16Zero:
4509 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4510 Ops.push_back(TexHandle);
4511 Ops.push_back(N->getOperand(2));
4512 Ops.push_back(N->getOperand(3));
4513 Ops.push_back(Chain);
4514 break;
4515 case NVPTXISD::Suld1DArrayI32Zero:
4516 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4517 Ops.push_back(TexHandle);
4518 Ops.push_back(N->getOperand(2));
4519 Ops.push_back(N->getOperand(3));
4520 Ops.push_back(Chain);
4521 break;
4522 case NVPTXISD::Suld1DArrayI64Zero:
4523 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4524 Ops.push_back(TexHandle);
4525 Ops.push_back(N->getOperand(2));
4526 Ops.push_back(N->getOperand(3));
4527 Ops.push_back(Chain);
4528 break;
4529 case NVPTXISD::Suld1DArrayV2I8Zero:
4530 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4531 Ops.push_back(TexHandle);
4532 Ops.push_back(N->getOperand(2));
4533 Ops.push_back(N->getOperand(3));
4534 Ops.push_back(Chain);
4535 break;
4536 case NVPTXISD::Suld1DArrayV2I16Zero:
4537 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4538 Ops.push_back(TexHandle);
4539 Ops.push_back(N->getOperand(2));
4540 Ops.push_back(N->getOperand(3));
4541 Ops.push_back(Chain);
4542 break;
4543 case NVPTXISD::Suld1DArrayV2I32Zero:
4544 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4545 Ops.push_back(TexHandle);
4546 Ops.push_back(N->getOperand(2));
4547 Ops.push_back(N->getOperand(3));
4548 Ops.push_back(Chain);
4549 break;
4550 case NVPTXISD::Suld1DArrayV2I64Zero:
4551 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4552 Ops.push_back(TexHandle);
4553 Ops.push_back(N->getOperand(2));
4554 Ops.push_back(N->getOperand(3));
4555 Ops.push_back(Chain);
4556 break;
4557 case NVPTXISD::Suld1DArrayV4I8Zero:
4558 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4559 Ops.push_back(TexHandle);
4560 Ops.push_back(N->getOperand(2));
4561 Ops.push_back(N->getOperand(3));
4562 Ops.push_back(Chain);
4563 break;
4564 case NVPTXISD::Suld1DArrayV4I16Zero:
4565 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4566 Ops.push_back(TexHandle);
4567 Ops.push_back(N->getOperand(2));
4568 Ops.push_back(N->getOperand(3));
4569 Ops.push_back(Chain);
4570 break;
4571 case NVPTXISD::Suld1DArrayV4I32Zero:
4572 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4573 Ops.push_back(TexHandle);
4574 Ops.push_back(N->getOperand(2));
4575 Ops.push_back(N->getOperand(3));
4576 Ops.push_back(Chain);
4577 break;
4578 case NVPTXISD::Suld2DI8Zero:
4579 Opc = NVPTX::SULD_2D_I8_ZERO;
4580 Ops.push_back(TexHandle);
4581 Ops.push_back(N->getOperand(2));
4582 Ops.push_back(N->getOperand(3));
4583 Ops.push_back(Chain);
4584 break;
4585 case NVPTXISD::Suld2DI16Zero:
4586 Opc = NVPTX::SULD_2D_I16_ZERO;
4587 Ops.push_back(TexHandle);
4588 Ops.push_back(N->getOperand(2));
4589 Ops.push_back(N->getOperand(3));
4590 Ops.push_back(Chain);
4591 break;
4592 case NVPTXISD::Suld2DI32Zero:
4593 Opc = NVPTX::SULD_2D_I32_ZERO;
4594 Ops.push_back(TexHandle);
4595 Ops.push_back(N->getOperand(2));
4596 Ops.push_back(N->getOperand(3));
4597 Ops.push_back(Chain);
4598 break;
4599 case NVPTXISD::Suld2DI64Zero:
4600 Opc = NVPTX::SULD_2D_I64_ZERO;
4601 Ops.push_back(TexHandle);
4602 Ops.push_back(N->getOperand(2));
4603 Ops.push_back(N->getOperand(3));
4604 Ops.push_back(Chain);
4605 break;
4606 case NVPTXISD::Suld2DV2I8Zero:
4607 Opc = NVPTX::SULD_2D_V2I8_ZERO;
4608 Ops.push_back(TexHandle);
4609 Ops.push_back(N->getOperand(2));
4610 Ops.push_back(N->getOperand(3));
4611 Ops.push_back(Chain);
4612 break;
4613 case NVPTXISD::Suld2DV2I16Zero:
4614 Opc = NVPTX::SULD_2D_V2I16_ZERO;
4615 Ops.push_back(TexHandle);
4616 Ops.push_back(N->getOperand(2));
4617 Ops.push_back(N->getOperand(3));
4618 Ops.push_back(Chain);
4619 break;
4620 case NVPTXISD::Suld2DV2I32Zero:
4621 Opc = NVPTX::SULD_2D_V2I32_ZERO;
4622 Ops.push_back(TexHandle);
4623 Ops.push_back(N->getOperand(2));
4624 Ops.push_back(N->getOperand(3));
4625 Ops.push_back(Chain);
4626 break;
4627 case NVPTXISD::Suld2DV2I64Zero:
4628 Opc = NVPTX::SULD_2D_V2I64_ZERO;
4629 Ops.push_back(TexHandle);
4630 Ops.push_back(N->getOperand(2));
4631 Ops.push_back(N->getOperand(3));
4632 Ops.push_back(Chain);
4633 break;
4634 case NVPTXISD::Suld2DV4I8Zero:
4635 Opc = NVPTX::SULD_2D_V4I8_ZERO;
4636 Ops.push_back(TexHandle);
4637 Ops.push_back(N->getOperand(2));
4638 Ops.push_back(N->getOperand(3));
4639 Ops.push_back(Chain);
4640 break;
4641 case NVPTXISD::Suld2DV4I16Zero:
4642 Opc = NVPTX::SULD_2D_V4I16_ZERO;
4643 Ops.push_back(TexHandle);
4644 Ops.push_back(N->getOperand(2));
4645 Ops.push_back(N->getOperand(3));
4646 Ops.push_back(Chain);
4647 break;
4648 case NVPTXISD::Suld2DV4I32Zero:
4649 Opc = NVPTX::SULD_2D_V4I32_ZERO;
4650 Ops.push_back(TexHandle);
4651 Ops.push_back(N->getOperand(2));
4652 Ops.push_back(N->getOperand(3));
4653 Ops.push_back(Chain);
4654 break;
4655 case NVPTXISD::Suld2DArrayI8Zero:
4656 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4657 Ops.push_back(TexHandle);
4658 Ops.push_back(N->getOperand(2));
4659 Ops.push_back(N->getOperand(3));
4660 Ops.push_back(N->getOperand(4));
4661 Ops.push_back(Chain);
4662 break;
4663 case NVPTXISD::Suld2DArrayI16Zero:
4664 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4665 Ops.push_back(TexHandle);
4666 Ops.push_back(N->getOperand(2));
4667 Ops.push_back(N->getOperand(3));
4668 Ops.push_back(N->getOperand(4));
4669 Ops.push_back(Chain);
4670 break;
4671 case NVPTXISD::Suld2DArrayI32Zero:
4672 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4673 Ops.push_back(TexHandle);
4674 Ops.push_back(N->getOperand(2));
4675 Ops.push_back(N->getOperand(3));
4676 Ops.push_back(N->getOperand(4));
4677 Ops.push_back(Chain);
4678 break;
4679 case NVPTXISD::Suld2DArrayI64Zero:
4680 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4681 Ops.push_back(TexHandle);
4682 Ops.push_back(N->getOperand(2));
4683 Ops.push_back(N->getOperand(3));
4684 Ops.push_back(N->getOperand(4));
4685 Ops.push_back(Chain);
4686 break;
4687 case NVPTXISD::Suld2DArrayV2I8Zero:
4688 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4689 Ops.push_back(TexHandle);
4690 Ops.push_back(N->getOperand(2));
4691 Ops.push_back(N->getOperand(3));
4692 Ops.push_back(N->getOperand(4));
4693 Ops.push_back(Chain);
4694 break;
4695 case NVPTXISD::Suld2DArrayV2I16Zero:
4696 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4697 Ops.push_back(TexHandle);
4698 Ops.push_back(N->getOperand(2));
4699 Ops.push_back(N->getOperand(3));
4700 Ops.push_back(N->getOperand(4));
4701 Ops.push_back(Chain);
4702 break;
4703 case NVPTXISD::Suld2DArrayV2I32Zero:
4704 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4705 Ops.push_back(TexHandle);
4706 Ops.push_back(N->getOperand(2));
4707 Ops.push_back(N->getOperand(3));
4708 Ops.push_back(N->getOperand(4));
4709 Ops.push_back(Chain);
4710 break;
4711 case NVPTXISD::Suld2DArrayV2I64Zero:
4712 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4713 Ops.push_back(TexHandle);
4714 Ops.push_back(N->getOperand(2));
4715 Ops.push_back(N->getOperand(3));
4716 Ops.push_back(N->getOperand(4));
4717 Ops.push_back(Chain);
4718 break;
4719 case NVPTXISD::Suld2DArrayV4I8Zero:
4720 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4721 Ops.push_back(TexHandle);
4722 Ops.push_back(N->getOperand(2));
4723 Ops.push_back(N->getOperand(3));
4724 Ops.push_back(N->getOperand(4));
4725 Ops.push_back(Chain);
4726 break;
4727 case NVPTXISD::Suld2DArrayV4I16Zero:
4728 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4729 Ops.push_back(TexHandle);
4730 Ops.push_back(N->getOperand(2));
4731 Ops.push_back(N->getOperand(3));
4732 Ops.push_back(N->getOperand(4));
4733 Ops.push_back(Chain);
4734 break;
4735 case NVPTXISD::Suld2DArrayV4I32Zero:
4736 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4737 Ops.push_back(TexHandle);
4738 Ops.push_back(N->getOperand(2));
4739 Ops.push_back(N->getOperand(3));
4740 Ops.push_back(N->getOperand(4));
4741 Ops.push_back(Chain);
4742 break;
4743 case NVPTXISD::Suld3DI8Zero:
4744 Opc = NVPTX::SULD_3D_I8_ZERO;
4745 Ops.push_back(TexHandle);
4746 Ops.push_back(N->getOperand(2));
4747 Ops.push_back(N->getOperand(3));
4748 Ops.push_back(N->getOperand(4));
4749 Ops.push_back(Chain);
4750 break;
4751 case NVPTXISD::Suld3DI16Zero:
4752 Opc = NVPTX::SULD_3D_I16_ZERO;
4753 Ops.push_back(TexHandle);
4754 Ops.push_back(N->getOperand(2));
4755 Ops.push_back(N->getOperand(3));
4756 Ops.push_back(N->getOperand(4));
4757 Ops.push_back(Chain);
4758 break;
4759 case NVPTXISD::Suld3DI32Zero:
4760 Opc = NVPTX::SULD_3D_I32_ZERO;
4761 Ops.push_back(TexHandle);
4762 Ops.push_back(N->getOperand(2));
4763 Ops.push_back(N->getOperand(3));
4764 Ops.push_back(N->getOperand(4));
4765 Ops.push_back(Chain);
4766 break;
4767 case NVPTXISD::Suld3DI64Zero:
4768 Opc = NVPTX::SULD_3D_I64_ZERO;
4769 Ops.push_back(TexHandle);
4770 Ops.push_back(N->getOperand(2));
4771 Ops.push_back(N->getOperand(3));
4772 Ops.push_back(N->getOperand(4));
4773 Ops.push_back(Chain);
4774 break;
4775 case NVPTXISD::Suld3DV2I8Zero:
4776 Opc = NVPTX::SULD_3D_V2I8_ZERO;
4777 Ops.push_back(TexHandle);
4778 Ops.push_back(N->getOperand(2));
4779 Ops.push_back(N->getOperand(3));
4780 Ops.push_back(N->getOperand(4));
4781 Ops.push_back(Chain);
4782 break;
4783 case NVPTXISD::Suld3DV2I16Zero:
4784 Opc = NVPTX::SULD_3D_V2I16_ZERO;
4785 Ops.push_back(TexHandle);
4786 Ops.push_back(N->getOperand(2));
4787 Ops.push_back(N->getOperand(3));
4788 Ops.push_back(N->getOperand(4));
4789 Ops.push_back(Chain);
4790 break;
4791 case NVPTXISD::Suld3DV2I32Zero:
4792 Opc = NVPTX::SULD_3D_V2I32_ZERO;
4793 Ops.push_back(TexHandle);
4794 Ops.push_back(N->getOperand(2));
4795 Ops.push_back(N->getOperand(3));
4796 Ops.push_back(N->getOperand(4));
4797 Ops.push_back(Chain);
4798 break;
4799 case NVPTXISD::Suld3DV2I64Zero:
4800 Opc = NVPTX::SULD_3D_V2I64_ZERO;
4801 Ops.push_back(TexHandle);
4802 Ops.push_back(N->getOperand(2));
4803 Ops.push_back(N->getOperand(3));
4804 Ops.push_back(N->getOperand(4));
4805 Ops.push_back(Chain);
4806 break;
4807 case NVPTXISD::Suld3DV4I8Zero:
4808 Opc = NVPTX::SULD_3D_V4I8_ZERO;
4809 Ops.push_back(TexHandle);
4810 Ops.push_back(N->getOperand(2));
4811 Ops.push_back(N->getOperand(3));
4812 Ops.push_back(N->getOperand(4));
4813 Ops.push_back(Chain);
4814 break;
4815 case NVPTXISD::Suld3DV4I16Zero:
4816 Opc = NVPTX::SULD_3D_V4I16_ZERO;
4817 Ops.push_back(TexHandle);
4818 Ops.push_back(N->getOperand(2));
4819 Ops.push_back(N->getOperand(3));
4820 Ops.push_back(N->getOperand(4));
4821 Ops.push_back(Chain);
4822 break;
4823 case NVPTXISD::Suld3DV4I32Zero:
4824 Opc = NVPTX::SULD_3D_V4I32_ZERO;
4825 Ops.push_back(TexHandle);
4826 Ops.push_back(N->getOperand(2));
4827 Ops.push_back(N->getOperand(3));
4828 Ops.push_back(N->getOperand(4));
4829 Ops.push_back(Chain);
4830 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004831 }
4832 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4833 return Ret;
4834}
4835
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004836
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004837/// SelectBFE - Look for instruction sequences that can be made more efficient
4838/// by using the 'bfe' (bit-field extract) PTX instruction
4839SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004840 SDLoc DL(N);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004841 SDValue LHS = N->getOperand(0);
4842 SDValue RHS = N->getOperand(1);
4843 SDValue Len;
4844 SDValue Start;
4845 SDValue Val;
4846 bool IsSigned = false;
4847
4848 if (N->getOpcode() == ISD::AND) {
4849 // Canonicalize the operands
4850 // We want 'and %val, %mask'
4851 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4852 std::swap(LHS, RHS);
4853 }
4854
4855 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4856 if (!Mask) {
4857 // We need a constant mask on the RHS of the AND
4858 return NULL;
4859 }
4860
4861 // Extract the mask bits
4862 uint64_t MaskVal = Mask->getZExtValue();
4863 if (!isMask_64(MaskVal)) {
4864 // We *could* handle shifted masks here, but doing so would require an
4865 // 'and' operation to fix up the low-order bits so we would trade
4866 // shr+and for bfe+and, which has the same throughput
4867 return NULL;
4868 }
4869
4870 // How many bits are in our mask?
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00004871 uint64_t NumBits = countTrailingOnes(MaskVal);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004872 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004873
4874 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4875 // We have a 'srl/and' pair, extract the effective start bit and length
4876 Val = LHS.getNode()->getOperand(0);
4877 Start = LHS.getNode()->getOperand(1);
4878 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4879 if (StartConst) {
4880 uint64_t StartVal = StartConst->getZExtValue();
4881 // How many "good" bits do we have left? "good" is defined here as bits
4882 // that exist in the original value, not shifted in.
4883 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4884 if (NumBits > GoodBits) {
4885 // Do not handle the case where bits have been shifted in. In theory
4886 // we could handle this, but the cost is likely higher than just
4887 // emitting the srl/and pair.
4888 return NULL;
4889 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004890 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004891 } else {
4892 // Do not handle the case where the shift amount (can be zero if no srl
4893 // was found) is not constant. We could handle this case, but it would
4894 // require run-time logic that would be more expensive than just
4895 // emitting the srl/and pair.
4896 return NULL;
4897 }
4898 } else {
4899 // Do not handle the case where the LHS of the and is not a shift. While
4900 // it would be trivial to handle this case, it would just transform
4901 // 'and' -> 'bfe', but 'and' has higher-throughput.
4902 return NULL;
4903 }
4904 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4905 if (LHS->getOpcode() == ISD::AND) {
4906 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4907 if (!ShiftCnst) {
4908 // Shift amount must be constant
4909 return NULL;
4910 }
4911
4912 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4913
4914 SDValue AndLHS = LHS->getOperand(0);
4915 SDValue AndRHS = LHS->getOperand(1);
4916
4917 // Canonicalize the AND to have the mask on the RHS
4918 if (isa<ConstantSDNode>(AndLHS)) {
4919 std::swap(AndLHS, AndRHS);
4920 }
4921
4922 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4923 if (!MaskCnst) {
4924 // Mask must be constant
4925 return NULL;
4926 }
4927
4928 uint64_t MaskVal = MaskCnst->getZExtValue();
4929 uint64_t NumZeros;
4930 uint64_t NumBits;
4931 if (isMask_64(MaskVal)) {
4932 NumZeros = 0;
4933 // The number of bits in the result bitfield will be the number of
4934 // trailing ones (the AND) minus the number of bits we shift off
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00004935 NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004936 } else if (isShiftedMask_64(MaskVal)) {
4937 NumZeros = countTrailingZeros(MaskVal);
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00004938 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004939 // The number of bits in the result bitfield will be the number of
4940 // trailing zeros plus the number of set bits in the mask minus the
4941 // number of bits we shift off
4942 NumBits = NumZeros + NumOnes - ShiftAmt;
4943 } else {
4944 // This is not a mask we can handle
4945 return NULL;
4946 }
4947
4948 if (ShiftAmt < NumZeros) {
4949 // Handling this case would require extra logic that would make this
4950 // transformation non-profitable
4951 return NULL;
4952 }
4953
4954 Val = AndLHS;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004955 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
4956 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004957 } else if (LHS->getOpcode() == ISD::SHL) {
4958 // Here, we have a pattern like:
4959 //
4960 // (sra (shl val, NN), MM)
4961 // or
4962 // (srl (shl val, NN), MM)
4963 //
4964 // If MM >= NN, we can efficiently optimize this with bfe
4965 Val = LHS->getOperand(0);
4966
4967 SDValue ShlRHS = LHS->getOperand(1);
4968 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4969 if (!ShlCnst) {
4970 // Shift amount must be constant
4971 return NULL;
4972 }
4973 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4974
4975 SDValue ShrRHS = RHS;
4976 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4977 if (!ShrCnst) {
4978 // Shift amount must be constant
4979 return NULL;
4980 }
4981 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
4982
4983 // To avoid extra codegen and be profitable, we need Outer >= Inner
4984 if (OuterShiftAmt < InnerShiftAmt) {
4985 return NULL;
4986 }
4987
4988 // If the outer shift is more than the type size, we have no bitfield to
4989 // extract (since we also check that the inner shift is <= the outer shift
4990 // then this also implies that the inner shift is < the type size)
4991 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
4992 return NULL;
4993 }
4994
4995 Start =
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004996 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004997 Len =
4998 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004999 OuterShiftAmt, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005000
5001 if (N->getOpcode() == ISD::SRA) {
5002 // If we have a arithmetic right shift, we need to use the signed bfe
5003 // variant
5004 IsSigned = true;
5005 }
5006 } else {
5007 // No can do...
5008 return NULL;
5009 }
5010 } else {
5011 // No can do...
5012 return NULL;
5013 }
5014
5015
5016 unsigned Opc;
5017 // For the BFE operations we form here from "and" and "srl", always use the
5018 // unsigned variants.
5019 if (Val.getValueType() == MVT::i32) {
5020 if (IsSigned) {
5021 Opc = NVPTX::BFE_S32rii;
5022 } else {
5023 Opc = NVPTX::BFE_U32rii;
5024 }
5025 } else if (Val.getValueType() == MVT::i64) {
5026 if (IsSigned) {
5027 Opc = NVPTX::BFE_S64rii;
5028 } else {
5029 Opc = NVPTX::BFE_U64rii;
5030 }
5031 } else {
5032 // We cannot handle this type
5033 return NULL;
5034 }
5035
5036 SDValue Ops[] = {
5037 Val, Start, Len
5038 };
5039
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005040 return CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005041}
5042
Justin Holewinskiae556d32012-05-04 20:18:50 +00005043// SelectDirectAddr - Match a direct address for DAG.
5044// A direct address could be a globaladdress or externalsymbol.
5045bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
5046 // Return true if TGA or ES.
Justin Holewinski0497ab12013-03-30 14:29:21 +00005047 if (N.getOpcode() == ISD::TargetGlobalAddress ||
5048 N.getOpcode() == ISD::TargetExternalSymbol) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00005049 Address = N;
5050 return true;
5051 }
5052 if (N.getOpcode() == NVPTXISD::Wrapper) {
5053 Address = N.getOperand(0);
5054 return true;
5055 }
5056 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
5057 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
5058 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
5059 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
5060 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
5061 }
5062 return false;
5063}
5064
5065// symbol+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00005066bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
5067 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00005068 if (Addr.getOpcode() == ISD::ADD) {
5069 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00005070 SDValue base = Addr.getOperand(0);
Justin Holewinskiae556d32012-05-04 20:18:50 +00005071 if (SelectDirectAddr(base, Base)) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005072 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5073 mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00005074 return true;
5075 }
5076 }
5077 }
5078 return false;
5079}
5080
5081// symbol+offset
5082bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
5083 SDValue &Base, SDValue &Offset) {
5084 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
5085}
5086
5087// symbol+offset
5088bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
5089 SDValue &Base, SDValue &Offset) {
5090 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
5091}
5092
5093// register+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00005094bool NVPTXDAGToDAGISel::SelectADDRri_imp(
5095 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00005096 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
5097 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005098 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00005099 return true;
5100 }
5101 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
5102 Addr.getOpcode() == ISD::TargetGlobalAddress)
Justin Holewinski0497ab12013-03-30 14:29:21 +00005103 return false; // direct calls.
Justin Holewinskiae556d32012-05-04 20:18:50 +00005104
5105 if (Addr.getOpcode() == ISD::ADD) {
5106 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5107 return false;
5108 }
5109 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5110 if (FrameIndexSDNode *FIN =
Justin Holewinski0497ab12013-03-30 14:29:21 +00005111 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
Justin Holewinskiae556d32012-05-04 20:18:50 +00005112 // Constant offset from frame ref.
5113 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5114 else
5115 Base = Addr.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005116 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5117 mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00005118 return true;
5119 }
5120 }
5121 return false;
5122}
5123
5124// register+offset
5125bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5126 SDValue &Base, SDValue &Offset) {
5127 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5128}
5129
5130// register+offset
5131bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5132 SDValue &Base, SDValue &Offset) {
5133 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5134}
5135
5136bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5137 unsigned int spN) const {
Craig Topper062a2ba2014-04-25 05:30:21 +00005138 const Value *Src = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00005139 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +00005140 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5141 return true;
5142 Src = mN->getMemOperand()->getValue();
Justin Holewinskiae556d32012-05-04 20:18:50 +00005143 }
5144 if (!Src)
5145 return false;
Craig Toppere3dcce92015-08-01 22:20:21 +00005146 if (auto *PT = dyn_cast<PointerType>(Src->getType()))
Justin Holewinskiae556d32012-05-04 20:18:50 +00005147 return (PT->getAddressSpace() == spN);
5148 return false;
5149}
5150
5151/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5152/// inline asm expressions.
Justin Holewinski0497ab12013-03-30 14:29:21 +00005153bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
Daniel Sanders60f1db02015-03-13 12:45:09 +00005154 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00005155 SDValue Op0, Op1;
Daniel Sanders60f1db02015-03-13 12:45:09 +00005156 switch (ConstraintID) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00005157 default:
5158 return true;
Daniel Sanders60f1db02015-03-13 12:45:09 +00005159 case InlineAsm::Constraint_m: // memory
Justin Holewinskiae556d32012-05-04 20:18:50 +00005160 if (SelectDirectAddr(Op, Op0)) {
5161 OutOps.push_back(Op0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005162 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
Justin Holewinskiae556d32012-05-04 20:18:50 +00005163 return false;
5164 }
5165 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5166 OutOps.push_back(Op0);
5167 OutOps.push_back(Op1);
5168 return false;
5169 }
5170 break;
5171 }
5172 return true;
5173}
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00005174
5175/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
5176/// conversion from \p SrcTy to \p DestTy.
5177unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
5178 bool IsSigned) {
5179 switch (SrcTy.SimpleTy) {
5180 default:
5181 llvm_unreachable("Unhandled source type");
5182 case MVT::i8:
5183 switch (DestTy.SimpleTy) {
5184 default:
5185 llvm_unreachable("Unhandled dest type");
5186 case MVT::i16:
5187 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
5188 case MVT::i32:
5189 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
5190 case MVT::i64:
5191 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
5192 }
5193 case MVT::i16:
5194 switch (DestTy.SimpleTy) {
5195 default:
5196 llvm_unreachable("Unhandled dest type");
5197 case MVT::i8:
5198 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
5199 case MVT::i32:
5200 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
5201 case MVT::i64:
5202 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
5203 }
5204 case MVT::i32:
5205 switch (DestTy.SimpleTy) {
5206 default:
5207 llvm_unreachable("Unhandled dest type");
5208 case MVT::i8:
5209 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
5210 case MVT::i16:
5211 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
5212 case MVT::i64:
5213 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
5214 }
5215 case MVT::i64:
5216 switch (DestTy.SimpleTy) {
5217 default:
5218 llvm_unreachable("Unhandled dest type");
5219 case MVT::i8:
5220 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
5221 case MVT::i16:
5222 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
5223 case MVT::i32:
5224 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
5225 }
5226 }
5227}