blob: b1ed2df7a17f28cb01f0ecc5d7448a57e8ad96f1 [file] [log] [blame]
Justin Holewinskiae556d32012-05-04 20:18:50 +00001//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
Justin Holewinskiae556d32012-05-04 20:18:50 +000014#include "NVPTXISelDAGToDAG.h"
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +000015#include "NVPTXUtilities.h"
Jingyue Wu48a9bdc2015-07-20 21:28:54 +000016#include "llvm/Analysis/ValueTracking.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000017#include "llvm/IR/GlobalValue.h"
18#include "llvm/IR/Instructions.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000019#include "llvm/Support/CommandLine.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000020#include "llvm/Support/Debug.h"
21#include "llvm/Support/ErrorHandling.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000022#include "llvm/Support/raw_ostream.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000023#include "llvm/Target/TargetIntrinsicInfo.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000024
Justin Holewinskiae556d32012-05-04 20:18:50 +000025using namespace llvm;
26
Chandler Carruth84e68b22014-04-22 02:41:26 +000027#define DEBUG_TYPE "nvptx-isel"
28
Justin Holewinski0497ab12013-03-30 14:29:21 +000029static cl::opt<int> UsePrecDivF32(
Nadav Rotem7f27e0b2013-10-18 23:38:13 +000030 "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden,
Justin Holewinski0497ab12013-03-30 14:29:21 +000031 cl::desc("NVPTX Specifies: 0 use div.approx, 1 use div.full, 2 use"
Sylvestre Ledru469de192014-08-11 18:04:46 +000032 " IEEE Compliant F32 div.rnd if available."),
Justin Holewinski0497ab12013-03-30 14:29:21 +000033 cl::init(2));
Justin Holewinskiae556d32012-05-04 20:18:50 +000034
Justin Holewinski48f4ad32013-05-21 16:51:30 +000035static cl::opt<bool>
Nadav Rotem7f27e0b2013-10-18 23:38:13 +000036UsePrecSqrtF32("nvptx-prec-sqrtf32", cl::Hidden,
Justin Holewinski48f4ad32013-05-21 16:51:30 +000037 cl::desc("NVPTX Specific: 0 use sqrt.approx, 1 use sqrt.rn."),
38 cl::init(true));
39
Justin Holewinskicd069e62013-07-22 12:18:04 +000040static cl::opt<bool>
Nadav Rotem7f27e0b2013-10-18 23:38:13 +000041FtzEnabled("nvptx-f32ftz", cl::ZeroOrMore, cl::Hidden,
Justin Holewinskicd069e62013-07-22 12:18:04 +000042 cl::desc("NVPTX Specific: Flush f32 subnormals to sign-preserving zero."),
43 cl::init(false));
44
45
Justin Holewinskiae556d32012-05-04 20:18:50 +000046/// createNVPTXISelDag - This pass converts a legalized DAG into a
47/// NVPTX-specific DAG, ready for instruction scheduling.
48FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
49 llvm::CodeGenOpt::Level OptLevel) {
50 return new NVPTXDAGToDAGISel(TM, OptLevel);
51}
52
Justin Holewinskiae556d32012-05-04 20:18:50 +000053NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
54 CodeGenOpt::Level OptLevel)
Eric Christopher02389e32015-02-19 00:08:27 +000055 : SelectionDAGISel(tm, OptLevel), TM(tm) {
Justin Holewinskiae556d32012-05-04 20:18:50 +000056 doMulWide = (OptLevel > 0);
Justin Holewinskicd069e62013-07-22 12:18:04 +000057}
Justin Holewinskiae556d32012-05-04 20:18:50 +000058
Eric Christopher147bba22015-01-30 01:40:59 +000059bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
60 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
61 return SelectionDAGISel::runOnMachineFunction(MF);
62}
63
Justin Holewinskicd069e62013-07-22 12:18:04 +000064int NVPTXDAGToDAGISel::getDivF32Level() const {
65 if (UsePrecDivF32.getNumOccurrences() > 0) {
66 // If nvptx-prec-div32=N is used on the command-line, always honor it
67 return UsePrecDivF32;
68 } else {
69 // Otherwise, use div.approx if fast math is enabled
70 if (TM.Options.UnsafeFPMath)
71 return 0;
72 else
73 return 2;
74 }
75}
Justin Holewinskiae556d32012-05-04 20:18:50 +000076
Justin Holewinskicd069e62013-07-22 12:18:04 +000077bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
78 if (UsePrecSqrtF32.getNumOccurrences() > 0) {
79 // If nvptx-prec-sqrtf32 is used on the command-line, always honor it
80 return UsePrecSqrtF32;
81 } else {
82 // Otherwise, use sqrt.approx if fast math is enabled
Eli Bendersky3e840192015-03-23 16:26:23 +000083 return !TM.Options.UnsafeFPMath;
Justin Holewinskicd069e62013-07-22 12:18:04 +000084 }
85}
86
87bool NVPTXDAGToDAGISel::useF32FTZ() const {
88 if (FtzEnabled.getNumOccurrences() > 0) {
89 // If nvptx-f32ftz is used on the command-line, always honor it
90 return FtzEnabled;
91 } else {
92 const Function *F = MF->getFunction();
93 // Otherwise, check for an nvptx-f32ftz attribute on the function
94 if (F->hasFnAttribute("nvptx-f32ftz"))
Duncan P. N. Exon Smithb5054332015-02-14 15:35:43 +000095 return F->getFnAttribute("nvptx-f32ftz").getValueAsString() == "true";
Justin Holewinskicd069e62013-07-22 12:18:04 +000096 else
97 return false;
98 }
Justin Holewinskiae556d32012-05-04 20:18:50 +000099}
100
Justin Holewinski428cf0e2014-07-17 18:10:09 +0000101bool NVPTXDAGToDAGISel::allowFMA() const {
Eric Christopher147bba22015-01-30 01:40:59 +0000102 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
Justin Holewinski428cf0e2014-07-17 18:10:09 +0000103 return TL->allowFMA(*MF, OptLevel);
104}
105
Justin Holewinskiae556d32012-05-04 20:18:50 +0000106/// Select - Select instructions not customized! Used for
107/// expanded, promoted and normal instructions.
Justin Holewinski0497ab12013-03-30 14:29:21 +0000108SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000109
Tim Northover31d093c2013-09-22 08:21:56 +0000110 if (N->isMachineOpcode()) {
111 N->setNodeId(-1);
Craig Topper062a2ba2014-04-25 05:30:21 +0000112 return nullptr; // Already selected.
Tim Northover31d093c2013-09-22 08:21:56 +0000113 }
Justin Holewinskiae556d32012-05-04 20:18:50 +0000114
Craig Topper062a2ba2014-04-25 05:30:21 +0000115 SDNode *ResNode = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000116 switch (N->getOpcode()) {
117 case ISD::LOAD:
118 ResNode = SelectLoad(N);
119 break;
120 case ISD::STORE:
121 ResNode = SelectStore(N);
122 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000123 case NVPTXISD::LoadV2:
124 case NVPTXISD::LoadV4:
125 ResNode = SelectLoadVector(N);
126 break;
127 case NVPTXISD::LDGV2:
128 case NVPTXISD::LDGV4:
129 case NVPTXISD::LDUV2:
130 case NVPTXISD::LDUV4:
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000131 ResNode = SelectLDGLDU(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000132 break;
133 case NVPTXISD::StoreV2:
134 case NVPTXISD::StoreV4:
135 ResNode = SelectStoreVector(N);
136 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000137 case NVPTXISD::LoadParam:
138 case NVPTXISD::LoadParamV2:
139 case NVPTXISD::LoadParamV4:
140 ResNode = SelectLoadParam(N);
141 break;
142 case NVPTXISD::StoreRetval:
143 case NVPTXISD::StoreRetvalV2:
144 case NVPTXISD::StoreRetvalV4:
145 ResNode = SelectStoreRetval(N);
146 break;
147 case NVPTXISD::StoreParam:
148 case NVPTXISD::StoreParamV2:
149 case NVPTXISD::StoreParamV4:
150 case NVPTXISD::StoreParamS32:
151 case NVPTXISD::StoreParamU32:
152 ResNode = SelectStoreParam(N);
153 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000154 case ISD::INTRINSIC_WO_CHAIN:
155 ResNode = SelectIntrinsicNoChain(N);
156 break;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000157 case ISD::INTRINSIC_W_CHAIN:
158 ResNode = SelectIntrinsicChain(N);
159 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000160 case NVPTXISD::Tex1DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000161 case NVPTXISD::Tex1DFloatFloat:
162 case NVPTXISD::Tex1DFloatFloatLevel:
163 case NVPTXISD::Tex1DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000164 case NVPTXISD::Tex1DS32S32:
165 case NVPTXISD::Tex1DS32Float:
166 case NVPTXISD::Tex1DS32FloatLevel:
167 case NVPTXISD::Tex1DS32FloatGrad:
168 case NVPTXISD::Tex1DU32S32:
169 case NVPTXISD::Tex1DU32Float:
170 case NVPTXISD::Tex1DU32FloatLevel:
171 case NVPTXISD::Tex1DU32FloatGrad:
172 case NVPTXISD::Tex1DArrayFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000173 case NVPTXISD::Tex1DArrayFloatFloat:
174 case NVPTXISD::Tex1DArrayFloatFloatLevel:
175 case NVPTXISD::Tex1DArrayFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000176 case NVPTXISD::Tex1DArrayS32S32:
177 case NVPTXISD::Tex1DArrayS32Float:
178 case NVPTXISD::Tex1DArrayS32FloatLevel:
179 case NVPTXISD::Tex1DArrayS32FloatGrad:
180 case NVPTXISD::Tex1DArrayU32S32:
181 case NVPTXISD::Tex1DArrayU32Float:
182 case NVPTXISD::Tex1DArrayU32FloatLevel:
183 case NVPTXISD::Tex1DArrayU32FloatGrad:
184 case NVPTXISD::Tex2DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000185 case NVPTXISD::Tex2DFloatFloat:
186 case NVPTXISD::Tex2DFloatFloatLevel:
187 case NVPTXISD::Tex2DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000188 case NVPTXISD::Tex2DS32S32:
189 case NVPTXISD::Tex2DS32Float:
190 case NVPTXISD::Tex2DS32FloatLevel:
191 case NVPTXISD::Tex2DS32FloatGrad:
192 case NVPTXISD::Tex2DU32S32:
193 case NVPTXISD::Tex2DU32Float:
194 case NVPTXISD::Tex2DU32FloatLevel:
195 case NVPTXISD::Tex2DU32FloatGrad:
196 case NVPTXISD::Tex2DArrayFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000197 case NVPTXISD::Tex2DArrayFloatFloat:
198 case NVPTXISD::Tex2DArrayFloatFloatLevel:
199 case NVPTXISD::Tex2DArrayFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000200 case NVPTXISD::Tex2DArrayS32S32:
201 case NVPTXISD::Tex2DArrayS32Float:
202 case NVPTXISD::Tex2DArrayS32FloatLevel:
203 case NVPTXISD::Tex2DArrayS32FloatGrad:
204 case NVPTXISD::Tex2DArrayU32S32:
205 case NVPTXISD::Tex2DArrayU32Float:
206 case NVPTXISD::Tex2DArrayU32FloatLevel:
207 case NVPTXISD::Tex2DArrayU32FloatGrad:
208 case NVPTXISD::Tex3DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000209 case NVPTXISD::Tex3DFloatFloat:
210 case NVPTXISD::Tex3DFloatFloatLevel:
211 case NVPTXISD::Tex3DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000212 case NVPTXISD::Tex3DS32S32:
213 case NVPTXISD::Tex3DS32Float:
214 case NVPTXISD::Tex3DS32FloatLevel:
215 case NVPTXISD::Tex3DS32FloatGrad:
216 case NVPTXISD::Tex3DU32S32:
217 case NVPTXISD::Tex3DU32Float:
218 case NVPTXISD::Tex3DU32FloatLevel:
219 case NVPTXISD::Tex3DU32FloatGrad:
220 case NVPTXISD::TexCubeFloatFloat:
221 case NVPTXISD::TexCubeFloatFloatLevel:
222 case NVPTXISD::TexCubeS32Float:
223 case NVPTXISD::TexCubeS32FloatLevel:
224 case NVPTXISD::TexCubeU32Float:
225 case NVPTXISD::TexCubeU32FloatLevel:
226 case NVPTXISD::TexCubeArrayFloatFloat:
227 case NVPTXISD::TexCubeArrayFloatFloatLevel:
228 case NVPTXISD::TexCubeArrayS32Float:
229 case NVPTXISD::TexCubeArrayS32FloatLevel:
230 case NVPTXISD::TexCubeArrayU32Float:
231 case NVPTXISD::TexCubeArrayU32FloatLevel:
232 case NVPTXISD::Tld4R2DFloatFloat:
233 case NVPTXISD::Tld4G2DFloatFloat:
234 case NVPTXISD::Tld4B2DFloatFloat:
235 case NVPTXISD::Tld4A2DFloatFloat:
236 case NVPTXISD::Tld4R2DS64Float:
237 case NVPTXISD::Tld4G2DS64Float:
238 case NVPTXISD::Tld4B2DS64Float:
239 case NVPTXISD::Tld4A2DS64Float:
240 case NVPTXISD::Tld4R2DU64Float:
241 case NVPTXISD::Tld4G2DU64Float:
242 case NVPTXISD::Tld4B2DU64Float:
243 case NVPTXISD::Tld4A2DU64Float:
244 case NVPTXISD::TexUnified1DFloatS32:
245 case NVPTXISD::TexUnified1DFloatFloat:
246 case NVPTXISD::TexUnified1DFloatFloatLevel:
247 case NVPTXISD::TexUnified1DFloatFloatGrad:
248 case NVPTXISD::TexUnified1DS32S32:
249 case NVPTXISD::TexUnified1DS32Float:
250 case NVPTXISD::TexUnified1DS32FloatLevel:
251 case NVPTXISD::TexUnified1DS32FloatGrad:
252 case NVPTXISD::TexUnified1DU32S32:
253 case NVPTXISD::TexUnified1DU32Float:
254 case NVPTXISD::TexUnified1DU32FloatLevel:
255 case NVPTXISD::TexUnified1DU32FloatGrad:
256 case NVPTXISD::TexUnified1DArrayFloatS32:
257 case NVPTXISD::TexUnified1DArrayFloatFloat:
258 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
259 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
260 case NVPTXISD::TexUnified1DArrayS32S32:
261 case NVPTXISD::TexUnified1DArrayS32Float:
262 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
263 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
264 case NVPTXISD::TexUnified1DArrayU32S32:
265 case NVPTXISD::TexUnified1DArrayU32Float:
266 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
267 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
268 case NVPTXISD::TexUnified2DFloatS32:
269 case NVPTXISD::TexUnified2DFloatFloat:
270 case NVPTXISD::TexUnified2DFloatFloatLevel:
271 case NVPTXISD::TexUnified2DFloatFloatGrad:
272 case NVPTXISD::TexUnified2DS32S32:
273 case NVPTXISD::TexUnified2DS32Float:
274 case NVPTXISD::TexUnified2DS32FloatLevel:
275 case NVPTXISD::TexUnified2DS32FloatGrad:
276 case NVPTXISD::TexUnified2DU32S32:
277 case NVPTXISD::TexUnified2DU32Float:
278 case NVPTXISD::TexUnified2DU32FloatLevel:
279 case NVPTXISD::TexUnified2DU32FloatGrad:
280 case NVPTXISD::TexUnified2DArrayFloatS32:
281 case NVPTXISD::TexUnified2DArrayFloatFloat:
282 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
283 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
284 case NVPTXISD::TexUnified2DArrayS32S32:
285 case NVPTXISD::TexUnified2DArrayS32Float:
286 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
287 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
288 case NVPTXISD::TexUnified2DArrayU32S32:
289 case NVPTXISD::TexUnified2DArrayU32Float:
290 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
291 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
292 case NVPTXISD::TexUnified3DFloatS32:
293 case NVPTXISD::TexUnified3DFloatFloat:
294 case NVPTXISD::TexUnified3DFloatFloatLevel:
295 case NVPTXISD::TexUnified3DFloatFloatGrad:
296 case NVPTXISD::TexUnified3DS32S32:
297 case NVPTXISD::TexUnified3DS32Float:
298 case NVPTXISD::TexUnified3DS32FloatLevel:
299 case NVPTXISD::TexUnified3DS32FloatGrad:
300 case NVPTXISD::TexUnified3DU32S32:
301 case NVPTXISD::TexUnified3DU32Float:
302 case NVPTXISD::TexUnified3DU32FloatLevel:
303 case NVPTXISD::TexUnified3DU32FloatGrad:
304 case NVPTXISD::TexUnifiedCubeFloatFloat:
305 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
306 case NVPTXISD::TexUnifiedCubeS32Float:
307 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
308 case NVPTXISD::TexUnifiedCubeU32Float:
309 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
310 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
311 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
312 case NVPTXISD::TexUnifiedCubeArrayS32Float:
313 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
314 case NVPTXISD::TexUnifiedCubeArrayU32Float:
315 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
316 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
317 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
318 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
319 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
320 case NVPTXISD::Tld4UnifiedR2DS64Float:
321 case NVPTXISD::Tld4UnifiedG2DS64Float:
322 case NVPTXISD::Tld4UnifiedB2DS64Float:
323 case NVPTXISD::Tld4UnifiedA2DS64Float:
324 case NVPTXISD::Tld4UnifiedR2DU64Float:
325 case NVPTXISD::Tld4UnifiedG2DU64Float:
326 case NVPTXISD::Tld4UnifiedB2DU64Float:
327 case NVPTXISD::Tld4UnifiedA2DU64Float:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000328 ResNode = SelectTextureIntrinsic(N);
329 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000330 case NVPTXISD::Suld1DI8Clamp:
331 case NVPTXISD::Suld1DI16Clamp:
332 case NVPTXISD::Suld1DI32Clamp:
333 case NVPTXISD::Suld1DI64Clamp:
334 case NVPTXISD::Suld1DV2I8Clamp:
335 case NVPTXISD::Suld1DV2I16Clamp:
336 case NVPTXISD::Suld1DV2I32Clamp:
337 case NVPTXISD::Suld1DV2I64Clamp:
338 case NVPTXISD::Suld1DV4I8Clamp:
339 case NVPTXISD::Suld1DV4I16Clamp:
340 case NVPTXISD::Suld1DV4I32Clamp:
341 case NVPTXISD::Suld1DArrayI8Clamp:
342 case NVPTXISD::Suld1DArrayI16Clamp:
343 case NVPTXISD::Suld1DArrayI32Clamp:
344 case NVPTXISD::Suld1DArrayI64Clamp:
345 case NVPTXISD::Suld1DArrayV2I8Clamp:
346 case NVPTXISD::Suld1DArrayV2I16Clamp:
347 case NVPTXISD::Suld1DArrayV2I32Clamp:
348 case NVPTXISD::Suld1DArrayV2I64Clamp:
349 case NVPTXISD::Suld1DArrayV4I8Clamp:
350 case NVPTXISD::Suld1DArrayV4I16Clamp:
351 case NVPTXISD::Suld1DArrayV4I32Clamp:
352 case NVPTXISD::Suld2DI8Clamp:
353 case NVPTXISD::Suld2DI16Clamp:
354 case NVPTXISD::Suld2DI32Clamp:
355 case NVPTXISD::Suld2DI64Clamp:
356 case NVPTXISD::Suld2DV2I8Clamp:
357 case NVPTXISD::Suld2DV2I16Clamp:
358 case NVPTXISD::Suld2DV2I32Clamp:
359 case NVPTXISD::Suld2DV2I64Clamp:
360 case NVPTXISD::Suld2DV4I8Clamp:
361 case NVPTXISD::Suld2DV4I16Clamp:
362 case NVPTXISD::Suld2DV4I32Clamp:
363 case NVPTXISD::Suld2DArrayI8Clamp:
364 case NVPTXISD::Suld2DArrayI16Clamp:
365 case NVPTXISD::Suld2DArrayI32Clamp:
366 case NVPTXISD::Suld2DArrayI64Clamp:
367 case NVPTXISD::Suld2DArrayV2I8Clamp:
368 case NVPTXISD::Suld2DArrayV2I16Clamp:
369 case NVPTXISD::Suld2DArrayV2I32Clamp:
370 case NVPTXISD::Suld2DArrayV2I64Clamp:
371 case NVPTXISD::Suld2DArrayV4I8Clamp:
372 case NVPTXISD::Suld2DArrayV4I16Clamp:
373 case NVPTXISD::Suld2DArrayV4I32Clamp:
374 case NVPTXISD::Suld3DI8Clamp:
375 case NVPTXISD::Suld3DI16Clamp:
376 case NVPTXISD::Suld3DI32Clamp:
377 case NVPTXISD::Suld3DI64Clamp:
378 case NVPTXISD::Suld3DV2I8Clamp:
379 case NVPTXISD::Suld3DV2I16Clamp:
380 case NVPTXISD::Suld3DV2I32Clamp:
381 case NVPTXISD::Suld3DV2I64Clamp:
382 case NVPTXISD::Suld3DV4I8Clamp:
383 case NVPTXISD::Suld3DV4I16Clamp:
384 case NVPTXISD::Suld3DV4I32Clamp:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000385 case NVPTXISD::Suld1DI8Trap:
386 case NVPTXISD::Suld1DI16Trap:
387 case NVPTXISD::Suld1DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000388 case NVPTXISD::Suld1DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000389 case NVPTXISD::Suld1DV2I8Trap:
390 case NVPTXISD::Suld1DV2I16Trap:
391 case NVPTXISD::Suld1DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000392 case NVPTXISD::Suld1DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000393 case NVPTXISD::Suld1DV4I8Trap:
394 case NVPTXISD::Suld1DV4I16Trap:
395 case NVPTXISD::Suld1DV4I32Trap:
396 case NVPTXISD::Suld1DArrayI8Trap:
397 case NVPTXISD::Suld1DArrayI16Trap:
398 case NVPTXISD::Suld1DArrayI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000399 case NVPTXISD::Suld1DArrayI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000400 case NVPTXISD::Suld1DArrayV2I8Trap:
401 case NVPTXISD::Suld1DArrayV2I16Trap:
402 case NVPTXISD::Suld1DArrayV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000403 case NVPTXISD::Suld1DArrayV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000404 case NVPTXISD::Suld1DArrayV4I8Trap:
405 case NVPTXISD::Suld1DArrayV4I16Trap:
406 case NVPTXISD::Suld1DArrayV4I32Trap:
407 case NVPTXISD::Suld2DI8Trap:
408 case NVPTXISD::Suld2DI16Trap:
409 case NVPTXISD::Suld2DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000410 case NVPTXISD::Suld2DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000411 case NVPTXISD::Suld2DV2I8Trap:
412 case NVPTXISD::Suld2DV2I16Trap:
413 case NVPTXISD::Suld2DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000414 case NVPTXISD::Suld2DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000415 case NVPTXISD::Suld2DV4I8Trap:
416 case NVPTXISD::Suld2DV4I16Trap:
417 case NVPTXISD::Suld2DV4I32Trap:
418 case NVPTXISD::Suld2DArrayI8Trap:
419 case NVPTXISD::Suld2DArrayI16Trap:
420 case NVPTXISD::Suld2DArrayI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000421 case NVPTXISD::Suld2DArrayI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000422 case NVPTXISD::Suld2DArrayV2I8Trap:
423 case NVPTXISD::Suld2DArrayV2I16Trap:
424 case NVPTXISD::Suld2DArrayV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000425 case NVPTXISD::Suld2DArrayV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000426 case NVPTXISD::Suld2DArrayV4I8Trap:
427 case NVPTXISD::Suld2DArrayV4I16Trap:
428 case NVPTXISD::Suld2DArrayV4I32Trap:
429 case NVPTXISD::Suld3DI8Trap:
430 case NVPTXISD::Suld3DI16Trap:
431 case NVPTXISD::Suld3DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000432 case NVPTXISD::Suld3DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000433 case NVPTXISD::Suld3DV2I8Trap:
434 case NVPTXISD::Suld3DV2I16Trap:
435 case NVPTXISD::Suld3DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000436 case NVPTXISD::Suld3DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000437 case NVPTXISD::Suld3DV4I8Trap:
438 case NVPTXISD::Suld3DV4I16Trap:
439 case NVPTXISD::Suld3DV4I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000440 case NVPTXISD::Suld1DI8Zero:
441 case NVPTXISD::Suld1DI16Zero:
442 case NVPTXISD::Suld1DI32Zero:
443 case NVPTXISD::Suld1DI64Zero:
444 case NVPTXISD::Suld1DV2I8Zero:
445 case NVPTXISD::Suld1DV2I16Zero:
446 case NVPTXISD::Suld1DV2I32Zero:
447 case NVPTXISD::Suld1DV2I64Zero:
448 case NVPTXISD::Suld1DV4I8Zero:
449 case NVPTXISD::Suld1DV4I16Zero:
450 case NVPTXISD::Suld1DV4I32Zero:
451 case NVPTXISD::Suld1DArrayI8Zero:
452 case NVPTXISD::Suld1DArrayI16Zero:
453 case NVPTXISD::Suld1DArrayI32Zero:
454 case NVPTXISD::Suld1DArrayI64Zero:
455 case NVPTXISD::Suld1DArrayV2I8Zero:
456 case NVPTXISD::Suld1DArrayV2I16Zero:
457 case NVPTXISD::Suld1DArrayV2I32Zero:
458 case NVPTXISD::Suld1DArrayV2I64Zero:
459 case NVPTXISD::Suld1DArrayV4I8Zero:
460 case NVPTXISD::Suld1DArrayV4I16Zero:
461 case NVPTXISD::Suld1DArrayV4I32Zero:
462 case NVPTXISD::Suld2DI8Zero:
463 case NVPTXISD::Suld2DI16Zero:
464 case NVPTXISD::Suld2DI32Zero:
465 case NVPTXISD::Suld2DI64Zero:
466 case NVPTXISD::Suld2DV2I8Zero:
467 case NVPTXISD::Suld2DV2I16Zero:
468 case NVPTXISD::Suld2DV2I32Zero:
469 case NVPTXISD::Suld2DV2I64Zero:
470 case NVPTXISD::Suld2DV4I8Zero:
471 case NVPTXISD::Suld2DV4I16Zero:
472 case NVPTXISD::Suld2DV4I32Zero:
473 case NVPTXISD::Suld2DArrayI8Zero:
474 case NVPTXISD::Suld2DArrayI16Zero:
475 case NVPTXISD::Suld2DArrayI32Zero:
476 case NVPTXISD::Suld2DArrayI64Zero:
477 case NVPTXISD::Suld2DArrayV2I8Zero:
478 case NVPTXISD::Suld2DArrayV2I16Zero:
479 case NVPTXISD::Suld2DArrayV2I32Zero:
480 case NVPTXISD::Suld2DArrayV2I64Zero:
481 case NVPTXISD::Suld2DArrayV4I8Zero:
482 case NVPTXISD::Suld2DArrayV4I16Zero:
483 case NVPTXISD::Suld2DArrayV4I32Zero:
484 case NVPTXISD::Suld3DI8Zero:
485 case NVPTXISD::Suld3DI16Zero:
486 case NVPTXISD::Suld3DI32Zero:
487 case NVPTXISD::Suld3DI64Zero:
488 case NVPTXISD::Suld3DV2I8Zero:
489 case NVPTXISD::Suld3DV2I16Zero:
490 case NVPTXISD::Suld3DV2I32Zero:
491 case NVPTXISD::Suld3DV2I64Zero:
492 case NVPTXISD::Suld3DV4I8Zero:
493 case NVPTXISD::Suld3DV4I16Zero:
494 case NVPTXISD::Suld3DV4I32Zero:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000495 ResNode = SelectSurfaceIntrinsic(N);
496 break;
Justin Holewinskica7a4f12014-06-27 18:35:27 +0000497 case ISD::AND:
498 case ISD::SRA:
499 case ISD::SRL:
500 // Try to select BFE
501 ResNode = SelectBFE(N);
502 break;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000503 case ISD::ADDRSPACECAST:
504 ResNode = SelectAddrSpaceCast(N);
505 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000506 default:
507 break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000508 }
509 if (ResNode)
510 return ResNode;
511 return SelectCode(N);
512}
513
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000514SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) {
515 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
516 switch (IID) {
517 default:
518 return NULL;
519 case Intrinsic::nvvm_ldg_global_f:
520 case Intrinsic::nvvm_ldg_global_i:
521 case Intrinsic::nvvm_ldg_global_p:
522 case Intrinsic::nvvm_ldu_global_f:
523 case Intrinsic::nvvm_ldu_global_i:
524 case Intrinsic::nvvm_ldu_global_p:
525 return SelectLDGLDU(N);
526 }
527}
528
Eric Christopher9745b3a2015-01-30 01:41:01 +0000529static unsigned int getCodeAddrSpace(MemSDNode *N) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +0000530 const Value *Src = N->getMemOperand()->getValue();
Justin Holewinskib96d1392013-06-10 13:29:47 +0000531
Justin Holewinskiae556d32012-05-04 20:18:50 +0000532 if (!Src)
Justin Holewinskib96d1392013-06-10 13:29:47 +0000533 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000534
Craig Toppere3dcce92015-08-01 22:20:21 +0000535 if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000536 switch (PT->getAddressSpace()) {
Justin Holewinskib96d1392013-06-10 13:29:47 +0000537 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
538 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
539 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
540 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
541 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
542 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
543 default: break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000544 }
545 }
Justin Holewinskib96d1392013-06-10 13:29:47 +0000546 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000547}
548
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000549static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000550 unsigned CodeAddrSpace, MachineFunction *F) {
551 // To use non-coherent caching, the load has to be from global
552 // memory and we have to prove that the memory area is not written
553 // to anywhere for the duration of the kernel call, not even after
554 // the load.
555 //
556 // To ensure that there are no writes to the memory, we require the
557 // underlying pointer to be a noalias (__restrict) kernel parameter
558 // that is never used for a write. We can only do this for kernel
559 // functions since from within a device function, we cannot know if
560 // there were or will be writes to the memory from the caller - or we
561 // could, but then we would have to do inter-procedural analysis.
562 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
563 !isKernelFunction(*F->getFunction())) {
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000564 return false;
565 }
566
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000567 // We use GetUnderlyingObjects() here instead of
568 // GetUnderlyingObject() mainly because the former looks through phi
569 // nodes while the latter does not. We need to look through phi
570 // nodes to handle pointer induction variables.
571 SmallVector<Value *, 8> Objs;
572 GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
573 Objs, F->getDataLayout());
574 for (Value *Obj : Objs) {
575 auto *A = dyn_cast<const Argument>(Obj);
576 if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
577 }
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000578
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000579 return true;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000580}
581
Justin Holewinski30d56a72014-04-09 15:39:15 +0000582SDNode *NVPTXDAGToDAGISel::SelectIntrinsicNoChain(SDNode *N) {
583 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
584 switch (IID) {
585 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000586 return nullptr;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000587 case Intrinsic::nvvm_texsurf_handle_internal:
588 return SelectTexSurfHandle(N);
589 }
590}
591
592SDNode *NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
593 // Op 0 is the intrinsic ID
594 SDValue Wrapper = N->getOperand(1);
595 SDValue GlobalVal = Wrapper.getOperand(0);
596 return CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N), MVT::i64,
597 GlobalVal);
598}
599
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000600SDNode *NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
601 SDValue Src = N->getOperand(0);
602 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
603 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
604 unsigned DstAddrSpace = CastN->getDestAddressSpace();
605
606 assert(SrcAddrSpace != DstAddrSpace &&
607 "addrspacecast must be between different address spaces");
608
609 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
610 // Specific to generic
611 unsigned Opc;
612 switch (SrcAddrSpace) {
613 default: report_fatal_error("Bad address space in addrspacecast");
614 case ADDRESS_SPACE_GLOBAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000615 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000616 break;
617 case ADDRESS_SPACE_SHARED:
Eric Christopher02389e32015-02-19 00:08:27 +0000618 Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000619 break;
620 case ADDRESS_SPACE_CONST:
Eric Christopher02389e32015-02-19 00:08:27 +0000621 Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000622 break;
623 case ADDRESS_SPACE_LOCAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000624 Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000625 break;
626 }
627 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
628 } else {
629 // Generic to specific
630 if (SrcAddrSpace != 0)
631 report_fatal_error("Cannot cast between two non-generic address spaces");
632 unsigned Opc;
633 switch (DstAddrSpace) {
634 default: report_fatal_error("Bad address space in addrspacecast");
635 case ADDRESS_SPACE_GLOBAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000636 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
637 : NVPTX::cvta_to_global_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000638 break;
639 case ADDRESS_SPACE_SHARED:
Eric Christopher02389e32015-02-19 00:08:27 +0000640 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
641 : NVPTX::cvta_to_shared_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000642 break;
643 case ADDRESS_SPACE_CONST:
Eric Christopher02389e32015-02-19 00:08:27 +0000644 Opc =
645 TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000646 break;
647 case ADDRESS_SPACE_LOCAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000648 Opc =
649 TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000650 break;
Jingyue Wua2f60272015-06-04 21:28:26 +0000651 case ADDRESS_SPACE_PARAM:
652 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
653 : NVPTX::nvvm_ptr_gen_to_param;
654 break;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000655 }
656 return CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0), Src);
657 }
658}
659
Justin Holewinski0497ab12013-03-30 14:29:21 +0000660SDNode *NVPTXDAGToDAGISel::SelectLoad(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000661 SDLoc dl(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000662 LoadSDNode *LD = cast<LoadSDNode>(N);
663 EVT LoadedVT = LD->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +0000664 SDNode *NVPTXLD = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000665
666 // do not support pre/post inc/dec
667 if (LD->isIndexed())
Craig Topper062a2ba2014-04-25 05:30:21 +0000668 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000669
670 if (!LoadedVT.isSimple())
Craig Topper062a2ba2014-04-25 05:30:21 +0000671 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000672
673 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +0000674 unsigned int codeAddrSpace = getCodeAddrSpace(LD);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000675
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000676 if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000677 return SelectLDGLDU(N);
678 }
679
Justin Holewinskiae556d32012-05-04 20:18:50 +0000680 // Volatile Setting
681 // - .volatile is only availalble for .global and .shared
682 bool isVolatile = LD->isVolatile();
683 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
684 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
685 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
686 isVolatile = false;
687
688 // Vector Setting
689 MVT SimpleVT = LoadedVT.getSimpleVT();
690 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
691 if (SimpleVT.isVector()) {
692 unsigned num = SimpleVT.getVectorNumElements();
693 if (num == 2)
694 vecType = NVPTX::PTXLdStInstCode::V2;
695 else if (num == 4)
696 vecType = NVPTX::PTXLdStInstCode::V4;
697 else
Craig Topper062a2ba2014-04-25 05:30:21 +0000698 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000699 }
700
701 // Type Setting: fromType + fromTypeWidth
702 //
703 // Sign : ISD::SEXTLOAD
704 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
705 // type is integer
706 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
707 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +0000708 // Read at least 8 bits (predicates are stored as 8-bit values)
709 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskiae556d32012-05-04 20:18:50 +0000710 unsigned int fromType;
711 if ((LD->getExtensionType() == ISD::SEXTLOAD))
712 fromType = NVPTX::PTXLdStInstCode::Signed;
713 else if (ScalarVT.isFloatingPoint())
714 fromType = NVPTX::PTXLdStInstCode::Float;
715 else
716 fromType = NVPTX::PTXLdStInstCode::Unsigned;
717
718 // Create the machine instruction DAG
719 SDValue Chain = N->getOperand(0);
720 SDValue N1 = N->getOperand(1);
721 SDValue Addr;
722 SDValue Offset, Base;
723 unsigned Opcode;
Craig Topperd9c27832013-08-15 02:44:19 +0000724 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000725
726 if (SelectDirectAddr(N1, Addr)) {
727 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000728 case MVT::i8:
729 Opcode = NVPTX::LD_i8_avar;
730 break;
731 case MVT::i16:
732 Opcode = NVPTX::LD_i16_avar;
733 break;
734 case MVT::i32:
735 Opcode = NVPTX::LD_i32_avar;
736 break;
737 case MVT::i64:
738 Opcode = NVPTX::LD_i64_avar;
739 break;
740 case MVT::f32:
741 Opcode = NVPTX::LD_f32_avar;
742 break;
743 case MVT::f64:
744 Opcode = NVPTX::LD_f64_avar;
745 break;
746 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000747 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000748 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000749 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
750 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
751 getI32Imm(fromTypeWidth, dl), Addr, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +0000752 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +0000753 } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
754 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000755 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000756 case MVT::i8:
757 Opcode = NVPTX::LD_i8_asi;
758 break;
759 case MVT::i16:
760 Opcode = NVPTX::LD_i16_asi;
761 break;
762 case MVT::i32:
763 Opcode = NVPTX::LD_i32_asi;
764 break;
765 case MVT::i64:
766 Opcode = NVPTX::LD_i64_asi;
767 break;
768 case MVT::f32:
769 Opcode = NVPTX::LD_f32_asi;
770 break;
771 case MVT::f64:
772 Opcode = NVPTX::LD_f64_asi;
773 break;
774 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000775 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000776 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000777 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
778 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
779 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +0000780 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +0000781 } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
782 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
783 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000784 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000785 case MVT::i8:
786 Opcode = NVPTX::LD_i8_ari_64;
787 break;
788 case MVT::i16:
789 Opcode = NVPTX::LD_i16_ari_64;
790 break;
791 case MVT::i32:
792 Opcode = NVPTX::LD_i32_ari_64;
793 break;
794 case MVT::i64:
795 Opcode = NVPTX::LD_i64_ari_64;
796 break;
797 case MVT::f32:
798 Opcode = NVPTX::LD_f32_ari_64;
799 break;
800 case MVT::f64:
801 Opcode = NVPTX::LD_f64_ari_64;
802 break;
803 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000804 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000805 }
806 } else {
807 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000808 case MVT::i8:
809 Opcode = NVPTX::LD_i8_ari;
810 break;
811 case MVT::i16:
812 Opcode = NVPTX::LD_i16_ari;
813 break;
814 case MVT::i32:
815 Opcode = NVPTX::LD_i32_ari;
816 break;
817 case MVT::i64:
818 Opcode = NVPTX::LD_i64_ari;
819 break;
820 case MVT::f32:
821 Opcode = NVPTX::LD_f32_ari;
822 break;
823 case MVT::f64:
824 Opcode = NVPTX::LD_f64_ari;
825 break;
826 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000827 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000828 }
Justin Holewinskiae556d32012-05-04 20:18:50 +0000829 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000830 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
831 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
832 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +0000833 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000834 } else {
Eric Christopher02389e32015-02-19 00:08:27 +0000835 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000836 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000837 case MVT::i8:
838 Opcode = NVPTX::LD_i8_areg_64;
839 break;
840 case MVT::i16:
841 Opcode = NVPTX::LD_i16_areg_64;
842 break;
843 case MVT::i32:
844 Opcode = NVPTX::LD_i32_areg_64;
845 break;
846 case MVT::i64:
847 Opcode = NVPTX::LD_i64_areg_64;
848 break;
849 case MVT::f32:
850 Opcode = NVPTX::LD_f32_areg_64;
851 break;
852 case MVT::f64:
853 Opcode = NVPTX::LD_f64_areg_64;
854 break;
855 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000856 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000857 }
858 } else {
859 switch (TargetVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000860 case MVT::i8:
861 Opcode = NVPTX::LD_i8_areg;
862 break;
863 case MVT::i16:
864 Opcode = NVPTX::LD_i16_areg;
865 break;
866 case MVT::i32:
867 Opcode = NVPTX::LD_i32_areg;
868 break;
869 case MVT::i64:
870 Opcode = NVPTX::LD_i64_areg;
871 break;
872 case MVT::f32:
873 Opcode = NVPTX::LD_f32_areg;
874 break;
875 case MVT::f64:
876 Opcode = NVPTX::LD_f64_areg;
877 break;
878 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000879 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000880 }
Justin Holewinskiae556d32012-05-04 20:18:50 +0000881 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000882 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
883 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
884 getI32Imm(fromTypeWidth, dl), N1, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +0000885 NVPTXLD = CurDAG->getMachineNode(Opcode, dl, TargetVT, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000886 }
887
Craig Topper062a2ba2014-04-25 05:30:21 +0000888 if (NVPTXLD) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000889 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
890 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
891 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
892 }
893
894 return NVPTXLD;
895}
896
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000897SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) {
898
899 SDValue Chain = N->getOperand(0);
900 SDValue Op1 = N->getOperand(1);
901 SDValue Addr, Offset, Base;
902 unsigned Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +0000903 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000904 SDNode *LD;
905 MemSDNode *MemSD = cast<MemSDNode>(N);
906 EVT LoadedVT = MemSD->getMemoryVT();
907
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000908 if (!LoadedVT.isSimple())
Craig Topper062a2ba2014-04-25 05:30:21 +0000909 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000910
911 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +0000912 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000913
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000914 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000915 return SelectLDGLDU(N);
916 }
917
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000918 // Volatile Setting
919 // - .volatile is only availalble for .global and .shared
920 bool IsVolatile = MemSD->isVolatile();
921 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
922 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
923 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
924 IsVolatile = false;
925
926 // Vector Setting
927 MVT SimpleVT = LoadedVT.getSimpleVT();
928
929 // Type Setting: fromType + fromTypeWidth
930 //
931 // Sign : ISD::SEXTLOAD
932 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
933 // type is integer
934 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
935 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +0000936 // Read at least 8 bits (predicates are stored as 8-bit values)
937 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000938 unsigned int FromType;
939 // The last operand holds the original LoadSDNode::getExtensionType() value
Justin Holewinski0497ab12013-03-30 14:29:21 +0000940 unsigned ExtensionType = cast<ConstantSDNode>(
941 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000942 if (ExtensionType == ISD::SEXTLOAD)
943 FromType = NVPTX::PTXLdStInstCode::Signed;
944 else if (ScalarVT.isFloatingPoint())
945 FromType = NVPTX::PTXLdStInstCode::Float;
946 else
947 FromType = NVPTX::PTXLdStInstCode::Unsigned;
948
949 unsigned VecType;
950
951 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000952 case NVPTXISD::LoadV2:
953 VecType = NVPTX::PTXLdStInstCode::V2;
954 break;
955 case NVPTXISD::LoadV4:
956 VecType = NVPTX::PTXLdStInstCode::V4;
957 break;
958 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000959 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000960 }
961
962 EVT EltVT = N->getValueType(0);
963
964 if (SelectDirectAddr(Op1, Addr)) {
965 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000966 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000967 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000968 case NVPTXISD::LoadV2:
969 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000970 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000971 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000972 case MVT::i8:
973 Opcode = NVPTX::LDV_i8_v2_avar;
974 break;
975 case MVT::i16:
976 Opcode = NVPTX::LDV_i16_v2_avar;
977 break;
978 case MVT::i32:
979 Opcode = NVPTX::LDV_i32_v2_avar;
980 break;
981 case MVT::i64:
982 Opcode = NVPTX::LDV_i64_v2_avar;
983 break;
984 case MVT::f32:
985 Opcode = NVPTX::LDV_f32_v2_avar;
986 break;
987 case MVT::f64:
988 Opcode = NVPTX::LDV_f64_v2_avar;
989 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000990 }
991 break;
992 case NVPTXISD::LoadV4:
993 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +0000994 default:
Craig Topper062a2ba2014-04-25 05:30:21 +0000995 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000996 case MVT::i8:
997 Opcode = NVPTX::LDV_i8_v4_avar;
998 break;
999 case MVT::i16:
1000 Opcode = NVPTX::LDV_i16_v4_avar;
1001 break;
1002 case MVT::i32:
1003 Opcode = NVPTX::LDV_i32_v4_avar;
1004 break;
1005 case MVT::f32:
1006 Opcode = NVPTX::LDV_f32_v4_avar;
1007 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001008 }
1009 break;
1010 }
1011
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001012 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1013 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1014 getI32Imm(FromTypeWidth, DL), Addr, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00001015 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001016 } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1017 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001018 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001019 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001020 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001021 case NVPTXISD::LoadV2:
1022 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001023 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001024 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001025 case MVT::i8:
1026 Opcode = NVPTX::LDV_i8_v2_asi;
1027 break;
1028 case MVT::i16:
1029 Opcode = NVPTX::LDV_i16_v2_asi;
1030 break;
1031 case MVT::i32:
1032 Opcode = NVPTX::LDV_i32_v2_asi;
1033 break;
1034 case MVT::i64:
1035 Opcode = NVPTX::LDV_i64_v2_asi;
1036 break;
1037 case MVT::f32:
1038 Opcode = NVPTX::LDV_f32_v2_asi;
1039 break;
1040 case MVT::f64:
1041 Opcode = NVPTX::LDV_f64_v2_asi;
1042 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001043 }
1044 break;
1045 case NVPTXISD::LoadV4:
1046 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001047 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001048 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001049 case MVT::i8:
1050 Opcode = NVPTX::LDV_i8_v4_asi;
1051 break;
1052 case MVT::i16:
1053 Opcode = NVPTX::LDV_i16_v4_asi;
1054 break;
1055 case MVT::i32:
1056 Opcode = NVPTX::LDV_i32_v4_asi;
1057 break;
1058 case MVT::f32:
1059 Opcode = NVPTX::LDV_f32_v4_asi;
1060 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001061 }
1062 break;
1063 }
1064
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001065 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1066 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1067 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00001068 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001069 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1070 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1071 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001072 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001073 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001074 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001075 case NVPTXISD::LoadV2:
1076 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001077 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001078 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001079 case MVT::i8:
1080 Opcode = NVPTX::LDV_i8_v2_ari_64;
1081 break;
1082 case MVT::i16:
1083 Opcode = NVPTX::LDV_i16_v2_ari_64;
1084 break;
1085 case MVT::i32:
1086 Opcode = NVPTX::LDV_i32_v2_ari_64;
1087 break;
1088 case MVT::i64:
1089 Opcode = NVPTX::LDV_i64_v2_ari_64;
1090 break;
1091 case MVT::f32:
1092 Opcode = NVPTX::LDV_f32_v2_ari_64;
1093 break;
1094 case MVT::f64:
1095 Opcode = NVPTX::LDV_f64_v2_ari_64;
1096 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001097 }
1098 break;
1099 case NVPTXISD::LoadV4:
1100 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001101 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001102 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001103 case MVT::i8:
1104 Opcode = NVPTX::LDV_i8_v4_ari_64;
1105 break;
1106 case MVT::i16:
1107 Opcode = NVPTX::LDV_i16_v4_ari_64;
1108 break;
1109 case MVT::i32:
1110 Opcode = NVPTX::LDV_i32_v4_ari_64;
1111 break;
1112 case MVT::f32:
1113 Opcode = NVPTX::LDV_f32_v4_ari_64;
1114 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001115 }
1116 break;
1117 }
1118 } else {
1119 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001120 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001121 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001122 case NVPTXISD::LoadV2:
1123 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001124 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001125 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001126 case MVT::i8:
1127 Opcode = NVPTX::LDV_i8_v2_ari;
1128 break;
1129 case MVT::i16:
1130 Opcode = NVPTX::LDV_i16_v2_ari;
1131 break;
1132 case MVT::i32:
1133 Opcode = NVPTX::LDV_i32_v2_ari;
1134 break;
1135 case MVT::i64:
1136 Opcode = NVPTX::LDV_i64_v2_ari;
1137 break;
1138 case MVT::f32:
1139 Opcode = NVPTX::LDV_f32_v2_ari;
1140 break;
1141 case MVT::f64:
1142 Opcode = NVPTX::LDV_f64_v2_ari;
1143 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001144 }
1145 break;
1146 case NVPTXISD::LoadV4:
1147 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001148 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001149 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001150 case MVT::i8:
1151 Opcode = NVPTX::LDV_i8_v4_ari;
1152 break;
1153 case MVT::i16:
1154 Opcode = NVPTX::LDV_i16_v4_ari;
1155 break;
1156 case MVT::i32:
1157 Opcode = NVPTX::LDV_i32_v4_ari;
1158 break;
1159 case MVT::f32:
1160 Opcode = NVPTX::LDV_f32_v4_ari;
1161 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001162 }
1163 break;
1164 }
1165 }
1166
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001167 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1168 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1169 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001170
Michael Liaob53d8962013-04-19 22:22:57 +00001171 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001172 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001173 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001174 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001175 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001176 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001177 case NVPTXISD::LoadV2:
1178 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001179 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001180 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001181 case MVT::i8:
1182 Opcode = NVPTX::LDV_i8_v2_areg_64;
1183 break;
1184 case MVT::i16:
1185 Opcode = NVPTX::LDV_i16_v2_areg_64;
1186 break;
1187 case MVT::i32:
1188 Opcode = NVPTX::LDV_i32_v2_areg_64;
1189 break;
1190 case MVT::i64:
1191 Opcode = NVPTX::LDV_i64_v2_areg_64;
1192 break;
1193 case MVT::f32:
1194 Opcode = NVPTX::LDV_f32_v2_areg_64;
1195 break;
1196 case MVT::f64:
1197 Opcode = NVPTX::LDV_f64_v2_areg_64;
1198 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001199 }
1200 break;
1201 case NVPTXISD::LoadV4:
1202 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001203 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001204 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001205 case MVT::i8:
1206 Opcode = NVPTX::LDV_i8_v4_areg_64;
1207 break;
1208 case MVT::i16:
1209 Opcode = NVPTX::LDV_i16_v4_areg_64;
1210 break;
1211 case MVT::i32:
1212 Opcode = NVPTX::LDV_i32_v4_areg_64;
1213 break;
1214 case MVT::f32:
1215 Opcode = NVPTX::LDV_f32_v4_areg_64;
1216 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001217 }
1218 break;
1219 }
1220 } else {
1221 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001222 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001223 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001224 case NVPTXISD::LoadV2:
1225 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001226 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001227 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001228 case MVT::i8:
1229 Opcode = NVPTX::LDV_i8_v2_areg;
1230 break;
1231 case MVT::i16:
1232 Opcode = NVPTX::LDV_i16_v2_areg;
1233 break;
1234 case MVT::i32:
1235 Opcode = NVPTX::LDV_i32_v2_areg;
1236 break;
1237 case MVT::i64:
1238 Opcode = NVPTX::LDV_i64_v2_areg;
1239 break;
1240 case MVT::f32:
1241 Opcode = NVPTX::LDV_f32_v2_areg;
1242 break;
1243 case MVT::f64:
1244 Opcode = NVPTX::LDV_f64_v2_areg;
1245 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001246 }
1247 break;
1248 case NVPTXISD::LoadV4:
1249 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001250 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001251 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001252 case MVT::i8:
1253 Opcode = NVPTX::LDV_i8_v4_areg;
1254 break;
1255 case MVT::i16:
1256 Opcode = NVPTX::LDV_i16_v4_areg;
1257 break;
1258 case MVT::i32:
1259 Opcode = NVPTX::LDV_i32_v4_areg;
1260 break;
1261 case MVT::f32:
1262 Opcode = NVPTX::LDV_f32_v4_areg;
1263 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001264 }
1265 break;
1266 }
1267 }
1268
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001269 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1270 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1271 getI32Imm(FromTypeWidth, DL), Op1, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00001272 LD = CurDAG->getMachineNode(Opcode, DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001273 }
1274
1275 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1276 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1277 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1278
1279 return LD;
1280}
1281
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001282SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001283
1284 SDValue Chain = N->getOperand(0);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001285 SDValue Op1;
1286 MemSDNode *Mem;
1287 bool IsLDG = true;
1288
Justin Holewinskic7997922016-04-05 12:38:01 +00001289 // If this is an LDG intrinsic, the address is the third operand. If its an
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001290 // LDG/LDU SD node (from custom vector handling), then its the second operand
1291 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1292 Op1 = N->getOperand(2);
1293 Mem = cast<MemIntrinsicSDNode>(N);
1294 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1295 switch (IID) {
1296 default:
1297 return NULL;
1298 case Intrinsic::nvvm_ldg_global_f:
1299 case Intrinsic::nvvm_ldg_global_i:
1300 case Intrinsic::nvvm_ldg_global_p:
1301 IsLDG = true;
1302 break;
1303 case Intrinsic::nvvm_ldu_global_f:
1304 case Intrinsic::nvvm_ldu_global_i:
1305 case Intrinsic::nvvm_ldu_global_p:
1306 IsLDG = false;
1307 break;
1308 }
1309 } else {
1310 Op1 = N->getOperand(1);
1311 Mem = cast<MemSDNode>(N);
1312 }
1313
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001314 unsigned Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00001315 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001316 SDNode *LD;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001317 SDValue Base, Offset, Addr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00001318
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001319 EVT EltVT = Mem->getMemoryVT();
Justin Holewinskic7997922016-04-05 12:38:01 +00001320 unsigned NumElts = 1;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001321 if (EltVT.isVector()) {
Justin Holewinskic7997922016-04-05 12:38:01 +00001322 NumElts = EltVT.getVectorNumElements();
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001323 EltVT = EltVT.getVectorElementType();
1324 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001325
Justin Holewinskic7997922016-04-05 12:38:01 +00001326 // Build the "promoted" result VTList for the load. If we are really loading
1327 // i8s, then the return type will be promoted to i16 since we do not expose
1328 // 8-bit registers in NVPTX.
1329 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1330 SmallVector<EVT, 5> InstVTs;
1331 for (unsigned i = 0; i != NumElts; ++i) {
1332 InstVTs.push_back(NodeVT);
1333 }
1334 InstVTs.push_back(MVT::Other);
1335 SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1336
Justin Holewinskie40e9292013-07-01 12:58:52 +00001337 if (SelectDirectAddr(Op1, Addr)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001338 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001339 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001340 return nullptr;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001341 case ISD::INTRINSIC_W_CHAIN:
1342 if (IsLDG) {
1343 switch (EltVT.getSimpleVT().SimpleTy) {
1344 default:
1345 return nullptr;
1346 case MVT::i8:
1347 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar;
1348 break;
1349 case MVT::i16:
1350 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar;
1351 break;
1352 case MVT::i32:
1353 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar;
1354 break;
1355 case MVT::i64:
1356 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar;
1357 break;
1358 case MVT::f32:
1359 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar;
1360 break;
1361 case MVT::f64:
1362 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar;
1363 break;
1364 }
1365 } else {
1366 switch (EltVT.getSimpleVT().SimpleTy) {
1367 default:
1368 return nullptr;
1369 case MVT::i8:
1370 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar;
1371 break;
1372 case MVT::i16:
1373 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar;
1374 break;
1375 case MVT::i32:
1376 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar;
1377 break;
1378 case MVT::i64:
1379 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar;
1380 break;
1381 case MVT::f32:
1382 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar;
1383 break;
1384 case MVT::f64:
1385 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar;
1386 break;
1387 }
1388 }
1389 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001390 case NVPTXISD::LDGV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001391 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001392 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001393 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001394 case MVT::i8:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001395 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001396 break;
1397 case MVT::i16:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001398 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001399 break;
1400 case MVT::i32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001401 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001402 break;
1403 case MVT::i64:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001404 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001405 break;
1406 case MVT::f32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001407 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001408 break;
1409 case MVT::f64:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001410 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001411 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001412 }
1413 break;
1414 case NVPTXISD::LDUV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001415 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001416 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001417 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001418 case MVT::i8:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001419 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001420 break;
1421 case MVT::i16:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001422 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001423 break;
1424 case MVT::i32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001425 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001426 break;
1427 case MVT::i64:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001428 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001429 break;
1430 case MVT::f32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001431 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001432 break;
1433 case MVT::f64:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001434 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar;
1435 break;
1436 }
1437 break;
1438 case NVPTXISD::LDGV4:
1439 switch (EltVT.getSimpleVT().SimpleTy) {
1440 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001441 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001442 case MVT::i8:
1443 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar;
1444 break;
1445 case MVT::i16:
1446 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar;
1447 break;
1448 case MVT::i32:
1449 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar;
1450 break;
1451 case MVT::f32:
1452 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001453 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001454 }
1455 break;
1456 case NVPTXISD::LDUV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001457 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001458 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001459 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001460 case MVT::i8:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001461 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001462 break;
1463 case MVT::i16:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001464 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001465 break;
1466 case MVT::i32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001467 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001468 break;
1469 case MVT::f32:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001470 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001471 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001472 }
1473 break;
1474 }
Justin Holewinskie40e9292013-07-01 12:58:52 +00001475
1476 SDValue Ops[] = { Addr, Chain };
Justin Holewinskic7997922016-04-05 12:38:01 +00001477 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001478 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1479 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1480 if (TM.is64Bit()) {
Justin Holewinskie40e9292013-07-01 12:58:52 +00001481 switch (N->getOpcode()) {
1482 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001483 return nullptr;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001484 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001485 case ISD::INTRINSIC_W_CHAIN:
1486 if (IsLDG) {
1487 switch (EltVT.getSimpleVT().SimpleTy) {
1488 default:
1489 return nullptr;
1490 case MVT::i8:
1491 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64;
1492 break;
1493 case MVT::i16:
1494 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64;
1495 break;
1496 case MVT::i32:
1497 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64;
1498 break;
1499 case MVT::i64:
1500 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64;
1501 break;
1502 case MVT::f32:
1503 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64;
1504 break;
1505 case MVT::f64:
1506 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64;
1507 break;
1508 }
1509 } else {
1510 switch (EltVT.getSimpleVT().SimpleTy) {
1511 default:
1512 return nullptr;
1513 case MVT::i8:
1514 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64;
1515 break;
1516 case MVT::i16:
1517 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64;
1518 break;
1519 case MVT::i32:
1520 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64;
1521 break;
1522 case MVT::i64:
1523 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64;
1524 break;
1525 case MVT::f32:
1526 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64;
1527 break;
1528 case MVT::f64:
1529 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64;
1530 break;
1531 }
1532 }
1533 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001534 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001535 case NVPTXISD::LDGV2:
1536 switch (EltVT.getSimpleVT().SimpleTy) {
1537 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001538 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001539 case MVT::i8:
1540 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64;
1541 break;
1542 case MVT::i16:
1543 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64;
1544 break;
1545 case MVT::i32:
1546 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64;
1547 break;
1548 case MVT::i64:
1549 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64;
1550 break;
1551 case MVT::f32:
1552 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64;
1553 break;
1554 case MVT::f64:
1555 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64;
1556 break;
1557 }
1558 break;
1559 case NVPTXISD::LDUV2:
1560 switch (EltVT.getSimpleVT().SimpleTy) {
1561 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001562 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001563 case MVT::i8:
1564 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64;
1565 break;
1566 case MVT::i16:
1567 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64;
1568 break;
1569 case MVT::i32:
1570 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64;
1571 break;
1572 case MVT::i64:
1573 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64;
1574 break;
1575 case MVT::f32:
1576 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64;
1577 break;
1578 case MVT::f64:
1579 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64;
1580 break;
1581 }
1582 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001583 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001584 case NVPTXISD::LDGV4:
1585 switch (EltVT.getSimpleVT().SimpleTy) {
1586 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001587 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001588 case MVT::i8:
1589 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64;
1590 break;
1591 case MVT::i16:
1592 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64;
1593 break;
1594 case MVT::i32:
1595 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64;
1596 break;
1597 case MVT::f32:
1598 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64;
1599 break;
1600 }
1601 break;
1602 case NVPTXISD::LDUV4:
1603 switch (EltVT.getSimpleVT().SimpleTy) {
1604 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001605 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001606 case MVT::i8:
1607 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64;
1608 break;
1609 case MVT::i16:
1610 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64;
1611 break;
1612 case MVT::i32:
1613 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64;
1614 break;
1615 case MVT::f32:
1616 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64;
1617 break;
1618 }
1619 break;
1620 }
1621 } else {
1622 switch (N->getOpcode()) {
1623 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001624 return nullptr;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001625 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001626 case ISD::INTRINSIC_W_CHAIN:
1627 if (IsLDG) {
1628 switch (EltVT.getSimpleVT().SimpleTy) {
1629 default:
1630 return nullptr;
1631 case MVT::i8:
1632 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari;
1633 break;
1634 case MVT::i16:
1635 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari;
1636 break;
1637 case MVT::i32:
1638 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari;
1639 break;
1640 case MVT::i64:
1641 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari;
1642 break;
1643 case MVT::f32:
1644 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari;
1645 break;
1646 case MVT::f64:
1647 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari;
1648 break;
1649 }
1650 } else {
1651 switch (EltVT.getSimpleVT().SimpleTy) {
1652 default:
1653 return nullptr;
1654 case MVT::i8:
1655 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari;
1656 break;
1657 case MVT::i16:
1658 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari;
1659 break;
1660 case MVT::i32:
1661 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari;
1662 break;
1663 case MVT::i64:
1664 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari;
1665 break;
1666 case MVT::f32:
1667 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari;
1668 break;
1669 case MVT::f64:
1670 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari;
1671 break;
1672 }
1673 }
1674 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001675 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001676 case NVPTXISD::LDGV2:
1677 switch (EltVT.getSimpleVT().SimpleTy) {
1678 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001679 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001680 case MVT::i8:
1681 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32;
1682 break;
1683 case MVT::i16:
1684 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32;
1685 break;
1686 case MVT::i32:
1687 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32;
1688 break;
1689 case MVT::i64:
1690 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32;
1691 break;
1692 case MVT::f32:
1693 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32;
1694 break;
1695 case MVT::f64:
1696 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32;
1697 break;
1698 }
1699 break;
1700 case NVPTXISD::LDUV2:
1701 switch (EltVT.getSimpleVT().SimpleTy) {
1702 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001703 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001704 case MVT::i8:
1705 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32;
1706 break;
1707 case MVT::i16:
1708 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32;
1709 break;
1710 case MVT::i32:
1711 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32;
1712 break;
1713 case MVT::i64:
1714 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32;
1715 break;
1716 case MVT::f32:
1717 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32;
1718 break;
1719 case MVT::f64:
1720 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32;
1721 break;
1722 }
1723 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001724 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001725 case NVPTXISD::LDGV4:
1726 switch (EltVT.getSimpleVT().SimpleTy) {
1727 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001728 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001729 case MVT::i8:
1730 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32;
1731 break;
1732 case MVT::i16:
1733 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32;
1734 break;
1735 case MVT::i32:
1736 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32;
1737 break;
1738 case MVT::f32:
1739 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32;
1740 break;
1741 }
1742 break;
1743 case NVPTXISD::LDUV4:
1744 switch (EltVT.getSimpleVT().SimpleTy) {
1745 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001746 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001747 case MVT::i8:
1748 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32;
1749 break;
1750 case MVT::i16:
1751 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32;
1752 break;
1753 case MVT::i32:
1754 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32;
1755 break;
1756 case MVT::f32:
1757 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32;
1758 break;
1759 }
1760 break;
1761 }
1762 }
1763
1764 SDValue Ops[] = { Base, Offset, Chain };
1765
Justin Holewinskic7997922016-04-05 12:38:01 +00001766 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001767 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001768 if (TM.is64Bit()) {
Justin Holewinskie40e9292013-07-01 12:58:52 +00001769 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001770 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001771 return nullptr;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001772 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001773 case ISD::INTRINSIC_W_CHAIN:
1774 if (IsLDG) {
1775 switch (EltVT.getSimpleVT().SimpleTy) {
1776 default:
1777 return nullptr;
1778 case MVT::i8:
1779 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64;
1780 break;
1781 case MVT::i16:
1782 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64;
1783 break;
1784 case MVT::i32:
1785 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64;
1786 break;
1787 case MVT::i64:
1788 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64;
1789 break;
1790 case MVT::f32:
1791 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64;
1792 break;
1793 case MVT::f64:
1794 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64;
1795 break;
1796 }
1797 } else {
1798 switch (EltVT.getSimpleVT().SimpleTy) {
1799 default:
1800 return nullptr;
1801 case MVT::i8:
1802 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64;
1803 break;
1804 case MVT::i16:
1805 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64;
1806 break;
1807 case MVT::i32:
1808 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64;
1809 break;
1810 case MVT::i64:
1811 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64;
1812 break;
1813 case MVT::f32:
1814 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64;
1815 break;
1816 case MVT::f64:
1817 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64;
1818 break;
1819 }
1820 }
1821 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001822 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001823 case NVPTXISD::LDGV2:
1824 switch (EltVT.getSimpleVT().SimpleTy) {
1825 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001826 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001827 case MVT::i8:
1828 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64;
1829 break;
1830 case MVT::i16:
1831 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64;
1832 break;
1833 case MVT::i32:
1834 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64;
1835 break;
1836 case MVT::i64:
1837 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64;
1838 break;
1839 case MVT::f32:
1840 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64;
1841 break;
1842 case MVT::f64:
1843 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64;
1844 break;
1845 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001846 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001847 case NVPTXISD::LDUV2:
1848 switch (EltVT.getSimpleVT().SimpleTy) {
1849 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001850 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001851 case MVT::i8:
1852 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64;
1853 break;
1854 case MVT::i16:
1855 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64;
1856 break;
1857 case MVT::i32:
1858 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64;
1859 break;
1860 case MVT::i64:
1861 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64;
1862 break;
1863 case MVT::f32:
1864 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64;
1865 break;
1866 case MVT::f64:
1867 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64;
1868 break;
1869 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001870 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001871 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001872 case NVPTXISD::LDGV4:
1873 switch (EltVT.getSimpleVT().SimpleTy) {
1874 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001875 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001876 case MVT::i8:
1877 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64;
1878 break;
1879 case MVT::i16:
1880 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64;
1881 break;
1882 case MVT::i32:
1883 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64;
1884 break;
1885 case MVT::f32:
1886 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64;
1887 break;
1888 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001889 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001890 case NVPTXISD::LDUV4:
1891 switch (EltVT.getSimpleVT().SimpleTy) {
1892 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001893 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001894 case MVT::i8:
1895 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64;
1896 break;
1897 case MVT::i16:
1898 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64;
1899 break;
1900 case MVT::i32:
1901 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64;
1902 break;
1903 case MVT::f32:
1904 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64;
1905 break;
1906 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001907 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001908 }
Justin Holewinskie40e9292013-07-01 12:58:52 +00001909 } else {
1910 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001911 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001912 return nullptr;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001913 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001914 case ISD::INTRINSIC_W_CHAIN:
1915 if (IsLDG) {
1916 switch (EltVT.getSimpleVT().SimpleTy) {
1917 default:
1918 return nullptr;
1919 case MVT::i8:
1920 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg;
1921 break;
1922 case MVT::i16:
1923 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg;
1924 break;
1925 case MVT::i32:
1926 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg;
1927 break;
1928 case MVT::i64:
1929 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg;
1930 break;
1931 case MVT::f32:
1932 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg;
1933 break;
1934 case MVT::f64:
1935 Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg;
1936 break;
1937 }
1938 } else {
1939 switch (EltVT.getSimpleVT().SimpleTy) {
1940 default:
1941 return nullptr;
1942 case MVT::i8:
1943 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg;
1944 break;
1945 case MVT::i16:
1946 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg;
1947 break;
1948 case MVT::i32:
1949 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg;
1950 break;
1951 case MVT::i64:
1952 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg;
1953 break;
1954 case MVT::f32:
1955 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg;
1956 break;
1957 case MVT::f64:
1958 Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg;
1959 break;
1960 }
1961 }
1962 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001963 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001964 case NVPTXISD::LDGV2:
1965 switch (EltVT.getSimpleVT().SimpleTy) {
1966 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001967 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001968 case MVT::i8:
1969 Opcode = NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32;
1970 break;
1971 case MVT::i16:
1972 Opcode = NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32;
1973 break;
1974 case MVT::i32:
1975 Opcode = NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32;
1976 break;
1977 case MVT::i64:
1978 Opcode = NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32;
1979 break;
1980 case MVT::f32:
1981 Opcode = NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32;
1982 break;
1983 case MVT::f64:
1984 Opcode = NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32;
1985 break;
1986 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00001987 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001988 case NVPTXISD::LDUV2:
1989 switch (EltVT.getSimpleVT().SimpleTy) {
1990 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00001991 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001992 case MVT::i8:
1993 Opcode = NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32;
1994 break;
1995 case MVT::i16:
1996 Opcode = NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32;
1997 break;
1998 case MVT::i32:
1999 Opcode = NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32;
2000 break;
2001 case MVT::i64:
2002 Opcode = NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32;
2003 break;
2004 case MVT::f32:
2005 Opcode = NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32;
2006 break;
2007 case MVT::f64:
2008 Opcode = NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32;
2009 break;
2010 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00002011 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00002012 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00002013 case NVPTXISD::LDGV4:
2014 switch (EltVT.getSimpleVT().SimpleTy) {
2015 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002016 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002017 case MVT::i8:
2018 Opcode = NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32;
2019 break;
2020 case MVT::i16:
2021 Opcode = NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32;
2022 break;
2023 case MVT::i32:
2024 Opcode = NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32;
2025 break;
2026 case MVT::f32:
2027 Opcode = NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32;
2028 break;
2029 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00002030 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002031 case NVPTXISD::LDUV4:
2032 switch (EltVT.getSimpleVT().SimpleTy) {
2033 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002034 return nullptr;
Justin Holewinskie40e9292013-07-01 12:58:52 +00002035 case MVT::i8:
2036 Opcode = NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32;
2037 break;
2038 case MVT::i16:
2039 Opcode = NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32;
2040 break;
2041 case MVT::i32:
2042 Opcode = NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32;
2043 break;
2044 case MVT::f32:
2045 Opcode = NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32;
2046 break;
2047 }
Justin Holewinski0497ab12013-03-30 14:29:21 +00002048 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002049 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002050 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002051
Justin Holewinskie40e9292013-07-01 12:58:52 +00002052 SDValue Ops[] = { Op1, Chain };
Justin Holewinskic7997922016-04-05 12:38:01 +00002053 LD = CurDAG->getMachineNode(Opcode, DL, InstVTList, Ops);
Justin Holewinskie40e9292013-07-01 12:58:52 +00002054 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002055
2056 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00002057 MemRefs0[0] = Mem->getMemOperand();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002058 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
2059
Justin Holewinskic7997922016-04-05 12:38:01 +00002060 // For automatic generation of LDG (through SelectLoad[Vector], not the
2061 // intrinsics), we may have an extending load like:
2062 //
2063 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
2064 //
2065 // Since we load an i8 value, the matching logic above will have selected an
2066 // LDG instruction that reads i8 and stores it in an i16 register (NVPTX does
2067 // not expose 8-bit registers):
2068 //
2069 // i16,ch = INT_PTX_LDG_GLOBAL_i8areg64 t7, t0
2070 //
2071 // To get the correct type in this case, truncate back to i8 and then extend
2072 // to the original load type.
2073 EVT OrigType = N->getValueType(0);
2074 LoadSDNode *LDSD = dyn_cast<LoadSDNode>(N);
2075 if (LDSD && EltVT == MVT::i8 && OrigType.getScalarSizeInBits() >= 32) {
2076 unsigned CvtOpc = 0;
2077
2078 switch (LDSD->getExtensionType()) {
2079 default:
2080 llvm_unreachable("An extension is required for i8 loads");
2081 break;
2082 case ISD::SEXTLOAD:
2083 switch (OrigType.getSimpleVT().SimpleTy) {
2084 default:
2085 llvm_unreachable("Unhandled integer load type");
2086 break;
2087 case MVT::i32:
2088 CvtOpc = NVPTX::CVT_s32_s8;
2089 break;
2090 case MVT::i64:
2091 CvtOpc = NVPTX::CVT_s64_s8;
2092 break;
2093 }
2094 break;
2095 case ISD::EXTLOAD:
2096 case ISD::ZEXTLOAD:
2097 switch (OrigType.getSimpleVT().SimpleTy) {
2098 default:
2099 llvm_unreachable("Unhandled integer load type");
2100 break;
2101 case MVT::i32:
2102 CvtOpc = NVPTX::CVT_u32_u8;
2103 break;
2104 case MVT::i64:
2105 CvtOpc = NVPTX::CVT_u64_u8;
2106 break;
2107 }
2108 break;
2109 }
2110
2111 // For each output value, truncate to i8 (since the upper 8 bits are
2112 // undefined) and then extend to the desired type.
2113 for (unsigned i = 0; i != NumElts; ++i) {
2114 SDValue Res(LD, i);
2115 SDValue OrigVal(N, i);
2116
2117 SDNode *CvtNode =
2118 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
2119 CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL, MVT::i32));
2120 ReplaceUses(OrigVal, SDValue(CvtNode, 0));
2121 }
2122 }
2123
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002124 return LD;
2125}
2126
Justin Holewinski0497ab12013-03-30 14:29:21 +00002127SDNode *NVPTXDAGToDAGISel::SelectStore(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00002128 SDLoc dl(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +00002129 StoreSDNode *ST = cast<StoreSDNode>(N);
2130 EVT StoreVT = ST->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +00002131 SDNode *NVPTXST = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002132
2133 // do not support pre/post inc/dec
2134 if (ST->isIndexed())
Craig Topper062a2ba2014-04-25 05:30:21 +00002135 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002136
2137 if (!StoreVT.isSimple())
Craig Topper062a2ba2014-04-25 05:30:21 +00002138 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002139
2140 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00002141 unsigned int codeAddrSpace = getCodeAddrSpace(ST);
Justin Holewinskiae556d32012-05-04 20:18:50 +00002142
2143 // Volatile Setting
2144 // - .volatile is only availalble for .global and .shared
2145 bool isVolatile = ST->isVolatile();
2146 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2147 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2148 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2149 isVolatile = false;
2150
2151 // Vector Setting
2152 MVT SimpleVT = StoreVT.getSimpleVT();
2153 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
2154 if (SimpleVT.isVector()) {
2155 unsigned num = SimpleVT.getVectorNumElements();
2156 if (num == 2)
2157 vecType = NVPTX::PTXLdStInstCode::V2;
2158 else if (num == 4)
2159 vecType = NVPTX::PTXLdStInstCode::V4;
2160 else
Craig Topper062a2ba2014-04-25 05:30:21 +00002161 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002162 }
2163
2164 // Type Setting: toType + toTypeWidth
2165 // - for integer type, always use 'u'
2166 //
2167 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00002168 unsigned toTypeWidth = ScalarVT.getSizeInBits();
Justin Holewinskiae556d32012-05-04 20:18:50 +00002169 unsigned int toType;
2170 if (ScalarVT.isFloatingPoint())
2171 toType = NVPTX::PTXLdStInstCode::Float;
2172 else
2173 toType = NVPTX::PTXLdStInstCode::Unsigned;
2174
2175 // Create the machine instruction DAG
2176 SDValue Chain = N->getOperand(0);
2177 SDValue N1 = N->getOperand(1);
2178 SDValue N2 = N->getOperand(2);
2179 SDValue Addr;
2180 SDValue Offset, Base;
2181 unsigned Opcode;
Craig Topperd9c27832013-08-15 02:44:19 +00002182 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002183
2184 if (SelectDirectAddr(N2, Addr)) {
2185 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002186 case MVT::i8:
2187 Opcode = NVPTX::ST_i8_avar;
2188 break;
2189 case MVT::i16:
2190 Opcode = NVPTX::ST_i16_avar;
2191 break;
2192 case MVT::i32:
2193 Opcode = NVPTX::ST_i32_avar;
2194 break;
2195 case MVT::i64:
2196 Opcode = NVPTX::ST_i64_avar;
2197 break;
2198 case MVT::f32:
2199 Opcode = NVPTX::ST_f32_avar;
2200 break;
2201 case MVT::f64:
2202 Opcode = NVPTX::ST_f64_avar;
2203 break;
2204 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002205 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002206 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002207 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2208 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2209 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
2210 Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00002211 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00002212 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2213 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00002214 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002215 case MVT::i8:
2216 Opcode = NVPTX::ST_i8_asi;
2217 break;
2218 case MVT::i16:
2219 Opcode = NVPTX::ST_i16_asi;
2220 break;
2221 case MVT::i32:
2222 Opcode = NVPTX::ST_i32_asi;
2223 break;
2224 case MVT::i64:
2225 Opcode = NVPTX::ST_i64_asi;
2226 break;
2227 case MVT::f32:
2228 Opcode = NVPTX::ST_f32_asi;
2229 break;
2230 case MVT::f64:
2231 Opcode = NVPTX::ST_f64_asi;
2232 break;
2233 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002234 return nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00002235 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002236 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2237 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2238 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2239 Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00002240 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00002241 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2242 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2243 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002244 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002245 case MVT::i8:
2246 Opcode = NVPTX::ST_i8_ari_64;
2247 break;
2248 case MVT::i16:
2249 Opcode = NVPTX::ST_i16_ari_64;
2250 break;
2251 case MVT::i32:
2252 Opcode = NVPTX::ST_i32_ari_64;
2253 break;
2254 case MVT::i64:
2255 Opcode = NVPTX::ST_i64_ari_64;
2256 break;
2257 case MVT::f32:
2258 Opcode = NVPTX::ST_f32_ari_64;
2259 break;
2260 case MVT::f64:
2261 Opcode = NVPTX::ST_f64_ari_64;
2262 break;
2263 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002264 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002265 }
2266 } else {
2267 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002268 case MVT::i8:
2269 Opcode = NVPTX::ST_i8_ari;
2270 break;
2271 case MVT::i16:
2272 Opcode = NVPTX::ST_i16_ari;
2273 break;
2274 case MVT::i32:
2275 Opcode = NVPTX::ST_i32_ari;
2276 break;
2277 case MVT::i64:
2278 Opcode = NVPTX::ST_i64_ari;
2279 break;
2280 case MVT::f32:
2281 Opcode = NVPTX::ST_f32_ari;
2282 break;
2283 case MVT::f64:
2284 Opcode = NVPTX::ST_f64_ari;
2285 break;
2286 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002287 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002288 }
Justin Holewinskiae556d32012-05-04 20:18:50 +00002289 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002290 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2291 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2292 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
2293 Offset, Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00002294 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00002295 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00002296 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002297 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002298 case MVT::i8:
2299 Opcode = NVPTX::ST_i8_areg_64;
2300 break;
2301 case MVT::i16:
2302 Opcode = NVPTX::ST_i16_areg_64;
2303 break;
2304 case MVT::i32:
2305 Opcode = NVPTX::ST_i32_areg_64;
2306 break;
2307 case MVT::i64:
2308 Opcode = NVPTX::ST_i64_areg_64;
2309 break;
2310 case MVT::f32:
2311 Opcode = NVPTX::ST_f32_areg_64;
2312 break;
2313 case MVT::f64:
2314 Opcode = NVPTX::ST_f64_areg_64;
2315 break;
2316 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002317 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002318 }
2319 } else {
2320 switch (SourceVT) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002321 case MVT::i8:
2322 Opcode = NVPTX::ST_i8_areg;
2323 break;
2324 case MVT::i16:
2325 Opcode = NVPTX::ST_i16_areg;
2326 break;
2327 case MVT::i32:
2328 Opcode = NVPTX::ST_i32_areg;
2329 break;
2330 case MVT::i64:
2331 Opcode = NVPTX::ST_i64_areg;
2332 break;
2333 case MVT::f32:
2334 Opcode = NVPTX::ST_f32_areg;
2335 break;
2336 case MVT::f64:
2337 Opcode = NVPTX::ST_f64_areg;
2338 break;
2339 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002340 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002341 }
Justin Holewinskiae556d32012-05-04 20:18:50 +00002342 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002343 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
2344 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
2345 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
2346 Chain };
Michael Liaob53d8962013-04-19 22:22:57 +00002347 NVPTXST = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00002348 }
2349
Craig Topper062a2ba2014-04-25 05:30:21 +00002350 if (NVPTXST) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00002351 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2352 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2353 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2354 }
2355
2356 return NVPTXST;
2357}
2358
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002359SDNode *NVPTXDAGToDAGISel::SelectStoreVector(SDNode *N) {
2360 SDValue Chain = N->getOperand(0);
2361 SDValue Op1 = N->getOperand(1);
2362 SDValue Addr, Offset, Base;
2363 unsigned Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00002364 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002365 SDNode *ST;
2366 EVT EltVT = Op1.getValueType();
2367 MemSDNode *MemSD = cast<MemSDNode>(N);
2368 EVT StoreVT = MemSD->getMemoryVT();
2369
2370 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00002371 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002372
2373 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
2374 report_fatal_error("Cannot store to pointer that points to constant "
2375 "memory space");
2376 }
2377
2378 // Volatile Setting
2379 // - .volatile is only availalble for .global and .shared
2380 bool IsVolatile = MemSD->isVolatile();
2381 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
2382 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
2383 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
2384 IsVolatile = false;
2385
2386 // Type Setting: toType + toTypeWidth
2387 // - for integer type, always use 'u'
2388 assert(StoreVT.isSimple() && "Store value is not simple");
2389 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00002390 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002391 unsigned ToType;
2392 if (ScalarVT.isFloatingPoint())
2393 ToType = NVPTX::PTXLdStInstCode::Float;
2394 else
2395 ToType = NVPTX::PTXLdStInstCode::Unsigned;
2396
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002397 SmallVector<SDValue, 12> StOps;
2398 SDValue N2;
2399 unsigned VecType;
2400
2401 switch (N->getOpcode()) {
2402 case NVPTXISD::StoreV2:
2403 VecType = NVPTX::PTXLdStInstCode::V2;
2404 StOps.push_back(N->getOperand(1));
2405 StOps.push_back(N->getOperand(2));
2406 N2 = N->getOperand(3);
2407 break;
2408 case NVPTXISD::StoreV4:
2409 VecType = NVPTX::PTXLdStInstCode::V4;
2410 StOps.push_back(N->getOperand(1));
2411 StOps.push_back(N->getOperand(2));
2412 StOps.push_back(N->getOperand(3));
2413 StOps.push_back(N->getOperand(4));
2414 N2 = N->getOperand(5);
2415 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002416 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002417 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002418 }
2419
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002420 StOps.push_back(getI32Imm(IsVolatile, DL));
2421 StOps.push_back(getI32Imm(CodeAddrSpace, DL));
2422 StOps.push_back(getI32Imm(VecType, DL));
2423 StOps.push_back(getI32Imm(ToType, DL));
2424 StOps.push_back(getI32Imm(ToTypeWidth, DL));
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002425
2426 if (SelectDirectAddr(N2, Addr)) {
2427 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002428 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002429 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002430 case NVPTXISD::StoreV2:
2431 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002432 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002433 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002434 case MVT::i8:
2435 Opcode = NVPTX::STV_i8_v2_avar;
2436 break;
2437 case MVT::i16:
2438 Opcode = NVPTX::STV_i16_v2_avar;
2439 break;
2440 case MVT::i32:
2441 Opcode = NVPTX::STV_i32_v2_avar;
2442 break;
2443 case MVT::i64:
2444 Opcode = NVPTX::STV_i64_v2_avar;
2445 break;
2446 case MVT::f32:
2447 Opcode = NVPTX::STV_f32_v2_avar;
2448 break;
2449 case MVT::f64:
2450 Opcode = NVPTX::STV_f64_v2_avar;
2451 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002452 }
2453 break;
2454 case NVPTXISD::StoreV4:
2455 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002456 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002457 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002458 case MVT::i8:
2459 Opcode = NVPTX::STV_i8_v4_avar;
2460 break;
2461 case MVT::i16:
2462 Opcode = NVPTX::STV_i16_v4_avar;
2463 break;
2464 case MVT::i32:
2465 Opcode = NVPTX::STV_i32_v4_avar;
2466 break;
2467 case MVT::f32:
2468 Opcode = NVPTX::STV_f32_v4_avar;
2469 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002470 }
2471 break;
2472 }
2473 StOps.push_back(Addr);
Eric Christopher02389e32015-02-19 00:08:27 +00002474 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
2475 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002476 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002477 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002478 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002479 case NVPTXISD::StoreV2:
2480 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002481 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002482 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002483 case MVT::i8:
2484 Opcode = NVPTX::STV_i8_v2_asi;
2485 break;
2486 case MVT::i16:
2487 Opcode = NVPTX::STV_i16_v2_asi;
2488 break;
2489 case MVT::i32:
2490 Opcode = NVPTX::STV_i32_v2_asi;
2491 break;
2492 case MVT::i64:
2493 Opcode = NVPTX::STV_i64_v2_asi;
2494 break;
2495 case MVT::f32:
2496 Opcode = NVPTX::STV_f32_v2_asi;
2497 break;
2498 case MVT::f64:
2499 Opcode = NVPTX::STV_f64_v2_asi;
2500 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002501 }
2502 break;
2503 case NVPTXISD::StoreV4:
2504 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002505 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002506 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002507 case MVT::i8:
2508 Opcode = NVPTX::STV_i8_v4_asi;
2509 break;
2510 case MVT::i16:
2511 Opcode = NVPTX::STV_i16_v4_asi;
2512 break;
2513 case MVT::i32:
2514 Opcode = NVPTX::STV_i32_v4_asi;
2515 break;
2516 case MVT::f32:
2517 Opcode = NVPTX::STV_f32_v4_asi;
2518 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002519 }
2520 break;
2521 }
2522 StOps.push_back(Base);
2523 StOps.push_back(Offset);
Eric Christopher02389e32015-02-19 00:08:27 +00002524 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
2525 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
2526 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002527 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002528 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002529 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002530 case NVPTXISD::StoreV2:
2531 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002532 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002533 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002534 case MVT::i8:
2535 Opcode = NVPTX::STV_i8_v2_ari_64;
2536 break;
2537 case MVT::i16:
2538 Opcode = NVPTX::STV_i16_v2_ari_64;
2539 break;
2540 case MVT::i32:
2541 Opcode = NVPTX::STV_i32_v2_ari_64;
2542 break;
2543 case MVT::i64:
2544 Opcode = NVPTX::STV_i64_v2_ari_64;
2545 break;
2546 case MVT::f32:
2547 Opcode = NVPTX::STV_f32_v2_ari_64;
2548 break;
2549 case MVT::f64:
2550 Opcode = NVPTX::STV_f64_v2_ari_64;
2551 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002552 }
2553 break;
2554 case NVPTXISD::StoreV4:
2555 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002556 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002557 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002558 case MVT::i8:
2559 Opcode = NVPTX::STV_i8_v4_ari_64;
2560 break;
2561 case MVT::i16:
2562 Opcode = NVPTX::STV_i16_v4_ari_64;
2563 break;
2564 case MVT::i32:
2565 Opcode = NVPTX::STV_i32_v4_ari_64;
2566 break;
2567 case MVT::f32:
2568 Opcode = NVPTX::STV_f32_v4_ari_64;
2569 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002570 }
2571 break;
2572 }
2573 } else {
2574 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002575 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002576 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002577 case NVPTXISD::StoreV2:
2578 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002579 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002580 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002581 case MVT::i8:
2582 Opcode = NVPTX::STV_i8_v2_ari;
2583 break;
2584 case MVT::i16:
2585 Opcode = NVPTX::STV_i16_v2_ari;
2586 break;
2587 case MVT::i32:
2588 Opcode = NVPTX::STV_i32_v2_ari;
2589 break;
2590 case MVT::i64:
2591 Opcode = NVPTX::STV_i64_v2_ari;
2592 break;
2593 case MVT::f32:
2594 Opcode = NVPTX::STV_f32_v2_ari;
2595 break;
2596 case MVT::f64:
2597 Opcode = NVPTX::STV_f64_v2_ari;
2598 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002599 }
2600 break;
2601 case NVPTXISD::StoreV4:
2602 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002603 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002604 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002605 case MVT::i8:
2606 Opcode = NVPTX::STV_i8_v4_ari;
2607 break;
2608 case MVT::i16:
2609 Opcode = NVPTX::STV_i16_v4_ari;
2610 break;
2611 case MVT::i32:
2612 Opcode = NVPTX::STV_i32_v4_ari;
2613 break;
2614 case MVT::f32:
2615 Opcode = NVPTX::STV_f32_v4_ari;
2616 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002617 }
2618 break;
2619 }
2620 }
2621 StOps.push_back(Base);
2622 StOps.push_back(Offset);
2623 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00002624 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002625 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002626 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002627 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002628 case NVPTXISD::StoreV2:
2629 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002630 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002631 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002632 case MVT::i8:
2633 Opcode = NVPTX::STV_i8_v2_areg_64;
2634 break;
2635 case MVT::i16:
2636 Opcode = NVPTX::STV_i16_v2_areg_64;
2637 break;
2638 case MVT::i32:
2639 Opcode = NVPTX::STV_i32_v2_areg_64;
2640 break;
2641 case MVT::i64:
2642 Opcode = NVPTX::STV_i64_v2_areg_64;
2643 break;
2644 case MVT::f32:
2645 Opcode = NVPTX::STV_f32_v2_areg_64;
2646 break;
2647 case MVT::f64:
2648 Opcode = NVPTX::STV_f64_v2_areg_64;
2649 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002650 }
2651 break;
2652 case NVPTXISD::StoreV4:
2653 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002654 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002655 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002656 case MVT::i8:
2657 Opcode = NVPTX::STV_i8_v4_areg_64;
2658 break;
2659 case MVT::i16:
2660 Opcode = NVPTX::STV_i16_v4_areg_64;
2661 break;
2662 case MVT::i32:
2663 Opcode = NVPTX::STV_i32_v4_areg_64;
2664 break;
2665 case MVT::f32:
2666 Opcode = NVPTX::STV_f32_v4_areg_64;
2667 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002668 }
2669 break;
2670 }
2671 } else {
2672 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002673 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002674 return nullptr;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002675 case NVPTXISD::StoreV2:
2676 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002677 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002678 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002679 case MVT::i8:
2680 Opcode = NVPTX::STV_i8_v2_areg;
2681 break;
2682 case MVT::i16:
2683 Opcode = NVPTX::STV_i16_v2_areg;
2684 break;
2685 case MVT::i32:
2686 Opcode = NVPTX::STV_i32_v2_areg;
2687 break;
2688 case MVT::i64:
2689 Opcode = NVPTX::STV_i64_v2_areg;
2690 break;
2691 case MVT::f32:
2692 Opcode = NVPTX::STV_f32_v2_areg;
2693 break;
2694 case MVT::f64:
2695 Opcode = NVPTX::STV_f64_v2_areg;
2696 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002697 }
2698 break;
2699 case NVPTXISD::StoreV4:
2700 switch (EltVT.getSimpleVT().SimpleTy) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002701 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002702 return nullptr;
Justin Holewinski0497ab12013-03-30 14:29:21 +00002703 case MVT::i8:
2704 Opcode = NVPTX::STV_i8_v4_areg;
2705 break;
2706 case MVT::i16:
2707 Opcode = NVPTX::STV_i16_v4_areg;
2708 break;
2709 case MVT::i32:
2710 Opcode = NVPTX::STV_i32_v4_areg;
2711 break;
2712 case MVT::f32:
2713 Opcode = NVPTX::STV_f32_v4_areg;
2714 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002715 }
2716 break;
2717 }
2718 }
2719 StOps.push_back(N2);
2720 }
2721
2722 StOps.push_back(Chain);
2723
Michael Liaob53d8962013-04-19 22:22:57 +00002724 ST = CurDAG->getMachineNode(Opcode, DL, MVT::Other, StOps);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002725
2726 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2727 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2728 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2729
2730 return ST;
2731}
2732
Justin Holewinskif8f70912013-06-28 17:57:59 +00002733SDNode *NVPTXDAGToDAGISel::SelectLoadParam(SDNode *Node) {
2734 SDValue Chain = Node->getOperand(0);
2735 SDValue Offset = Node->getOperand(2);
2736 SDValue Flag = Node->getOperand(3);
2737 SDLoc DL(Node);
2738 MemSDNode *Mem = cast<MemSDNode>(Node);
2739
2740 unsigned VecSize;
2741 switch (Node->getOpcode()) {
2742 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002743 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002744 case NVPTXISD::LoadParam:
2745 VecSize = 1;
2746 break;
2747 case NVPTXISD::LoadParamV2:
2748 VecSize = 2;
2749 break;
2750 case NVPTXISD::LoadParamV4:
2751 VecSize = 4;
2752 break;
2753 }
2754
2755 EVT EltVT = Node->getValueType(0);
2756 EVT MemVT = Mem->getMemoryVT();
2757
2758 unsigned Opc = 0;
2759
2760 switch (VecSize) {
2761 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002762 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002763 case 1:
2764 switch (MemVT.getSimpleVT().SimpleTy) {
2765 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002766 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002767 case MVT::i1:
2768 Opc = NVPTX::LoadParamMemI8;
2769 break;
2770 case MVT::i8:
2771 Opc = NVPTX::LoadParamMemI8;
2772 break;
2773 case MVT::i16:
2774 Opc = NVPTX::LoadParamMemI16;
2775 break;
2776 case MVT::i32:
2777 Opc = NVPTX::LoadParamMemI32;
2778 break;
2779 case MVT::i64:
2780 Opc = NVPTX::LoadParamMemI64;
2781 break;
2782 case MVT::f32:
2783 Opc = NVPTX::LoadParamMemF32;
2784 break;
2785 case MVT::f64:
2786 Opc = NVPTX::LoadParamMemF64;
2787 break;
2788 }
2789 break;
2790 case 2:
2791 switch (MemVT.getSimpleVT().SimpleTy) {
2792 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002793 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002794 case MVT::i1:
2795 Opc = NVPTX::LoadParamMemV2I8;
2796 break;
2797 case MVT::i8:
2798 Opc = NVPTX::LoadParamMemV2I8;
2799 break;
2800 case MVT::i16:
2801 Opc = NVPTX::LoadParamMemV2I16;
2802 break;
2803 case MVT::i32:
2804 Opc = NVPTX::LoadParamMemV2I32;
2805 break;
2806 case MVT::i64:
2807 Opc = NVPTX::LoadParamMemV2I64;
2808 break;
2809 case MVT::f32:
2810 Opc = NVPTX::LoadParamMemV2F32;
2811 break;
2812 case MVT::f64:
2813 Opc = NVPTX::LoadParamMemV2F64;
2814 break;
2815 }
2816 break;
2817 case 4:
2818 switch (MemVT.getSimpleVT().SimpleTy) {
2819 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002820 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002821 case MVT::i1:
2822 Opc = NVPTX::LoadParamMemV4I8;
2823 break;
2824 case MVT::i8:
2825 Opc = NVPTX::LoadParamMemV4I8;
2826 break;
2827 case MVT::i16:
2828 Opc = NVPTX::LoadParamMemV4I16;
2829 break;
2830 case MVT::i32:
2831 Opc = NVPTX::LoadParamMemV4I32;
2832 break;
2833 case MVT::f32:
2834 Opc = NVPTX::LoadParamMemV4F32;
2835 break;
2836 }
2837 break;
2838 }
2839
2840 SDVTList VTs;
2841 if (VecSize == 1) {
2842 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2843 } else if (VecSize == 2) {
2844 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2845 } else {
2846 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
Craig Topperabb4ac72014-04-16 06:10:51 +00002847 VTs = CurDAG->getVTList(EVTs);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002848 }
2849
2850 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2851
2852 SmallVector<SDValue, 2> Ops;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002853 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002854 Ops.push_back(Chain);
2855 Ops.push_back(Flag);
2856
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002857 return CurDAG->getMachineNode(Opc, DL, VTs, Ops);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002858}
2859
2860SDNode *NVPTXDAGToDAGISel::SelectStoreRetval(SDNode *N) {
2861 SDLoc DL(N);
2862 SDValue Chain = N->getOperand(0);
2863 SDValue Offset = N->getOperand(1);
2864 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2865 MemSDNode *Mem = cast<MemSDNode>(N);
2866
2867 // How many elements do we have?
2868 unsigned NumElts = 1;
2869 switch (N->getOpcode()) {
2870 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002871 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002872 case NVPTXISD::StoreRetval:
2873 NumElts = 1;
2874 break;
2875 case NVPTXISD::StoreRetvalV2:
2876 NumElts = 2;
2877 break;
2878 case NVPTXISD::StoreRetvalV4:
2879 NumElts = 4;
2880 break;
2881 }
2882
2883 // Build vector of operands
2884 SmallVector<SDValue, 6> Ops;
2885 for (unsigned i = 0; i < NumElts; ++i)
2886 Ops.push_back(N->getOperand(i + 2));
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002887 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002888 Ops.push_back(Chain);
2889
2890 // Determine target opcode
2891 // If we have an i1, use an 8-bit store. The lowering code in
2892 // NVPTXISelLowering will have already emitted an upcast.
2893 unsigned Opcode = 0;
2894 switch (NumElts) {
2895 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002896 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002897 case 1:
2898 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2899 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002900 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002901 case MVT::i1:
2902 Opcode = NVPTX::StoreRetvalI8;
2903 break;
2904 case MVT::i8:
2905 Opcode = NVPTX::StoreRetvalI8;
2906 break;
2907 case MVT::i16:
2908 Opcode = NVPTX::StoreRetvalI16;
2909 break;
2910 case MVT::i32:
2911 Opcode = NVPTX::StoreRetvalI32;
2912 break;
2913 case MVT::i64:
2914 Opcode = NVPTX::StoreRetvalI64;
2915 break;
2916 case MVT::f32:
2917 Opcode = NVPTX::StoreRetvalF32;
2918 break;
2919 case MVT::f64:
2920 Opcode = NVPTX::StoreRetvalF64;
2921 break;
2922 }
2923 break;
2924 case 2:
2925 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2926 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002927 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002928 case MVT::i1:
2929 Opcode = NVPTX::StoreRetvalV2I8;
2930 break;
2931 case MVT::i8:
2932 Opcode = NVPTX::StoreRetvalV2I8;
2933 break;
2934 case MVT::i16:
2935 Opcode = NVPTX::StoreRetvalV2I16;
2936 break;
2937 case MVT::i32:
2938 Opcode = NVPTX::StoreRetvalV2I32;
2939 break;
2940 case MVT::i64:
2941 Opcode = NVPTX::StoreRetvalV2I64;
2942 break;
2943 case MVT::f32:
2944 Opcode = NVPTX::StoreRetvalV2F32;
2945 break;
2946 case MVT::f64:
2947 Opcode = NVPTX::StoreRetvalV2F64;
2948 break;
2949 }
2950 break;
2951 case 4:
2952 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
2953 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002954 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002955 case MVT::i1:
2956 Opcode = NVPTX::StoreRetvalV4I8;
2957 break;
2958 case MVT::i8:
2959 Opcode = NVPTX::StoreRetvalV4I8;
2960 break;
2961 case MVT::i16:
2962 Opcode = NVPTX::StoreRetvalV4I16;
2963 break;
2964 case MVT::i32:
2965 Opcode = NVPTX::StoreRetvalV4I32;
2966 break;
2967 case MVT::f32:
2968 Opcode = NVPTX::StoreRetvalV4F32;
2969 break;
2970 }
2971 break;
2972 }
2973
2974 SDNode *Ret =
2975 CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
2976 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2977 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2978 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2979
2980 return Ret;
2981}
2982
2983SDNode *NVPTXDAGToDAGISel::SelectStoreParam(SDNode *N) {
2984 SDLoc DL(N);
2985 SDValue Chain = N->getOperand(0);
2986 SDValue Param = N->getOperand(1);
2987 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2988 SDValue Offset = N->getOperand(2);
2989 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2990 MemSDNode *Mem = cast<MemSDNode>(N);
2991 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2992
2993 // How many elements do we have?
2994 unsigned NumElts = 1;
2995 switch (N->getOpcode()) {
2996 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00002997 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002998 case NVPTXISD::StoreParamU32:
2999 case NVPTXISD::StoreParamS32:
3000 case NVPTXISD::StoreParam:
3001 NumElts = 1;
3002 break;
3003 case NVPTXISD::StoreParamV2:
3004 NumElts = 2;
3005 break;
3006 case NVPTXISD::StoreParamV4:
3007 NumElts = 4;
3008 break;
3009 }
3010
3011 // Build vector of operands
3012 SmallVector<SDValue, 8> Ops;
3013 for (unsigned i = 0; i < NumElts; ++i)
3014 Ops.push_back(N->getOperand(i + 3));
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003015 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
3016 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00003017 Ops.push_back(Chain);
3018 Ops.push_back(Flag);
3019
3020 // Determine target opcode
3021 // If we have an i1, use an 8-bit store. The lowering code in
3022 // NVPTXISelLowering will have already emitted an upcast.
3023 unsigned Opcode = 0;
3024 switch (N->getOpcode()) {
3025 default:
3026 switch (NumElts) {
3027 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00003028 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003029 case 1:
3030 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3031 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00003032 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003033 case MVT::i1:
3034 Opcode = NVPTX::StoreParamI8;
3035 break;
3036 case MVT::i8:
3037 Opcode = NVPTX::StoreParamI8;
3038 break;
3039 case MVT::i16:
3040 Opcode = NVPTX::StoreParamI16;
3041 break;
3042 case MVT::i32:
3043 Opcode = NVPTX::StoreParamI32;
3044 break;
3045 case MVT::i64:
3046 Opcode = NVPTX::StoreParamI64;
3047 break;
3048 case MVT::f32:
3049 Opcode = NVPTX::StoreParamF32;
3050 break;
3051 case MVT::f64:
3052 Opcode = NVPTX::StoreParamF64;
3053 break;
3054 }
3055 break;
3056 case 2:
3057 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3058 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00003059 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003060 case MVT::i1:
3061 Opcode = NVPTX::StoreParamV2I8;
3062 break;
3063 case MVT::i8:
3064 Opcode = NVPTX::StoreParamV2I8;
3065 break;
3066 case MVT::i16:
3067 Opcode = NVPTX::StoreParamV2I16;
3068 break;
3069 case MVT::i32:
3070 Opcode = NVPTX::StoreParamV2I32;
3071 break;
3072 case MVT::i64:
3073 Opcode = NVPTX::StoreParamV2I64;
3074 break;
3075 case MVT::f32:
3076 Opcode = NVPTX::StoreParamV2F32;
3077 break;
3078 case MVT::f64:
3079 Opcode = NVPTX::StoreParamV2F64;
3080 break;
3081 }
3082 break;
3083 case 4:
3084 switch (Mem->getMemoryVT().getSimpleVT().SimpleTy) {
3085 default:
Craig Topper062a2ba2014-04-25 05:30:21 +00003086 return nullptr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00003087 case MVT::i1:
3088 Opcode = NVPTX::StoreParamV4I8;
3089 break;
3090 case MVT::i8:
3091 Opcode = NVPTX::StoreParamV4I8;
3092 break;
3093 case MVT::i16:
3094 Opcode = NVPTX::StoreParamV4I16;
3095 break;
3096 case MVT::i32:
3097 Opcode = NVPTX::StoreParamV4I32;
3098 break;
3099 case MVT::f32:
3100 Opcode = NVPTX::StoreParamV4F32;
3101 break;
3102 }
3103 break;
3104 }
3105 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00003106 // Special case: if we have a sign-extend/zero-extend node, insert the
3107 // conversion instruction first, and use that as the value operand to
3108 // the selected StoreParam node.
3109 case NVPTXISD::StoreParamU32: {
3110 Opcode = NVPTX::StoreParamI32;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003111 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00003112 MVT::i32);
3113 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
3114 MVT::i32, Ops[0], CvtNone);
3115 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00003116 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00003117 }
3118 case NVPTXISD::StoreParamS32: {
3119 Opcode = NVPTX::StoreParamI32;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003120 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00003121 MVT::i32);
3122 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
3123 MVT::i32, Ops[0], CvtNone);
3124 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00003125 break;
3126 }
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00003127 }
Justin Holewinskif8f70912013-06-28 17:57:59 +00003128
Justin Holewinskidff28d22013-07-01 12:59:01 +00003129 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
Justin Holewinskif8f70912013-06-28 17:57:59 +00003130 SDNode *Ret =
Justin Holewinskidff28d22013-07-01 12:59:01 +00003131 CurDAG->getMachineNode(Opcode, DL, RetVTs, Ops);
Justin Holewinskif8f70912013-06-28 17:57:59 +00003132 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
3133 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
3134 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
3135
3136 return Ret;
3137}
3138
Justin Holewinski30d56a72014-04-09 15:39:15 +00003139SDNode *NVPTXDAGToDAGISel::SelectTextureIntrinsic(SDNode *N) {
3140 SDValue Chain = N->getOperand(0);
Craig Topper062a2ba2014-04-25 05:30:21 +00003141 SDNode *Ret = nullptr;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003142 unsigned Opc = 0;
3143 SmallVector<SDValue, 8> Ops;
3144
3145 switch (N->getOpcode()) {
Craig Topper062a2ba2014-04-25 05:30:21 +00003146 default: return nullptr;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003147 case NVPTXISD::Tex1DFloatS32:
3148 Opc = NVPTX::TEX_1D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003149 break;
3150 case NVPTXISD::Tex1DFloatFloat:
3151 Opc = NVPTX::TEX_1D_F32_F32;
3152 break;
3153 case NVPTXISD::Tex1DFloatFloatLevel:
3154 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
3155 break;
3156 case NVPTXISD::Tex1DFloatFloatGrad:
3157 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
3158 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003159 case NVPTXISD::Tex1DS32S32:
3160 Opc = NVPTX::TEX_1D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003161 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003162 case NVPTXISD::Tex1DS32Float:
3163 Opc = NVPTX::TEX_1D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003164 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003165 case NVPTXISD::Tex1DS32FloatLevel:
3166 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003167 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003168 case NVPTXISD::Tex1DS32FloatGrad:
3169 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003170 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003171 case NVPTXISD::Tex1DU32S32:
3172 Opc = NVPTX::TEX_1D_U32_S32;
3173 break;
3174 case NVPTXISD::Tex1DU32Float:
3175 Opc = NVPTX::TEX_1D_U32_F32;
3176 break;
3177 case NVPTXISD::Tex1DU32FloatLevel:
3178 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
3179 break;
3180 case NVPTXISD::Tex1DU32FloatGrad:
3181 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
3182 break;
3183 case NVPTXISD::Tex1DArrayFloatS32:
3184 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003185 break;
3186 case NVPTXISD::Tex1DArrayFloatFloat:
3187 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
3188 break;
3189 case NVPTXISD::Tex1DArrayFloatFloatLevel:
3190 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
3191 break;
3192 case NVPTXISD::Tex1DArrayFloatFloatGrad:
3193 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
3194 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003195 case NVPTXISD::Tex1DArrayS32S32:
3196 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003197 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003198 case NVPTXISD::Tex1DArrayS32Float:
3199 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003200 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003201 case NVPTXISD::Tex1DArrayS32FloatLevel:
3202 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003203 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003204 case NVPTXISD::Tex1DArrayS32FloatGrad:
3205 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003206 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003207 case NVPTXISD::Tex1DArrayU32S32:
3208 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
3209 break;
3210 case NVPTXISD::Tex1DArrayU32Float:
3211 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
3212 break;
3213 case NVPTXISD::Tex1DArrayU32FloatLevel:
3214 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
3215 break;
3216 case NVPTXISD::Tex1DArrayU32FloatGrad:
3217 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
3218 break;
3219 case NVPTXISD::Tex2DFloatS32:
3220 Opc = NVPTX::TEX_2D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003221 break;
3222 case NVPTXISD::Tex2DFloatFloat:
3223 Opc = NVPTX::TEX_2D_F32_F32;
3224 break;
3225 case NVPTXISD::Tex2DFloatFloatLevel:
3226 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
3227 break;
3228 case NVPTXISD::Tex2DFloatFloatGrad:
3229 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
3230 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003231 case NVPTXISD::Tex2DS32S32:
3232 Opc = NVPTX::TEX_2D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003233 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003234 case NVPTXISD::Tex2DS32Float:
3235 Opc = NVPTX::TEX_2D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003236 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003237 case NVPTXISD::Tex2DS32FloatLevel:
3238 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003239 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003240 case NVPTXISD::Tex2DS32FloatGrad:
3241 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003242 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003243 case NVPTXISD::Tex2DU32S32:
3244 Opc = NVPTX::TEX_2D_U32_S32;
3245 break;
3246 case NVPTXISD::Tex2DU32Float:
3247 Opc = NVPTX::TEX_2D_U32_F32;
3248 break;
3249 case NVPTXISD::Tex2DU32FloatLevel:
3250 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
3251 break;
3252 case NVPTXISD::Tex2DU32FloatGrad:
3253 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
3254 break;
3255 case NVPTXISD::Tex2DArrayFloatS32:
3256 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003257 break;
3258 case NVPTXISD::Tex2DArrayFloatFloat:
3259 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
3260 break;
3261 case NVPTXISD::Tex2DArrayFloatFloatLevel:
3262 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
3263 break;
3264 case NVPTXISD::Tex2DArrayFloatFloatGrad:
3265 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
3266 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003267 case NVPTXISD::Tex2DArrayS32S32:
3268 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003269 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003270 case NVPTXISD::Tex2DArrayS32Float:
3271 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003272 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003273 case NVPTXISD::Tex2DArrayS32FloatLevel:
3274 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003275 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003276 case NVPTXISD::Tex2DArrayS32FloatGrad:
3277 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003278 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003279 case NVPTXISD::Tex2DArrayU32S32:
3280 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
3281 break;
3282 case NVPTXISD::Tex2DArrayU32Float:
3283 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
3284 break;
3285 case NVPTXISD::Tex2DArrayU32FloatLevel:
3286 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
3287 break;
3288 case NVPTXISD::Tex2DArrayU32FloatGrad:
3289 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
3290 break;
3291 case NVPTXISD::Tex3DFloatS32:
3292 Opc = NVPTX::TEX_3D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003293 break;
3294 case NVPTXISD::Tex3DFloatFloat:
3295 Opc = NVPTX::TEX_3D_F32_F32;
3296 break;
3297 case NVPTXISD::Tex3DFloatFloatLevel:
3298 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
3299 break;
3300 case NVPTXISD::Tex3DFloatFloatGrad:
3301 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
3302 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003303 case NVPTXISD::Tex3DS32S32:
3304 Opc = NVPTX::TEX_3D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003305 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003306 case NVPTXISD::Tex3DS32Float:
3307 Opc = NVPTX::TEX_3D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003308 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003309 case NVPTXISD::Tex3DS32FloatLevel:
3310 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003311 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003312 case NVPTXISD::Tex3DS32FloatGrad:
3313 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
3314 break;
3315 case NVPTXISD::Tex3DU32S32:
3316 Opc = NVPTX::TEX_3D_U32_S32;
3317 break;
3318 case NVPTXISD::Tex3DU32Float:
3319 Opc = NVPTX::TEX_3D_U32_F32;
3320 break;
3321 case NVPTXISD::Tex3DU32FloatLevel:
3322 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
3323 break;
3324 case NVPTXISD::Tex3DU32FloatGrad:
3325 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
3326 break;
3327 case NVPTXISD::TexCubeFloatFloat:
3328 Opc = NVPTX::TEX_CUBE_F32_F32;
3329 break;
3330 case NVPTXISD::TexCubeFloatFloatLevel:
3331 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
3332 break;
3333 case NVPTXISD::TexCubeS32Float:
3334 Opc = NVPTX::TEX_CUBE_S32_F32;
3335 break;
3336 case NVPTXISD::TexCubeS32FloatLevel:
3337 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
3338 break;
3339 case NVPTXISD::TexCubeU32Float:
3340 Opc = NVPTX::TEX_CUBE_U32_F32;
3341 break;
3342 case NVPTXISD::TexCubeU32FloatLevel:
3343 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
3344 break;
3345 case NVPTXISD::TexCubeArrayFloatFloat:
3346 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
3347 break;
3348 case NVPTXISD::TexCubeArrayFloatFloatLevel:
3349 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
3350 break;
3351 case NVPTXISD::TexCubeArrayS32Float:
3352 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
3353 break;
3354 case NVPTXISD::TexCubeArrayS32FloatLevel:
3355 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
3356 break;
3357 case NVPTXISD::TexCubeArrayU32Float:
3358 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
3359 break;
3360 case NVPTXISD::TexCubeArrayU32FloatLevel:
3361 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
3362 break;
3363 case NVPTXISD::Tld4R2DFloatFloat:
3364 Opc = NVPTX::TLD4_R_2D_F32_F32;
3365 break;
3366 case NVPTXISD::Tld4G2DFloatFloat:
3367 Opc = NVPTX::TLD4_G_2D_F32_F32;
3368 break;
3369 case NVPTXISD::Tld4B2DFloatFloat:
3370 Opc = NVPTX::TLD4_B_2D_F32_F32;
3371 break;
3372 case NVPTXISD::Tld4A2DFloatFloat:
3373 Opc = NVPTX::TLD4_A_2D_F32_F32;
3374 break;
3375 case NVPTXISD::Tld4R2DS64Float:
3376 Opc = NVPTX::TLD4_R_2D_S32_F32;
3377 break;
3378 case NVPTXISD::Tld4G2DS64Float:
3379 Opc = NVPTX::TLD4_G_2D_S32_F32;
3380 break;
3381 case NVPTXISD::Tld4B2DS64Float:
3382 Opc = NVPTX::TLD4_B_2D_S32_F32;
3383 break;
3384 case NVPTXISD::Tld4A2DS64Float:
3385 Opc = NVPTX::TLD4_A_2D_S32_F32;
3386 break;
3387 case NVPTXISD::Tld4R2DU64Float:
3388 Opc = NVPTX::TLD4_R_2D_U32_F32;
3389 break;
3390 case NVPTXISD::Tld4G2DU64Float:
3391 Opc = NVPTX::TLD4_G_2D_U32_F32;
3392 break;
3393 case NVPTXISD::Tld4B2DU64Float:
3394 Opc = NVPTX::TLD4_B_2D_U32_F32;
3395 break;
3396 case NVPTXISD::Tld4A2DU64Float:
3397 Opc = NVPTX::TLD4_A_2D_U32_F32;
3398 break;
3399 case NVPTXISD::TexUnified1DFloatS32:
3400 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
3401 break;
3402 case NVPTXISD::TexUnified1DFloatFloat:
3403 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
3404 break;
3405 case NVPTXISD::TexUnified1DFloatFloatLevel:
3406 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
3407 break;
3408 case NVPTXISD::TexUnified1DFloatFloatGrad:
3409 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
3410 break;
3411 case NVPTXISD::TexUnified1DS32S32:
3412 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
3413 break;
3414 case NVPTXISD::TexUnified1DS32Float:
3415 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
3416 break;
3417 case NVPTXISD::TexUnified1DS32FloatLevel:
3418 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
3419 break;
3420 case NVPTXISD::TexUnified1DS32FloatGrad:
3421 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
3422 break;
3423 case NVPTXISD::TexUnified1DU32S32:
3424 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
3425 break;
3426 case NVPTXISD::TexUnified1DU32Float:
3427 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
3428 break;
3429 case NVPTXISD::TexUnified1DU32FloatLevel:
3430 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
3431 break;
3432 case NVPTXISD::TexUnified1DU32FloatGrad:
3433 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
3434 break;
3435 case NVPTXISD::TexUnified1DArrayFloatS32:
3436 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
3437 break;
3438 case NVPTXISD::TexUnified1DArrayFloatFloat:
3439 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
3440 break;
3441 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
3442 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
3443 break;
3444 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
3445 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
3446 break;
3447 case NVPTXISD::TexUnified1DArrayS32S32:
3448 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
3449 break;
3450 case NVPTXISD::TexUnified1DArrayS32Float:
3451 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
3452 break;
3453 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
3454 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
3455 break;
3456 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
3457 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
3458 break;
3459 case NVPTXISD::TexUnified1DArrayU32S32:
3460 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
3461 break;
3462 case NVPTXISD::TexUnified1DArrayU32Float:
3463 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
3464 break;
3465 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
3466 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
3467 break;
3468 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
3469 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
3470 break;
3471 case NVPTXISD::TexUnified2DFloatS32:
3472 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
3473 break;
3474 case NVPTXISD::TexUnified2DFloatFloat:
3475 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
3476 break;
3477 case NVPTXISD::TexUnified2DFloatFloatLevel:
3478 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
3479 break;
3480 case NVPTXISD::TexUnified2DFloatFloatGrad:
3481 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
3482 break;
3483 case NVPTXISD::TexUnified2DS32S32:
3484 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
3485 break;
3486 case NVPTXISD::TexUnified2DS32Float:
3487 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
3488 break;
3489 case NVPTXISD::TexUnified2DS32FloatLevel:
3490 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
3491 break;
3492 case NVPTXISD::TexUnified2DS32FloatGrad:
3493 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
3494 break;
3495 case NVPTXISD::TexUnified2DU32S32:
3496 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
3497 break;
3498 case NVPTXISD::TexUnified2DU32Float:
3499 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
3500 break;
3501 case NVPTXISD::TexUnified2DU32FloatLevel:
3502 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
3503 break;
3504 case NVPTXISD::TexUnified2DU32FloatGrad:
3505 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
3506 break;
3507 case NVPTXISD::TexUnified2DArrayFloatS32:
3508 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
3509 break;
3510 case NVPTXISD::TexUnified2DArrayFloatFloat:
3511 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
3512 break;
3513 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
3514 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
3515 break;
3516 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
3517 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
3518 break;
3519 case NVPTXISD::TexUnified2DArrayS32S32:
3520 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
3521 break;
3522 case NVPTXISD::TexUnified2DArrayS32Float:
3523 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
3524 break;
3525 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
3526 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
3527 break;
3528 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
3529 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
3530 break;
3531 case NVPTXISD::TexUnified2DArrayU32S32:
3532 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
3533 break;
3534 case NVPTXISD::TexUnified2DArrayU32Float:
3535 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
3536 break;
3537 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
3538 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
3539 break;
3540 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
3541 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
3542 break;
3543 case NVPTXISD::TexUnified3DFloatS32:
3544 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
3545 break;
3546 case NVPTXISD::TexUnified3DFloatFloat:
3547 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
3548 break;
3549 case NVPTXISD::TexUnified3DFloatFloatLevel:
3550 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
3551 break;
3552 case NVPTXISD::TexUnified3DFloatFloatGrad:
3553 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
3554 break;
3555 case NVPTXISD::TexUnified3DS32S32:
3556 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
3557 break;
3558 case NVPTXISD::TexUnified3DS32Float:
3559 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
3560 break;
3561 case NVPTXISD::TexUnified3DS32FloatLevel:
3562 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
3563 break;
3564 case NVPTXISD::TexUnified3DS32FloatGrad:
3565 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
3566 break;
3567 case NVPTXISD::TexUnified3DU32S32:
3568 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
3569 break;
3570 case NVPTXISD::TexUnified3DU32Float:
3571 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
3572 break;
3573 case NVPTXISD::TexUnified3DU32FloatLevel:
3574 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
3575 break;
3576 case NVPTXISD::TexUnified3DU32FloatGrad:
3577 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
3578 break;
3579 case NVPTXISD::TexUnifiedCubeFloatFloat:
3580 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
3581 break;
3582 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
3583 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
3584 break;
3585 case NVPTXISD::TexUnifiedCubeS32Float:
3586 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
3587 break;
3588 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
3589 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
3590 break;
3591 case NVPTXISD::TexUnifiedCubeU32Float:
3592 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
3593 break;
3594 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
3595 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
3596 break;
3597 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
3598 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
3599 break;
3600 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
3601 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
3602 break;
3603 case NVPTXISD::TexUnifiedCubeArrayS32Float:
3604 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
3605 break;
3606 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
3607 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
3608 break;
3609 case NVPTXISD::TexUnifiedCubeArrayU32Float:
3610 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
3611 break;
3612 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
3613 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
3614 break;
3615 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
3616 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
3617 break;
3618 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
3619 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
3620 break;
3621 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
3622 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
3623 break;
3624 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
3625 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
3626 break;
3627 case NVPTXISD::Tld4UnifiedR2DS64Float:
3628 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
3629 break;
3630 case NVPTXISD::Tld4UnifiedG2DS64Float:
3631 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
3632 break;
3633 case NVPTXISD::Tld4UnifiedB2DS64Float:
3634 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
3635 break;
3636 case NVPTXISD::Tld4UnifiedA2DS64Float:
3637 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
3638 break;
3639 case NVPTXISD::Tld4UnifiedR2DU64Float:
3640 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
3641 break;
3642 case NVPTXISD::Tld4UnifiedG2DU64Float:
3643 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
3644 break;
3645 case NVPTXISD::Tld4UnifiedB2DU64Float:
3646 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
3647 break;
3648 case NVPTXISD::Tld4UnifiedA2DU64Float:
3649 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003650 break;
3651 }
3652
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003653 // Copy over operands
3654 for (unsigned i = 1; i < N->getNumOperands(); ++i) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00003655 Ops.push_back(N->getOperand(i));
3656 }
3657
3658 Ops.push_back(Chain);
3659 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
3660 return Ret;
3661}
3662
3663SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) {
3664 SDValue Chain = N->getOperand(0);
3665 SDValue TexHandle = N->getOperand(1);
Craig Topper062a2ba2014-04-25 05:30:21 +00003666 SDNode *Ret = nullptr;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003667 unsigned Opc = 0;
3668 SmallVector<SDValue, 8> Ops;
3669 switch (N->getOpcode()) {
Craig Topper062a2ba2014-04-25 05:30:21 +00003670 default: return nullptr;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003671 case NVPTXISD::Suld1DI8Clamp:
3672 Opc = NVPTX::SULD_1D_I8_CLAMP;
3673 Ops.push_back(TexHandle);
3674 Ops.push_back(N->getOperand(2));
3675 Ops.push_back(Chain);
3676 break;
3677 case NVPTXISD::Suld1DI16Clamp:
3678 Opc = NVPTX::SULD_1D_I16_CLAMP;
3679 Ops.push_back(TexHandle);
3680 Ops.push_back(N->getOperand(2));
3681 Ops.push_back(Chain);
3682 break;
3683 case NVPTXISD::Suld1DI32Clamp:
3684 Opc = NVPTX::SULD_1D_I32_CLAMP;
3685 Ops.push_back(TexHandle);
3686 Ops.push_back(N->getOperand(2));
3687 Ops.push_back(Chain);
3688 break;
3689 case NVPTXISD::Suld1DI64Clamp:
3690 Opc = NVPTX::SULD_1D_I64_CLAMP;
3691 Ops.push_back(TexHandle);
3692 Ops.push_back(N->getOperand(2));
3693 Ops.push_back(Chain);
3694 break;
3695 case NVPTXISD::Suld1DV2I8Clamp:
3696 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
3697 Ops.push_back(TexHandle);
3698 Ops.push_back(N->getOperand(2));
3699 Ops.push_back(Chain);
3700 break;
3701 case NVPTXISD::Suld1DV2I16Clamp:
3702 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
3703 Ops.push_back(TexHandle);
3704 Ops.push_back(N->getOperand(2));
3705 Ops.push_back(Chain);
3706 break;
3707 case NVPTXISD::Suld1DV2I32Clamp:
3708 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
3709 Ops.push_back(TexHandle);
3710 Ops.push_back(N->getOperand(2));
3711 Ops.push_back(Chain);
3712 break;
3713 case NVPTXISD::Suld1DV2I64Clamp:
3714 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
3715 Ops.push_back(TexHandle);
3716 Ops.push_back(N->getOperand(2));
3717 Ops.push_back(Chain);
3718 break;
3719 case NVPTXISD::Suld1DV4I8Clamp:
3720 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
3721 Ops.push_back(TexHandle);
3722 Ops.push_back(N->getOperand(2));
3723 Ops.push_back(Chain);
3724 break;
3725 case NVPTXISD::Suld1DV4I16Clamp:
3726 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
3727 Ops.push_back(TexHandle);
3728 Ops.push_back(N->getOperand(2));
3729 Ops.push_back(Chain);
3730 break;
3731 case NVPTXISD::Suld1DV4I32Clamp:
3732 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
3733 Ops.push_back(TexHandle);
3734 Ops.push_back(N->getOperand(2));
3735 Ops.push_back(Chain);
3736 break;
3737 case NVPTXISD::Suld1DArrayI8Clamp:
3738 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
3739 Ops.push_back(TexHandle);
3740 Ops.push_back(N->getOperand(2));
3741 Ops.push_back(N->getOperand(3));
3742 Ops.push_back(Chain);
3743 break;
3744 case NVPTXISD::Suld1DArrayI16Clamp:
3745 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
3746 Ops.push_back(TexHandle);
3747 Ops.push_back(N->getOperand(2));
3748 Ops.push_back(N->getOperand(3));
3749 Ops.push_back(Chain);
3750 break;
3751 case NVPTXISD::Suld1DArrayI32Clamp:
3752 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
3753 Ops.push_back(TexHandle);
3754 Ops.push_back(N->getOperand(2));
3755 Ops.push_back(N->getOperand(3));
3756 Ops.push_back(Chain);
3757 break;
3758 case NVPTXISD::Suld1DArrayI64Clamp:
3759 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
3760 Ops.push_back(TexHandle);
3761 Ops.push_back(N->getOperand(2));
3762 Ops.push_back(N->getOperand(3));
3763 Ops.push_back(Chain);
3764 break;
3765 case NVPTXISD::Suld1DArrayV2I8Clamp:
3766 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
3767 Ops.push_back(TexHandle);
3768 Ops.push_back(N->getOperand(2));
3769 Ops.push_back(N->getOperand(3));
3770 Ops.push_back(Chain);
3771 break;
3772 case NVPTXISD::Suld1DArrayV2I16Clamp:
3773 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
3774 Ops.push_back(TexHandle);
3775 Ops.push_back(N->getOperand(2));
3776 Ops.push_back(N->getOperand(3));
3777 Ops.push_back(Chain);
3778 break;
3779 case NVPTXISD::Suld1DArrayV2I32Clamp:
3780 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
3781 Ops.push_back(TexHandle);
3782 Ops.push_back(N->getOperand(2));
3783 Ops.push_back(N->getOperand(3));
3784 Ops.push_back(Chain);
3785 break;
3786 case NVPTXISD::Suld1DArrayV2I64Clamp:
3787 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
3788 Ops.push_back(TexHandle);
3789 Ops.push_back(N->getOperand(2));
3790 Ops.push_back(N->getOperand(3));
3791 Ops.push_back(Chain);
3792 break;
3793 case NVPTXISD::Suld1DArrayV4I8Clamp:
3794 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
3795 Ops.push_back(TexHandle);
3796 Ops.push_back(N->getOperand(2));
3797 Ops.push_back(N->getOperand(3));
3798 Ops.push_back(Chain);
3799 break;
3800 case NVPTXISD::Suld1DArrayV4I16Clamp:
3801 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
3802 Ops.push_back(TexHandle);
3803 Ops.push_back(N->getOperand(2));
3804 Ops.push_back(N->getOperand(3));
3805 Ops.push_back(Chain);
3806 break;
3807 case NVPTXISD::Suld1DArrayV4I32Clamp:
3808 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
3809 Ops.push_back(TexHandle);
3810 Ops.push_back(N->getOperand(2));
3811 Ops.push_back(N->getOperand(3));
3812 Ops.push_back(Chain);
3813 break;
3814 case NVPTXISD::Suld2DI8Clamp:
3815 Opc = NVPTX::SULD_2D_I8_CLAMP;
3816 Ops.push_back(TexHandle);
3817 Ops.push_back(N->getOperand(2));
3818 Ops.push_back(N->getOperand(3));
3819 Ops.push_back(Chain);
3820 break;
3821 case NVPTXISD::Suld2DI16Clamp:
3822 Opc = NVPTX::SULD_2D_I16_CLAMP;
3823 Ops.push_back(TexHandle);
3824 Ops.push_back(N->getOperand(2));
3825 Ops.push_back(N->getOperand(3));
3826 Ops.push_back(Chain);
3827 break;
3828 case NVPTXISD::Suld2DI32Clamp:
3829 Opc = NVPTX::SULD_2D_I32_CLAMP;
3830 Ops.push_back(TexHandle);
3831 Ops.push_back(N->getOperand(2));
3832 Ops.push_back(N->getOperand(3));
3833 Ops.push_back(Chain);
3834 break;
3835 case NVPTXISD::Suld2DI64Clamp:
3836 Opc = NVPTX::SULD_2D_I64_CLAMP;
3837 Ops.push_back(TexHandle);
3838 Ops.push_back(N->getOperand(2));
3839 Ops.push_back(N->getOperand(3));
3840 Ops.push_back(Chain);
3841 break;
3842 case NVPTXISD::Suld2DV2I8Clamp:
3843 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
3844 Ops.push_back(TexHandle);
3845 Ops.push_back(N->getOperand(2));
3846 Ops.push_back(N->getOperand(3));
3847 Ops.push_back(Chain);
3848 break;
3849 case NVPTXISD::Suld2DV2I16Clamp:
3850 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
3851 Ops.push_back(TexHandle);
3852 Ops.push_back(N->getOperand(2));
3853 Ops.push_back(N->getOperand(3));
3854 Ops.push_back(Chain);
3855 break;
3856 case NVPTXISD::Suld2DV2I32Clamp:
3857 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
3858 Ops.push_back(TexHandle);
3859 Ops.push_back(N->getOperand(2));
3860 Ops.push_back(N->getOperand(3));
3861 Ops.push_back(Chain);
3862 break;
3863 case NVPTXISD::Suld2DV2I64Clamp:
3864 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
3865 Ops.push_back(TexHandle);
3866 Ops.push_back(N->getOperand(2));
3867 Ops.push_back(N->getOperand(3));
3868 Ops.push_back(Chain);
3869 break;
3870 case NVPTXISD::Suld2DV4I8Clamp:
3871 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3872 Ops.push_back(TexHandle);
3873 Ops.push_back(N->getOperand(2));
3874 Ops.push_back(N->getOperand(3));
3875 Ops.push_back(Chain);
3876 break;
3877 case NVPTXISD::Suld2DV4I16Clamp:
3878 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3879 Ops.push_back(TexHandle);
3880 Ops.push_back(N->getOperand(2));
3881 Ops.push_back(N->getOperand(3));
3882 Ops.push_back(Chain);
3883 break;
3884 case NVPTXISD::Suld2DV4I32Clamp:
3885 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3886 Ops.push_back(TexHandle);
3887 Ops.push_back(N->getOperand(2));
3888 Ops.push_back(N->getOperand(3));
3889 Ops.push_back(Chain);
3890 break;
3891 case NVPTXISD::Suld2DArrayI8Clamp:
3892 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3893 Ops.push_back(TexHandle);
3894 Ops.push_back(N->getOperand(2));
3895 Ops.push_back(N->getOperand(3));
3896 Ops.push_back(N->getOperand(4));
3897 Ops.push_back(Chain);
3898 break;
3899 case NVPTXISD::Suld2DArrayI16Clamp:
3900 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3901 Ops.push_back(TexHandle);
3902 Ops.push_back(N->getOperand(2));
3903 Ops.push_back(N->getOperand(3));
3904 Ops.push_back(N->getOperand(4));
3905 Ops.push_back(Chain);
3906 break;
3907 case NVPTXISD::Suld2DArrayI32Clamp:
3908 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3909 Ops.push_back(TexHandle);
3910 Ops.push_back(N->getOperand(2));
3911 Ops.push_back(N->getOperand(3));
3912 Ops.push_back(N->getOperand(4));
3913 Ops.push_back(Chain);
3914 break;
3915 case NVPTXISD::Suld2DArrayI64Clamp:
3916 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3917 Ops.push_back(TexHandle);
3918 Ops.push_back(N->getOperand(2));
3919 Ops.push_back(N->getOperand(3));
3920 Ops.push_back(N->getOperand(4));
3921 Ops.push_back(Chain);
3922 break;
3923 case NVPTXISD::Suld2DArrayV2I8Clamp:
3924 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3925 Ops.push_back(TexHandle);
3926 Ops.push_back(N->getOperand(2));
3927 Ops.push_back(N->getOperand(3));
3928 Ops.push_back(N->getOperand(4));
3929 Ops.push_back(Chain);
3930 break;
3931 case NVPTXISD::Suld2DArrayV2I16Clamp:
3932 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3933 Ops.push_back(TexHandle);
3934 Ops.push_back(N->getOperand(2));
3935 Ops.push_back(N->getOperand(3));
3936 Ops.push_back(N->getOperand(4));
3937 Ops.push_back(Chain);
3938 break;
3939 case NVPTXISD::Suld2DArrayV2I32Clamp:
3940 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3941 Ops.push_back(TexHandle);
3942 Ops.push_back(N->getOperand(2));
3943 Ops.push_back(N->getOperand(3));
3944 Ops.push_back(N->getOperand(4));
3945 Ops.push_back(Chain);
3946 break;
3947 case NVPTXISD::Suld2DArrayV2I64Clamp:
3948 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3949 Ops.push_back(TexHandle);
3950 Ops.push_back(N->getOperand(2));
3951 Ops.push_back(N->getOperand(3));
3952 Ops.push_back(N->getOperand(4));
3953 Ops.push_back(Chain);
3954 break;
3955 case NVPTXISD::Suld2DArrayV4I8Clamp:
3956 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3957 Ops.push_back(TexHandle);
3958 Ops.push_back(N->getOperand(2));
3959 Ops.push_back(N->getOperand(3));
3960 Ops.push_back(N->getOperand(4));
3961 Ops.push_back(Chain);
3962 break;
3963 case NVPTXISD::Suld2DArrayV4I16Clamp:
3964 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3965 Ops.push_back(TexHandle);
3966 Ops.push_back(N->getOperand(2));
3967 Ops.push_back(N->getOperand(3));
3968 Ops.push_back(N->getOperand(4));
3969 Ops.push_back(Chain);
3970 break;
3971 case NVPTXISD::Suld2DArrayV4I32Clamp:
3972 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3973 Ops.push_back(TexHandle);
3974 Ops.push_back(N->getOperand(2));
3975 Ops.push_back(N->getOperand(3));
3976 Ops.push_back(N->getOperand(4));
3977 Ops.push_back(Chain);
3978 break;
3979 case NVPTXISD::Suld3DI8Clamp:
3980 Opc = NVPTX::SULD_3D_I8_CLAMP;
3981 Ops.push_back(TexHandle);
3982 Ops.push_back(N->getOperand(2));
3983 Ops.push_back(N->getOperand(3));
3984 Ops.push_back(N->getOperand(4));
3985 Ops.push_back(Chain);
3986 break;
3987 case NVPTXISD::Suld3DI16Clamp:
3988 Opc = NVPTX::SULD_3D_I16_CLAMP;
3989 Ops.push_back(TexHandle);
3990 Ops.push_back(N->getOperand(2));
3991 Ops.push_back(N->getOperand(3));
3992 Ops.push_back(N->getOperand(4));
3993 Ops.push_back(Chain);
3994 break;
3995 case NVPTXISD::Suld3DI32Clamp:
3996 Opc = NVPTX::SULD_3D_I32_CLAMP;
3997 Ops.push_back(TexHandle);
3998 Ops.push_back(N->getOperand(2));
3999 Ops.push_back(N->getOperand(3));
4000 Ops.push_back(N->getOperand(4));
4001 Ops.push_back(Chain);
4002 break;
4003 case NVPTXISD::Suld3DI64Clamp:
4004 Opc = NVPTX::SULD_3D_I64_CLAMP;
4005 Ops.push_back(TexHandle);
4006 Ops.push_back(N->getOperand(2));
4007 Ops.push_back(N->getOperand(3));
4008 Ops.push_back(N->getOperand(4));
4009 Ops.push_back(Chain);
4010 break;
4011 case NVPTXISD::Suld3DV2I8Clamp:
4012 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
4013 Ops.push_back(TexHandle);
4014 Ops.push_back(N->getOperand(2));
4015 Ops.push_back(N->getOperand(3));
4016 Ops.push_back(N->getOperand(4));
4017 Ops.push_back(Chain);
4018 break;
4019 case NVPTXISD::Suld3DV2I16Clamp:
4020 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
4021 Ops.push_back(TexHandle);
4022 Ops.push_back(N->getOperand(2));
4023 Ops.push_back(N->getOperand(3));
4024 Ops.push_back(N->getOperand(4));
4025 Ops.push_back(Chain);
4026 break;
4027 case NVPTXISD::Suld3DV2I32Clamp:
4028 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
4029 Ops.push_back(TexHandle);
4030 Ops.push_back(N->getOperand(2));
4031 Ops.push_back(N->getOperand(3));
4032 Ops.push_back(N->getOperand(4));
4033 Ops.push_back(Chain);
4034 break;
4035 case NVPTXISD::Suld3DV2I64Clamp:
4036 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
4037 Ops.push_back(TexHandle);
4038 Ops.push_back(N->getOperand(2));
4039 Ops.push_back(N->getOperand(3));
4040 Ops.push_back(N->getOperand(4));
4041 Ops.push_back(Chain);
4042 break;
4043 case NVPTXISD::Suld3DV4I8Clamp:
4044 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
4045 Ops.push_back(TexHandle);
4046 Ops.push_back(N->getOperand(2));
4047 Ops.push_back(N->getOperand(3));
4048 Ops.push_back(N->getOperand(4));
4049 Ops.push_back(Chain);
4050 break;
4051 case NVPTXISD::Suld3DV4I16Clamp:
4052 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
4053 Ops.push_back(TexHandle);
4054 Ops.push_back(N->getOperand(2));
4055 Ops.push_back(N->getOperand(3));
4056 Ops.push_back(N->getOperand(4));
4057 Ops.push_back(Chain);
4058 break;
4059 case NVPTXISD::Suld3DV4I32Clamp:
4060 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
4061 Ops.push_back(TexHandle);
4062 Ops.push_back(N->getOperand(2));
4063 Ops.push_back(N->getOperand(3));
4064 Ops.push_back(N->getOperand(4));
4065 Ops.push_back(Chain);
4066 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004067 case NVPTXISD::Suld1DI8Trap:
4068 Opc = NVPTX::SULD_1D_I8_TRAP;
4069 Ops.push_back(TexHandle);
4070 Ops.push_back(N->getOperand(2));
4071 Ops.push_back(Chain);
4072 break;
4073 case NVPTXISD::Suld1DI16Trap:
4074 Opc = NVPTX::SULD_1D_I16_TRAP;
4075 Ops.push_back(TexHandle);
4076 Ops.push_back(N->getOperand(2));
4077 Ops.push_back(Chain);
4078 break;
4079 case NVPTXISD::Suld1DI32Trap:
4080 Opc = NVPTX::SULD_1D_I32_TRAP;
4081 Ops.push_back(TexHandle);
4082 Ops.push_back(N->getOperand(2));
4083 Ops.push_back(Chain);
4084 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004085 case NVPTXISD::Suld1DI64Trap:
4086 Opc = NVPTX::SULD_1D_I64_TRAP;
4087 Ops.push_back(TexHandle);
4088 Ops.push_back(N->getOperand(2));
4089 Ops.push_back(Chain);
4090 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004091 case NVPTXISD::Suld1DV2I8Trap:
4092 Opc = NVPTX::SULD_1D_V2I8_TRAP;
4093 Ops.push_back(TexHandle);
4094 Ops.push_back(N->getOperand(2));
4095 Ops.push_back(Chain);
4096 break;
4097 case NVPTXISD::Suld1DV2I16Trap:
4098 Opc = NVPTX::SULD_1D_V2I16_TRAP;
4099 Ops.push_back(TexHandle);
4100 Ops.push_back(N->getOperand(2));
4101 Ops.push_back(Chain);
4102 break;
4103 case NVPTXISD::Suld1DV2I32Trap:
4104 Opc = NVPTX::SULD_1D_V2I32_TRAP;
4105 Ops.push_back(TexHandle);
4106 Ops.push_back(N->getOperand(2));
4107 Ops.push_back(Chain);
4108 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004109 case NVPTXISD::Suld1DV2I64Trap:
4110 Opc = NVPTX::SULD_1D_V2I64_TRAP;
4111 Ops.push_back(TexHandle);
4112 Ops.push_back(N->getOperand(2));
4113 Ops.push_back(Chain);
4114 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004115 case NVPTXISD::Suld1DV4I8Trap:
4116 Opc = NVPTX::SULD_1D_V4I8_TRAP;
4117 Ops.push_back(TexHandle);
4118 Ops.push_back(N->getOperand(2));
4119 Ops.push_back(Chain);
4120 break;
4121 case NVPTXISD::Suld1DV4I16Trap:
4122 Opc = NVPTX::SULD_1D_V4I16_TRAP;
4123 Ops.push_back(TexHandle);
4124 Ops.push_back(N->getOperand(2));
4125 Ops.push_back(Chain);
4126 break;
4127 case NVPTXISD::Suld1DV4I32Trap:
4128 Opc = NVPTX::SULD_1D_V4I32_TRAP;
4129 Ops.push_back(TexHandle);
4130 Ops.push_back(N->getOperand(2));
4131 Ops.push_back(Chain);
4132 break;
4133 case NVPTXISD::Suld1DArrayI8Trap:
4134 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
4135 Ops.push_back(TexHandle);
4136 Ops.push_back(N->getOperand(2));
4137 Ops.push_back(N->getOperand(3));
4138 Ops.push_back(Chain);
4139 break;
4140 case NVPTXISD::Suld1DArrayI16Trap:
4141 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
4142 Ops.push_back(TexHandle);
4143 Ops.push_back(N->getOperand(2));
4144 Ops.push_back(N->getOperand(3));
4145 Ops.push_back(Chain);
4146 break;
4147 case NVPTXISD::Suld1DArrayI32Trap:
4148 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
4149 Ops.push_back(TexHandle);
4150 Ops.push_back(N->getOperand(2));
4151 Ops.push_back(N->getOperand(3));
4152 Ops.push_back(Chain);
4153 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004154 case NVPTXISD::Suld1DArrayI64Trap:
4155 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
4156 Ops.push_back(TexHandle);
4157 Ops.push_back(N->getOperand(2));
4158 Ops.push_back(N->getOperand(3));
4159 Ops.push_back(Chain);
4160 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004161 case NVPTXISD::Suld1DArrayV2I8Trap:
4162 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
4163 Ops.push_back(TexHandle);
4164 Ops.push_back(N->getOperand(2));
4165 Ops.push_back(N->getOperand(3));
4166 Ops.push_back(Chain);
4167 break;
4168 case NVPTXISD::Suld1DArrayV2I16Trap:
4169 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
4170 Ops.push_back(TexHandle);
4171 Ops.push_back(N->getOperand(2));
4172 Ops.push_back(N->getOperand(3));
4173 Ops.push_back(Chain);
4174 break;
4175 case NVPTXISD::Suld1DArrayV2I32Trap:
4176 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
4177 Ops.push_back(TexHandle);
4178 Ops.push_back(N->getOperand(2));
4179 Ops.push_back(N->getOperand(3));
4180 Ops.push_back(Chain);
4181 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004182 case NVPTXISD::Suld1DArrayV2I64Trap:
4183 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
4184 Ops.push_back(TexHandle);
4185 Ops.push_back(N->getOperand(2));
4186 Ops.push_back(N->getOperand(3));
4187 Ops.push_back(Chain);
4188 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004189 case NVPTXISD::Suld1DArrayV4I8Trap:
4190 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
4191 Ops.push_back(TexHandle);
4192 Ops.push_back(N->getOperand(2));
4193 Ops.push_back(N->getOperand(3));
4194 Ops.push_back(Chain);
4195 break;
4196 case NVPTXISD::Suld1DArrayV4I16Trap:
4197 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
4198 Ops.push_back(TexHandle);
4199 Ops.push_back(N->getOperand(2));
4200 Ops.push_back(N->getOperand(3));
4201 Ops.push_back(Chain);
4202 break;
4203 case NVPTXISD::Suld1DArrayV4I32Trap:
4204 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
4205 Ops.push_back(TexHandle);
4206 Ops.push_back(N->getOperand(2));
4207 Ops.push_back(N->getOperand(3));
4208 Ops.push_back(Chain);
4209 break;
4210 case NVPTXISD::Suld2DI8Trap:
4211 Opc = NVPTX::SULD_2D_I8_TRAP;
4212 Ops.push_back(TexHandle);
4213 Ops.push_back(N->getOperand(2));
4214 Ops.push_back(N->getOperand(3));
4215 Ops.push_back(Chain);
4216 break;
4217 case NVPTXISD::Suld2DI16Trap:
4218 Opc = NVPTX::SULD_2D_I16_TRAP;
4219 Ops.push_back(TexHandle);
4220 Ops.push_back(N->getOperand(2));
4221 Ops.push_back(N->getOperand(3));
4222 Ops.push_back(Chain);
4223 break;
4224 case NVPTXISD::Suld2DI32Trap:
4225 Opc = NVPTX::SULD_2D_I32_TRAP;
4226 Ops.push_back(TexHandle);
4227 Ops.push_back(N->getOperand(2));
4228 Ops.push_back(N->getOperand(3));
4229 Ops.push_back(Chain);
4230 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004231 case NVPTXISD::Suld2DI64Trap:
4232 Opc = NVPTX::SULD_2D_I64_TRAP;
4233 Ops.push_back(TexHandle);
4234 Ops.push_back(N->getOperand(2));
4235 Ops.push_back(N->getOperand(3));
4236 Ops.push_back(Chain);
4237 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004238 case NVPTXISD::Suld2DV2I8Trap:
4239 Opc = NVPTX::SULD_2D_V2I8_TRAP;
4240 Ops.push_back(TexHandle);
4241 Ops.push_back(N->getOperand(2));
4242 Ops.push_back(N->getOperand(3));
4243 Ops.push_back(Chain);
4244 break;
4245 case NVPTXISD::Suld2DV2I16Trap:
4246 Opc = NVPTX::SULD_2D_V2I16_TRAP;
4247 Ops.push_back(TexHandle);
4248 Ops.push_back(N->getOperand(2));
4249 Ops.push_back(N->getOperand(3));
4250 Ops.push_back(Chain);
4251 break;
4252 case NVPTXISD::Suld2DV2I32Trap:
4253 Opc = NVPTX::SULD_2D_V2I32_TRAP;
4254 Ops.push_back(TexHandle);
4255 Ops.push_back(N->getOperand(2));
4256 Ops.push_back(N->getOperand(3));
4257 Ops.push_back(Chain);
4258 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004259 case NVPTXISD::Suld2DV2I64Trap:
4260 Opc = NVPTX::SULD_2D_V2I64_TRAP;
4261 Ops.push_back(TexHandle);
4262 Ops.push_back(N->getOperand(2));
4263 Ops.push_back(N->getOperand(3));
4264 Ops.push_back(Chain);
4265 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004266 case NVPTXISD::Suld2DV4I8Trap:
4267 Opc = NVPTX::SULD_2D_V4I8_TRAP;
4268 Ops.push_back(TexHandle);
4269 Ops.push_back(N->getOperand(2));
4270 Ops.push_back(N->getOperand(3));
4271 Ops.push_back(Chain);
4272 break;
4273 case NVPTXISD::Suld2DV4I16Trap:
4274 Opc = NVPTX::SULD_2D_V4I16_TRAP;
4275 Ops.push_back(TexHandle);
4276 Ops.push_back(N->getOperand(2));
4277 Ops.push_back(N->getOperand(3));
4278 Ops.push_back(Chain);
4279 break;
4280 case NVPTXISD::Suld2DV4I32Trap:
4281 Opc = NVPTX::SULD_2D_V4I32_TRAP;
4282 Ops.push_back(TexHandle);
4283 Ops.push_back(N->getOperand(2));
4284 Ops.push_back(N->getOperand(3));
4285 Ops.push_back(Chain);
4286 break;
4287 case NVPTXISD::Suld2DArrayI8Trap:
4288 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
4289 Ops.push_back(TexHandle);
4290 Ops.push_back(N->getOperand(2));
4291 Ops.push_back(N->getOperand(3));
4292 Ops.push_back(N->getOperand(4));
4293 Ops.push_back(Chain);
4294 break;
4295 case NVPTXISD::Suld2DArrayI16Trap:
4296 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
4297 Ops.push_back(TexHandle);
4298 Ops.push_back(N->getOperand(2));
4299 Ops.push_back(N->getOperand(3));
4300 Ops.push_back(N->getOperand(4));
4301 Ops.push_back(Chain);
4302 break;
4303 case NVPTXISD::Suld2DArrayI32Trap:
4304 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
4305 Ops.push_back(TexHandle);
4306 Ops.push_back(N->getOperand(2));
4307 Ops.push_back(N->getOperand(3));
4308 Ops.push_back(N->getOperand(4));
4309 Ops.push_back(Chain);
4310 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004311 case NVPTXISD::Suld2DArrayI64Trap:
4312 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
4313 Ops.push_back(TexHandle);
4314 Ops.push_back(N->getOperand(2));
4315 Ops.push_back(N->getOperand(3));
4316 Ops.push_back(N->getOperand(4));
4317 Ops.push_back(Chain);
4318 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004319 case NVPTXISD::Suld2DArrayV2I8Trap:
4320 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
4321 Ops.push_back(TexHandle);
4322 Ops.push_back(N->getOperand(2));
4323 Ops.push_back(N->getOperand(3));
4324 Ops.push_back(N->getOperand(4));
4325 Ops.push_back(Chain);
4326 break;
4327 case NVPTXISD::Suld2DArrayV2I16Trap:
4328 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
4329 Ops.push_back(TexHandle);
4330 Ops.push_back(N->getOperand(2));
4331 Ops.push_back(N->getOperand(3));
4332 Ops.push_back(N->getOperand(4));
4333 Ops.push_back(Chain);
4334 break;
4335 case NVPTXISD::Suld2DArrayV2I32Trap:
4336 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
4337 Ops.push_back(TexHandle);
4338 Ops.push_back(N->getOperand(2));
4339 Ops.push_back(N->getOperand(3));
4340 Ops.push_back(N->getOperand(4));
4341 Ops.push_back(Chain);
4342 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004343 case NVPTXISD::Suld2DArrayV2I64Trap:
4344 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
4345 Ops.push_back(TexHandle);
4346 Ops.push_back(N->getOperand(2));
4347 Ops.push_back(N->getOperand(3));
4348 Ops.push_back(N->getOperand(4));
4349 Ops.push_back(Chain);
4350 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004351 case NVPTXISD::Suld2DArrayV4I8Trap:
4352 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
4353 Ops.push_back(TexHandle);
4354 Ops.push_back(N->getOperand(2));
4355 Ops.push_back(N->getOperand(3));
4356 Ops.push_back(N->getOperand(4));
4357 Ops.push_back(Chain);
4358 break;
4359 case NVPTXISD::Suld2DArrayV4I16Trap:
4360 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
4361 Ops.push_back(TexHandle);
4362 Ops.push_back(N->getOperand(2));
4363 Ops.push_back(N->getOperand(3));
4364 Ops.push_back(N->getOperand(4));
4365 Ops.push_back(Chain);
4366 break;
4367 case NVPTXISD::Suld2DArrayV4I32Trap:
4368 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
4369 Ops.push_back(TexHandle);
4370 Ops.push_back(N->getOperand(2));
4371 Ops.push_back(N->getOperand(3));
4372 Ops.push_back(N->getOperand(4));
4373 Ops.push_back(Chain);
4374 break;
4375 case NVPTXISD::Suld3DI8Trap:
4376 Opc = NVPTX::SULD_3D_I8_TRAP;
4377 Ops.push_back(TexHandle);
4378 Ops.push_back(N->getOperand(2));
4379 Ops.push_back(N->getOperand(3));
4380 Ops.push_back(N->getOperand(4));
4381 Ops.push_back(Chain);
4382 break;
4383 case NVPTXISD::Suld3DI16Trap:
4384 Opc = NVPTX::SULD_3D_I16_TRAP;
4385 Ops.push_back(TexHandle);
4386 Ops.push_back(N->getOperand(2));
4387 Ops.push_back(N->getOperand(3));
4388 Ops.push_back(N->getOperand(4));
4389 Ops.push_back(Chain);
4390 break;
4391 case NVPTXISD::Suld3DI32Trap:
4392 Opc = NVPTX::SULD_3D_I32_TRAP;
4393 Ops.push_back(TexHandle);
4394 Ops.push_back(N->getOperand(2));
4395 Ops.push_back(N->getOperand(3));
4396 Ops.push_back(N->getOperand(4));
4397 Ops.push_back(Chain);
4398 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004399 case NVPTXISD::Suld3DI64Trap:
4400 Opc = NVPTX::SULD_3D_I64_TRAP;
4401 Ops.push_back(TexHandle);
4402 Ops.push_back(N->getOperand(2));
4403 Ops.push_back(N->getOperand(3));
4404 Ops.push_back(N->getOperand(4));
4405 Ops.push_back(Chain);
4406 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004407 case NVPTXISD::Suld3DV2I8Trap:
4408 Opc = NVPTX::SULD_3D_V2I8_TRAP;
4409 Ops.push_back(TexHandle);
4410 Ops.push_back(N->getOperand(2));
4411 Ops.push_back(N->getOperand(3));
4412 Ops.push_back(N->getOperand(4));
4413 Ops.push_back(Chain);
4414 break;
4415 case NVPTXISD::Suld3DV2I16Trap:
4416 Opc = NVPTX::SULD_3D_V2I16_TRAP;
4417 Ops.push_back(TexHandle);
4418 Ops.push_back(N->getOperand(2));
4419 Ops.push_back(N->getOperand(3));
4420 Ops.push_back(N->getOperand(4));
4421 Ops.push_back(Chain);
4422 break;
4423 case NVPTXISD::Suld3DV2I32Trap:
4424 Opc = NVPTX::SULD_3D_V2I32_TRAP;
4425 Ops.push_back(TexHandle);
4426 Ops.push_back(N->getOperand(2));
4427 Ops.push_back(N->getOperand(3));
4428 Ops.push_back(N->getOperand(4));
4429 Ops.push_back(Chain);
4430 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004431 case NVPTXISD::Suld3DV2I64Trap:
4432 Opc = NVPTX::SULD_3D_V2I64_TRAP;
4433 Ops.push_back(TexHandle);
4434 Ops.push_back(N->getOperand(2));
4435 Ops.push_back(N->getOperand(3));
4436 Ops.push_back(N->getOperand(4));
4437 Ops.push_back(Chain);
4438 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004439 case NVPTXISD::Suld3DV4I8Trap:
4440 Opc = NVPTX::SULD_3D_V4I8_TRAP;
4441 Ops.push_back(TexHandle);
4442 Ops.push_back(N->getOperand(2));
4443 Ops.push_back(N->getOperand(3));
4444 Ops.push_back(N->getOperand(4));
4445 Ops.push_back(Chain);
4446 break;
4447 case NVPTXISD::Suld3DV4I16Trap:
4448 Opc = NVPTX::SULD_3D_V4I16_TRAP;
4449 Ops.push_back(TexHandle);
4450 Ops.push_back(N->getOperand(2));
4451 Ops.push_back(N->getOperand(3));
4452 Ops.push_back(N->getOperand(4));
4453 Ops.push_back(Chain);
4454 break;
4455 case NVPTXISD::Suld3DV4I32Trap:
4456 Opc = NVPTX::SULD_3D_V4I32_TRAP;
4457 Ops.push_back(TexHandle);
4458 Ops.push_back(N->getOperand(2));
4459 Ops.push_back(N->getOperand(3));
4460 Ops.push_back(N->getOperand(4));
4461 Ops.push_back(Chain);
4462 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004463 case NVPTXISD::Suld1DI8Zero:
4464 Opc = NVPTX::SULD_1D_I8_ZERO;
4465 Ops.push_back(TexHandle);
4466 Ops.push_back(N->getOperand(2));
4467 Ops.push_back(Chain);
4468 break;
4469 case NVPTXISD::Suld1DI16Zero:
4470 Opc = NVPTX::SULD_1D_I16_ZERO;
4471 Ops.push_back(TexHandle);
4472 Ops.push_back(N->getOperand(2));
4473 Ops.push_back(Chain);
4474 break;
4475 case NVPTXISD::Suld1DI32Zero:
4476 Opc = NVPTX::SULD_1D_I32_ZERO;
4477 Ops.push_back(TexHandle);
4478 Ops.push_back(N->getOperand(2));
4479 Ops.push_back(Chain);
4480 break;
4481 case NVPTXISD::Suld1DI64Zero:
4482 Opc = NVPTX::SULD_1D_I64_ZERO;
4483 Ops.push_back(TexHandle);
4484 Ops.push_back(N->getOperand(2));
4485 Ops.push_back(Chain);
4486 break;
4487 case NVPTXISD::Suld1DV2I8Zero:
4488 Opc = NVPTX::SULD_1D_V2I8_ZERO;
4489 Ops.push_back(TexHandle);
4490 Ops.push_back(N->getOperand(2));
4491 Ops.push_back(Chain);
4492 break;
4493 case NVPTXISD::Suld1DV2I16Zero:
4494 Opc = NVPTX::SULD_1D_V2I16_ZERO;
4495 Ops.push_back(TexHandle);
4496 Ops.push_back(N->getOperand(2));
4497 Ops.push_back(Chain);
4498 break;
4499 case NVPTXISD::Suld1DV2I32Zero:
4500 Opc = NVPTX::SULD_1D_V2I32_ZERO;
4501 Ops.push_back(TexHandle);
4502 Ops.push_back(N->getOperand(2));
4503 Ops.push_back(Chain);
4504 break;
4505 case NVPTXISD::Suld1DV2I64Zero:
4506 Opc = NVPTX::SULD_1D_V2I64_ZERO;
4507 Ops.push_back(TexHandle);
4508 Ops.push_back(N->getOperand(2));
4509 Ops.push_back(Chain);
4510 break;
4511 case NVPTXISD::Suld1DV4I8Zero:
4512 Opc = NVPTX::SULD_1D_V4I8_ZERO;
4513 Ops.push_back(TexHandle);
4514 Ops.push_back(N->getOperand(2));
4515 Ops.push_back(Chain);
4516 break;
4517 case NVPTXISD::Suld1DV4I16Zero:
4518 Opc = NVPTX::SULD_1D_V4I16_ZERO;
4519 Ops.push_back(TexHandle);
4520 Ops.push_back(N->getOperand(2));
4521 Ops.push_back(Chain);
4522 break;
4523 case NVPTXISD::Suld1DV4I32Zero:
4524 Opc = NVPTX::SULD_1D_V4I32_ZERO;
4525 Ops.push_back(TexHandle);
4526 Ops.push_back(N->getOperand(2));
4527 Ops.push_back(Chain);
4528 break;
4529 case NVPTXISD::Suld1DArrayI8Zero:
4530 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
4531 Ops.push_back(TexHandle);
4532 Ops.push_back(N->getOperand(2));
4533 Ops.push_back(N->getOperand(3));
4534 Ops.push_back(Chain);
4535 break;
4536 case NVPTXISD::Suld1DArrayI16Zero:
4537 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
4538 Ops.push_back(TexHandle);
4539 Ops.push_back(N->getOperand(2));
4540 Ops.push_back(N->getOperand(3));
4541 Ops.push_back(Chain);
4542 break;
4543 case NVPTXISD::Suld1DArrayI32Zero:
4544 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
4545 Ops.push_back(TexHandle);
4546 Ops.push_back(N->getOperand(2));
4547 Ops.push_back(N->getOperand(3));
4548 Ops.push_back(Chain);
4549 break;
4550 case NVPTXISD::Suld1DArrayI64Zero:
4551 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
4552 Ops.push_back(TexHandle);
4553 Ops.push_back(N->getOperand(2));
4554 Ops.push_back(N->getOperand(3));
4555 Ops.push_back(Chain);
4556 break;
4557 case NVPTXISD::Suld1DArrayV2I8Zero:
4558 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
4559 Ops.push_back(TexHandle);
4560 Ops.push_back(N->getOperand(2));
4561 Ops.push_back(N->getOperand(3));
4562 Ops.push_back(Chain);
4563 break;
4564 case NVPTXISD::Suld1DArrayV2I16Zero:
4565 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
4566 Ops.push_back(TexHandle);
4567 Ops.push_back(N->getOperand(2));
4568 Ops.push_back(N->getOperand(3));
4569 Ops.push_back(Chain);
4570 break;
4571 case NVPTXISD::Suld1DArrayV2I32Zero:
4572 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
4573 Ops.push_back(TexHandle);
4574 Ops.push_back(N->getOperand(2));
4575 Ops.push_back(N->getOperand(3));
4576 Ops.push_back(Chain);
4577 break;
4578 case NVPTXISD::Suld1DArrayV2I64Zero:
4579 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
4580 Ops.push_back(TexHandle);
4581 Ops.push_back(N->getOperand(2));
4582 Ops.push_back(N->getOperand(3));
4583 Ops.push_back(Chain);
4584 break;
4585 case NVPTXISD::Suld1DArrayV4I8Zero:
4586 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
4587 Ops.push_back(TexHandle);
4588 Ops.push_back(N->getOperand(2));
4589 Ops.push_back(N->getOperand(3));
4590 Ops.push_back(Chain);
4591 break;
4592 case NVPTXISD::Suld1DArrayV4I16Zero:
4593 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
4594 Ops.push_back(TexHandle);
4595 Ops.push_back(N->getOperand(2));
4596 Ops.push_back(N->getOperand(3));
4597 Ops.push_back(Chain);
4598 break;
4599 case NVPTXISD::Suld1DArrayV4I32Zero:
4600 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
4601 Ops.push_back(TexHandle);
4602 Ops.push_back(N->getOperand(2));
4603 Ops.push_back(N->getOperand(3));
4604 Ops.push_back(Chain);
4605 break;
4606 case NVPTXISD::Suld2DI8Zero:
4607 Opc = NVPTX::SULD_2D_I8_ZERO;
4608 Ops.push_back(TexHandle);
4609 Ops.push_back(N->getOperand(2));
4610 Ops.push_back(N->getOperand(3));
4611 Ops.push_back(Chain);
4612 break;
4613 case NVPTXISD::Suld2DI16Zero:
4614 Opc = NVPTX::SULD_2D_I16_ZERO;
4615 Ops.push_back(TexHandle);
4616 Ops.push_back(N->getOperand(2));
4617 Ops.push_back(N->getOperand(3));
4618 Ops.push_back(Chain);
4619 break;
4620 case NVPTXISD::Suld2DI32Zero:
4621 Opc = NVPTX::SULD_2D_I32_ZERO;
4622 Ops.push_back(TexHandle);
4623 Ops.push_back(N->getOperand(2));
4624 Ops.push_back(N->getOperand(3));
4625 Ops.push_back(Chain);
4626 break;
4627 case NVPTXISD::Suld2DI64Zero:
4628 Opc = NVPTX::SULD_2D_I64_ZERO;
4629 Ops.push_back(TexHandle);
4630 Ops.push_back(N->getOperand(2));
4631 Ops.push_back(N->getOperand(3));
4632 Ops.push_back(Chain);
4633 break;
4634 case NVPTXISD::Suld2DV2I8Zero:
4635 Opc = NVPTX::SULD_2D_V2I8_ZERO;
4636 Ops.push_back(TexHandle);
4637 Ops.push_back(N->getOperand(2));
4638 Ops.push_back(N->getOperand(3));
4639 Ops.push_back(Chain);
4640 break;
4641 case NVPTXISD::Suld2DV2I16Zero:
4642 Opc = NVPTX::SULD_2D_V2I16_ZERO;
4643 Ops.push_back(TexHandle);
4644 Ops.push_back(N->getOperand(2));
4645 Ops.push_back(N->getOperand(3));
4646 Ops.push_back(Chain);
4647 break;
4648 case NVPTXISD::Suld2DV2I32Zero:
4649 Opc = NVPTX::SULD_2D_V2I32_ZERO;
4650 Ops.push_back(TexHandle);
4651 Ops.push_back(N->getOperand(2));
4652 Ops.push_back(N->getOperand(3));
4653 Ops.push_back(Chain);
4654 break;
4655 case NVPTXISD::Suld2DV2I64Zero:
4656 Opc = NVPTX::SULD_2D_V2I64_ZERO;
4657 Ops.push_back(TexHandle);
4658 Ops.push_back(N->getOperand(2));
4659 Ops.push_back(N->getOperand(3));
4660 Ops.push_back(Chain);
4661 break;
4662 case NVPTXISD::Suld2DV4I8Zero:
4663 Opc = NVPTX::SULD_2D_V4I8_ZERO;
4664 Ops.push_back(TexHandle);
4665 Ops.push_back(N->getOperand(2));
4666 Ops.push_back(N->getOperand(3));
4667 Ops.push_back(Chain);
4668 break;
4669 case NVPTXISD::Suld2DV4I16Zero:
4670 Opc = NVPTX::SULD_2D_V4I16_ZERO;
4671 Ops.push_back(TexHandle);
4672 Ops.push_back(N->getOperand(2));
4673 Ops.push_back(N->getOperand(3));
4674 Ops.push_back(Chain);
4675 break;
4676 case NVPTXISD::Suld2DV4I32Zero:
4677 Opc = NVPTX::SULD_2D_V4I32_ZERO;
4678 Ops.push_back(TexHandle);
4679 Ops.push_back(N->getOperand(2));
4680 Ops.push_back(N->getOperand(3));
4681 Ops.push_back(Chain);
4682 break;
4683 case NVPTXISD::Suld2DArrayI8Zero:
4684 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
4685 Ops.push_back(TexHandle);
4686 Ops.push_back(N->getOperand(2));
4687 Ops.push_back(N->getOperand(3));
4688 Ops.push_back(N->getOperand(4));
4689 Ops.push_back(Chain);
4690 break;
4691 case NVPTXISD::Suld2DArrayI16Zero:
4692 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
4693 Ops.push_back(TexHandle);
4694 Ops.push_back(N->getOperand(2));
4695 Ops.push_back(N->getOperand(3));
4696 Ops.push_back(N->getOperand(4));
4697 Ops.push_back(Chain);
4698 break;
4699 case NVPTXISD::Suld2DArrayI32Zero:
4700 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
4701 Ops.push_back(TexHandle);
4702 Ops.push_back(N->getOperand(2));
4703 Ops.push_back(N->getOperand(3));
4704 Ops.push_back(N->getOperand(4));
4705 Ops.push_back(Chain);
4706 break;
4707 case NVPTXISD::Suld2DArrayI64Zero:
4708 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
4709 Ops.push_back(TexHandle);
4710 Ops.push_back(N->getOperand(2));
4711 Ops.push_back(N->getOperand(3));
4712 Ops.push_back(N->getOperand(4));
4713 Ops.push_back(Chain);
4714 break;
4715 case NVPTXISD::Suld2DArrayV2I8Zero:
4716 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
4717 Ops.push_back(TexHandle);
4718 Ops.push_back(N->getOperand(2));
4719 Ops.push_back(N->getOperand(3));
4720 Ops.push_back(N->getOperand(4));
4721 Ops.push_back(Chain);
4722 break;
4723 case NVPTXISD::Suld2DArrayV2I16Zero:
4724 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
4725 Ops.push_back(TexHandle);
4726 Ops.push_back(N->getOperand(2));
4727 Ops.push_back(N->getOperand(3));
4728 Ops.push_back(N->getOperand(4));
4729 Ops.push_back(Chain);
4730 break;
4731 case NVPTXISD::Suld2DArrayV2I32Zero:
4732 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
4733 Ops.push_back(TexHandle);
4734 Ops.push_back(N->getOperand(2));
4735 Ops.push_back(N->getOperand(3));
4736 Ops.push_back(N->getOperand(4));
4737 Ops.push_back(Chain);
4738 break;
4739 case NVPTXISD::Suld2DArrayV2I64Zero:
4740 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
4741 Ops.push_back(TexHandle);
4742 Ops.push_back(N->getOperand(2));
4743 Ops.push_back(N->getOperand(3));
4744 Ops.push_back(N->getOperand(4));
4745 Ops.push_back(Chain);
4746 break;
4747 case NVPTXISD::Suld2DArrayV4I8Zero:
4748 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
4749 Ops.push_back(TexHandle);
4750 Ops.push_back(N->getOperand(2));
4751 Ops.push_back(N->getOperand(3));
4752 Ops.push_back(N->getOperand(4));
4753 Ops.push_back(Chain);
4754 break;
4755 case NVPTXISD::Suld2DArrayV4I16Zero:
4756 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
4757 Ops.push_back(TexHandle);
4758 Ops.push_back(N->getOperand(2));
4759 Ops.push_back(N->getOperand(3));
4760 Ops.push_back(N->getOperand(4));
4761 Ops.push_back(Chain);
4762 break;
4763 case NVPTXISD::Suld2DArrayV4I32Zero:
4764 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
4765 Ops.push_back(TexHandle);
4766 Ops.push_back(N->getOperand(2));
4767 Ops.push_back(N->getOperand(3));
4768 Ops.push_back(N->getOperand(4));
4769 Ops.push_back(Chain);
4770 break;
4771 case NVPTXISD::Suld3DI8Zero:
4772 Opc = NVPTX::SULD_3D_I8_ZERO;
4773 Ops.push_back(TexHandle);
4774 Ops.push_back(N->getOperand(2));
4775 Ops.push_back(N->getOperand(3));
4776 Ops.push_back(N->getOperand(4));
4777 Ops.push_back(Chain);
4778 break;
4779 case NVPTXISD::Suld3DI16Zero:
4780 Opc = NVPTX::SULD_3D_I16_ZERO;
4781 Ops.push_back(TexHandle);
4782 Ops.push_back(N->getOperand(2));
4783 Ops.push_back(N->getOperand(3));
4784 Ops.push_back(N->getOperand(4));
4785 Ops.push_back(Chain);
4786 break;
4787 case NVPTXISD::Suld3DI32Zero:
4788 Opc = NVPTX::SULD_3D_I32_ZERO;
4789 Ops.push_back(TexHandle);
4790 Ops.push_back(N->getOperand(2));
4791 Ops.push_back(N->getOperand(3));
4792 Ops.push_back(N->getOperand(4));
4793 Ops.push_back(Chain);
4794 break;
4795 case NVPTXISD::Suld3DI64Zero:
4796 Opc = NVPTX::SULD_3D_I64_ZERO;
4797 Ops.push_back(TexHandle);
4798 Ops.push_back(N->getOperand(2));
4799 Ops.push_back(N->getOperand(3));
4800 Ops.push_back(N->getOperand(4));
4801 Ops.push_back(Chain);
4802 break;
4803 case NVPTXISD::Suld3DV2I8Zero:
4804 Opc = NVPTX::SULD_3D_V2I8_ZERO;
4805 Ops.push_back(TexHandle);
4806 Ops.push_back(N->getOperand(2));
4807 Ops.push_back(N->getOperand(3));
4808 Ops.push_back(N->getOperand(4));
4809 Ops.push_back(Chain);
4810 break;
4811 case NVPTXISD::Suld3DV2I16Zero:
4812 Opc = NVPTX::SULD_3D_V2I16_ZERO;
4813 Ops.push_back(TexHandle);
4814 Ops.push_back(N->getOperand(2));
4815 Ops.push_back(N->getOperand(3));
4816 Ops.push_back(N->getOperand(4));
4817 Ops.push_back(Chain);
4818 break;
4819 case NVPTXISD::Suld3DV2I32Zero:
4820 Opc = NVPTX::SULD_3D_V2I32_ZERO;
4821 Ops.push_back(TexHandle);
4822 Ops.push_back(N->getOperand(2));
4823 Ops.push_back(N->getOperand(3));
4824 Ops.push_back(N->getOperand(4));
4825 Ops.push_back(Chain);
4826 break;
4827 case NVPTXISD::Suld3DV2I64Zero:
4828 Opc = NVPTX::SULD_3D_V2I64_ZERO;
4829 Ops.push_back(TexHandle);
4830 Ops.push_back(N->getOperand(2));
4831 Ops.push_back(N->getOperand(3));
4832 Ops.push_back(N->getOperand(4));
4833 Ops.push_back(Chain);
4834 break;
4835 case NVPTXISD::Suld3DV4I8Zero:
4836 Opc = NVPTX::SULD_3D_V4I8_ZERO;
4837 Ops.push_back(TexHandle);
4838 Ops.push_back(N->getOperand(2));
4839 Ops.push_back(N->getOperand(3));
4840 Ops.push_back(N->getOperand(4));
4841 Ops.push_back(Chain);
4842 break;
4843 case NVPTXISD::Suld3DV4I16Zero:
4844 Opc = NVPTX::SULD_3D_V4I16_ZERO;
4845 Ops.push_back(TexHandle);
4846 Ops.push_back(N->getOperand(2));
4847 Ops.push_back(N->getOperand(3));
4848 Ops.push_back(N->getOperand(4));
4849 Ops.push_back(Chain);
4850 break;
4851 case NVPTXISD::Suld3DV4I32Zero:
4852 Opc = NVPTX::SULD_3D_V4I32_ZERO;
4853 Ops.push_back(TexHandle);
4854 Ops.push_back(N->getOperand(2));
4855 Ops.push_back(N->getOperand(3));
4856 Ops.push_back(N->getOperand(4));
4857 Ops.push_back(Chain);
4858 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00004859 }
4860 Ret = CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops);
4861 return Ret;
4862}
4863
Justin Holewinski9a2350e2014-07-17 11:59:04 +00004864
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004865/// SelectBFE - Look for instruction sequences that can be made more efficient
4866/// by using the 'bfe' (bit-field extract) PTX instruction
4867SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004868 SDLoc DL(N);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004869 SDValue LHS = N->getOperand(0);
4870 SDValue RHS = N->getOperand(1);
4871 SDValue Len;
4872 SDValue Start;
4873 SDValue Val;
4874 bool IsSigned = false;
4875
4876 if (N->getOpcode() == ISD::AND) {
4877 // Canonicalize the operands
4878 // We want 'and %val, %mask'
4879 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4880 std::swap(LHS, RHS);
4881 }
4882
4883 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4884 if (!Mask) {
4885 // We need a constant mask on the RHS of the AND
4886 return NULL;
4887 }
4888
4889 // Extract the mask bits
4890 uint64_t MaskVal = Mask->getZExtValue();
4891 if (!isMask_64(MaskVal)) {
4892 // We *could* handle shifted masks here, but doing so would require an
4893 // 'and' operation to fix up the low-order bits so we would trade
4894 // shr+and for bfe+and, which has the same throughput
4895 return NULL;
4896 }
4897
4898 // How many bits are in our mask?
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00004899 uint64_t NumBits = countTrailingOnes(MaskVal);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004900 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004901
4902 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4903 // We have a 'srl/and' pair, extract the effective start bit and length
4904 Val = LHS.getNode()->getOperand(0);
4905 Start = LHS.getNode()->getOperand(1);
4906 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4907 if (StartConst) {
4908 uint64_t StartVal = StartConst->getZExtValue();
4909 // How many "good" bits do we have left? "good" is defined here as bits
4910 // that exist in the original value, not shifted in.
4911 uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
4912 if (NumBits > GoodBits) {
4913 // Do not handle the case where bits have been shifted in. In theory
4914 // we could handle this, but the cost is likely higher than just
4915 // emitting the srl/and pair.
4916 return NULL;
4917 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004918 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004919 } else {
4920 // Do not handle the case where the shift amount (can be zero if no srl
4921 // was found) is not constant. We could handle this case, but it would
4922 // require run-time logic that would be more expensive than just
4923 // emitting the srl/and pair.
4924 return NULL;
4925 }
4926 } else {
4927 // Do not handle the case where the LHS of the and is not a shift. While
4928 // it would be trivial to handle this case, it would just transform
4929 // 'and' -> 'bfe', but 'and' has higher-throughput.
4930 return NULL;
4931 }
4932 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4933 if (LHS->getOpcode() == ISD::AND) {
4934 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4935 if (!ShiftCnst) {
4936 // Shift amount must be constant
4937 return NULL;
4938 }
4939
4940 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4941
4942 SDValue AndLHS = LHS->getOperand(0);
4943 SDValue AndRHS = LHS->getOperand(1);
4944
4945 // Canonicalize the AND to have the mask on the RHS
4946 if (isa<ConstantSDNode>(AndLHS)) {
4947 std::swap(AndLHS, AndRHS);
4948 }
4949
4950 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4951 if (!MaskCnst) {
4952 // Mask must be constant
4953 return NULL;
4954 }
4955
4956 uint64_t MaskVal = MaskCnst->getZExtValue();
4957 uint64_t NumZeros;
4958 uint64_t NumBits;
4959 if (isMask_64(MaskVal)) {
4960 NumZeros = 0;
4961 // The number of bits in the result bitfield will be the number of
4962 // trailing ones (the AND) minus the number of bits we shift off
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00004963 NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004964 } else if (isShiftedMask_64(MaskVal)) {
4965 NumZeros = countTrailingZeros(MaskVal);
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00004966 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004967 // The number of bits in the result bitfield will be the number of
4968 // trailing zeros plus the number of set bits in the mask minus the
4969 // number of bits we shift off
4970 NumBits = NumZeros + NumOnes - ShiftAmt;
4971 } else {
4972 // This is not a mask we can handle
4973 return NULL;
4974 }
4975
4976 if (ShiftAmt < NumZeros) {
4977 // Handling this case would require extra logic that would make this
4978 // transformation non-profitable
4979 return NULL;
4980 }
4981
4982 Val = AndLHS;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004983 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
4984 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004985 } else if (LHS->getOpcode() == ISD::SHL) {
4986 // Here, we have a pattern like:
4987 //
4988 // (sra (shl val, NN), MM)
4989 // or
4990 // (srl (shl val, NN), MM)
4991 //
4992 // If MM >= NN, we can efficiently optimize this with bfe
4993 Val = LHS->getOperand(0);
4994
4995 SDValue ShlRHS = LHS->getOperand(1);
4996 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4997 if (!ShlCnst) {
4998 // Shift amount must be constant
4999 return NULL;
5000 }
5001 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
5002
5003 SDValue ShrRHS = RHS;
5004 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
5005 if (!ShrCnst) {
5006 // Shift amount must be constant
5007 return NULL;
5008 }
5009 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
5010
5011 // To avoid extra codegen and be profitable, we need Outer >= Inner
5012 if (OuterShiftAmt < InnerShiftAmt) {
5013 return NULL;
5014 }
5015
5016 // If the outer shift is more than the type size, we have no bitfield to
5017 // extract (since we also check that the inner shift is <= the outer shift
5018 // then this also implies that the inner shift is < the type size)
5019 if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
5020 return NULL;
5021 }
5022
5023 Start =
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005024 CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005025 Len =
5026 CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005027 OuterShiftAmt, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005028
5029 if (N->getOpcode() == ISD::SRA) {
5030 // If we have a arithmetic right shift, we need to use the signed bfe
5031 // variant
5032 IsSigned = true;
5033 }
5034 } else {
5035 // No can do...
5036 return NULL;
5037 }
5038 } else {
5039 // No can do...
5040 return NULL;
5041 }
5042
5043
5044 unsigned Opc;
5045 // For the BFE operations we form here from "and" and "srl", always use the
5046 // unsigned variants.
5047 if (Val.getValueType() == MVT::i32) {
5048 if (IsSigned) {
5049 Opc = NVPTX::BFE_S32rii;
5050 } else {
5051 Opc = NVPTX::BFE_U32rii;
5052 }
5053 } else if (Val.getValueType() == MVT::i64) {
5054 if (IsSigned) {
5055 Opc = NVPTX::BFE_S64rii;
5056 } else {
5057 Opc = NVPTX::BFE_U64rii;
5058 }
5059 } else {
5060 // We cannot handle this type
5061 return NULL;
5062 }
5063
5064 SDValue Ops[] = {
5065 Val, Start, Len
5066 };
5067
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005068 return CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00005069}
5070
Justin Holewinskiae556d32012-05-04 20:18:50 +00005071// SelectDirectAddr - Match a direct address for DAG.
5072// A direct address could be a globaladdress or externalsymbol.
5073bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
5074 // Return true if TGA or ES.
Justin Holewinski0497ab12013-03-30 14:29:21 +00005075 if (N.getOpcode() == ISD::TargetGlobalAddress ||
5076 N.getOpcode() == ISD::TargetExternalSymbol) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00005077 Address = N;
5078 return true;
5079 }
5080 if (N.getOpcode() == NVPTXISD::Wrapper) {
5081 Address = N.getOperand(0);
5082 return true;
5083 }
5084 if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
5085 unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
5086 if (IID == Intrinsic::nvvm_ptr_gen_to_param)
5087 if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
5088 return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
5089 }
5090 return false;
5091}
5092
5093// symbol+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00005094bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
5095 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00005096 if (Addr.getOpcode() == ISD::ADD) {
5097 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00005098 SDValue base = Addr.getOperand(0);
Justin Holewinskiae556d32012-05-04 20:18:50 +00005099 if (SelectDirectAddr(base, Base)) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005100 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5101 mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00005102 return true;
5103 }
5104 }
5105 }
5106 return false;
5107}
5108
5109// symbol+offset
5110bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
5111 SDValue &Base, SDValue &Offset) {
5112 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
5113}
5114
5115// symbol+offset
5116bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
5117 SDValue &Base, SDValue &Offset) {
5118 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
5119}
5120
5121// register+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00005122bool NVPTXDAGToDAGISel::SelectADDRri_imp(
5123 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00005124 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
5125 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005126 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00005127 return true;
5128 }
5129 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
5130 Addr.getOpcode() == ISD::TargetGlobalAddress)
Justin Holewinski0497ab12013-03-30 14:29:21 +00005131 return false; // direct calls.
Justin Holewinskiae556d32012-05-04 20:18:50 +00005132
5133 if (Addr.getOpcode() == ISD::ADD) {
5134 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
5135 return false;
5136 }
5137 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
5138 if (FrameIndexSDNode *FIN =
Justin Holewinski0497ab12013-03-30 14:29:21 +00005139 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
Justin Holewinskiae556d32012-05-04 20:18:50 +00005140 // Constant offset from frame ref.
5141 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
5142 else
5143 Base = Addr.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005144 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
5145 mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00005146 return true;
5147 }
5148 }
5149 return false;
5150}
5151
5152// register+offset
5153bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
5154 SDValue &Base, SDValue &Offset) {
5155 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
5156}
5157
5158// register+offset
5159bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
5160 SDValue &Base, SDValue &Offset) {
5161 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
5162}
5163
5164bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
5165 unsigned int spN) const {
Craig Topper062a2ba2014-04-25 05:30:21 +00005166 const Value *Src = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00005167 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +00005168 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
5169 return true;
5170 Src = mN->getMemOperand()->getValue();
Justin Holewinskiae556d32012-05-04 20:18:50 +00005171 }
5172 if (!Src)
5173 return false;
Craig Toppere3dcce92015-08-01 22:20:21 +00005174 if (auto *PT = dyn_cast<PointerType>(Src->getType()))
Justin Holewinskiae556d32012-05-04 20:18:50 +00005175 return (PT->getAddressSpace() == spN);
5176 return false;
5177}
5178
5179/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
5180/// inline asm expressions.
Justin Holewinski0497ab12013-03-30 14:29:21 +00005181bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
Daniel Sanders60f1db02015-03-13 12:45:09 +00005182 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00005183 SDValue Op0, Op1;
Daniel Sanders60f1db02015-03-13 12:45:09 +00005184 switch (ConstraintID) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00005185 default:
5186 return true;
Daniel Sanders60f1db02015-03-13 12:45:09 +00005187 case InlineAsm::Constraint_m: // memory
Justin Holewinskiae556d32012-05-04 20:18:50 +00005188 if (SelectDirectAddr(Op, Op0)) {
5189 OutOps.push_back(Op0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00005190 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
Justin Holewinskiae556d32012-05-04 20:18:50 +00005191 return false;
5192 }
5193 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
5194 OutOps.push_back(Op0);
5195 OutOps.push_back(Op1);
5196 return false;
5197 }
5198 break;
5199 }
5200 return true;
5201}