blob: 2749772540464d2634694422b900eea3359220da [file] [log] [blame]
Justin Holewinskiae556d32012-05-04 20:18:50 +00001//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
Justin Holewinskiae556d32012-05-04 20:18:50 +000014#include "NVPTXISelDAGToDAG.h"
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +000015#include "NVPTXUtilities.h"
Jingyue Wu48a9bdc2015-07-20 21:28:54 +000016#include "llvm/Analysis/ValueTracking.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000017#include "llvm/IR/GlobalValue.h"
18#include "llvm/IR/Instructions.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000019#include "llvm/Support/CommandLine.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000020#include "llvm/Support/Debug.h"
21#include "llvm/Support/ErrorHandling.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000022#include "llvm/Support/raw_ostream.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000023#include "llvm/Target/TargetIntrinsicInfo.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000024
Justin Holewinskiae556d32012-05-04 20:18:50 +000025using namespace llvm;
26
Chandler Carruth84e68b22014-04-22 02:41:26 +000027#define DEBUG_TYPE "nvptx-isel"
28
Justin Holewinskiae556d32012-05-04 20:18:50 +000029/// createNVPTXISelDag - This pass converts a legalized DAG into a
30/// NVPTX-specific DAG, ready for instruction scheduling.
31FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
32 llvm::CodeGenOpt::Level OptLevel) {
33 return new NVPTXDAGToDAGISel(TM, OptLevel);
34}
35
Justin Holewinskiae556d32012-05-04 20:18:50 +000036NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
37 CodeGenOpt::Level OptLevel)
Eric Christopher02389e32015-02-19 00:08:27 +000038 : SelectionDAGISel(tm, OptLevel), TM(tm) {
Justin Holewinskiae556d32012-05-04 20:18:50 +000039 doMulWide = (OptLevel > 0);
Justin Holewinskicd069e62013-07-22 12:18:04 +000040}
Justin Holewinskiae556d32012-05-04 20:18:50 +000041
Eric Christopher147bba22015-01-30 01:40:59 +000042bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
Justin Lebar077f8fb2017-01-21 01:00:14 +000043 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
44 return SelectionDAGISel::runOnMachineFunction(MF);
Eric Christopher147bba22015-01-30 01:40:59 +000045}
46
Justin Holewinskicd069e62013-07-22 12:18:04 +000047int NVPTXDAGToDAGISel::getDivF32Level() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000048 return Subtarget->getTargetLowering()->getDivF32Level();
Justin Holewinskicd069e62013-07-22 12:18:04 +000049}
Justin Holewinskiae556d32012-05-04 20:18:50 +000050
Justin Holewinskicd069e62013-07-22 12:18:04 +000051bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000052 return Subtarget->getTargetLowering()->usePrecSqrtF32();
Justin Holewinskicd069e62013-07-22 12:18:04 +000053}
54
55bool NVPTXDAGToDAGISel::useF32FTZ() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000056 return Subtarget->getTargetLowering()->useF32FTZ(*MF);
Justin Holewinskiae556d32012-05-04 20:18:50 +000057}
58
Justin Holewinski428cf0e2014-07-17 18:10:09 +000059bool NVPTXDAGToDAGISel::allowFMA() const {
Eric Christopher147bba22015-01-30 01:40:59 +000060 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
Justin Holewinski428cf0e2014-07-17 18:10:09 +000061 return TL->allowFMA(*MF, OptLevel);
62}
63
Artem Belevichd109f462017-01-13 18:48:13 +000064bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
65 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
66 return TL->allowUnsafeFPMath(*MF);
67}
68
Justin Holewinskiae556d32012-05-04 20:18:50 +000069/// Select - Select instructions not customized! Used for
70/// expanded, promoted and normal instructions.
Justin Bogner8d83fb62016-05-13 21:12:53 +000071void NVPTXDAGToDAGISel::Select(SDNode *N) {
Justin Holewinskiae556d32012-05-04 20:18:50 +000072
Tim Northover31d093c2013-09-22 08:21:56 +000073 if (N->isMachineOpcode()) {
74 N->setNodeId(-1);
Justin Bogner8d83fb62016-05-13 21:12:53 +000075 return; // Already selected.
Tim Northover31d093c2013-09-22 08:21:56 +000076 }
Justin Holewinskiae556d32012-05-04 20:18:50 +000077
Justin Holewinskiae556d32012-05-04 20:18:50 +000078 switch (N->getOpcode()) {
79 case ISD::LOAD:
Justin Bogner8d83fb62016-05-13 21:12:53 +000080 if (tryLoad(N))
81 return;
Justin Holewinskiae556d32012-05-04 20:18:50 +000082 break;
83 case ISD::STORE:
Justin Bogner8d83fb62016-05-13 21:12:53 +000084 if (tryStore(N))
85 return;
Justin Holewinskiae556d32012-05-04 20:18:50 +000086 break;
Artem Belevich620db1f2017-02-23 22:38:24 +000087 case ISD::EXTRACT_VECTOR_ELT:
88 if (tryEXTRACT_VECTOR_ELEMENT(N))
89 return;
90 break;
91 case NVPTXISD::SETP_F16X2:
92 SelectSETP_F16X2(N);
93 return;
94
Justin Holewinskibe8dc642013-02-12 14:18:49 +000095 case NVPTXISD::LoadV2:
96 case NVPTXISD::LoadV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +000097 if (tryLoadVector(N))
98 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +000099 break;
100 case NVPTXISD::LDGV2:
101 case NVPTXISD::LDGV4:
102 case NVPTXISD::LDUV2:
103 case NVPTXISD::LDUV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000104 if (tryLDGLDU(N))
105 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000106 break;
107 case NVPTXISD::StoreV2:
108 case NVPTXISD::StoreV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000109 if (tryStoreVector(N))
110 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000111 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000112 case NVPTXISD::LoadParam:
113 case NVPTXISD::LoadParamV2:
114 case NVPTXISD::LoadParamV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000115 if (tryLoadParam(N))
116 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000117 break;
118 case NVPTXISD::StoreRetval:
119 case NVPTXISD::StoreRetvalV2:
120 case NVPTXISD::StoreRetvalV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000121 if (tryStoreRetval(N))
122 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000123 break;
124 case NVPTXISD::StoreParam:
125 case NVPTXISD::StoreParamV2:
126 case NVPTXISD::StoreParamV4:
127 case NVPTXISD::StoreParamS32:
128 case NVPTXISD::StoreParamU32:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000129 if (tryStoreParam(N))
130 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000131 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000132 case ISD::INTRINSIC_WO_CHAIN:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000133 if (tryIntrinsicNoChain(N))
134 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000135 break;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000136 case ISD::INTRINSIC_W_CHAIN:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000137 if (tryIntrinsicChain(N))
138 return;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000139 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000140 case NVPTXISD::Tex1DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000141 case NVPTXISD::Tex1DFloatFloat:
142 case NVPTXISD::Tex1DFloatFloatLevel:
143 case NVPTXISD::Tex1DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000144 case NVPTXISD::Tex1DS32S32:
145 case NVPTXISD::Tex1DS32Float:
146 case NVPTXISD::Tex1DS32FloatLevel:
147 case NVPTXISD::Tex1DS32FloatGrad:
148 case NVPTXISD::Tex1DU32S32:
149 case NVPTXISD::Tex1DU32Float:
150 case NVPTXISD::Tex1DU32FloatLevel:
151 case NVPTXISD::Tex1DU32FloatGrad:
152 case NVPTXISD::Tex1DArrayFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000153 case NVPTXISD::Tex1DArrayFloatFloat:
154 case NVPTXISD::Tex1DArrayFloatFloatLevel:
155 case NVPTXISD::Tex1DArrayFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000156 case NVPTXISD::Tex1DArrayS32S32:
157 case NVPTXISD::Tex1DArrayS32Float:
158 case NVPTXISD::Tex1DArrayS32FloatLevel:
159 case NVPTXISD::Tex1DArrayS32FloatGrad:
160 case NVPTXISD::Tex1DArrayU32S32:
161 case NVPTXISD::Tex1DArrayU32Float:
162 case NVPTXISD::Tex1DArrayU32FloatLevel:
163 case NVPTXISD::Tex1DArrayU32FloatGrad:
164 case NVPTXISD::Tex2DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000165 case NVPTXISD::Tex2DFloatFloat:
166 case NVPTXISD::Tex2DFloatFloatLevel:
167 case NVPTXISD::Tex2DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000168 case NVPTXISD::Tex2DS32S32:
169 case NVPTXISD::Tex2DS32Float:
170 case NVPTXISD::Tex2DS32FloatLevel:
171 case NVPTXISD::Tex2DS32FloatGrad:
172 case NVPTXISD::Tex2DU32S32:
173 case NVPTXISD::Tex2DU32Float:
174 case NVPTXISD::Tex2DU32FloatLevel:
175 case NVPTXISD::Tex2DU32FloatGrad:
176 case NVPTXISD::Tex2DArrayFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000177 case NVPTXISD::Tex2DArrayFloatFloat:
178 case NVPTXISD::Tex2DArrayFloatFloatLevel:
179 case NVPTXISD::Tex2DArrayFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000180 case NVPTXISD::Tex2DArrayS32S32:
181 case NVPTXISD::Tex2DArrayS32Float:
182 case NVPTXISD::Tex2DArrayS32FloatLevel:
183 case NVPTXISD::Tex2DArrayS32FloatGrad:
184 case NVPTXISD::Tex2DArrayU32S32:
185 case NVPTXISD::Tex2DArrayU32Float:
186 case NVPTXISD::Tex2DArrayU32FloatLevel:
187 case NVPTXISD::Tex2DArrayU32FloatGrad:
188 case NVPTXISD::Tex3DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000189 case NVPTXISD::Tex3DFloatFloat:
190 case NVPTXISD::Tex3DFloatFloatLevel:
191 case NVPTXISD::Tex3DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000192 case NVPTXISD::Tex3DS32S32:
193 case NVPTXISD::Tex3DS32Float:
194 case NVPTXISD::Tex3DS32FloatLevel:
195 case NVPTXISD::Tex3DS32FloatGrad:
196 case NVPTXISD::Tex3DU32S32:
197 case NVPTXISD::Tex3DU32Float:
198 case NVPTXISD::Tex3DU32FloatLevel:
199 case NVPTXISD::Tex3DU32FloatGrad:
200 case NVPTXISD::TexCubeFloatFloat:
201 case NVPTXISD::TexCubeFloatFloatLevel:
202 case NVPTXISD::TexCubeS32Float:
203 case NVPTXISD::TexCubeS32FloatLevel:
204 case NVPTXISD::TexCubeU32Float:
205 case NVPTXISD::TexCubeU32FloatLevel:
206 case NVPTXISD::TexCubeArrayFloatFloat:
207 case NVPTXISD::TexCubeArrayFloatFloatLevel:
208 case NVPTXISD::TexCubeArrayS32Float:
209 case NVPTXISD::TexCubeArrayS32FloatLevel:
210 case NVPTXISD::TexCubeArrayU32Float:
211 case NVPTXISD::TexCubeArrayU32FloatLevel:
212 case NVPTXISD::Tld4R2DFloatFloat:
213 case NVPTXISD::Tld4G2DFloatFloat:
214 case NVPTXISD::Tld4B2DFloatFloat:
215 case NVPTXISD::Tld4A2DFloatFloat:
216 case NVPTXISD::Tld4R2DS64Float:
217 case NVPTXISD::Tld4G2DS64Float:
218 case NVPTXISD::Tld4B2DS64Float:
219 case NVPTXISD::Tld4A2DS64Float:
220 case NVPTXISD::Tld4R2DU64Float:
221 case NVPTXISD::Tld4G2DU64Float:
222 case NVPTXISD::Tld4B2DU64Float:
223 case NVPTXISD::Tld4A2DU64Float:
224 case NVPTXISD::TexUnified1DFloatS32:
225 case NVPTXISD::TexUnified1DFloatFloat:
226 case NVPTXISD::TexUnified1DFloatFloatLevel:
227 case NVPTXISD::TexUnified1DFloatFloatGrad:
228 case NVPTXISD::TexUnified1DS32S32:
229 case NVPTXISD::TexUnified1DS32Float:
230 case NVPTXISD::TexUnified1DS32FloatLevel:
231 case NVPTXISD::TexUnified1DS32FloatGrad:
232 case NVPTXISD::TexUnified1DU32S32:
233 case NVPTXISD::TexUnified1DU32Float:
234 case NVPTXISD::TexUnified1DU32FloatLevel:
235 case NVPTXISD::TexUnified1DU32FloatGrad:
236 case NVPTXISD::TexUnified1DArrayFloatS32:
237 case NVPTXISD::TexUnified1DArrayFloatFloat:
238 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
239 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
240 case NVPTXISD::TexUnified1DArrayS32S32:
241 case NVPTXISD::TexUnified1DArrayS32Float:
242 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
243 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
244 case NVPTXISD::TexUnified1DArrayU32S32:
245 case NVPTXISD::TexUnified1DArrayU32Float:
246 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
247 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
248 case NVPTXISD::TexUnified2DFloatS32:
249 case NVPTXISD::TexUnified2DFloatFloat:
250 case NVPTXISD::TexUnified2DFloatFloatLevel:
251 case NVPTXISD::TexUnified2DFloatFloatGrad:
252 case NVPTXISD::TexUnified2DS32S32:
253 case NVPTXISD::TexUnified2DS32Float:
254 case NVPTXISD::TexUnified2DS32FloatLevel:
255 case NVPTXISD::TexUnified2DS32FloatGrad:
256 case NVPTXISD::TexUnified2DU32S32:
257 case NVPTXISD::TexUnified2DU32Float:
258 case NVPTXISD::TexUnified2DU32FloatLevel:
259 case NVPTXISD::TexUnified2DU32FloatGrad:
260 case NVPTXISD::TexUnified2DArrayFloatS32:
261 case NVPTXISD::TexUnified2DArrayFloatFloat:
262 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
263 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
264 case NVPTXISD::TexUnified2DArrayS32S32:
265 case NVPTXISD::TexUnified2DArrayS32Float:
266 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
267 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
268 case NVPTXISD::TexUnified2DArrayU32S32:
269 case NVPTXISD::TexUnified2DArrayU32Float:
270 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
271 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
272 case NVPTXISD::TexUnified3DFloatS32:
273 case NVPTXISD::TexUnified3DFloatFloat:
274 case NVPTXISD::TexUnified3DFloatFloatLevel:
275 case NVPTXISD::TexUnified3DFloatFloatGrad:
276 case NVPTXISD::TexUnified3DS32S32:
277 case NVPTXISD::TexUnified3DS32Float:
278 case NVPTXISD::TexUnified3DS32FloatLevel:
279 case NVPTXISD::TexUnified3DS32FloatGrad:
280 case NVPTXISD::TexUnified3DU32S32:
281 case NVPTXISD::TexUnified3DU32Float:
282 case NVPTXISD::TexUnified3DU32FloatLevel:
283 case NVPTXISD::TexUnified3DU32FloatGrad:
284 case NVPTXISD::TexUnifiedCubeFloatFloat:
285 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
286 case NVPTXISD::TexUnifiedCubeS32Float:
287 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
288 case NVPTXISD::TexUnifiedCubeU32Float:
289 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
290 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
291 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
292 case NVPTXISD::TexUnifiedCubeArrayS32Float:
293 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
294 case NVPTXISD::TexUnifiedCubeArrayU32Float:
295 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
296 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
297 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
298 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
299 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
300 case NVPTXISD::Tld4UnifiedR2DS64Float:
301 case NVPTXISD::Tld4UnifiedG2DS64Float:
302 case NVPTXISD::Tld4UnifiedB2DS64Float:
303 case NVPTXISD::Tld4UnifiedA2DS64Float:
304 case NVPTXISD::Tld4UnifiedR2DU64Float:
305 case NVPTXISD::Tld4UnifiedG2DU64Float:
306 case NVPTXISD::Tld4UnifiedB2DU64Float:
307 case NVPTXISD::Tld4UnifiedA2DU64Float:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000308 if (tryTextureIntrinsic(N))
309 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000310 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000311 case NVPTXISD::Suld1DI8Clamp:
312 case NVPTXISD::Suld1DI16Clamp:
313 case NVPTXISD::Suld1DI32Clamp:
314 case NVPTXISD::Suld1DI64Clamp:
315 case NVPTXISD::Suld1DV2I8Clamp:
316 case NVPTXISD::Suld1DV2I16Clamp:
317 case NVPTXISD::Suld1DV2I32Clamp:
318 case NVPTXISD::Suld1DV2I64Clamp:
319 case NVPTXISD::Suld1DV4I8Clamp:
320 case NVPTXISD::Suld1DV4I16Clamp:
321 case NVPTXISD::Suld1DV4I32Clamp:
322 case NVPTXISD::Suld1DArrayI8Clamp:
323 case NVPTXISD::Suld1DArrayI16Clamp:
324 case NVPTXISD::Suld1DArrayI32Clamp:
325 case NVPTXISD::Suld1DArrayI64Clamp:
326 case NVPTXISD::Suld1DArrayV2I8Clamp:
327 case NVPTXISD::Suld1DArrayV2I16Clamp:
328 case NVPTXISD::Suld1DArrayV2I32Clamp:
329 case NVPTXISD::Suld1DArrayV2I64Clamp:
330 case NVPTXISD::Suld1DArrayV4I8Clamp:
331 case NVPTXISD::Suld1DArrayV4I16Clamp:
332 case NVPTXISD::Suld1DArrayV4I32Clamp:
333 case NVPTXISD::Suld2DI8Clamp:
334 case NVPTXISD::Suld2DI16Clamp:
335 case NVPTXISD::Suld2DI32Clamp:
336 case NVPTXISD::Suld2DI64Clamp:
337 case NVPTXISD::Suld2DV2I8Clamp:
338 case NVPTXISD::Suld2DV2I16Clamp:
339 case NVPTXISD::Suld2DV2I32Clamp:
340 case NVPTXISD::Suld2DV2I64Clamp:
341 case NVPTXISD::Suld2DV4I8Clamp:
342 case NVPTXISD::Suld2DV4I16Clamp:
343 case NVPTXISD::Suld2DV4I32Clamp:
344 case NVPTXISD::Suld2DArrayI8Clamp:
345 case NVPTXISD::Suld2DArrayI16Clamp:
346 case NVPTXISD::Suld2DArrayI32Clamp:
347 case NVPTXISD::Suld2DArrayI64Clamp:
348 case NVPTXISD::Suld2DArrayV2I8Clamp:
349 case NVPTXISD::Suld2DArrayV2I16Clamp:
350 case NVPTXISD::Suld2DArrayV2I32Clamp:
351 case NVPTXISD::Suld2DArrayV2I64Clamp:
352 case NVPTXISD::Suld2DArrayV4I8Clamp:
353 case NVPTXISD::Suld2DArrayV4I16Clamp:
354 case NVPTXISD::Suld2DArrayV4I32Clamp:
355 case NVPTXISD::Suld3DI8Clamp:
356 case NVPTXISD::Suld3DI16Clamp:
357 case NVPTXISD::Suld3DI32Clamp:
358 case NVPTXISD::Suld3DI64Clamp:
359 case NVPTXISD::Suld3DV2I8Clamp:
360 case NVPTXISD::Suld3DV2I16Clamp:
361 case NVPTXISD::Suld3DV2I32Clamp:
362 case NVPTXISD::Suld3DV2I64Clamp:
363 case NVPTXISD::Suld3DV4I8Clamp:
364 case NVPTXISD::Suld3DV4I16Clamp:
365 case NVPTXISD::Suld3DV4I32Clamp:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000366 case NVPTXISD::Suld1DI8Trap:
367 case NVPTXISD::Suld1DI16Trap:
368 case NVPTXISD::Suld1DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000369 case NVPTXISD::Suld1DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000370 case NVPTXISD::Suld1DV2I8Trap:
371 case NVPTXISD::Suld1DV2I16Trap:
372 case NVPTXISD::Suld1DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000373 case NVPTXISD::Suld1DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000374 case NVPTXISD::Suld1DV4I8Trap:
375 case NVPTXISD::Suld1DV4I16Trap:
376 case NVPTXISD::Suld1DV4I32Trap:
377 case NVPTXISD::Suld1DArrayI8Trap:
378 case NVPTXISD::Suld1DArrayI16Trap:
379 case NVPTXISD::Suld1DArrayI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000380 case NVPTXISD::Suld1DArrayI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000381 case NVPTXISD::Suld1DArrayV2I8Trap:
382 case NVPTXISD::Suld1DArrayV2I16Trap:
383 case NVPTXISD::Suld1DArrayV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000384 case NVPTXISD::Suld1DArrayV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000385 case NVPTXISD::Suld1DArrayV4I8Trap:
386 case NVPTXISD::Suld1DArrayV4I16Trap:
387 case NVPTXISD::Suld1DArrayV4I32Trap:
388 case NVPTXISD::Suld2DI8Trap:
389 case NVPTXISD::Suld2DI16Trap:
390 case NVPTXISD::Suld2DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000391 case NVPTXISD::Suld2DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000392 case NVPTXISD::Suld2DV2I8Trap:
393 case NVPTXISD::Suld2DV2I16Trap:
394 case NVPTXISD::Suld2DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000395 case NVPTXISD::Suld2DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000396 case NVPTXISD::Suld2DV4I8Trap:
397 case NVPTXISD::Suld2DV4I16Trap:
398 case NVPTXISD::Suld2DV4I32Trap:
399 case NVPTXISD::Suld2DArrayI8Trap:
400 case NVPTXISD::Suld2DArrayI16Trap:
401 case NVPTXISD::Suld2DArrayI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000402 case NVPTXISD::Suld2DArrayI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000403 case NVPTXISD::Suld2DArrayV2I8Trap:
404 case NVPTXISD::Suld2DArrayV2I16Trap:
405 case NVPTXISD::Suld2DArrayV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000406 case NVPTXISD::Suld2DArrayV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000407 case NVPTXISD::Suld2DArrayV4I8Trap:
408 case NVPTXISD::Suld2DArrayV4I16Trap:
409 case NVPTXISD::Suld2DArrayV4I32Trap:
410 case NVPTXISD::Suld3DI8Trap:
411 case NVPTXISD::Suld3DI16Trap:
412 case NVPTXISD::Suld3DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000413 case NVPTXISD::Suld3DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000414 case NVPTXISD::Suld3DV2I8Trap:
415 case NVPTXISD::Suld3DV2I16Trap:
416 case NVPTXISD::Suld3DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000417 case NVPTXISD::Suld3DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000418 case NVPTXISD::Suld3DV4I8Trap:
419 case NVPTXISD::Suld3DV4I16Trap:
420 case NVPTXISD::Suld3DV4I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000421 case NVPTXISD::Suld1DI8Zero:
422 case NVPTXISD::Suld1DI16Zero:
423 case NVPTXISD::Suld1DI32Zero:
424 case NVPTXISD::Suld1DI64Zero:
425 case NVPTXISD::Suld1DV2I8Zero:
426 case NVPTXISD::Suld1DV2I16Zero:
427 case NVPTXISD::Suld1DV2I32Zero:
428 case NVPTXISD::Suld1DV2I64Zero:
429 case NVPTXISD::Suld1DV4I8Zero:
430 case NVPTXISD::Suld1DV4I16Zero:
431 case NVPTXISD::Suld1DV4I32Zero:
432 case NVPTXISD::Suld1DArrayI8Zero:
433 case NVPTXISD::Suld1DArrayI16Zero:
434 case NVPTXISD::Suld1DArrayI32Zero:
435 case NVPTXISD::Suld1DArrayI64Zero:
436 case NVPTXISD::Suld1DArrayV2I8Zero:
437 case NVPTXISD::Suld1DArrayV2I16Zero:
438 case NVPTXISD::Suld1DArrayV2I32Zero:
439 case NVPTXISD::Suld1DArrayV2I64Zero:
440 case NVPTXISD::Suld1DArrayV4I8Zero:
441 case NVPTXISD::Suld1DArrayV4I16Zero:
442 case NVPTXISD::Suld1DArrayV4I32Zero:
443 case NVPTXISD::Suld2DI8Zero:
444 case NVPTXISD::Suld2DI16Zero:
445 case NVPTXISD::Suld2DI32Zero:
446 case NVPTXISD::Suld2DI64Zero:
447 case NVPTXISD::Suld2DV2I8Zero:
448 case NVPTXISD::Suld2DV2I16Zero:
449 case NVPTXISD::Suld2DV2I32Zero:
450 case NVPTXISD::Suld2DV2I64Zero:
451 case NVPTXISD::Suld2DV4I8Zero:
452 case NVPTXISD::Suld2DV4I16Zero:
453 case NVPTXISD::Suld2DV4I32Zero:
454 case NVPTXISD::Suld2DArrayI8Zero:
455 case NVPTXISD::Suld2DArrayI16Zero:
456 case NVPTXISD::Suld2DArrayI32Zero:
457 case NVPTXISD::Suld2DArrayI64Zero:
458 case NVPTXISD::Suld2DArrayV2I8Zero:
459 case NVPTXISD::Suld2DArrayV2I16Zero:
460 case NVPTXISD::Suld2DArrayV2I32Zero:
461 case NVPTXISD::Suld2DArrayV2I64Zero:
462 case NVPTXISD::Suld2DArrayV4I8Zero:
463 case NVPTXISD::Suld2DArrayV4I16Zero:
464 case NVPTXISD::Suld2DArrayV4I32Zero:
465 case NVPTXISD::Suld3DI8Zero:
466 case NVPTXISD::Suld3DI16Zero:
467 case NVPTXISD::Suld3DI32Zero:
468 case NVPTXISD::Suld3DI64Zero:
469 case NVPTXISD::Suld3DV2I8Zero:
470 case NVPTXISD::Suld3DV2I16Zero:
471 case NVPTXISD::Suld3DV2I32Zero:
472 case NVPTXISD::Suld3DV2I64Zero:
473 case NVPTXISD::Suld3DV4I8Zero:
474 case NVPTXISD::Suld3DV4I16Zero:
475 case NVPTXISD::Suld3DV4I32Zero:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000476 if (trySurfaceIntrinsic(N))
477 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000478 break;
Justin Holewinskica7a4f12014-06-27 18:35:27 +0000479 case ISD::AND:
480 case ISD::SRA:
481 case ISD::SRL:
482 // Try to select BFE
Justin Bogner8d83fb62016-05-13 21:12:53 +0000483 if (tryBFE(N))
484 return;
Justin Holewinskica7a4f12014-06-27 18:35:27 +0000485 break;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000486 case ISD::ADDRSPACECAST:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000487 SelectAddrSpaceCast(N);
488 return;
Artem Belevich64dc9be2017-01-13 20:56:17 +0000489 case ISD::ConstantFP:
490 if (tryConstantFP16(N))
491 return;
492 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000493 default:
494 break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000495 }
Justin Bogner8d83fb62016-05-13 21:12:53 +0000496 SelectCode(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000497}
498
Justin Bogner8d83fb62016-05-13 21:12:53 +0000499bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000500 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
501 switch (IID) {
502 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000503 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000504 case Intrinsic::nvvm_ldg_global_f:
505 case Intrinsic::nvvm_ldg_global_i:
506 case Intrinsic::nvvm_ldg_global_p:
507 case Intrinsic::nvvm_ldu_global_f:
508 case Intrinsic::nvvm_ldu_global_i:
509 case Intrinsic::nvvm_ldu_global_p:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000510 return tryLDGLDU(N);
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000511 }
512}
513
Artem Belevich64dc9be2017-01-13 20:56:17 +0000514// There's no way to specify FP16 immediates in .f16 ops, so we have to
515// load them into an .f16 register first.
516bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
517 if (N->getValueType(0) != MVT::f16)
518 return false;
519 SDValue Val = CurDAG->getTargetConstantFP(
520 cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
521 SDNode *LoadConstF16 =
522 CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
523 ReplaceNode(N, LoadConstF16);
524 return true;
525}
526
Artem Belevich620db1f2017-02-23 22:38:24 +0000527// Map ISD:CONDCODE value to appropriate CmpMode expected by
528// NVPTXInstPrinter::printCmpMode()
529static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
530 using NVPTX::PTXCmpMode::CmpMode;
531 unsigned PTXCmpMode = [](ISD::CondCode CC) {
532 switch (CC) {
533 default:
534 llvm_unreachable("Unexpected condition code.");
535 case ISD::SETOEQ:
536 return CmpMode::EQ;
537 case ISD::SETOGT:
538 return CmpMode::GT;
539 case ISD::SETOGE:
540 return CmpMode::GE;
541 case ISD::SETOLT:
542 return CmpMode::LT;
543 case ISD::SETOLE:
544 return CmpMode::LE;
545 case ISD::SETONE:
546 return CmpMode::NE;
547 case ISD::SETO:
548 return CmpMode::NUM;
549 case ISD::SETUO:
550 return CmpMode::NotANumber;
551 case ISD::SETUEQ:
552 return CmpMode::EQU;
553 case ISD::SETUGT:
554 return CmpMode::GTU;
555 case ISD::SETUGE:
556 return CmpMode::GEU;
557 case ISD::SETULT:
558 return CmpMode::LTU;
559 case ISD::SETULE:
560 return CmpMode::LEU;
561 case ISD::SETUNE:
562 return CmpMode::NEU;
563 case ISD::SETEQ:
564 return CmpMode::EQ;
565 case ISD::SETGT:
566 return CmpMode::GT;
567 case ISD::SETGE:
568 return CmpMode::GE;
569 case ISD::SETLT:
570 return CmpMode::LT;
571 case ISD::SETLE:
572 return CmpMode::LE;
573 case ISD::SETNE:
574 return CmpMode::NE;
575 }
576 }(CondCode.get());
577
578 if (FTZ)
579 PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
580
581 return PTXCmpMode;
582}
583
584bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
585 unsigned PTXCmpMode =
586 getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
587 SDLoc DL(N);
588 SDNode *SetP = CurDAG->getMachineNode(
589 NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
590 N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
591 ReplaceNode(N, SetP);
592 return true;
593}
594
595// Find all instances of extract_vector_elt that use this v2f16 vector
596// and coalesce them into a scattering move instruction.
597bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
598 SDValue Vector = N->getOperand(0);
599
600 // We only care about f16x2 as it's the only real vector type we
601 // need to deal with.
602 if (Vector.getSimpleValueType() != MVT::v2f16)
603 return false;
604
605 // Find and record all uses of this vector that extract element 0 or 1.
606 SmallVector<SDNode *, 4> E0, E1;
607 for (const auto &U : Vector.getNode()->uses()) {
608 if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
609 continue;
610 if (U->getOperand(0) != Vector)
611 continue;
612 if (const ConstantSDNode *IdxConst =
613 dyn_cast<ConstantSDNode>(U->getOperand(1))) {
614 if (IdxConst->getZExtValue() == 0)
615 E0.push_back(U);
616 else if (IdxConst->getZExtValue() == 1)
617 E1.push_back(U);
618 else
619 llvm_unreachable("Invalid vector index.");
620 }
621 }
622
623 // There's no point scattering f16x2 if we only ever access one
624 // element of it.
625 if (E0.empty() || E1.empty())
626 return false;
627
628 unsigned Op = NVPTX::SplitF16x2;
629 // If the vector has been BITCAST'ed from i32, we can use original
630 // value directly and avoid register-to-register move.
631 SDValue Source = Vector;
632 if (Vector->getOpcode() == ISD::BITCAST) {
633 Op = NVPTX::SplitI32toF16x2;
634 Source = Vector->getOperand(0);
635 }
636 // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
637 // into f16,f16 SplitF16x2(V)
638 SDNode *ScatterOp =
639 CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source);
640 for (auto *Node : E0)
641 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
642 for (auto *Node : E1)
643 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
644
645 return true;
646}
647
Eric Christopher9745b3a2015-01-30 01:41:01 +0000648static unsigned int getCodeAddrSpace(MemSDNode *N) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +0000649 const Value *Src = N->getMemOperand()->getValue();
Justin Holewinskib96d1392013-06-10 13:29:47 +0000650
Justin Holewinskiae556d32012-05-04 20:18:50 +0000651 if (!Src)
Justin Holewinskib96d1392013-06-10 13:29:47 +0000652 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000653
Craig Toppere3dcce92015-08-01 22:20:21 +0000654 if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000655 switch (PT->getAddressSpace()) {
Justin Holewinskib96d1392013-06-10 13:29:47 +0000656 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
657 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
658 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
659 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
660 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
661 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
662 default: break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000663 }
664 }
Justin Holewinskib96d1392013-06-10 13:29:47 +0000665 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000666}
667
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000668static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000669 unsigned CodeAddrSpace, MachineFunction *F) {
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000670 // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
671 // space.
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000672 //
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000673 // We have two ways of identifying invariant loads: Loads may be explicitly
674 // marked as invariant, or we may infer them to be invariant.
675 //
676 // We currently infer invariance only for kernel function pointer params that
677 // are noalias (i.e. __restrict) and never written to.
678 //
679 // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
680 // not during the SelectionDAG phase).
681 //
682 // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
683 // explicitly invariant loads because these are how clang tells us to use ldg
684 // when the user uses a builtin.
685 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000686 return false;
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000687
688 if (N->isInvariant())
689 return true;
690
691 // Load wasn't explicitly invariant. Attempt to infer invariance.
692 if (!isKernelFunction(*F->getFunction()))
693 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000694
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000695 // We use GetUnderlyingObjects() here instead of
696 // GetUnderlyingObject() mainly because the former looks through phi
697 // nodes while the latter does not. We need to look through phi
698 // nodes to handle pointer induction variables.
699 SmallVector<Value *, 8> Objs;
700 GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
701 Objs, F->getDataLayout());
702 for (Value *Obj : Objs) {
703 auto *A = dyn_cast<const Argument>(Obj);
704 if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
705 }
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000706
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000707 return true;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000708}
709
Justin Bogner8d83fb62016-05-13 21:12:53 +0000710bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +0000711 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
712 switch (IID) {
713 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000714 return false;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000715 case Intrinsic::nvvm_texsurf_handle_internal:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000716 SelectTexSurfHandle(N);
717 return true;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000718 }
719}
720
Justin Bogner8d83fb62016-05-13 21:12:53 +0000721void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +0000722 // Op 0 is the intrinsic ID
723 SDValue Wrapper = N->getOperand(1);
724 SDValue GlobalVal = Wrapper.getOperand(0);
Justin Bogner8d83fb62016-05-13 21:12:53 +0000725 ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
726 MVT::i64, GlobalVal));
Justin Holewinski30d56a72014-04-09 15:39:15 +0000727}
728
Justin Bogner8d83fb62016-05-13 21:12:53 +0000729void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000730 SDValue Src = N->getOperand(0);
731 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
732 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
733 unsigned DstAddrSpace = CastN->getDestAddressSpace();
734
735 assert(SrcAddrSpace != DstAddrSpace &&
736 "addrspacecast must be between different address spaces");
737
738 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
739 // Specific to generic
740 unsigned Opc;
741 switch (SrcAddrSpace) {
742 default: report_fatal_error("Bad address space in addrspacecast");
743 case ADDRESS_SPACE_GLOBAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000744 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000745 break;
746 case ADDRESS_SPACE_SHARED:
Eric Christopher02389e32015-02-19 00:08:27 +0000747 Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000748 break;
749 case ADDRESS_SPACE_CONST:
Eric Christopher02389e32015-02-19 00:08:27 +0000750 Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000751 break;
752 case ADDRESS_SPACE_LOCAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000753 Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000754 break;
755 }
Justin Bogner8d83fb62016-05-13 21:12:53 +0000756 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
757 Src));
758 return;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000759 } else {
760 // Generic to specific
761 if (SrcAddrSpace != 0)
762 report_fatal_error("Cannot cast between two non-generic address spaces");
763 unsigned Opc;
764 switch (DstAddrSpace) {
765 default: report_fatal_error("Bad address space in addrspacecast");
766 case ADDRESS_SPACE_GLOBAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000767 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
768 : NVPTX::cvta_to_global_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000769 break;
770 case ADDRESS_SPACE_SHARED:
Eric Christopher02389e32015-02-19 00:08:27 +0000771 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
772 : NVPTX::cvta_to_shared_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000773 break;
774 case ADDRESS_SPACE_CONST:
Eric Christopher02389e32015-02-19 00:08:27 +0000775 Opc =
776 TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000777 break;
778 case ADDRESS_SPACE_LOCAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000779 Opc =
780 TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000781 break;
Jingyue Wua2f60272015-06-04 21:28:26 +0000782 case ADDRESS_SPACE_PARAM:
783 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
784 : NVPTX::nvvm_ptr_gen_to_param;
785 break;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000786 }
Justin Bogner8d83fb62016-05-13 21:12:53 +0000787 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
788 Src));
789 return;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000790 }
791}
792
Artem Belevichee7dd122017-03-02 19:14:14 +0000793// Helper function template to reduce amount of boilerplate code for
794// opcode selection.
795static Optional<unsigned> pickOpcodeForVT(
796 MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16,
797 unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16,
798 unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) {
799 switch (VT) {
800 case MVT::i1:
801 case MVT::i8:
802 return Opcode_i8;
803 case MVT::i16:
804 return Opcode_i16;
805 case MVT::i32:
806 return Opcode_i32;
807 case MVT::i64:
808 return Opcode_i64;
809 case MVT::f16:
810 return Opcode_f16;
811 case MVT::v2f16:
812 return Opcode_f16x2;
813 case MVT::f32:
814 return Opcode_f32;
815 case MVT::f64:
816 return Opcode_f64;
817 default:
818 return None;
819 }
820}
821
Justin Bogner8d83fb62016-05-13 21:12:53 +0000822bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000823 SDLoc dl(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000824 LoadSDNode *LD = cast<LoadSDNode>(N);
825 EVT LoadedVT = LD->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +0000826 SDNode *NVPTXLD = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000827
828 // do not support pre/post inc/dec
829 if (LD->isIndexed())
Justin Bogner8d83fb62016-05-13 21:12:53 +0000830 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000831
832 if (!LoadedVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +0000833 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000834
835 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +0000836 unsigned int codeAddrSpace = getCodeAddrSpace(LD);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000837
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000838 if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
Justin Bogner8d83fb62016-05-13 21:12:53 +0000839 return tryLDGLDU(N);
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000840 }
841
Justin Holewinskiae556d32012-05-04 20:18:50 +0000842 // Volatile Setting
843 // - .volatile is only availalble for .global and .shared
844 bool isVolatile = LD->isVolatile();
845 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
846 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
847 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
848 isVolatile = false;
849
Justin Holewinskiae556d32012-05-04 20:18:50 +0000850 // Type Setting: fromType + fromTypeWidth
851 //
852 // Sign : ISD::SEXTLOAD
853 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
854 // type is integer
855 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
Artem Belevich620db1f2017-02-23 22:38:24 +0000856 MVT SimpleVT = LoadedVT.getSimpleVT();
Justin Holewinskiae556d32012-05-04 20:18:50 +0000857 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +0000858 // Read at least 8 bits (predicates are stored as 8-bit values)
859 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskiae556d32012-05-04 20:18:50 +0000860 unsigned int fromType;
Artem Belevich620db1f2017-02-23 22:38:24 +0000861
862 // Vector Setting
863 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
864 if (SimpleVT.isVector()) {
865 assert(LoadedVT == MVT::v2f16 && "Unexpected vector type");
866 // v2f16 is loaded using ld.b32
867 fromTypeWidth = 32;
868 }
869
Justin Holewinskiae556d32012-05-04 20:18:50 +0000870 if ((LD->getExtensionType() == ISD::SEXTLOAD))
871 fromType = NVPTX::PTXLdStInstCode::Signed;
872 else if (ScalarVT.isFloatingPoint())
Artem Belevich64dc9be2017-01-13 20:56:17 +0000873 // f16 uses .b16 as its storage type.
874 fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
875 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000876 else
877 fromType = NVPTX::PTXLdStInstCode::Unsigned;
878
879 // Create the machine instruction DAG
880 SDValue Chain = N->getOperand(0);
881 SDValue N1 = N->getOperand(1);
882 SDValue Addr;
883 SDValue Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +0000884 Optional<unsigned> Opcode;
Craig Topperd9c27832013-08-15 02:44:19 +0000885 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000886
887 if (SelectDirectAddr(N1, Addr)) {
Artem Belevichee7dd122017-03-02 19:14:14 +0000888 Opcode = pickOpcodeForVT(
889 TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
890 NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
891 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
892 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +0000893 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000894 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
895 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
896 getI32Imm(fromTypeWidth, dl), Addr, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +0000897 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
898 MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +0000899 } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
900 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +0000901 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
902 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
903 NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
904 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
905 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +0000906 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000907 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
908 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
909 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +0000910 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
911 MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +0000912 } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
913 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +0000914 if (TM.is64Bit())
915 Opcode = pickOpcodeForVT(
916 TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
917 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
918 NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
919 else
920 Opcode = pickOpcodeForVT(
921 TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
922 NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
923 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
924 if (!Opcode)
925 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000926 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
927 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
928 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +0000929 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
930 MVT::Other, Ops);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000931 } else {
Artem Belevichee7dd122017-03-02 19:14:14 +0000932 if (TM.is64Bit())
933 Opcode = pickOpcodeForVT(
934 TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
935 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
936 NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
937 NVPTX::LD_f64_areg_64);
938 else
939 Opcode = pickOpcodeForVT(
940 TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
941 NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
942 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
943 if (!Opcode)
944 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000945 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
946 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
947 getI32Imm(fromTypeWidth, dl), N1, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +0000948 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
949 MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000950 }
951
Justin Bogner8d83fb62016-05-13 21:12:53 +0000952 if (!NVPTXLD)
953 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000954
Justin Bogner8d83fb62016-05-13 21:12:53 +0000955 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
956 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
957 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
958
959 ReplaceNode(N, NVPTXLD);
960 return true;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000961}
962
Justin Bogner8d83fb62016-05-13 21:12:53 +0000963bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000964
965 SDValue Chain = N->getOperand(0);
966 SDValue Op1 = N->getOperand(1);
967 SDValue Addr, Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +0000968 Optional<unsigned> Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +0000969 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000970 SDNode *LD;
971 MemSDNode *MemSD = cast<MemSDNode>(N);
972 EVT LoadedVT = MemSD->getMemoryVT();
973
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000974 if (!LoadedVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +0000975 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000976
977 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +0000978 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000979
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000980 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
Justin Bogner8d83fb62016-05-13 21:12:53 +0000981 return tryLDGLDU(N);
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000982 }
983
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000984 // Volatile Setting
985 // - .volatile is only availalble for .global and .shared
986 bool IsVolatile = MemSD->isVolatile();
987 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
988 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
989 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
990 IsVolatile = false;
991
992 // Vector Setting
993 MVT SimpleVT = LoadedVT.getSimpleVT();
994
995 // Type Setting: fromType + fromTypeWidth
996 //
997 // Sign : ISD::SEXTLOAD
998 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
999 // type is integer
1000 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1001 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +00001002 // Read at least 8 bits (predicates are stored as 8-bit values)
1003 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001004 unsigned int FromType;
1005 // The last operand holds the original LoadSDNode::getExtensionType() value
Justin Holewinski0497ab12013-03-30 14:29:21 +00001006 unsigned ExtensionType = cast<ConstantSDNode>(
1007 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001008 if (ExtensionType == ISD::SEXTLOAD)
1009 FromType = NVPTX::PTXLdStInstCode::Signed;
1010 else if (ScalarVT.isFloatingPoint())
Artem Belevich620db1f2017-02-23 22:38:24 +00001011 FromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1012 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001013 else
1014 FromType = NVPTX::PTXLdStInstCode::Unsigned;
1015
1016 unsigned VecType;
1017
1018 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001019 case NVPTXISD::LoadV2:
1020 VecType = NVPTX::PTXLdStInstCode::V2;
1021 break;
1022 case NVPTXISD::LoadV4:
1023 VecType = NVPTX::PTXLdStInstCode::V4;
1024 break;
1025 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001026 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001027 }
1028
1029 EVT EltVT = N->getValueType(0);
1030
Artem Belevich620db1f2017-02-23 22:38:24 +00001031 // v8f16 is a special case. PTX doesn't have ld.v8.f16
1032 // instruction. Instead, we split the vector into v2f16 chunks and
1033 // load them with ld.v4.b32.
1034 if (EltVT == MVT::v2f16) {
1035 assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1036 EltVT = MVT::i32;
1037 FromType = NVPTX::PTXLdStInstCode::Untyped;
1038 FromTypeWidth = 32;
1039 }
1040
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001041 if (SelectDirectAddr(Op1, Addr)) {
1042 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001043 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001044 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001045 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001046 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1047 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1048 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1049 NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
1050 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001051 break;
1052 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001053 Opcode =
1054 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1055 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None,
1056 NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
1057 NVPTX::LDV_f32_v4_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001058 break;
1059 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001060 if (!Opcode)
1061 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001062 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1063 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1064 getI32Imm(FromTypeWidth, DL), Addr, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001065 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001066 } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1067 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001068 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001069 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001070 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001071 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001072 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1073 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1074 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1075 NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
1076 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001077 break;
1078 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001079 Opcode =
1080 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1081 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None,
1082 NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
1083 NVPTX::LDV_f32_v4_asi, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001084 break;
1085 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001086 if (!Opcode)
1087 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001088 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1089 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1090 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001091 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001092 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1093 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1094 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001095 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001096 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001097 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001098 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001099 Opcode = pickOpcodeForVT(
1100 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
1101 NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
1102 NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
1103 NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
1104 NVPTX::LDV_f64_v2_ari_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001105 break;
1106 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001107 Opcode = pickOpcodeForVT(
1108 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1109 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None,
1110 NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
1111 NVPTX::LDV_f32_v4_ari_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001112 break;
1113 }
1114 } else {
1115 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001116 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001117 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001118 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001119 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1120 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1121 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1122 NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
1123 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001124 break;
1125 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001126 Opcode =
1127 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1128 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None,
1129 NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
1130 NVPTX::LDV_f32_v4_ari, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001131 break;
1132 }
1133 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001134 if (!Opcode)
1135 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001136 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1137 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1138 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001139
Artem Belevichee7dd122017-03-02 19:14:14 +00001140 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001141 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001142 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001143 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001144 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001145 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001146 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001147 Opcode = pickOpcodeForVT(
1148 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1149 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1150 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
1151 NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1152 NVPTX::LDV_f64_v2_areg_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001153 break;
1154 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001155 Opcode = pickOpcodeForVT(
1156 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1157 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None,
1158 NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
1159 NVPTX::LDV_f32_v4_areg_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001160 break;
1161 }
1162 } else {
1163 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001164 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001165 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001166 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001167 Opcode =
1168 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1169 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1170 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
1171 NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
1172 NVPTX::LDV_f64_v2_areg);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001173 break;
1174 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001175 Opcode = pickOpcodeForVT(
1176 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1177 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None,
1178 NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
1179 NVPTX::LDV_f32_v4_areg, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001180 break;
1181 }
1182 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001183 if (!Opcode)
1184 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001185 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1186 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1187 getI32Imm(FromTypeWidth, DL), Op1, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001188 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001189 }
1190
1191 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1192 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1193 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1194
Justin Bogner8d83fb62016-05-13 21:12:53 +00001195 ReplaceNode(N, LD);
1196 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001197}
1198
Justin Bogner8d83fb62016-05-13 21:12:53 +00001199bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001200
1201 SDValue Chain = N->getOperand(0);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001202 SDValue Op1;
1203 MemSDNode *Mem;
1204 bool IsLDG = true;
1205
Justin Holewinskic7997922016-04-05 12:38:01 +00001206 // If this is an LDG intrinsic, the address is the third operand. If its an
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001207 // LDG/LDU SD node (from custom vector handling), then its the second operand
1208 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1209 Op1 = N->getOperand(2);
1210 Mem = cast<MemIntrinsicSDNode>(N);
1211 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1212 switch (IID) {
1213 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001214 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001215 case Intrinsic::nvvm_ldg_global_f:
1216 case Intrinsic::nvvm_ldg_global_i:
1217 case Intrinsic::nvvm_ldg_global_p:
1218 IsLDG = true;
1219 break;
1220 case Intrinsic::nvvm_ldu_global_f:
1221 case Intrinsic::nvvm_ldu_global_i:
1222 case Intrinsic::nvvm_ldu_global_p:
1223 IsLDG = false;
1224 break;
1225 }
1226 } else {
1227 Op1 = N->getOperand(1);
1228 Mem = cast<MemSDNode>(N);
1229 }
1230
Artem Belevichee7dd122017-03-02 19:14:14 +00001231 Optional<unsigned> Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00001232 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001233 SDNode *LD;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001234 SDValue Base, Offset, Addr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00001235
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001236 EVT EltVT = Mem->getMemoryVT();
Justin Holewinskic7997922016-04-05 12:38:01 +00001237 unsigned NumElts = 1;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001238 if (EltVT.isVector()) {
Justin Holewinskic7997922016-04-05 12:38:01 +00001239 NumElts = EltVT.getVectorNumElements();
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001240 EltVT = EltVT.getVectorElementType();
1241 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001242
Justin Holewinskic7997922016-04-05 12:38:01 +00001243 // Build the "promoted" result VTList for the load. If we are really loading
1244 // i8s, then the return type will be promoted to i16 since we do not expose
1245 // 8-bit registers in NVPTX.
1246 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1247 SmallVector<EVT, 5> InstVTs;
1248 for (unsigned i = 0; i != NumElts; ++i) {
1249 InstVTs.push_back(NodeVT);
1250 }
1251 InstVTs.push_back(MVT::Other);
1252 SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1253
Justin Holewinskie40e9292013-07-01 12:58:52 +00001254 if (SelectDirectAddr(Op1, Addr)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001255 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001256 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001257 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001258 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001259 if (IsLDG)
1260 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1261 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1262 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1263 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1264 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1265 NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
1266 NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
1267 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1268 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1269 else
1270 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1271 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1272 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1273 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1274 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1275 NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
1276 NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
1277 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1278 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001279 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001280 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001281 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1282 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1283 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1284 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1285 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1286 NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
1287 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
1288 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1289 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001290 break;
1291 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001292 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1293 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1294 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1295 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1296 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1297 NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
1298 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
1299 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1300 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001301 break;
1302 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001303 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1304 NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1305 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1306 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None,
1307 NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
1308 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
1309 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001310 break;
1311 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001312 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1313 NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1314 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1315 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None,
1316 NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
1317 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
1318 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001319 break;
1320 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001321 if (!Opcode)
1322 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001323 SDValue Ops[] = { Addr, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001324 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001325 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1326 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1327 if (TM.is64Bit()) {
Justin Holewinskie40e9292013-07-01 12:58:52 +00001328 switch (N->getOpcode()) {
1329 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001330 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001331 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001332 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001333 if (IsLDG)
1334 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1335 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1336 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1337 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1338 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1339 NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
1340 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
1341 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1342 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1343 else
1344 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1345 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1346 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1347 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1348 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1349 NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
1350 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
1351 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1352 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001353 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001354 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001355 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001356 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1357 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1358 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1359 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1360 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1361 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
1362 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
1363 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1364 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001365 break;
1366 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001367 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1368 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1369 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1370 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1371 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1372 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
1373 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
1374 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1375 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001376 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001377 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001378 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001379 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1380 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1381 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1382 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None,
1383 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
1384 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
1385 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001386 break;
1387 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001388 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1389 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1390 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1391 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None,
1392 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
1393 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
1394 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001395 break;
1396 }
1397 } else {
1398 switch (N->getOpcode()) {
1399 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001400 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001401 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001402 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001403 if (IsLDG)
1404 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1405 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1406 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1407 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1408 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1409 NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
1410 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
1411 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1412 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1413 else
1414 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1415 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1416 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1417 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1418 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1419 NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
1420 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
1421 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1422 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001423 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001424 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001425 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001426 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1427 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1428 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1429 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1430 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1431 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
1432 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
1433 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1434 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001435 break;
1436 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001437 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1438 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1439 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1440 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1441 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1442 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
1443 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
1444 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1445 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001446 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001447 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001448 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001449 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1450 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1451 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1452 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None,
1453 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
1454 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
1455 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001456 break;
1457 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001458 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1459 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1460 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1461 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None,
1462 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
1463 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
1464 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001465 break;
1466 }
1467 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001468 if (!Opcode)
1469 return false;
1470 SDValue Ops[] = {Base, Offset, Chain};
1471 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001472 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001473 if (TM.is64Bit()) {
Justin Holewinskie40e9292013-07-01 12:58:52 +00001474 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001475 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001476 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001477 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001478 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001479 if (IsLDG)
1480 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1481 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1482 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1483 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1484 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1485 NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
1486 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
1487 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1488 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1489 else
1490 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1491 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1492 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1493 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1494 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1495 NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
1496 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
1497 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1498 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001499 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001500 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001501 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001502 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1503 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1504 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1505 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1506 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1507 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
1508 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
1509 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1510 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001511 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001512 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001513 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1514 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1515 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1516 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1517 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1518 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
1519 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
1520 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1521 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001522 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001523 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001524 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001525 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1526 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1527 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1528 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None,
1529 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
1530 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
1531 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001532 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001533 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001534 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1535 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1536 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1537 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None,
1538 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
1539 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
1540 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001541 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001542 }
Justin Holewinskie40e9292013-07-01 12:58:52 +00001543 } else {
1544 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001545 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001546 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001547 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001548 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001549 if (IsLDG)
1550 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1551 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1552 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1553 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1554 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1555 NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
1556 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
1557 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1558 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1559 else
1560 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1561 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1562 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1563 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1564 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1565 NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
1566 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
1567 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1568 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001569 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001570 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001571 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001572 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1573 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1574 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1575 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1576 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1577 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
1578 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
1579 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1580 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001581 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001582 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001583 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1584 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1585 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1586 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1587 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1588 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
1589 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
1590 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1591 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001592 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001593 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001594 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001595 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1596 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1597 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1598 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None,
1599 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
1600 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
1601 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001602 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001603 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001604 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1605 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1606 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1607 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None,
1608 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
1609 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
1610 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001611 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001612 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001613 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001614 if (!Opcode)
1615 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001616 SDValue Ops[] = { Op1, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001617 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001618 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001619
1620 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001621 MemRefs0[0] = Mem->getMemOperand();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001622 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1623
Justin Holewinskic7997922016-04-05 12:38:01 +00001624 // For automatic generation of LDG (through SelectLoad[Vector], not the
1625 // intrinsics), we may have an extending load like:
1626 //
1627 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1628 //
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001629 // In this case, the matching logic above will select a load for the original
1630 // memory type (in this case, i8) and our types will not match (the node needs
1631 // to return an i32 in this case). Our LDG/LDU nodes do not support the
1632 // concept of sign-/zero-extension, so emulate it here by adding an explicit
1633 // CVT instruction. Ptxas should clean up any redundancies here.
1634
Justin Holewinskic7997922016-04-05 12:38:01 +00001635 EVT OrigType = N->getValueType(0);
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001636 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
Justin Holewinskic7997922016-04-05 12:38:01 +00001637
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001638 if (OrigType != EltVT && LdNode) {
1639 // We have an extending-load. The instruction we selected operates on the
1640 // smaller type, but the SDNode we are replacing has the larger type. We
1641 // need to emit a CVT to make the types match.
1642 bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
1643 unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
1644 EltVT.getSimpleVT(), IsSigned);
Justin Holewinskic7997922016-04-05 12:38:01 +00001645
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001646 // For each output value, apply the manual sign/zero-extension and make sure
1647 // all users of the load go through that CVT.
Justin Holewinskic7997922016-04-05 12:38:01 +00001648 for (unsigned i = 0; i != NumElts; ++i) {
1649 SDValue Res(LD, i);
1650 SDValue OrigVal(N, i);
1651
1652 SDNode *CvtNode =
1653 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001654 CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
1655 DL, MVT::i32));
Justin Holewinskic7997922016-04-05 12:38:01 +00001656 ReplaceUses(OrigVal, SDValue(CvtNode, 0));
1657 }
1658 }
1659
Justin Bogner8d83fb62016-05-13 21:12:53 +00001660 ReplaceNode(N, LD);
1661 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001662}
1663
Justin Bogner8d83fb62016-05-13 21:12:53 +00001664bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001665 SDLoc dl(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001666 StoreSDNode *ST = cast<StoreSDNode>(N);
1667 EVT StoreVT = ST->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +00001668 SDNode *NVPTXST = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001669
1670 // do not support pre/post inc/dec
1671 if (ST->isIndexed())
Justin Bogner8d83fb62016-05-13 21:12:53 +00001672 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001673
1674 if (!StoreVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +00001675 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001676
1677 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00001678 unsigned int codeAddrSpace = getCodeAddrSpace(ST);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001679
1680 // Volatile Setting
1681 // - .volatile is only availalble for .global and .shared
1682 bool isVolatile = ST->isVolatile();
1683 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1684 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1685 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1686 isVolatile = false;
1687
1688 // Vector Setting
1689 MVT SimpleVT = StoreVT.getSimpleVT();
1690 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001691
1692 // Type Setting: toType + toTypeWidth
1693 // - for integer type, always use 'u'
1694 //
1695 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00001696 unsigned toTypeWidth = ScalarVT.getSizeInBits();
Artem Belevich620db1f2017-02-23 22:38:24 +00001697 if (SimpleVT.isVector()) {
1698 assert(StoreVT == MVT::v2f16 && "Unexpected vector type");
1699 // v2f16 is stored using st.b32
1700 toTypeWidth = 32;
1701 }
1702
Justin Holewinskiae556d32012-05-04 20:18:50 +00001703 unsigned int toType;
1704 if (ScalarVT.isFloatingPoint())
Artem Belevich64dc9be2017-01-13 20:56:17 +00001705 // f16 uses .b16 as its storage type.
1706 toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1707 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001708 else
1709 toType = NVPTX::PTXLdStInstCode::Unsigned;
1710
1711 // Create the machine instruction DAG
1712 SDValue Chain = N->getOperand(0);
1713 SDValue N1 = N->getOperand(1);
1714 SDValue N2 = N->getOperand(2);
1715 SDValue Addr;
1716 SDValue Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +00001717 Optional<unsigned> Opcode;
Craig Topperd9c27832013-08-15 02:44:19 +00001718 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001719
1720 if (SelectDirectAddr(N2, Addr)) {
Artem Belevichee7dd122017-03-02 19:14:14 +00001721 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1722 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1723 NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
1724 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1725 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +00001726 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001727 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1728 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1729 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
1730 Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001731 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001732 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1733 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +00001734 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1735 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1736 NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
1737 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1738 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +00001739 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001740 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1741 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1742 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
1743 Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001744 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001745 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1746 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +00001747 if (TM.is64Bit())
1748 Opcode = pickOpcodeForVT(
1749 SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1750 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
1751 NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1752 else
1753 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1754 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1755 NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
1756 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1757 if (!Opcode)
1758 return false;
1759
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001760 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1761 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1762 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
1763 Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001764 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001765 } else {
Artem Belevichee7dd122017-03-02 19:14:14 +00001766 if (TM.is64Bit())
1767 Opcode =
1768 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1769 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1770 NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
1771 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1772 else
1773 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1774 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1775 NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
1776 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1777 if (!Opcode)
1778 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001779 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1780 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1781 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
1782 Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001783 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001784 }
1785
Justin Bogner8d83fb62016-05-13 21:12:53 +00001786 if (!NVPTXST)
1787 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001788
Justin Bogner8d83fb62016-05-13 21:12:53 +00001789 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1790 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1791 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1792 ReplaceNode(N, NVPTXST);
1793 return true;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001794}
1795
Justin Bogner8d83fb62016-05-13 21:12:53 +00001796bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001797 SDValue Chain = N->getOperand(0);
1798 SDValue Op1 = N->getOperand(1);
1799 SDValue Addr, Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +00001800 Optional<unsigned> Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00001801 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001802 SDNode *ST;
1803 EVT EltVT = Op1.getValueType();
1804 MemSDNode *MemSD = cast<MemSDNode>(N);
1805 EVT StoreVT = MemSD->getMemoryVT();
1806
1807 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00001808 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001809
1810 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1811 report_fatal_error("Cannot store to pointer that points to constant "
1812 "memory space");
1813 }
1814
1815 // Volatile Setting
1816 // - .volatile is only availalble for .global and .shared
1817 bool IsVolatile = MemSD->isVolatile();
1818 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1819 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1820 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1821 IsVolatile = false;
1822
1823 // Type Setting: toType + toTypeWidth
1824 // - for integer type, always use 'u'
1825 assert(StoreVT.isSimple() && "Store value is not simple");
1826 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00001827 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001828 unsigned ToType;
1829 if (ScalarVT.isFloatingPoint())
Artem Belevich620db1f2017-02-23 22:38:24 +00001830 ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1831 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001832 else
1833 ToType = NVPTX::PTXLdStInstCode::Unsigned;
1834
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001835 SmallVector<SDValue, 12> StOps;
1836 SDValue N2;
1837 unsigned VecType;
1838
1839 switch (N->getOpcode()) {
1840 case NVPTXISD::StoreV2:
1841 VecType = NVPTX::PTXLdStInstCode::V2;
1842 StOps.push_back(N->getOperand(1));
1843 StOps.push_back(N->getOperand(2));
1844 N2 = N->getOperand(3);
1845 break;
1846 case NVPTXISD::StoreV4:
1847 VecType = NVPTX::PTXLdStInstCode::V4;
1848 StOps.push_back(N->getOperand(1));
1849 StOps.push_back(N->getOperand(2));
1850 StOps.push_back(N->getOperand(3));
1851 StOps.push_back(N->getOperand(4));
1852 N2 = N->getOperand(5);
1853 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001854 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001855 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001856 }
1857
Artem Belevich620db1f2017-02-23 22:38:24 +00001858 // v8f16 is a special case. PTX doesn't have st.v8.f16
1859 // instruction. Instead, we split the vector into v2f16 chunks and
1860 // store them with st.v4.b32.
1861 if (EltVT == MVT::v2f16) {
1862 assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1863 EltVT = MVT::i32;
1864 ToType = NVPTX::PTXLdStInstCode::Untyped;
1865 ToTypeWidth = 32;
1866 }
1867
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001868 StOps.push_back(getI32Imm(IsVolatile, DL));
1869 StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1870 StOps.push_back(getI32Imm(VecType, DL));
1871 StOps.push_back(getI32Imm(ToType, DL));
1872 StOps.push_back(getI32Imm(ToTypeWidth, DL));
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001873
1874 if (SelectDirectAddr(N2, Addr)) {
1875 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001876 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001877 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001878 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001879 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1880 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1881 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1882 NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
1883 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001884 break;
1885 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001886 Opcode =
1887 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar,
1888 NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None,
1889 NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
1890 NVPTX::STV_f32_v4_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001891 break;
1892 }
1893 StOps.push_back(Addr);
Eric Christopher02389e32015-02-19 00:08:27 +00001894 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1895 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001896 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001897 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001898 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001899 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001900 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1901 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1902 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1903 NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
1904 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001905 break;
1906 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001907 Opcode =
1908 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
1909 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None,
1910 NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
1911 NVPTX::STV_f32_v4_asi, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001912 break;
1913 }
1914 StOps.push_back(Base);
1915 StOps.push_back(Offset);
Eric Christopher02389e32015-02-19 00:08:27 +00001916 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1917 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1918 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001919 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001920 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001921 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001922 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001923 Opcode = pickOpcodeForVT(
1924 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
1925 NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
1926 NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
1927 NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
1928 NVPTX::STV_f64_v2_ari_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001929 break;
1930 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001931 Opcode = pickOpcodeForVT(
1932 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
1933 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None,
1934 NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
1935 NVPTX::STV_f32_v4_ari_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001936 break;
1937 }
1938 } else {
1939 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001940 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001941 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001942 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001943 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1944 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1945 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1946 NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
1947 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001948 break;
1949 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001950 Opcode =
1951 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari,
1952 NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None,
1953 NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
1954 NVPTX::STV_f32_v4_ari, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001955 break;
1956 }
1957 }
1958 StOps.push_back(Base);
1959 StOps.push_back(Offset);
1960 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001961 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001962 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001963 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001964 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001965 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001966 Opcode = pickOpcodeForVT(
1967 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
1968 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1969 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
1970 NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1971 NVPTX::STV_f64_v2_areg_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001972 break;
1973 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001974 Opcode = pickOpcodeForVT(
1975 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
1976 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None,
1977 NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
1978 NVPTX::STV_f32_v4_areg_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001979 break;
1980 }
1981 } else {
1982 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001983 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001984 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001985 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001986 Opcode =
1987 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
1988 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
1989 NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
1990 NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
1991 NVPTX::STV_f64_v2_areg);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001992 break;
1993 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001994 Opcode =
1995 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
1996 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None,
1997 NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
1998 NVPTX::STV_f32_v4_areg, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001999 break;
2000 }
2001 }
2002 StOps.push_back(N2);
2003 }
2004
Artem Belevichee7dd122017-03-02 19:14:14 +00002005 if (!Opcode)
2006 return false;
2007
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002008 StOps.push_back(Chain);
2009
Artem Belevichee7dd122017-03-02 19:14:14 +00002010 ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002011
2012 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2013 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2014 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2015
Justin Bogner8d83fb62016-05-13 21:12:53 +00002016 ReplaceNode(N, ST);
2017 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002018}
2019
Justin Bogner8d83fb62016-05-13 21:12:53 +00002020bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00002021 SDValue Chain = Node->getOperand(0);
2022 SDValue Offset = Node->getOperand(2);
2023 SDValue Flag = Node->getOperand(3);
2024 SDLoc DL(Node);
2025 MemSDNode *Mem = cast<MemSDNode>(Node);
2026
2027 unsigned VecSize;
2028 switch (Node->getOpcode()) {
2029 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002030 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002031 case NVPTXISD::LoadParam:
2032 VecSize = 1;
2033 break;
2034 case NVPTXISD::LoadParamV2:
2035 VecSize = 2;
2036 break;
2037 case NVPTXISD::LoadParamV4:
2038 VecSize = 4;
2039 break;
2040 }
2041
2042 EVT EltVT = Node->getValueType(0);
2043 EVT MemVT = Mem->getMemoryVT();
2044
Artem Belevichee7dd122017-03-02 19:14:14 +00002045 Optional<unsigned> Opcode;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002046
2047 switch (VecSize) {
2048 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002049 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002050 case 1:
Artem Belevichee7dd122017-03-02 19:14:14 +00002051 Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2052 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2053 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2054 NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
2055 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002056 break;
2057 case 2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002058 Opcode =
2059 pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2060 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2061 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
2062 NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
2063 NVPTX::LoadParamMemV2F64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002064 break;
2065 case 4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002066 Opcode = pickOpcodeForVT(
2067 MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2068 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None,
2069 NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
2070 NVPTX::LoadParamMemV4F32, None);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002071 break;
2072 }
Artem Belevichee7dd122017-03-02 19:14:14 +00002073 if (!Opcode)
2074 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002075
2076 SDVTList VTs;
2077 if (VecSize == 1) {
2078 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2079 } else if (VecSize == 2) {
2080 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2081 } else {
2082 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
Craig Topperabb4ac72014-04-16 06:10:51 +00002083 VTs = CurDAG->getVTList(EVTs);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002084 }
2085
2086 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2087
2088 SmallVector<SDValue, 2> Ops;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002089 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002090 Ops.push_back(Chain);
2091 Ops.push_back(Flag);
2092
Artem Belevichee7dd122017-03-02 19:14:14 +00002093 ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops));
Justin Bogner8d83fb62016-05-13 21:12:53 +00002094 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002095}
2096
Justin Bogner8d83fb62016-05-13 21:12:53 +00002097bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00002098 SDLoc DL(N);
2099 SDValue Chain = N->getOperand(0);
2100 SDValue Offset = N->getOperand(1);
2101 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2102 MemSDNode *Mem = cast<MemSDNode>(N);
2103
2104 // How many elements do we have?
2105 unsigned NumElts = 1;
2106 switch (N->getOpcode()) {
2107 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002108 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002109 case NVPTXISD::StoreRetval:
2110 NumElts = 1;
2111 break;
2112 case NVPTXISD::StoreRetvalV2:
2113 NumElts = 2;
2114 break;
2115 case NVPTXISD::StoreRetvalV4:
2116 NumElts = 4;
2117 break;
2118 }
2119
2120 // Build vector of operands
2121 SmallVector<SDValue, 6> Ops;
2122 for (unsigned i = 0; i < NumElts; ++i)
2123 Ops.push_back(N->getOperand(i + 2));
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002124 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002125 Ops.push_back(Chain);
2126
2127 // Determine target opcode
2128 // If we have an i1, use an 8-bit store. The lowering code in
2129 // NVPTXISelLowering will have already emitted an upcast.
Artem Belevichee7dd122017-03-02 19:14:14 +00002130 Optional<unsigned> Opcode = 0;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002131 switch (NumElts) {
2132 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002133 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002134 case 1:
Artem Belevichee7dd122017-03-02 19:14:14 +00002135 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2136 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2137 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2138 NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
2139 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002140 break;
2141 case 2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002142 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2143 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2144 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2145 NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
2146 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002147 break;
2148 case 4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002149 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2150 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2151 NVPTX::StoreRetvalV4I32, None,
2152 NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
2153 NVPTX::StoreRetvalV4F32, None);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002154 break;
2155 }
Artem Belevichee7dd122017-03-02 19:14:14 +00002156 if (!Opcode)
2157 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002158
Artem Belevichee7dd122017-03-02 19:14:14 +00002159 SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002160 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2161 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2162 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2163
Justin Bogner8d83fb62016-05-13 21:12:53 +00002164 ReplaceNode(N, Ret);
2165 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002166}
2167
Justin Bogner8d83fb62016-05-13 21:12:53 +00002168bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00002169 SDLoc DL(N);
2170 SDValue Chain = N->getOperand(0);
2171 SDValue Param = N->getOperand(1);
2172 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2173 SDValue Offset = N->getOperand(2);
2174 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2175 MemSDNode *Mem = cast<MemSDNode>(N);
2176 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2177
2178 // How many elements do we have?
2179 unsigned NumElts = 1;
2180 switch (N->getOpcode()) {
2181 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002182 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002183 case NVPTXISD::StoreParamU32:
2184 case NVPTXISD::StoreParamS32:
2185 case NVPTXISD::StoreParam:
2186 NumElts = 1;
2187 break;
2188 case NVPTXISD::StoreParamV2:
2189 NumElts = 2;
2190 break;
2191 case NVPTXISD::StoreParamV4:
2192 NumElts = 4;
2193 break;
2194 }
2195
2196 // Build vector of operands
2197 SmallVector<SDValue, 8> Ops;
2198 for (unsigned i = 0; i < NumElts; ++i)
2199 Ops.push_back(N->getOperand(i + 3));
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002200 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2201 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002202 Ops.push_back(Chain);
2203 Ops.push_back(Flag);
2204
2205 // Determine target opcode
2206 // If we have an i1, use an 8-bit store. The lowering code in
2207 // NVPTXISelLowering will have already emitted an upcast.
Artem Belevichee7dd122017-03-02 19:14:14 +00002208 Optional<unsigned> Opcode = 0;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002209 switch (N->getOpcode()) {
2210 default:
2211 switch (NumElts) {
2212 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002213 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002214 case 1:
Artem Belevichee7dd122017-03-02 19:14:14 +00002215 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2216 NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2217 NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2218 NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
2219 NVPTX::StoreParamF32, NVPTX::StoreParamF64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002220 break;
2221 case 2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002222 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2223 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2224 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2225 NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
2226 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002227 break;
2228 case 4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002229 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2230 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2231 NVPTX::StoreParamV4I32, None,
2232 NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
2233 NVPTX::StoreParamV4F32, None);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002234 break;
2235 }
Artem Belevichee7dd122017-03-02 19:14:14 +00002236 if (!Opcode)
2237 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002238 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002239 // Special case: if we have a sign-extend/zero-extend node, insert the
2240 // conversion instruction first, and use that as the value operand to
2241 // the selected StoreParam node.
2242 case NVPTXISD::StoreParamU32: {
2243 Opcode = NVPTX::StoreParamI32;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002244 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002245 MVT::i32);
2246 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2247 MVT::i32, Ops[0], CvtNone);
2248 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002249 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002250 }
2251 case NVPTXISD::StoreParamS32: {
2252 Opcode = NVPTX::StoreParamI32;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002253 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002254 MVT::i32);
2255 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2256 MVT::i32, Ops[0], CvtNone);
2257 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002258 break;
2259 }
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002260 }
Justin Holewinskif8f70912013-06-28 17:57:59 +00002261
Justin Holewinskidff28d22013-07-01 12:59:01 +00002262 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002263 SDNode *Ret =
Artem Belevichee7dd122017-03-02 19:14:14 +00002264 CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002265 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2266 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2267 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2268
Justin Bogner8d83fb62016-05-13 21:12:53 +00002269 ReplaceNode(N, Ret);
2270 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002271}
2272
Justin Bogner8d83fb62016-05-13 21:12:53 +00002273bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00002274 SDValue Chain = N->getOperand(0);
Justin Holewinski30d56a72014-04-09 15:39:15 +00002275 unsigned Opc = 0;
2276 SmallVector<SDValue, 8> Ops;
2277
2278 switch (N->getOpcode()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00002279 default: return false;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002280 case NVPTXISD::Tex1DFloatS32:
2281 Opc = NVPTX::TEX_1D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002282 break;
2283 case NVPTXISD::Tex1DFloatFloat:
2284 Opc = NVPTX::TEX_1D_F32_F32;
2285 break;
2286 case NVPTXISD::Tex1DFloatFloatLevel:
2287 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2288 break;
2289 case NVPTXISD::Tex1DFloatFloatGrad:
2290 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2291 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002292 case NVPTXISD::Tex1DS32S32:
2293 Opc = NVPTX::TEX_1D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002294 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002295 case NVPTXISD::Tex1DS32Float:
2296 Opc = NVPTX::TEX_1D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002297 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002298 case NVPTXISD::Tex1DS32FloatLevel:
2299 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002300 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002301 case NVPTXISD::Tex1DS32FloatGrad:
2302 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002303 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002304 case NVPTXISD::Tex1DU32S32:
2305 Opc = NVPTX::TEX_1D_U32_S32;
2306 break;
2307 case NVPTXISD::Tex1DU32Float:
2308 Opc = NVPTX::TEX_1D_U32_F32;
2309 break;
2310 case NVPTXISD::Tex1DU32FloatLevel:
2311 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
2312 break;
2313 case NVPTXISD::Tex1DU32FloatGrad:
2314 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
2315 break;
2316 case NVPTXISD::Tex1DArrayFloatS32:
2317 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002318 break;
2319 case NVPTXISD::Tex1DArrayFloatFloat:
2320 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2321 break;
2322 case NVPTXISD::Tex1DArrayFloatFloatLevel:
2323 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2324 break;
2325 case NVPTXISD::Tex1DArrayFloatFloatGrad:
2326 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2327 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002328 case NVPTXISD::Tex1DArrayS32S32:
2329 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002330 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002331 case NVPTXISD::Tex1DArrayS32Float:
2332 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002333 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002334 case NVPTXISD::Tex1DArrayS32FloatLevel:
2335 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002336 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002337 case NVPTXISD::Tex1DArrayS32FloatGrad:
2338 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002339 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002340 case NVPTXISD::Tex1DArrayU32S32:
2341 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
2342 break;
2343 case NVPTXISD::Tex1DArrayU32Float:
2344 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
2345 break;
2346 case NVPTXISD::Tex1DArrayU32FloatLevel:
2347 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
2348 break;
2349 case NVPTXISD::Tex1DArrayU32FloatGrad:
2350 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
2351 break;
2352 case NVPTXISD::Tex2DFloatS32:
2353 Opc = NVPTX::TEX_2D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002354 break;
2355 case NVPTXISD::Tex2DFloatFloat:
2356 Opc = NVPTX::TEX_2D_F32_F32;
2357 break;
2358 case NVPTXISD::Tex2DFloatFloatLevel:
2359 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2360 break;
2361 case NVPTXISD::Tex2DFloatFloatGrad:
2362 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2363 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002364 case NVPTXISD::Tex2DS32S32:
2365 Opc = NVPTX::TEX_2D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002366 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002367 case NVPTXISD::Tex2DS32Float:
2368 Opc = NVPTX::TEX_2D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002369 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002370 case NVPTXISD::Tex2DS32FloatLevel:
2371 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002372 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002373 case NVPTXISD::Tex2DS32FloatGrad:
2374 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002375 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002376 case NVPTXISD::Tex2DU32S32:
2377 Opc = NVPTX::TEX_2D_U32_S32;
2378 break;
2379 case NVPTXISD::Tex2DU32Float:
2380 Opc = NVPTX::TEX_2D_U32_F32;
2381 break;
2382 case NVPTXISD::Tex2DU32FloatLevel:
2383 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
2384 break;
2385 case NVPTXISD::Tex2DU32FloatGrad:
2386 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
2387 break;
2388 case NVPTXISD::Tex2DArrayFloatS32:
2389 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002390 break;
2391 case NVPTXISD::Tex2DArrayFloatFloat:
2392 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2393 break;
2394 case NVPTXISD::Tex2DArrayFloatFloatLevel:
2395 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2396 break;
2397 case NVPTXISD::Tex2DArrayFloatFloatGrad:
2398 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2399 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002400 case NVPTXISD::Tex2DArrayS32S32:
2401 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002402 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002403 case NVPTXISD::Tex2DArrayS32Float:
2404 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002405 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002406 case NVPTXISD::Tex2DArrayS32FloatLevel:
2407 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002408 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002409 case NVPTXISD::Tex2DArrayS32FloatGrad:
2410 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002411 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002412 case NVPTXISD::Tex2DArrayU32S32:
2413 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
2414 break;
2415 case NVPTXISD::Tex2DArrayU32Float:
2416 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
2417 break;
2418 case NVPTXISD::Tex2DArrayU32FloatLevel:
2419 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
2420 break;
2421 case NVPTXISD::Tex2DArrayU32FloatGrad:
2422 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
2423 break;
2424 case NVPTXISD::Tex3DFloatS32:
2425 Opc = NVPTX::TEX_3D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002426 break;
2427 case NVPTXISD::Tex3DFloatFloat:
2428 Opc = NVPTX::TEX_3D_F32_F32;
2429 break;
2430 case NVPTXISD::Tex3DFloatFloatLevel:
2431 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2432 break;
2433 case NVPTXISD::Tex3DFloatFloatGrad:
2434 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2435 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002436 case NVPTXISD::Tex3DS32S32:
2437 Opc = NVPTX::TEX_3D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002438 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002439 case NVPTXISD::Tex3DS32Float:
2440 Opc = NVPTX::TEX_3D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002441 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002442 case NVPTXISD::Tex3DS32FloatLevel:
2443 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002444 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002445 case NVPTXISD::Tex3DS32FloatGrad:
2446 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
2447 break;
2448 case NVPTXISD::Tex3DU32S32:
2449 Opc = NVPTX::TEX_3D_U32_S32;
2450 break;
2451 case NVPTXISD::Tex3DU32Float:
2452 Opc = NVPTX::TEX_3D_U32_F32;
2453 break;
2454 case NVPTXISD::Tex3DU32FloatLevel:
2455 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
2456 break;
2457 case NVPTXISD::Tex3DU32FloatGrad:
2458 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
2459 break;
2460 case NVPTXISD::TexCubeFloatFloat:
2461 Opc = NVPTX::TEX_CUBE_F32_F32;
2462 break;
2463 case NVPTXISD::TexCubeFloatFloatLevel:
2464 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
2465 break;
2466 case NVPTXISD::TexCubeS32Float:
2467 Opc = NVPTX::TEX_CUBE_S32_F32;
2468 break;
2469 case NVPTXISD::TexCubeS32FloatLevel:
2470 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
2471 break;
2472 case NVPTXISD::TexCubeU32Float:
2473 Opc = NVPTX::TEX_CUBE_U32_F32;
2474 break;
2475 case NVPTXISD::TexCubeU32FloatLevel:
2476 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
2477 break;
2478 case NVPTXISD::TexCubeArrayFloatFloat:
2479 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
2480 break;
2481 case NVPTXISD::TexCubeArrayFloatFloatLevel:
2482 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
2483 break;
2484 case NVPTXISD::TexCubeArrayS32Float:
2485 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
2486 break;
2487 case NVPTXISD::TexCubeArrayS32FloatLevel:
2488 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
2489 break;
2490 case NVPTXISD::TexCubeArrayU32Float:
2491 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
2492 break;
2493 case NVPTXISD::TexCubeArrayU32FloatLevel:
2494 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
2495 break;
2496 case NVPTXISD::Tld4R2DFloatFloat:
2497 Opc = NVPTX::TLD4_R_2D_F32_F32;
2498 break;
2499 case NVPTXISD::Tld4G2DFloatFloat:
2500 Opc = NVPTX::TLD4_G_2D_F32_F32;
2501 break;
2502 case NVPTXISD::Tld4B2DFloatFloat:
2503 Opc = NVPTX::TLD4_B_2D_F32_F32;
2504 break;
2505 case NVPTXISD::Tld4A2DFloatFloat:
2506 Opc = NVPTX::TLD4_A_2D_F32_F32;
2507 break;
2508 case NVPTXISD::Tld4R2DS64Float:
2509 Opc = NVPTX::TLD4_R_2D_S32_F32;
2510 break;
2511 case NVPTXISD::Tld4G2DS64Float:
2512 Opc = NVPTX::TLD4_G_2D_S32_F32;
2513 break;
2514 case NVPTXISD::Tld4B2DS64Float:
2515 Opc = NVPTX::TLD4_B_2D_S32_F32;
2516 break;
2517 case NVPTXISD::Tld4A2DS64Float:
2518 Opc = NVPTX::TLD4_A_2D_S32_F32;
2519 break;
2520 case NVPTXISD::Tld4R2DU64Float:
2521 Opc = NVPTX::TLD4_R_2D_U32_F32;
2522 break;
2523 case NVPTXISD::Tld4G2DU64Float:
2524 Opc = NVPTX::TLD4_G_2D_U32_F32;
2525 break;
2526 case NVPTXISD::Tld4B2DU64Float:
2527 Opc = NVPTX::TLD4_B_2D_U32_F32;
2528 break;
2529 case NVPTXISD::Tld4A2DU64Float:
2530 Opc = NVPTX::TLD4_A_2D_U32_F32;
2531 break;
2532 case NVPTXISD::TexUnified1DFloatS32:
2533 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
2534 break;
2535 case NVPTXISD::TexUnified1DFloatFloat:
2536 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
2537 break;
2538 case NVPTXISD::TexUnified1DFloatFloatLevel:
2539 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
2540 break;
2541 case NVPTXISD::TexUnified1DFloatFloatGrad:
2542 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
2543 break;
2544 case NVPTXISD::TexUnified1DS32S32:
2545 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
2546 break;
2547 case NVPTXISD::TexUnified1DS32Float:
2548 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
2549 break;
2550 case NVPTXISD::TexUnified1DS32FloatLevel:
2551 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
2552 break;
2553 case NVPTXISD::TexUnified1DS32FloatGrad:
2554 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
2555 break;
2556 case NVPTXISD::TexUnified1DU32S32:
2557 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
2558 break;
2559 case NVPTXISD::TexUnified1DU32Float:
2560 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
2561 break;
2562 case NVPTXISD::TexUnified1DU32FloatLevel:
2563 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
2564 break;
2565 case NVPTXISD::TexUnified1DU32FloatGrad:
2566 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
2567 break;
2568 case NVPTXISD::TexUnified1DArrayFloatS32:
2569 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
2570 break;
2571 case NVPTXISD::TexUnified1DArrayFloatFloat:
2572 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
2573 break;
2574 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
2575 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
2576 break;
2577 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
2578 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
2579 break;
2580 case NVPTXISD::TexUnified1DArrayS32S32:
2581 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
2582 break;
2583 case NVPTXISD::TexUnified1DArrayS32Float:
2584 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
2585 break;
2586 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
2587 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
2588 break;
2589 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
2590 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
2591 break;
2592 case NVPTXISD::TexUnified1DArrayU32S32:
2593 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
2594 break;
2595 case NVPTXISD::TexUnified1DArrayU32Float:
2596 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
2597 break;
2598 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
2599 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
2600 break;
2601 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
2602 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
2603 break;
2604 case NVPTXISD::TexUnified2DFloatS32:
2605 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
2606 break;
2607 case NVPTXISD::TexUnified2DFloatFloat:
2608 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
2609 break;
2610 case NVPTXISD::TexUnified2DFloatFloatLevel:
2611 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
2612 break;
2613 case NVPTXISD::TexUnified2DFloatFloatGrad:
2614 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
2615 break;
2616 case NVPTXISD::TexUnified2DS32S32:
2617 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
2618 break;
2619 case NVPTXISD::TexUnified2DS32Float:
2620 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
2621 break;
2622 case NVPTXISD::TexUnified2DS32FloatLevel:
2623 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
2624 break;
2625 case NVPTXISD::TexUnified2DS32FloatGrad:
2626 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
2627 break;
2628 case NVPTXISD::TexUnified2DU32S32:
2629 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
2630 break;
2631 case NVPTXISD::TexUnified2DU32Float:
2632 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
2633 break;
2634 case NVPTXISD::TexUnified2DU32FloatLevel:
2635 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
2636 break;
2637 case NVPTXISD::TexUnified2DU32FloatGrad:
2638 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
2639 break;
2640 case NVPTXISD::TexUnified2DArrayFloatS32:
2641 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
2642 break;
2643 case NVPTXISD::TexUnified2DArrayFloatFloat:
2644 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
2645 break;
2646 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
2647 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
2648 break;
2649 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
2650 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
2651 break;
2652 case NVPTXISD::TexUnified2DArrayS32S32:
2653 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
2654 break;
2655 case NVPTXISD::TexUnified2DArrayS32Float:
2656 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
2657 break;
2658 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
2659 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
2660 break;
2661 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
2662 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
2663 break;
2664 case NVPTXISD::TexUnified2DArrayU32S32:
2665 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
2666 break;
2667 case NVPTXISD::TexUnified2DArrayU32Float:
2668 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
2669 break;
2670 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
2671 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
2672 break;
2673 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
2674 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
2675 break;
2676 case NVPTXISD::TexUnified3DFloatS32:
2677 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
2678 break;
2679 case NVPTXISD::TexUnified3DFloatFloat:
2680 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
2681 break;
2682 case NVPTXISD::TexUnified3DFloatFloatLevel:
2683 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
2684 break;
2685 case NVPTXISD::TexUnified3DFloatFloatGrad:
2686 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
2687 break;
2688 case NVPTXISD::TexUnified3DS32S32:
2689 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
2690 break;
2691 case NVPTXISD::TexUnified3DS32Float:
2692 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
2693 break;
2694 case NVPTXISD::TexUnified3DS32FloatLevel:
2695 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
2696 break;
2697 case NVPTXISD::TexUnified3DS32FloatGrad:
2698 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
2699 break;
2700 case NVPTXISD::TexUnified3DU32S32:
2701 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
2702 break;
2703 case NVPTXISD::TexUnified3DU32Float:
2704 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
2705 break;
2706 case NVPTXISD::TexUnified3DU32FloatLevel:
2707 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
2708 break;
2709 case NVPTXISD::TexUnified3DU32FloatGrad:
2710 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
2711 break;
2712 case NVPTXISD::TexUnifiedCubeFloatFloat:
2713 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
2714 break;
2715 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
2716 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
2717 break;
2718 case NVPTXISD::TexUnifiedCubeS32Float:
2719 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
2720 break;
2721 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
2722 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
2723 break;
2724 case NVPTXISD::TexUnifiedCubeU32Float:
2725 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
2726 break;
2727 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
2728 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
2729 break;
2730 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
2731 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
2732 break;
2733 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
2734 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
2735 break;
2736 case NVPTXISD::TexUnifiedCubeArrayS32Float:
2737 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
2738 break;
2739 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
2740 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
2741 break;
2742 case NVPTXISD::TexUnifiedCubeArrayU32Float:
2743 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
2744 break;
2745 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
2746 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
2747 break;
2748 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
2749 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
2750 break;
2751 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
2752 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
2753 break;
2754 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
2755 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
2756 break;
2757 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
2758 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
2759 break;
2760 case NVPTXISD::Tld4UnifiedR2DS64Float:
2761 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
2762 break;
2763 case NVPTXISD::Tld4UnifiedG2DS64Float:
2764 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
2765 break;
2766 case NVPTXISD::Tld4UnifiedB2DS64Float:
2767 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
2768 break;
2769 case NVPTXISD::Tld4UnifiedA2DS64Float:
2770 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
2771 break;
2772 case NVPTXISD::Tld4UnifiedR2DU64Float:
2773 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
2774 break;
2775 case NVPTXISD::Tld4UnifiedG2DU64Float:
2776 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
2777 break;
2778 case NVPTXISD::Tld4UnifiedB2DU64Float:
2779 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
2780 break;
2781 case NVPTXISD::Tld4UnifiedA2DU64Float:
2782 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002783 break;
2784 }
2785
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002786 // Copy over operands
2787 for (unsigned i = 1; i < N->getNumOperands(); ++i) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00002788 Ops.push_back(N->getOperand(i));
2789 }
2790
2791 Ops.push_back(Chain);
Justin Bogner8d83fb62016-05-13 21:12:53 +00002792 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
2793 return true;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002794}
2795
Justin Bogner8d83fb62016-05-13 21:12:53 +00002796bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00002797 SDValue Chain = N->getOperand(0);
2798 SDValue TexHandle = N->getOperand(1);
Justin Holewinski30d56a72014-04-09 15:39:15 +00002799 unsigned Opc = 0;
2800 SmallVector<SDValue, 8> Ops;
2801 switch (N->getOpcode()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00002802 default: return false;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002803 case NVPTXISD::Suld1DI8Clamp:
2804 Opc = NVPTX::SULD_1D_I8_CLAMP;
2805 Ops.push_back(TexHandle);
2806 Ops.push_back(N->getOperand(2));
2807 Ops.push_back(Chain);
2808 break;
2809 case NVPTXISD::Suld1DI16Clamp:
2810 Opc = NVPTX::SULD_1D_I16_CLAMP;
2811 Ops.push_back(TexHandle);
2812 Ops.push_back(N->getOperand(2));
2813 Ops.push_back(Chain);
2814 break;
2815 case NVPTXISD::Suld1DI32Clamp:
2816 Opc = NVPTX::SULD_1D_I32_CLAMP;
2817 Ops.push_back(TexHandle);
2818 Ops.push_back(N->getOperand(2));
2819 Ops.push_back(Chain);
2820 break;
2821 case NVPTXISD::Suld1DI64Clamp:
2822 Opc = NVPTX::SULD_1D_I64_CLAMP;
2823 Ops.push_back(TexHandle);
2824 Ops.push_back(N->getOperand(2));
2825 Ops.push_back(Chain);
2826 break;
2827 case NVPTXISD::Suld1DV2I8Clamp:
2828 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
2829 Ops.push_back(TexHandle);
2830 Ops.push_back(N->getOperand(2));
2831 Ops.push_back(Chain);
2832 break;
2833 case NVPTXISD::Suld1DV2I16Clamp:
2834 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
2835 Ops.push_back(TexHandle);
2836 Ops.push_back(N->getOperand(2));
2837 Ops.push_back(Chain);
2838 break;
2839 case NVPTXISD::Suld1DV2I32Clamp:
2840 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
2841 Ops.push_back(TexHandle);
2842 Ops.push_back(N->getOperand(2));
2843 Ops.push_back(Chain);
2844 break;
2845 case NVPTXISD::Suld1DV2I64Clamp:
2846 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
2847 Ops.push_back(TexHandle);
2848 Ops.push_back(N->getOperand(2));
2849 Ops.push_back(Chain);
2850 break;
2851 case NVPTXISD::Suld1DV4I8Clamp:
2852 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
2853 Ops.push_back(TexHandle);
2854 Ops.push_back(N->getOperand(2));
2855 Ops.push_back(Chain);
2856 break;
2857 case NVPTXISD::Suld1DV4I16Clamp:
2858 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
2859 Ops.push_back(TexHandle);
2860 Ops.push_back(N->getOperand(2));
2861 Ops.push_back(Chain);
2862 break;
2863 case NVPTXISD::Suld1DV4I32Clamp:
2864 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
2865 Ops.push_back(TexHandle);
2866 Ops.push_back(N->getOperand(2));
2867 Ops.push_back(Chain);
2868 break;
2869 case NVPTXISD::Suld1DArrayI8Clamp:
2870 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
2871 Ops.push_back(TexHandle);
2872 Ops.push_back(N->getOperand(2));
2873 Ops.push_back(N->getOperand(3));
2874 Ops.push_back(Chain);
2875 break;
2876 case NVPTXISD::Suld1DArrayI16Clamp:
2877 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
2878 Ops.push_back(TexHandle);
2879 Ops.push_back(N->getOperand(2));
2880 Ops.push_back(N->getOperand(3));
2881 Ops.push_back(Chain);
2882 break;
2883 case NVPTXISD::Suld1DArrayI32Clamp:
2884 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
2885 Ops.push_back(TexHandle);
2886 Ops.push_back(N->getOperand(2));
2887 Ops.push_back(N->getOperand(3));
2888 Ops.push_back(Chain);
2889 break;
2890 case NVPTXISD::Suld1DArrayI64Clamp:
2891 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
2892 Ops.push_back(TexHandle);
2893 Ops.push_back(N->getOperand(2));
2894 Ops.push_back(N->getOperand(3));
2895 Ops.push_back(Chain);
2896 break;
2897 case NVPTXISD::Suld1DArrayV2I8Clamp:
2898 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
2899 Ops.push_back(TexHandle);
2900 Ops.push_back(N->getOperand(2));
2901 Ops.push_back(N->getOperand(3));
2902 Ops.push_back(Chain);
2903 break;
2904 case NVPTXISD::Suld1DArrayV2I16Clamp:
2905 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
2906 Ops.push_back(TexHandle);
2907 Ops.push_back(N->getOperand(2));
2908 Ops.push_back(N->getOperand(3));
2909 Ops.push_back(Chain);
2910 break;
2911 case NVPTXISD::Suld1DArrayV2I32Clamp:
2912 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
2913 Ops.push_back(TexHandle);
2914 Ops.push_back(N->getOperand(2));
2915 Ops.push_back(N->getOperand(3));
2916 Ops.push_back(Chain);
2917 break;
2918 case NVPTXISD::Suld1DArrayV2I64Clamp:
2919 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
2920 Ops.push_back(TexHandle);
2921 Ops.push_back(N->getOperand(2));
2922 Ops.push_back(N->getOperand(3));
2923 Ops.push_back(Chain);
2924 break;
2925 case NVPTXISD::Suld1DArrayV4I8Clamp:
2926 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
2927 Ops.push_back(TexHandle);
2928 Ops.push_back(N->getOperand(2));
2929 Ops.push_back(N->getOperand(3));
2930 Ops.push_back(Chain);
2931 break;
2932 case NVPTXISD::Suld1DArrayV4I16Clamp:
2933 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
2934 Ops.push_back(TexHandle);
2935 Ops.push_back(N->getOperand(2));
2936 Ops.push_back(N->getOperand(3));
2937 Ops.push_back(Chain);
2938 break;
2939 case NVPTXISD::Suld1DArrayV4I32Clamp:
2940 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
2941 Ops.push_back(TexHandle);
2942 Ops.push_back(N->getOperand(2));
2943 Ops.push_back(N->getOperand(3));
2944 Ops.push_back(Chain);
2945 break;
2946 case NVPTXISD::Suld2DI8Clamp:
2947 Opc = NVPTX::SULD_2D_I8_CLAMP;
2948 Ops.push_back(TexHandle);
2949 Ops.push_back(N->getOperand(2));
2950 Ops.push_back(N->getOperand(3));
2951 Ops.push_back(Chain);
2952 break;
2953 case NVPTXISD::Suld2DI16Clamp:
2954 Opc = NVPTX::SULD_2D_I16_CLAMP;
2955 Ops.push_back(TexHandle);
2956 Ops.push_back(N->getOperand(2));
2957 Ops.push_back(N->getOperand(3));
2958 Ops.push_back(Chain);
2959 break;
2960 case NVPTXISD::Suld2DI32Clamp:
2961 Opc = NVPTX::SULD_2D_I32_CLAMP;
2962 Ops.push_back(TexHandle);
2963 Ops.push_back(N->getOperand(2));
2964 Ops.push_back(N->getOperand(3));
2965 Ops.push_back(Chain);
2966 break;
2967 case NVPTXISD::Suld2DI64Clamp:
2968 Opc = NVPTX::SULD_2D_I64_CLAMP;
2969 Ops.push_back(TexHandle);
2970 Ops.push_back(N->getOperand(2));
2971 Ops.push_back(N->getOperand(3));
2972 Ops.push_back(Chain);
2973 break;
2974 case NVPTXISD::Suld2DV2I8Clamp:
2975 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
2976 Ops.push_back(TexHandle);
2977 Ops.push_back(N->getOperand(2));
2978 Ops.push_back(N->getOperand(3));
2979 Ops.push_back(Chain);
2980 break;
2981 case NVPTXISD::Suld2DV2I16Clamp:
2982 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
2983 Ops.push_back(TexHandle);
2984 Ops.push_back(N->getOperand(2));
2985 Ops.push_back(N->getOperand(3));
2986 Ops.push_back(Chain);
2987 break;
2988 case NVPTXISD::Suld2DV2I32Clamp:
2989 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
2990 Ops.push_back(TexHandle);
2991 Ops.push_back(N->getOperand(2));
2992 Ops.push_back(N->getOperand(3));
2993 Ops.push_back(Chain);
2994 break;
2995 case NVPTXISD::Suld2DV2I64Clamp:
2996 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
2997 Ops.push_back(TexHandle);
2998 Ops.push_back(N->getOperand(2));
2999 Ops.push_back(N->getOperand(3));
3000 Ops.push_back(Chain);
3001 break;
3002 case NVPTXISD::Suld2DV4I8Clamp:
3003 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
3004 Ops.push_back(TexHandle);
3005 Ops.push_back(N->getOperand(2));
3006 Ops.push_back(N->getOperand(3));
3007 Ops.push_back(Chain);
3008 break;
3009 case NVPTXISD::Suld2DV4I16Clamp:
3010 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
3011 Ops.push_back(TexHandle);
3012 Ops.push_back(N->getOperand(2));
3013 Ops.push_back(N->getOperand(3));
3014 Ops.push_back(Chain);
3015 break;
3016 case NVPTXISD::Suld2DV4I32Clamp:
3017 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
3018 Ops.push_back(TexHandle);
3019 Ops.push_back(N->getOperand(2));
3020 Ops.push_back(N->getOperand(3));
3021 Ops.push_back(Chain);
3022 break;
3023 case NVPTXISD::Suld2DArrayI8Clamp:
3024 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
3025 Ops.push_back(TexHandle);
3026 Ops.push_back(N->getOperand(2));
3027 Ops.push_back(N->getOperand(3));
3028 Ops.push_back(N->getOperand(4));
3029 Ops.push_back(Chain);
3030 break;
3031 case NVPTXISD::Suld2DArrayI16Clamp:
3032 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
3033 Ops.push_back(TexHandle);
3034 Ops.push_back(N->getOperand(2));
3035 Ops.push_back(N->getOperand(3));
3036 Ops.push_back(N->getOperand(4));
3037 Ops.push_back(Chain);
3038 break;
3039 case NVPTXISD::Suld2DArrayI32Clamp:
3040 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
3041 Ops.push_back(TexHandle);
3042 Ops.push_back(N->getOperand(2));
3043 Ops.push_back(N->getOperand(3));
3044 Ops.push_back(N->getOperand(4));
3045 Ops.push_back(Chain);
3046 break;
3047 case NVPTXISD::Suld2DArrayI64Clamp:
3048 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
3049 Ops.push_back(TexHandle);
3050 Ops.push_back(N->getOperand(2));
3051 Ops.push_back(N->getOperand(3));
3052 Ops.push_back(N->getOperand(4));
3053 Ops.push_back(Chain);
3054 break;
3055 case NVPTXISD::Suld2DArrayV2I8Clamp:
3056 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
3057 Ops.push_back(TexHandle);
3058 Ops.push_back(N->getOperand(2));
3059 Ops.push_back(N->getOperand(3));
3060 Ops.push_back(N->getOperand(4));
3061 Ops.push_back(Chain);
3062 break;
3063 case NVPTXISD::Suld2DArrayV2I16Clamp:
3064 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
3065 Ops.push_back(TexHandle);
3066 Ops.push_back(N->getOperand(2));
3067 Ops.push_back(N->getOperand(3));
3068 Ops.push_back(N->getOperand(4));
3069 Ops.push_back(Chain);
3070 break;
3071 case NVPTXISD::Suld2DArrayV2I32Clamp:
3072 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
3073 Ops.push_back(TexHandle);
3074 Ops.push_back(N->getOperand(2));
3075 Ops.push_back(N->getOperand(3));
3076 Ops.push_back(N->getOperand(4));
3077 Ops.push_back(Chain);
3078 break;
3079 case NVPTXISD::Suld2DArrayV2I64Clamp:
3080 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
3081 Ops.push_back(TexHandle);
3082 Ops.push_back(N->getOperand(2));
3083 Ops.push_back(N->getOperand(3));
3084 Ops.push_back(N->getOperand(4));
3085 Ops.push_back(Chain);
3086 break;
3087 case NVPTXISD::Suld2DArrayV4I8Clamp:
3088 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
3089 Ops.push_back(TexHandle);
3090 Ops.push_back(N->getOperand(2));
3091 Ops.push_back(N->getOperand(3));
3092 Ops.push_back(N->getOperand(4));
3093 Ops.push_back(Chain);
3094 break;
3095 case NVPTXISD::Suld2DArrayV4I16Clamp:
3096 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
3097 Ops.push_back(TexHandle);
3098 Ops.push_back(N->getOperand(2));
3099 Ops.push_back(N->getOperand(3));
3100 Ops.push_back(N->getOperand(4));
3101 Ops.push_back(Chain);
3102 break;
3103 case NVPTXISD::Suld2DArrayV4I32Clamp:
3104 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
3105 Ops.push_back(TexHandle);
3106 Ops.push_back(N->getOperand(2));
3107 Ops.push_back(N->getOperand(3));
3108 Ops.push_back(N->getOperand(4));
3109 Ops.push_back(Chain);
3110 break;
3111 case NVPTXISD::Suld3DI8Clamp:
3112 Opc = NVPTX::SULD_3D_I8_CLAMP;
3113 Ops.push_back(TexHandle);
3114 Ops.push_back(N->getOperand(2));
3115 Ops.push_back(N->getOperand(3));
3116 Ops.push_back(N->getOperand(4));
3117 Ops.push_back(Chain);
3118 break;
3119 case NVPTXISD::Suld3DI16Clamp:
3120 Opc = NVPTX::SULD_3D_I16_CLAMP;
3121 Ops.push_back(TexHandle);
3122 Ops.push_back(N->getOperand(2));
3123 Ops.push_back(N->getOperand(3));
3124 Ops.push_back(N->getOperand(4));
3125 Ops.push_back(Chain);
3126 break;
3127 case NVPTXISD::Suld3DI32Clamp:
3128 Opc = NVPTX::SULD_3D_I32_CLAMP;
3129 Ops.push_back(TexHandle);
3130 Ops.push_back(N->getOperand(2));
3131 Ops.push_back(N->getOperand(3));
3132 Ops.push_back(N->getOperand(4));
3133 Ops.push_back(Chain);
3134 break;
3135 case NVPTXISD::Suld3DI64Clamp:
3136 Opc = NVPTX::SULD_3D_I64_CLAMP;
3137 Ops.push_back(TexHandle);
3138 Ops.push_back(N->getOperand(2));
3139 Ops.push_back(N->getOperand(3));
3140 Ops.push_back(N->getOperand(4));
3141 Ops.push_back(Chain);
3142 break;
3143 case NVPTXISD::Suld3DV2I8Clamp:
3144 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
3145 Ops.push_back(TexHandle);
3146 Ops.push_back(N->getOperand(2));
3147 Ops.push_back(N->getOperand(3));
3148 Ops.push_back(N->getOperand(4));
3149 Ops.push_back(Chain);
3150 break;
3151 case NVPTXISD::Suld3DV2I16Clamp:
3152 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
3153 Ops.push_back(TexHandle);
3154 Ops.push_back(N->getOperand(2));
3155 Ops.push_back(N->getOperand(3));
3156 Ops.push_back(N->getOperand(4));
3157 Ops.push_back(Chain);
3158 break;
3159 case NVPTXISD::Suld3DV2I32Clamp:
3160 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
3161 Ops.push_back(TexHandle);
3162 Ops.push_back(N->getOperand(2));
3163 Ops.push_back(N->getOperand(3));
3164 Ops.push_back(N->getOperand(4));
3165 Ops.push_back(Chain);
3166 break;
3167 case NVPTXISD::Suld3DV2I64Clamp:
3168 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
3169 Ops.push_back(TexHandle);
3170 Ops.push_back(N->getOperand(2));
3171 Ops.push_back(N->getOperand(3));
3172 Ops.push_back(N->getOperand(4));
3173 Ops.push_back(Chain);
3174 break;
3175 case NVPTXISD::Suld3DV4I8Clamp:
3176 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
3177 Ops.push_back(TexHandle);
3178 Ops.push_back(N->getOperand(2));
3179 Ops.push_back(N->getOperand(3));
3180 Ops.push_back(N->getOperand(4));
3181 Ops.push_back(Chain);
3182 break;
3183 case NVPTXISD::Suld3DV4I16Clamp:
3184 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
3185 Ops.push_back(TexHandle);
3186 Ops.push_back(N->getOperand(2));
3187 Ops.push_back(N->getOperand(3));
3188 Ops.push_back(N->getOperand(4));
3189 Ops.push_back(Chain);
3190 break;
3191 case NVPTXISD::Suld3DV4I32Clamp:
3192 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
3193 Ops.push_back(TexHandle);
3194 Ops.push_back(N->getOperand(2));
3195 Ops.push_back(N->getOperand(3));
3196 Ops.push_back(N->getOperand(4));
3197 Ops.push_back(Chain);
3198 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003199 case NVPTXISD::Suld1DI8Trap:
3200 Opc = NVPTX::SULD_1D_I8_TRAP;
3201 Ops.push_back(TexHandle);
3202 Ops.push_back(N->getOperand(2));
3203 Ops.push_back(Chain);
3204 break;
3205 case NVPTXISD::Suld1DI16Trap:
3206 Opc = NVPTX::SULD_1D_I16_TRAP;
3207 Ops.push_back(TexHandle);
3208 Ops.push_back(N->getOperand(2));
3209 Ops.push_back(Chain);
3210 break;
3211 case NVPTXISD::Suld1DI32Trap:
3212 Opc = NVPTX::SULD_1D_I32_TRAP;
3213 Ops.push_back(TexHandle);
3214 Ops.push_back(N->getOperand(2));
3215 Ops.push_back(Chain);
3216 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003217 case NVPTXISD::Suld1DI64Trap:
3218 Opc = NVPTX::SULD_1D_I64_TRAP;
3219 Ops.push_back(TexHandle);
3220 Ops.push_back(N->getOperand(2));
3221 Ops.push_back(Chain);
3222 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003223 case NVPTXISD::Suld1DV2I8Trap:
3224 Opc = NVPTX::SULD_1D_V2I8_TRAP;
3225 Ops.push_back(TexHandle);
3226 Ops.push_back(N->getOperand(2));
3227 Ops.push_back(Chain);
3228 break;
3229 case NVPTXISD::Suld1DV2I16Trap:
3230 Opc = NVPTX::SULD_1D_V2I16_TRAP;
3231 Ops.push_back(TexHandle);
3232 Ops.push_back(N->getOperand(2));
3233 Ops.push_back(Chain);
3234 break;
3235 case NVPTXISD::Suld1DV2I32Trap:
3236 Opc = NVPTX::SULD_1D_V2I32_TRAP;
3237 Ops.push_back(TexHandle);
3238 Ops.push_back(N->getOperand(2));
3239 Ops.push_back(Chain);
3240 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003241 case NVPTXISD::Suld1DV2I64Trap:
3242 Opc = NVPTX::SULD_1D_V2I64_TRAP;
3243 Ops.push_back(TexHandle);
3244 Ops.push_back(N->getOperand(2));
3245 Ops.push_back(Chain);
3246 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003247 case NVPTXISD::Suld1DV4I8Trap:
3248 Opc = NVPTX::SULD_1D_V4I8_TRAP;
3249 Ops.push_back(TexHandle);
3250 Ops.push_back(N->getOperand(2));
3251 Ops.push_back(Chain);
3252 break;
3253 case NVPTXISD::Suld1DV4I16Trap:
3254 Opc = NVPTX::SULD_1D_V4I16_TRAP;
3255 Ops.push_back(TexHandle);
3256 Ops.push_back(N->getOperand(2));
3257 Ops.push_back(Chain);
3258 break;
3259 case NVPTXISD::Suld1DV4I32Trap:
3260 Opc = NVPTX::SULD_1D_V4I32_TRAP;
3261 Ops.push_back(TexHandle);
3262 Ops.push_back(N->getOperand(2));
3263 Ops.push_back(Chain);
3264 break;
3265 case NVPTXISD::Suld1DArrayI8Trap:
3266 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
3267 Ops.push_back(TexHandle);
3268 Ops.push_back(N->getOperand(2));
3269 Ops.push_back(N->getOperand(3));
3270 Ops.push_back(Chain);
3271 break;
3272 case NVPTXISD::Suld1DArrayI16Trap:
3273 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
3274 Ops.push_back(TexHandle);
3275 Ops.push_back(N->getOperand(2));
3276 Ops.push_back(N->getOperand(3));
3277 Ops.push_back(Chain);
3278 break;
3279 case NVPTXISD::Suld1DArrayI32Trap:
3280 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
3281 Ops.push_back(TexHandle);
3282 Ops.push_back(N->getOperand(2));
3283 Ops.push_back(N->getOperand(3));
3284 Ops.push_back(Chain);
3285 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003286 case NVPTXISD::Suld1DArrayI64Trap:
3287 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
3288 Ops.push_back(TexHandle);
3289 Ops.push_back(N->getOperand(2));
3290 Ops.push_back(N->getOperand(3));
3291 Ops.push_back(Chain);
3292 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003293 case NVPTXISD::Suld1DArrayV2I8Trap:
3294 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
3295 Ops.push_back(TexHandle);
3296 Ops.push_back(N->getOperand(2));
3297 Ops.push_back(N->getOperand(3));
3298 Ops.push_back(Chain);
3299 break;
3300 case NVPTXISD::Suld1DArrayV2I16Trap:
3301 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
3302 Ops.push_back(TexHandle);
3303 Ops.push_back(N->getOperand(2));
3304 Ops.push_back(N->getOperand(3));
3305 Ops.push_back(Chain);
3306 break;
3307 case NVPTXISD::Suld1DArrayV2I32Trap:
3308 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
3309 Ops.push_back(TexHandle);
3310 Ops.push_back(N->getOperand(2));
3311 Ops.push_back(N->getOperand(3));
3312 Ops.push_back(Chain);
3313 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003314 case NVPTXISD::Suld1DArrayV2I64Trap:
3315 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
3316 Ops.push_back(TexHandle);
3317 Ops.push_back(N->getOperand(2));
3318 Ops.push_back(N->getOperand(3));
3319 Ops.push_back(Chain);
3320 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003321 case NVPTXISD::Suld1DArrayV4I8Trap:
3322 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
3323 Ops.push_back(TexHandle);
3324 Ops.push_back(N->getOperand(2));
3325 Ops.push_back(N->getOperand(3));
3326 Ops.push_back(Chain);
3327 break;
3328 case NVPTXISD::Suld1DArrayV4I16Trap:
3329 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
3330 Ops.push_back(TexHandle);
3331 Ops.push_back(N->getOperand(2));
3332 Ops.push_back(N->getOperand(3));
3333 Ops.push_back(Chain);
3334 break;
3335 case NVPTXISD::Suld1DArrayV4I32Trap:
3336 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
3337 Ops.push_back(TexHandle);
3338 Ops.push_back(N->getOperand(2));
3339 Ops.push_back(N->getOperand(3));
3340 Ops.push_back(Chain);
3341 break;
3342 case NVPTXISD::Suld2DI8Trap:
3343 Opc = NVPTX::SULD_2D_I8_TRAP;
3344 Ops.push_back(TexHandle);
3345 Ops.push_back(N->getOperand(2));
3346 Ops.push_back(N->getOperand(3));
3347 Ops.push_back(Chain);
3348 break;
3349 case NVPTXISD::Suld2DI16Trap:
3350 Opc = NVPTX::SULD_2D_I16_TRAP;
3351 Ops.push_back(TexHandle);
3352 Ops.push_back(N->getOperand(2));
3353 Ops.push_back(N->getOperand(3));
3354 Ops.push_back(Chain);
3355 break;
3356 case NVPTXISD::Suld2DI32Trap:
3357 Opc = NVPTX::SULD_2D_I32_TRAP;
3358 Ops.push_back(TexHandle);
3359 Ops.push_back(N->getOperand(2));
3360 Ops.push_back(N->getOperand(3));
3361 Ops.push_back(Chain);
3362 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003363 case NVPTXISD::Suld2DI64Trap:
3364 Opc = NVPTX::SULD_2D_I64_TRAP;
3365 Ops.push_back(TexHandle);
3366 Ops.push_back(N->getOperand(2));
3367 Ops.push_back(N->getOperand(3));
3368 Ops.push_back(Chain);
3369 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003370 case NVPTXISD::Suld2DV2I8Trap:
3371 Opc = NVPTX::SULD_2D_V2I8_TRAP;
3372 Ops.push_back(TexHandle);
3373 Ops.push_back(N->getOperand(2));
3374 Ops.push_back(N->getOperand(3));
3375 Ops.push_back(Chain);
3376 break;
3377 case NVPTXISD::Suld2DV2I16Trap:
3378 Opc = NVPTX::SULD_2D_V2I16_TRAP;
3379 Ops.push_back(TexHandle);
3380 Ops.push_back(N->getOperand(2));
3381 Ops.push_back(N->getOperand(3));
3382 Ops.push_back(Chain);
3383 break;
3384 case NVPTXISD::Suld2DV2I32Trap:
3385 Opc = NVPTX::SULD_2D_V2I32_TRAP;
3386 Ops.push_back(TexHandle);
3387 Ops.push_back(N->getOperand(2));
3388 Ops.push_back(N->getOperand(3));
3389 Ops.push_back(Chain);
3390 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003391 case NVPTXISD::Suld2DV2I64Trap:
3392 Opc = NVPTX::SULD_2D_V2I64_TRAP;
3393 Ops.push_back(TexHandle);
3394 Ops.push_back(N->getOperand(2));
3395 Ops.push_back(N->getOperand(3));
3396 Ops.push_back(Chain);
3397 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003398 case NVPTXISD::Suld2DV4I8Trap:
3399 Opc = NVPTX::SULD_2D_V4I8_TRAP;
3400 Ops.push_back(TexHandle);
3401 Ops.push_back(N->getOperand(2));
3402 Ops.push_back(N->getOperand(3));
3403 Ops.push_back(Chain);
3404 break;
3405 case NVPTXISD::Suld2DV4I16Trap:
3406 Opc = NVPTX::SULD_2D_V4I16_TRAP;
3407 Ops.push_back(TexHandle);
3408 Ops.push_back(N->getOperand(2));
3409 Ops.push_back(N->getOperand(3));
3410 Ops.push_back(Chain);
3411 break;
3412 case NVPTXISD::Suld2DV4I32Trap:
3413 Opc = NVPTX::SULD_2D_V4I32_TRAP;
3414 Ops.push_back(TexHandle);
3415 Ops.push_back(N->getOperand(2));
3416 Ops.push_back(N->getOperand(3));
3417 Ops.push_back(Chain);
3418 break;
3419 case NVPTXISD::Suld2DArrayI8Trap:
3420 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
3421 Ops.push_back(TexHandle);
3422 Ops.push_back(N->getOperand(2));
3423 Ops.push_back(N->getOperand(3));
3424 Ops.push_back(N->getOperand(4));
3425 Ops.push_back(Chain);
3426 break;
3427 case NVPTXISD::Suld2DArrayI16Trap:
3428 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
3429 Ops.push_back(TexHandle);
3430 Ops.push_back(N->getOperand(2));
3431 Ops.push_back(N->getOperand(3));
3432 Ops.push_back(N->getOperand(4));
3433 Ops.push_back(Chain);
3434 break;
3435 case NVPTXISD::Suld2DArrayI32Trap:
3436 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
3437 Ops.push_back(TexHandle);
3438 Ops.push_back(N->getOperand(2));
3439 Ops.push_back(N->getOperand(3));
3440 Ops.push_back(N->getOperand(4));
3441 Ops.push_back(Chain);
3442 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003443 case NVPTXISD::Suld2DArrayI64Trap:
3444 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
3445 Ops.push_back(TexHandle);
3446 Ops.push_back(N->getOperand(2));
3447 Ops.push_back(N->getOperand(3));
3448 Ops.push_back(N->getOperand(4));
3449 Ops.push_back(Chain);
3450 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003451 case NVPTXISD::Suld2DArrayV2I8Trap:
3452 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
3453 Ops.push_back(TexHandle);
3454 Ops.push_back(N->getOperand(2));
3455 Ops.push_back(N->getOperand(3));
3456 Ops.push_back(N->getOperand(4));
3457 Ops.push_back(Chain);
3458 break;
3459 case NVPTXISD::Suld2DArrayV2I16Trap:
3460 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
3461 Ops.push_back(TexHandle);
3462 Ops.push_back(N->getOperand(2));
3463 Ops.push_back(N->getOperand(3));
3464 Ops.push_back(N->getOperand(4));
3465 Ops.push_back(Chain);
3466 break;
3467 case NVPTXISD::Suld2DArrayV2I32Trap:
3468 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
3469 Ops.push_back(TexHandle);
3470 Ops.push_back(N->getOperand(2));
3471 Ops.push_back(N->getOperand(3));
3472 Ops.push_back(N->getOperand(4));
3473 Ops.push_back(Chain);
3474 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003475 case NVPTXISD::Suld2DArrayV2I64Trap:
3476 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
3477 Ops.push_back(TexHandle);
3478 Ops.push_back(N->getOperand(2));
3479 Ops.push_back(N->getOperand(3));
3480 Ops.push_back(N->getOperand(4));
3481 Ops.push_back(Chain);
3482 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003483 case NVPTXISD::Suld2DArrayV4I8Trap:
3484 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
3485 Ops.push_back(TexHandle);
3486 Ops.push_back(N->getOperand(2));
3487 Ops.push_back(N->getOperand(3));
3488 Ops.push_back(N->getOperand(4));
3489 Ops.push_back(Chain);
3490 break;
3491 case NVPTXISD::Suld2DArrayV4I16Trap:
3492 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
3493 Ops.push_back(TexHandle);
3494 Ops.push_back(N->getOperand(2));
3495 Ops.push_back(N->getOperand(3));
3496 Ops.push_back(N->getOperand(4));
3497 Ops.push_back(Chain);
3498 break;
3499 case NVPTXISD::Suld2DArrayV4I32Trap:
3500 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
3501 Ops.push_back(TexHandle);
3502 Ops.push_back(N->getOperand(2));
3503 Ops.push_back(N->getOperand(3));
3504 Ops.push_back(N->getOperand(4));
3505 Ops.push_back(Chain);
3506 break;
3507 case NVPTXISD::Suld3DI8Trap:
3508 Opc = NVPTX::SULD_3D_I8_TRAP;
3509 Ops.push_back(TexHandle);
3510 Ops.push_back(N->getOperand(2));
3511 Ops.push_back(N->getOperand(3));
3512 Ops.push_back(N->getOperand(4));
3513 Ops.push_back(Chain);
3514 break;
3515 case NVPTXISD::Suld3DI16Trap:
3516 Opc = NVPTX::SULD_3D_I16_TRAP;
3517 Ops.push_back(TexHandle);
3518 Ops.push_back(N->getOperand(2));
3519 Ops.push_back(N->getOperand(3));
3520 Ops.push_back(N->getOperand(4));
3521 Ops.push_back(Chain);
3522 break;
3523 case NVPTXISD::Suld3DI32Trap:
3524 Opc = NVPTX::SULD_3D_I32_TRAP;
3525 Ops.push_back(TexHandle);
3526 Ops.push_back(N->getOperand(2));
3527 Ops.push_back(N->getOperand(3));
3528 Ops.push_back(N->getOperand(4));
3529 Ops.push_back(Chain);
3530 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003531 case NVPTXISD::Suld3DI64Trap:
3532 Opc = NVPTX::SULD_3D_I64_TRAP;
3533 Ops.push_back(TexHandle);
3534 Ops.push_back(N->getOperand(2));
3535 Ops.push_back(N->getOperand(3));
3536 Ops.push_back(N->getOperand(4));
3537 Ops.push_back(Chain);
3538 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003539 case NVPTXISD::Suld3DV2I8Trap:
3540 Opc = NVPTX::SULD_3D_V2I8_TRAP;
3541 Ops.push_back(TexHandle);
3542 Ops.push_back(N->getOperand(2));
3543 Ops.push_back(N->getOperand(3));
3544 Ops.push_back(N->getOperand(4));
3545 Ops.push_back(Chain);
3546 break;
3547 case NVPTXISD::Suld3DV2I16Trap:
3548 Opc = NVPTX::SULD_3D_V2I16_TRAP;
3549 Ops.push_back(TexHandle);
3550 Ops.push_back(N->getOperand(2));
3551 Ops.push_back(N->getOperand(3));
3552 Ops.push_back(N->getOperand(4));
3553 Ops.push_back(Chain);
3554 break;
3555 case NVPTXISD::Suld3DV2I32Trap:
3556 Opc = NVPTX::SULD_3D_V2I32_TRAP;
3557 Ops.push_back(TexHandle);
3558 Ops.push_back(N->getOperand(2));
3559 Ops.push_back(N->getOperand(3));
3560 Ops.push_back(N->getOperand(4));
3561 Ops.push_back(Chain);
3562 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003563 case NVPTXISD::Suld3DV2I64Trap:
3564 Opc = NVPTX::SULD_3D_V2I64_TRAP;
3565 Ops.push_back(TexHandle);
3566 Ops.push_back(N->getOperand(2));
3567 Ops.push_back(N->getOperand(3));
3568 Ops.push_back(N->getOperand(4));
3569 Ops.push_back(Chain);
3570 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003571 case NVPTXISD::Suld3DV4I8Trap:
3572 Opc = NVPTX::SULD_3D_V4I8_TRAP;
3573 Ops.push_back(TexHandle);
3574 Ops.push_back(N->getOperand(2));
3575 Ops.push_back(N->getOperand(3));
3576 Ops.push_back(N->getOperand(4));
3577 Ops.push_back(Chain);
3578 break;
3579 case NVPTXISD::Suld3DV4I16Trap:
3580 Opc = NVPTX::SULD_3D_V4I16_TRAP;
3581 Ops.push_back(TexHandle);
3582 Ops.push_back(N->getOperand(2));
3583 Ops.push_back(N->getOperand(3));
3584 Ops.push_back(N->getOperand(4));
3585 Ops.push_back(Chain);
3586 break;
3587 case NVPTXISD::Suld3DV4I32Trap:
3588 Opc = NVPTX::SULD_3D_V4I32_TRAP;
3589 Ops.push_back(TexHandle);
3590 Ops.push_back(N->getOperand(2));
3591 Ops.push_back(N->getOperand(3));
3592 Ops.push_back(N->getOperand(4));
3593 Ops.push_back(Chain);
3594 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003595 case NVPTXISD::Suld1DI8Zero:
3596 Opc = NVPTX::SULD_1D_I8_ZERO;
3597 Ops.push_back(TexHandle);
3598 Ops.push_back(N->getOperand(2));
3599 Ops.push_back(Chain);
3600 break;
3601 case NVPTXISD::Suld1DI16Zero:
3602 Opc = NVPTX::SULD_1D_I16_ZERO;
3603 Ops.push_back(TexHandle);
3604 Ops.push_back(N->getOperand(2));
3605 Ops.push_back(Chain);
3606 break;
3607 case NVPTXISD::Suld1DI32Zero:
3608 Opc = NVPTX::SULD_1D_I32_ZERO;
3609 Ops.push_back(TexHandle);
3610 Ops.push_back(N->getOperand(2));
3611 Ops.push_back(Chain);
3612 break;
3613 case NVPTXISD::Suld1DI64Zero:
3614 Opc = NVPTX::SULD_1D_I64_ZERO;
3615 Ops.push_back(TexHandle);
3616 Ops.push_back(N->getOperand(2));
3617 Ops.push_back(Chain);
3618 break;
3619 case NVPTXISD::Suld1DV2I8Zero:
3620 Opc = NVPTX::SULD_1D_V2I8_ZERO;
3621 Ops.push_back(TexHandle);
3622 Ops.push_back(N->getOperand(2));
3623 Ops.push_back(Chain);
3624 break;
3625 case NVPTXISD::Suld1DV2I16Zero:
3626 Opc = NVPTX::SULD_1D_V2I16_ZERO;
3627 Ops.push_back(TexHandle);
3628 Ops.push_back(N->getOperand(2));
3629 Ops.push_back(Chain);
3630 break;
3631 case NVPTXISD::Suld1DV2I32Zero:
3632 Opc = NVPTX::SULD_1D_V2I32_ZERO;
3633 Ops.push_back(TexHandle);
3634 Ops.push_back(N->getOperand(2));
3635 Ops.push_back(Chain);
3636 break;
3637 case NVPTXISD::Suld1DV2I64Zero:
3638 Opc = NVPTX::SULD_1D_V2I64_ZERO;
3639 Ops.push_back(TexHandle);
3640 Ops.push_back(N->getOperand(2));
3641 Ops.push_back(Chain);
3642 break;
3643 case NVPTXISD::Suld1DV4I8Zero:
3644 Opc = NVPTX::SULD_1D_V4I8_ZERO;
3645 Ops.push_back(TexHandle);
3646 Ops.push_back(N->getOperand(2));
3647 Ops.push_back(Chain);
3648 break;
3649 case NVPTXISD::Suld1DV4I16Zero:
3650 Opc = NVPTX::SULD_1D_V4I16_ZERO;
3651 Ops.push_back(TexHandle);
3652 Ops.push_back(N->getOperand(2));
3653 Ops.push_back(Chain);
3654 break;
3655 case NVPTXISD::Suld1DV4I32Zero:
3656 Opc = NVPTX::SULD_1D_V4I32_ZERO;
3657 Ops.push_back(TexHandle);
3658 Ops.push_back(N->getOperand(2));
3659 Ops.push_back(Chain);
3660 break;
3661 case NVPTXISD::Suld1DArrayI8Zero:
3662 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
3663 Ops.push_back(TexHandle);
3664 Ops.push_back(N->getOperand(2));
3665 Ops.push_back(N->getOperand(3));
3666 Ops.push_back(Chain);
3667 break;
3668 case NVPTXISD::Suld1DArrayI16Zero:
3669 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
3670 Ops.push_back(TexHandle);
3671 Ops.push_back(N->getOperand(2));
3672 Ops.push_back(N->getOperand(3));
3673 Ops.push_back(Chain);
3674 break;
3675 case NVPTXISD::Suld1DArrayI32Zero:
3676 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
3677 Ops.push_back(TexHandle);
3678 Ops.push_back(N->getOperand(2));
3679 Ops.push_back(N->getOperand(3));
3680 Ops.push_back(Chain);
3681 break;
3682 case NVPTXISD::Suld1DArrayI64Zero:
3683 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
3684 Ops.push_back(TexHandle);
3685 Ops.push_back(N->getOperand(2));
3686 Ops.push_back(N->getOperand(3));
3687 Ops.push_back(Chain);
3688 break;
3689 case NVPTXISD::Suld1DArrayV2I8Zero:
3690 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
3691 Ops.push_back(TexHandle);
3692 Ops.push_back(N->getOperand(2));
3693 Ops.push_back(N->getOperand(3));
3694 Ops.push_back(Chain);
3695 break;
3696 case NVPTXISD::Suld1DArrayV2I16Zero:
3697 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
3698 Ops.push_back(TexHandle);
3699 Ops.push_back(N->getOperand(2));
3700 Ops.push_back(N->getOperand(3));
3701 Ops.push_back(Chain);
3702 break;
3703 case NVPTXISD::Suld1DArrayV2I32Zero:
3704 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
3705 Ops.push_back(TexHandle);
3706 Ops.push_back(N->getOperand(2));
3707 Ops.push_back(N->getOperand(3));
3708 Ops.push_back(Chain);
3709 break;
3710 case NVPTXISD::Suld1DArrayV2I64Zero:
3711 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
3712 Ops.push_back(TexHandle);
3713 Ops.push_back(N->getOperand(2));
3714 Ops.push_back(N->getOperand(3));
3715 Ops.push_back(Chain);
3716 break;
3717 case NVPTXISD::Suld1DArrayV4I8Zero:
3718 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
3719 Ops.push_back(TexHandle);
3720 Ops.push_back(N->getOperand(2));
3721 Ops.push_back(N->getOperand(3));
3722 Ops.push_back(Chain);
3723 break;
3724 case NVPTXISD::Suld1DArrayV4I16Zero:
3725 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
3726 Ops.push_back(TexHandle);
3727 Ops.push_back(N->getOperand(2));
3728 Ops.push_back(N->getOperand(3));
3729 Ops.push_back(Chain);
3730 break;
3731 case NVPTXISD::Suld1DArrayV4I32Zero:
3732 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
3733 Ops.push_back(TexHandle);
3734 Ops.push_back(N->getOperand(2));
3735 Ops.push_back(N->getOperand(3));
3736 Ops.push_back(Chain);
3737 break;
3738 case NVPTXISD::Suld2DI8Zero:
3739 Opc = NVPTX::SULD_2D_I8_ZERO;
3740 Ops.push_back(TexHandle);
3741 Ops.push_back(N->getOperand(2));
3742 Ops.push_back(N->getOperand(3));
3743 Ops.push_back(Chain);
3744 break;
3745 case NVPTXISD::Suld2DI16Zero:
3746 Opc = NVPTX::SULD_2D_I16_ZERO;
3747 Ops.push_back(TexHandle);
3748 Ops.push_back(N->getOperand(2));
3749 Ops.push_back(N->getOperand(3));
3750 Ops.push_back(Chain);
3751 break;
3752 case NVPTXISD::Suld2DI32Zero:
3753 Opc = NVPTX::SULD_2D_I32_ZERO;
3754 Ops.push_back(TexHandle);
3755 Ops.push_back(N->getOperand(2));
3756 Ops.push_back(N->getOperand(3));
3757 Ops.push_back(Chain);
3758 break;
3759 case NVPTXISD::Suld2DI64Zero:
3760 Opc = NVPTX::SULD_2D_I64_ZERO;
3761 Ops.push_back(TexHandle);
3762 Ops.push_back(N->getOperand(2));
3763 Ops.push_back(N->getOperand(3));
3764 Ops.push_back(Chain);
3765 break;
3766 case NVPTXISD::Suld2DV2I8Zero:
3767 Opc = NVPTX::SULD_2D_V2I8_ZERO;
3768 Ops.push_back(TexHandle);
3769 Ops.push_back(N->getOperand(2));
3770 Ops.push_back(N->getOperand(3));
3771 Ops.push_back(Chain);
3772 break;
3773 case NVPTXISD::Suld2DV2I16Zero:
3774 Opc = NVPTX::SULD_2D_V2I16_ZERO;
3775 Ops.push_back(TexHandle);
3776 Ops.push_back(N->getOperand(2));
3777 Ops.push_back(N->getOperand(3));
3778 Ops.push_back(Chain);
3779 break;
3780 case NVPTXISD::Suld2DV2I32Zero:
3781 Opc = NVPTX::SULD_2D_V2I32_ZERO;
3782 Ops.push_back(TexHandle);
3783 Ops.push_back(N->getOperand(2));
3784 Ops.push_back(N->getOperand(3));
3785 Ops.push_back(Chain);
3786 break;
3787 case NVPTXISD::Suld2DV2I64Zero:
3788 Opc = NVPTX::SULD_2D_V2I64_ZERO;
3789 Ops.push_back(TexHandle);
3790 Ops.push_back(N->getOperand(2));
3791 Ops.push_back(N->getOperand(3));
3792 Ops.push_back(Chain);
3793 break;
3794 case NVPTXISD::Suld2DV4I8Zero:
3795 Opc = NVPTX::SULD_2D_V4I8_ZERO;
3796 Ops.push_back(TexHandle);
3797 Ops.push_back(N->getOperand(2));
3798 Ops.push_back(N->getOperand(3));
3799 Ops.push_back(Chain);
3800 break;
3801 case NVPTXISD::Suld2DV4I16Zero:
3802 Opc = NVPTX::SULD_2D_V4I16_ZERO;
3803 Ops.push_back(TexHandle);
3804 Ops.push_back(N->getOperand(2));
3805 Ops.push_back(N->getOperand(3));
3806 Ops.push_back(Chain);
3807 break;
3808 case NVPTXISD::Suld2DV4I32Zero:
3809 Opc = NVPTX::SULD_2D_V4I32_ZERO;
3810 Ops.push_back(TexHandle);
3811 Ops.push_back(N->getOperand(2));
3812 Ops.push_back(N->getOperand(3));
3813 Ops.push_back(Chain);
3814 break;
3815 case NVPTXISD::Suld2DArrayI8Zero:
3816 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
3817 Ops.push_back(TexHandle);
3818 Ops.push_back(N->getOperand(2));
3819 Ops.push_back(N->getOperand(3));
3820 Ops.push_back(N->getOperand(4));
3821 Ops.push_back(Chain);
3822 break;
3823 case NVPTXISD::Suld2DArrayI16Zero:
3824 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
3825 Ops.push_back(TexHandle);
3826 Ops.push_back(N->getOperand(2));
3827 Ops.push_back(N->getOperand(3));
3828 Ops.push_back(N->getOperand(4));
3829 Ops.push_back(Chain);
3830 break;
3831 case NVPTXISD::Suld2DArrayI32Zero:
3832 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
3833 Ops.push_back(TexHandle);
3834 Ops.push_back(N->getOperand(2));
3835 Ops.push_back(N->getOperand(3));
3836 Ops.push_back(N->getOperand(4));
3837 Ops.push_back(Chain);
3838 break;
3839 case NVPTXISD::Suld2DArrayI64Zero:
3840 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
3841 Ops.push_back(TexHandle);
3842 Ops.push_back(N->getOperand(2));
3843 Ops.push_back(N->getOperand(3));
3844 Ops.push_back(N->getOperand(4));
3845 Ops.push_back(Chain);
3846 break;
3847 case NVPTXISD::Suld2DArrayV2I8Zero:
3848 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
3849 Ops.push_back(TexHandle);
3850 Ops.push_back(N->getOperand(2));
3851 Ops.push_back(N->getOperand(3));
3852 Ops.push_back(N->getOperand(4));
3853 Ops.push_back(Chain);
3854 break;
3855 case NVPTXISD::Suld2DArrayV2I16Zero:
3856 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
3857 Ops.push_back(TexHandle);
3858 Ops.push_back(N->getOperand(2));
3859 Ops.push_back(N->getOperand(3));
3860 Ops.push_back(N->getOperand(4));
3861 Ops.push_back(Chain);
3862 break;
3863 case NVPTXISD::Suld2DArrayV2I32Zero:
3864 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
3865 Ops.push_back(TexHandle);
3866 Ops.push_back(N->getOperand(2));
3867 Ops.push_back(N->getOperand(3));
3868 Ops.push_back(N->getOperand(4));
3869 Ops.push_back(Chain);
3870 break;
3871 case NVPTXISD::Suld2DArrayV2I64Zero:
3872 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
3873 Ops.push_back(TexHandle);
3874 Ops.push_back(N->getOperand(2));
3875 Ops.push_back(N->getOperand(3));
3876 Ops.push_back(N->getOperand(4));
3877 Ops.push_back(Chain);
3878 break;
3879 case NVPTXISD::Suld2DArrayV4I8Zero:
3880 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
3881 Ops.push_back(TexHandle);
3882 Ops.push_back(N->getOperand(2));
3883 Ops.push_back(N->getOperand(3));
3884 Ops.push_back(N->getOperand(4));
3885 Ops.push_back(Chain);
3886 break;
3887 case NVPTXISD::Suld2DArrayV4I16Zero:
3888 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
3889 Ops.push_back(TexHandle);
3890 Ops.push_back(N->getOperand(2));
3891 Ops.push_back(N->getOperand(3));
3892 Ops.push_back(N->getOperand(4));
3893 Ops.push_back(Chain);
3894 break;
3895 case NVPTXISD::Suld2DArrayV4I32Zero:
3896 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
3897 Ops.push_back(TexHandle);
3898 Ops.push_back(N->getOperand(2));
3899 Ops.push_back(N->getOperand(3));
3900 Ops.push_back(N->getOperand(4));
3901 Ops.push_back(Chain);
3902 break;
3903 case NVPTXISD::Suld3DI8Zero:
3904 Opc = NVPTX::SULD_3D_I8_ZERO;
3905 Ops.push_back(TexHandle);
3906 Ops.push_back(N->getOperand(2));
3907 Ops.push_back(N->getOperand(3));
3908 Ops.push_back(N->getOperand(4));
3909 Ops.push_back(Chain);
3910 break;
3911 case NVPTXISD::Suld3DI16Zero:
3912 Opc = NVPTX::SULD_3D_I16_ZERO;
3913 Ops.push_back(TexHandle);
3914 Ops.push_back(N->getOperand(2));
3915 Ops.push_back(N->getOperand(3));
3916 Ops.push_back(N->getOperand(4));
3917 Ops.push_back(Chain);
3918 break;
3919 case NVPTXISD::Suld3DI32Zero:
3920 Opc = NVPTX::SULD_3D_I32_ZERO;
3921 Ops.push_back(TexHandle);
3922 Ops.push_back(N->getOperand(2));
3923 Ops.push_back(N->getOperand(3));
3924 Ops.push_back(N->getOperand(4));
3925 Ops.push_back(Chain);
3926 break;
3927 case NVPTXISD::Suld3DI64Zero:
3928 Opc = NVPTX::SULD_3D_I64_ZERO;
3929 Ops.push_back(TexHandle);
3930 Ops.push_back(N->getOperand(2));
3931 Ops.push_back(N->getOperand(3));
3932 Ops.push_back(N->getOperand(4));
3933 Ops.push_back(Chain);
3934 break;
3935 case NVPTXISD::Suld3DV2I8Zero:
3936 Opc = NVPTX::SULD_3D_V2I8_ZERO;
3937 Ops.push_back(TexHandle);
3938 Ops.push_back(N->getOperand(2));
3939 Ops.push_back(N->getOperand(3));
3940 Ops.push_back(N->getOperand(4));
3941 Ops.push_back(Chain);
3942 break;
3943 case NVPTXISD::Suld3DV2I16Zero:
3944 Opc = NVPTX::SULD_3D_V2I16_ZERO;
3945 Ops.push_back(TexHandle);
3946 Ops.push_back(N->getOperand(2));
3947 Ops.push_back(N->getOperand(3));
3948 Ops.push_back(N->getOperand(4));
3949 Ops.push_back(Chain);
3950 break;
3951 case NVPTXISD::Suld3DV2I32Zero:
3952 Opc = NVPTX::SULD_3D_V2I32_ZERO;
3953 Ops.push_back(TexHandle);
3954 Ops.push_back(N->getOperand(2));
3955 Ops.push_back(N->getOperand(3));
3956 Ops.push_back(N->getOperand(4));
3957 Ops.push_back(Chain);
3958 break;
3959 case NVPTXISD::Suld3DV2I64Zero:
3960 Opc = NVPTX::SULD_3D_V2I64_ZERO;
3961 Ops.push_back(TexHandle);
3962 Ops.push_back(N->getOperand(2));
3963 Ops.push_back(N->getOperand(3));
3964 Ops.push_back(N->getOperand(4));
3965 Ops.push_back(Chain);
3966 break;
3967 case NVPTXISD::Suld3DV4I8Zero:
3968 Opc = NVPTX::SULD_3D_V4I8_ZERO;
3969 Ops.push_back(TexHandle);
3970 Ops.push_back(N->getOperand(2));
3971 Ops.push_back(N->getOperand(3));
3972 Ops.push_back(N->getOperand(4));
3973 Ops.push_back(Chain);
3974 break;
3975 case NVPTXISD::Suld3DV4I16Zero:
3976 Opc = NVPTX::SULD_3D_V4I16_ZERO;
3977 Ops.push_back(TexHandle);
3978 Ops.push_back(N->getOperand(2));
3979 Ops.push_back(N->getOperand(3));
3980 Ops.push_back(N->getOperand(4));
3981 Ops.push_back(Chain);
3982 break;
3983 case NVPTXISD::Suld3DV4I32Zero:
3984 Opc = NVPTX::SULD_3D_V4I32_ZERO;
3985 Ops.push_back(TexHandle);
3986 Ops.push_back(N->getOperand(2));
3987 Ops.push_back(N->getOperand(3));
3988 Ops.push_back(N->getOperand(4));
3989 Ops.push_back(Chain);
3990 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003991 }
Justin Bogner8d83fb62016-05-13 21:12:53 +00003992 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3993 return true;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003994}
3995
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003996
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003997/// SelectBFE - Look for instruction sequences that can be made more efficient
3998/// by using the 'bfe' (bit-field extract) PTX instruction
Justin Bogner8d83fb62016-05-13 21:12:53 +00003999bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004000 SDLoc DL(N);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004001 SDValue LHS = N->getOperand(0);
4002 SDValue RHS = N->getOperand(1);
4003 SDValue Len;
4004 SDValue Start;
4005 SDValue Val;
4006 bool IsSigned = false;
4007
4008 if (N->getOpcode() == ISD::AND) {
4009 // Canonicalize the operands
4010 // We want 'and %val, %mask'
4011 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
4012 std::swap(LHS, RHS);
4013 }
4014
4015 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
4016 if (!Mask) {
4017 // We need a constant mask on the RHS of the AND
Justin Bogner8d83fb62016-05-13 21:12:53 +00004018 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004019 }
4020
4021 // Extract the mask bits
4022 uint64_t MaskVal = Mask->getZExtValue();
4023 if (!isMask_64(MaskVal)) {
4024 // We *could* handle shifted masks here, but doing so would require an
4025 // 'and' operation to fix up the low-order bits so we would trade
4026 // shr+and for bfe+and, which has the same throughput
Justin Bogner8d83fb62016-05-13 21:12:53 +00004027 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004028 }
4029
4030 // How many bits are in our mask?
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00004031 uint64_t NumBits = countTrailingOnes(MaskVal);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004032 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004033
4034 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
4035 // We have a 'srl/and' pair, extract the effective start bit and length
4036 Val = LHS.getNode()->getOperand(0);
4037 Start = LHS.getNode()->getOperand(1);
4038 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
4039 if (StartConst) {
4040 uint64_t StartVal = StartConst->getZExtValue();
4041 // How many "good" bits do we have left? "good" is defined here as bits
4042 // that exist in the original value, not shifted in.
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00004043 uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004044 if (NumBits > GoodBits) {
4045 // Do not handle the case where bits have been shifted in. In theory
4046 // we could handle this, but the cost is likely higher than just
4047 // emitting the srl/and pair.
Justin Bogner8d83fb62016-05-13 21:12:53 +00004048 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004049 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004050 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004051 } else {
4052 // Do not handle the case where the shift amount (can be zero if no srl
4053 // was found) is not constant. We could handle this case, but it would
4054 // require run-time logic that would be more expensive than just
4055 // emitting the srl/and pair.
Justin Bogner8d83fb62016-05-13 21:12:53 +00004056 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004057 }
4058 } else {
4059 // Do not handle the case where the LHS of the and is not a shift. While
4060 // it would be trivial to handle this case, it would just transform
4061 // 'and' -> 'bfe', but 'and' has higher-throughput.
Justin Bogner8d83fb62016-05-13 21:12:53 +00004062 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004063 }
4064 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
4065 if (LHS->getOpcode() == ISD::AND) {
4066 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
4067 if (!ShiftCnst) {
4068 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00004069 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004070 }
4071
4072 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
4073
4074 SDValue AndLHS = LHS->getOperand(0);
4075 SDValue AndRHS = LHS->getOperand(1);
4076
4077 // Canonicalize the AND to have the mask on the RHS
4078 if (isa<ConstantSDNode>(AndLHS)) {
4079 std::swap(AndLHS, AndRHS);
4080 }
4081
4082 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
4083 if (!MaskCnst) {
4084 // Mask must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00004085 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004086 }
4087
4088 uint64_t MaskVal = MaskCnst->getZExtValue();
4089 uint64_t NumZeros;
4090 uint64_t NumBits;
4091 if (isMask_64(MaskVal)) {
4092 NumZeros = 0;
4093 // The number of bits in the result bitfield will be the number of
4094 // trailing ones (the AND) minus the number of bits we shift off
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00004095 NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004096 } else if (isShiftedMask_64(MaskVal)) {
4097 NumZeros = countTrailingZeros(MaskVal);
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00004098 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004099 // The number of bits in the result bitfield will be the number of
4100 // trailing zeros plus the number of set bits in the mask minus the
4101 // number of bits we shift off
4102 NumBits = NumZeros + NumOnes - ShiftAmt;
4103 } else {
4104 // This is not a mask we can handle
Justin Bogner8d83fb62016-05-13 21:12:53 +00004105 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004106 }
4107
4108 if (ShiftAmt < NumZeros) {
4109 // Handling this case would require extra logic that would make this
4110 // transformation non-profitable
Justin Bogner8d83fb62016-05-13 21:12:53 +00004111 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004112 }
4113
4114 Val = AndLHS;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004115 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
4116 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004117 } else if (LHS->getOpcode() == ISD::SHL) {
4118 // Here, we have a pattern like:
4119 //
4120 // (sra (shl val, NN), MM)
4121 // or
4122 // (srl (shl val, NN), MM)
4123 //
4124 // If MM >= NN, we can efficiently optimize this with bfe
4125 Val = LHS->getOperand(0);
4126
4127 SDValue ShlRHS = LHS->getOperand(1);
4128 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
4129 if (!ShlCnst) {
4130 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00004131 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004132 }
4133 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
4134
4135 SDValue ShrRHS = RHS;
4136 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
4137 if (!ShrCnst) {
4138 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00004139 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004140 }
4141 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
4142
4143 // To avoid extra codegen and be profitable, we need Outer >= Inner
4144 if (OuterShiftAmt < InnerShiftAmt) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00004145 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004146 }
4147
4148 // If the outer shift is more than the type size, we have no bitfield to
4149 // extract (since we also check that the inner shift is <= the outer shift
4150 // then this also implies that the inner shift is < the type size)
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00004151 if (OuterShiftAmt >= Val.getValueSizeInBits()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00004152 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004153 }
4154
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00004155 Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
4156 MVT::i32);
4157 Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
4158 DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004159
4160 if (N->getOpcode() == ISD::SRA) {
4161 // If we have a arithmetic right shift, we need to use the signed bfe
4162 // variant
4163 IsSigned = true;
4164 }
4165 } else {
4166 // No can do...
Justin Bogner8d83fb62016-05-13 21:12:53 +00004167 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004168 }
4169 } else {
4170 // No can do...
Justin Bogner8d83fb62016-05-13 21:12:53 +00004171 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004172 }
4173
4174
4175 unsigned Opc;
4176 // For the BFE operations we form here from "and" and "srl", always use the
4177 // unsigned variants.
4178 if (Val.getValueType() == MVT::i32) {
4179 if (IsSigned) {
4180 Opc = NVPTX::BFE_S32rii;
4181 } else {
4182 Opc = NVPTX::BFE_U32rii;
4183 }
4184 } else if (Val.getValueType() == MVT::i64) {
4185 if (IsSigned) {
4186 Opc = NVPTX::BFE_S64rii;
4187 } else {
4188 Opc = NVPTX::BFE_U64rii;
4189 }
4190 } else {
4191 // We cannot handle this type
Justin Bogner8d83fb62016-05-13 21:12:53 +00004192 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004193 }
4194
4195 SDValue Ops[] = {
4196 Val, Start, Len
4197 };
4198
Justin Bogner8d83fb62016-05-13 21:12:53 +00004199 ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
4200 return true;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00004201}
4202
Justin Holewinskiae556d32012-05-04 20:18:50 +00004203// SelectDirectAddr - Match a direct address for DAG.
4204// A direct address could be a globaladdress or externalsymbol.
4205bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
4206 // Return true if TGA or ES.
Justin Holewinski0497ab12013-03-30 14:29:21 +00004207 if (N.getOpcode() == ISD::TargetGlobalAddress ||
4208 N.getOpcode() == ISD::TargetExternalSymbol) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00004209 Address = N;
4210 return true;
4211 }
4212 if (N.getOpcode() == NVPTXISD::Wrapper) {
4213 Address = N.getOperand(0);
4214 return true;
4215 }
Artem Belevichb2e76a52016-07-20 18:39:47 +00004216 // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
4217 if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
4218 if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
4219 CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
4220 CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
4221 return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
Justin Holewinskiae556d32012-05-04 20:18:50 +00004222 }
4223 return false;
4224}
4225
4226// symbol+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00004227bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
4228 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00004229 if (Addr.getOpcode() == ISD::ADD) {
4230 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00004231 SDValue base = Addr.getOperand(0);
Justin Holewinskiae556d32012-05-04 20:18:50 +00004232 if (SelectDirectAddr(base, Base)) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004233 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
4234 mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00004235 return true;
4236 }
4237 }
4238 }
4239 return false;
4240}
4241
4242// symbol+offset
4243bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
4244 SDValue &Base, SDValue &Offset) {
4245 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
4246}
4247
4248// symbol+offset
4249bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
4250 SDValue &Base, SDValue &Offset) {
4251 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
4252}
4253
4254// register+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00004255bool NVPTXDAGToDAGISel::SelectADDRri_imp(
4256 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00004257 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
4258 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004259 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00004260 return true;
4261 }
4262 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
4263 Addr.getOpcode() == ISD::TargetGlobalAddress)
Justin Holewinski0497ab12013-03-30 14:29:21 +00004264 return false; // direct calls.
Justin Holewinskiae556d32012-05-04 20:18:50 +00004265
4266 if (Addr.getOpcode() == ISD::ADD) {
4267 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
4268 return false;
4269 }
4270 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
4271 if (FrameIndexSDNode *FIN =
Justin Holewinski0497ab12013-03-30 14:29:21 +00004272 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
Justin Holewinskiae556d32012-05-04 20:18:50 +00004273 // Constant offset from frame ref.
4274 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
4275 else
4276 Base = Addr.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004277 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
4278 mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00004279 return true;
4280 }
4281 }
4282 return false;
4283}
4284
4285// register+offset
4286bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
4287 SDValue &Base, SDValue &Offset) {
4288 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
4289}
4290
4291// register+offset
4292bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
4293 SDValue &Base, SDValue &Offset) {
4294 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
4295}
4296
4297bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
4298 unsigned int spN) const {
Craig Topper062a2ba2014-04-25 05:30:21 +00004299 const Value *Src = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00004300 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +00004301 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
4302 return true;
4303 Src = mN->getMemOperand()->getValue();
Justin Holewinskiae556d32012-05-04 20:18:50 +00004304 }
4305 if (!Src)
4306 return false;
Craig Toppere3dcce92015-08-01 22:20:21 +00004307 if (auto *PT = dyn_cast<PointerType>(Src->getType()))
Justin Holewinskiae556d32012-05-04 20:18:50 +00004308 return (PT->getAddressSpace() == spN);
4309 return false;
4310}
4311
4312/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
4313/// inline asm expressions.
Justin Holewinski0497ab12013-03-30 14:29:21 +00004314bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
Daniel Sanders60f1db02015-03-13 12:45:09 +00004315 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00004316 SDValue Op0, Op1;
Daniel Sanders60f1db02015-03-13 12:45:09 +00004317 switch (ConstraintID) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00004318 default:
4319 return true;
Daniel Sanders60f1db02015-03-13 12:45:09 +00004320 case InlineAsm::Constraint_m: // memory
Justin Holewinskiae556d32012-05-04 20:18:50 +00004321 if (SelectDirectAddr(Op, Op0)) {
4322 OutOps.push_back(Op0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00004323 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
Justin Holewinskiae556d32012-05-04 20:18:50 +00004324 return false;
4325 }
4326 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
4327 OutOps.push_back(Op0);
4328 OutOps.push_back(Op1);
4329 return false;
4330 }
4331 break;
4332 }
4333 return true;
4334}
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00004335
4336/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
4337/// conversion from \p SrcTy to \p DestTy.
4338unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
4339 bool IsSigned) {
4340 switch (SrcTy.SimpleTy) {
4341 default:
4342 llvm_unreachable("Unhandled source type");
4343 case MVT::i8:
4344 switch (DestTy.SimpleTy) {
4345 default:
4346 llvm_unreachable("Unhandled dest type");
4347 case MVT::i16:
4348 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
4349 case MVT::i32:
4350 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
4351 case MVT::i64:
4352 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
4353 }
4354 case MVT::i16:
4355 switch (DestTy.SimpleTy) {
4356 default:
4357 llvm_unreachable("Unhandled dest type");
4358 case MVT::i8:
4359 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
4360 case MVT::i32:
4361 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
4362 case MVT::i64:
4363 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
4364 }
4365 case MVT::i32:
4366 switch (DestTy.SimpleTy) {
4367 default:
4368 llvm_unreachable("Unhandled dest type");
4369 case MVT::i8:
4370 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
4371 case MVT::i16:
4372 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
4373 case MVT::i64:
4374 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
4375 }
4376 case MVT::i64:
4377 switch (DestTy.SimpleTy) {
4378 default:
4379 llvm_unreachable("Unhandled dest type");
4380 case MVT::i8:
4381 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
4382 case MVT::i16:
4383 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
4384 case MVT::i32:
4385 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
4386 }
4387 }
4388}