blob: ffc6a59cd6c8772d41484819eb2e46666040aead [file] [log] [blame]
Justin Holewinskiae556d32012-05-04 20:18:50 +00001//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
Justin Holewinskiae556d32012-05-04 20:18:50 +000014#include "NVPTXISelDAGToDAG.h"
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +000015#include "NVPTXUtilities.h"
Jingyue Wu48a9bdc2015-07-20 21:28:54 +000016#include "llvm/Analysis/ValueTracking.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000017#include "llvm/IR/GlobalValue.h"
18#include "llvm/IR/Instructions.h"
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +000019#include "llvm/Support/AtomicOrdering.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000020#include "llvm/Support/CommandLine.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000021#include "llvm/Support/Debug.h"
22#include "llvm/Support/ErrorHandling.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000023#include "llvm/Support/raw_ostream.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000024#include "llvm/Target/TargetIntrinsicInfo.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000025
Justin Holewinskiae556d32012-05-04 20:18:50 +000026using namespace llvm;
27
Chandler Carruth84e68b22014-04-22 02:41:26 +000028#define DEBUG_TYPE "nvptx-isel"
29
Justin Holewinskiae556d32012-05-04 20:18:50 +000030/// createNVPTXISelDag - This pass converts a legalized DAG into a
31/// NVPTX-specific DAG, ready for instruction scheduling.
32FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
33 llvm::CodeGenOpt::Level OptLevel) {
34 return new NVPTXDAGToDAGISel(TM, OptLevel);
35}
36
Justin Holewinskiae556d32012-05-04 20:18:50 +000037NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
38 CodeGenOpt::Level OptLevel)
Eric Christopher02389e32015-02-19 00:08:27 +000039 : SelectionDAGISel(tm, OptLevel), TM(tm) {
Justin Holewinskiae556d32012-05-04 20:18:50 +000040 doMulWide = (OptLevel > 0);
Justin Holewinskicd069e62013-07-22 12:18:04 +000041}
Justin Holewinskiae556d32012-05-04 20:18:50 +000042
Eric Christopher147bba22015-01-30 01:40:59 +000043bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
Justin Lebar077f8fb2017-01-21 01:00:14 +000044 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
45 return SelectionDAGISel::runOnMachineFunction(MF);
Eric Christopher147bba22015-01-30 01:40:59 +000046}
47
Justin Holewinskicd069e62013-07-22 12:18:04 +000048int NVPTXDAGToDAGISel::getDivF32Level() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000049 return Subtarget->getTargetLowering()->getDivF32Level();
Justin Holewinskicd069e62013-07-22 12:18:04 +000050}
Justin Holewinskiae556d32012-05-04 20:18:50 +000051
Justin Holewinskicd069e62013-07-22 12:18:04 +000052bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000053 return Subtarget->getTargetLowering()->usePrecSqrtF32();
Justin Holewinskicd069e62013-07-22 12:18:04 +000054}
55
56bool NVPTXDAGToDAGISel::useF32FTZ() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000057 return Subtarget->getTargetLowering()->useF32FTZ(*MF);
Justin Holewinskiae556d32012-05-04 20:18:50 +000058}
59
Justin Holewinski428cf0e2014-07-17 18:10:09 +000060bool NVPTXDAGToDAGISel::allowFMA() const {
Eric Christopher147bba22015-01-30 01:40:59 +000061 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
Justin Holewinski428cf0e2014-07-17 18:10:09 +000062 return TL->allowFMA(*MF, OptLevel);
63}
64
Artem Belevichd109f462017-01-13 18:48:13 +000065bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
66 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
67 return TL->allowUnsafeFPMath(*MF);
68}
69
Artem Belevich2f348ea2018-05-09 23:46:19 +000070bool NVPTXDAGToDAGISel::useShortPointers() const {
71 return TM.useShortPointers();
72}
73
Justin Holewinskiae556d32012-05-04 20:18:50 +000074/// Select - Select instructions not customized! Used for
75/// expanded, promoted and normal instructions.
Justin Bogner8d83fb62016-05-13 21:12:53 +000076void NVPTXDAGToDAGISel::Select(SDNode *N) {
Justin Holewinskiae556d32012-05-04 20:18:50 +000077
Tim Northover31d093c2013-09-22 08:21:56 +000078 if (N->isMachineOpcode()) {
79 N->setNodeId(-1);
Justin Bogner8d83fb62016-05-13 21:12:53 +000080 return; // Already selected.
Tim Northover31d093c2013-09-22 08:21:56 +000081 }
Justin Holewinskiae556d32012-05-04 20:18:50 +000082
Justin Holewinskiae556d32012-05-04 20:18:50 +000083 switch (N->getOpcode()) {
84 case ISD::LOAD:
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +000085 case ISD::ATOMIC_LOAD:
Justin Bogner8d83fb62016-05-13 21:12:53 +000086 if (tryLoad(N))
87 return;
Justin Holewinskiae556d32012-05-04 20:18:50 +000088 break;
89 case ISD::STORE:
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +000090 case ISD::ATOMIC_STORE:
Justin Bogner8d83fb62016-05-13 21:12:53 +000091 if (tryStore(N))
92 return;
Justin Holewinskiae556d32012-05-04 20:18:50 +000093 break;
Artem Belevich620db1f2017-02-23 22:38:24 +000094 case ISD::EXTRACT_VECTOR_ELT:
95 if (tryEXTRACT_VECTOR_ELEMENT(N))
96 return;
97 break;
98 case NVPTXISD::SETP_F16X2:
99 SelectSETP_F16X2(N);
100 return;
101
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000102 case NVPTXISD::LoadV2:
103 case NVPTXISD::LoadV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000104 if (tryLoadVector(N))
105 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000106 break;
107 case NVPTXISD::LDGV2:
108 case NVPTXISD::LDGV4:
109 case NVPTXISD::LDUV2:
110 case NVPTXISD::LDUV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000111 if (tryLDGLDU(N))
112 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000113 break;
114 case NVPTXISD::StoreV2:
115 case NVPTXISD::StoreV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000116 if (tryStoreVector(N))
117 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000118 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000119 case NVPTXISD::LoadParam:
120 case NVPTXISD::LoadParamV2:
121 case NVPTXISD::LoadParamV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000122 if (tryLoadParam(N))
123 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000124 break;
125 case NVPTXISD::StoreRetval:
126 case NVPTXISD::StoreRetvalV2:
127 case NVPTXISD::StoreRetvalV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000128 if (tryStoreRetval(N))
129 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000130 break;
131 case NVPTXISD::StoreParam:
132 case NVPTXISD::StoreParamV2:
133 case NVPTXISD::StoreParamV4:
134 case NVPTXISD::StoreParamS32:
135 case NVPTXISD::StoreParamU32:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000136 if (tryStoreParam(N))
137 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000138 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000139 case ISD::INTRINSIC_WO_CHAIN:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000140 if (tryIntrinsicNoChain(N))
141 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000142 break;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000143 case ISD::INTRINSIC_W_CHAIN:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000144 if (tryIntrinsicChain(N))
145 return;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000146 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000147 case NVPTXISD::Tex1DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000148 case NVPTXISD::Tex1DFloatFloat:
149 case NVPTXISD::Tex1DFloatFloatLevel:
150 case NVPTXISD::Tex1DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000151 case NVPTXISD::Tex1DS32S32:
152 case NVPTXISD::Tex1DS32Float:
153 case NVPTXISD::Tex1DS32FloatLevel:
154 case NVPTXISD::Tex1DS32FloatGrad:
155 case NVPTXISD::Tex1DU32S32:
156 case NVPTXISD::Tex1DU32Float:
157 case NVPTXISD::Tex1DU32FloatLevel:
158 case NVPTXISD::Tex1DU32FloatGrad:
159 case NVPTXISD::Tex1DArrayFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000160 case NVPTXISD::Tex1DArrayFloatFloat:
161 case NVPTXISD::Tex1DArrayFloatFloatLevel:
162 case NVPTXISD::Tex1DArrayFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000163 case NVPTXISD::Tex1DArrayS32S32:
164 case NVPTXISD::Tex1DArrayS32Float:
165 case NVPTXISD::Tex1DArrayS32FloatLevel:
166 case NVPTXISD::Tex1DArrayS32FloatGrad:
167 case NVPTXISD::Tex1DArrayU32S32:
168 case NVPTXISD::Tex1DArrayU32Float:
169 case NVPTXISD::Tex1DArrayU32FloatLevel:
170 case NVPTXISD::Tex1DArrayU32FloatGrad:
171 case NVPTXISD::Tex2DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000172 case NVPTXISD::Tex2DFloatFloat:
173 case NVPTXISD::Tex2DFloatFloatLevel:
174 case NVPTXISD::Tex2DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000175 case NVPTXISD::Tex2DS32S32:
176 case NVPTXISD::Tex2DS32Float:
177 case NVPTXISD::Tex2DS32FloatLevel:
178 case NVPTXISD::Tex2DS32FloatGrad:
179 case NVPTXISD::Tex2DU32S32:
180 case NVPTXISD::Tex2DU32Float:
181 case NVPTXISD::Tex2DU32FloatLevel:
182 case NVPTXISD::Tex2DU32FloatGrad:
183 case NVPTXISD::Tex2DArrayFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000184 case NVPTXISD::Tex2DArrayFloatFloat:
185 case NVPTXISD::Tex2DArrayFloatFloatLevel:
186 case NVPTXISD::Tex2DArrayFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000187 case NVPTXISD::Tex2DArrayS32S32:
188 case NVPTXISD::Tex2DArrayS32Float:
189 case NVPTXISD::Tex2DArrayS32FloatLevel:
190 case NVPTXISD::Tex2DArrayS32FloatGrad:
191 case NVPTXISD::Tex2DArrayU32S32:
192 case NVPTXISD::Tex2DArrayU32Float:
193 case NVPTXISD::Tex2DArrayU32FloatLevel:
194 case NVPTXISD::Tex2DArrayU32FloatGrad:
195 case NVPTXISD::Tex3DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000196 case NVPTXISD::Tex3DFloatFloat:
197 case NVPTXISD::Tex3DFloatFloatLevel:
198 case NVPTXISD::Tex3DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000199 case NVPTXISD::Tex3DS32S32:
200 case NVPTXISD::Tex3DS32Float:
201 case NVPTXISD::Tex3DS32FloatLevel:
202 case NVPTXISD::Tex3DS32FloatGrad:
203 case NVPTXISD::Tex3DU32S32:
204 case NVPTXISD::Tex3DU32Float:
205 case NVPTXISD::Tex3DU32FloatLevel:
206 case NVPTXISD::Tex3DU32FloatGrad:
207 case NVPTXISD::TexCubeFloatFloat:
208 case NVPTXISD::TexCubeFloatFloatLevel:
209 case NVPTXISD::TexCubeS32Float:
210 case NVPTXISD::TexCubeS32FloatLevel:
211 case NVPTXISD::TexCubeU32Float:
212 case NVPTXISD::TexCubeU32FloatLevel:
213 case NVPTXISD::TexCubeArrayFloatFloat:
214 case NVPTXISD::TexCubeArrayFloatFloatLevel:
215 case NVPTXISD::TexCubeArrayS32Float:
216 case NVPTXISD::TexCubeArrayS32FloatLevel:
217 case NVPTXISD::TexCubeArrayU32Float:
218 case NVPTXISD::TexCubeArrayU32FloatLevel:
219 case NVPTXISD::Tld4R2DFloatFloat:
220 case NVPTXISD::Tld4G2DFloatFloat:
221 case NVPTXISD::Tld4B2DFloatFloat:
222 case NVPTXISD::Tld4A2DFloatFloat:
223 case NVPTXISD::Tld4R2DS64Float:
224 case NVPTXISD::Tld4G2DS64Float:
225 case NVPTXISD::Tld4B2DS64Float:
226 case NVPTXISD::Tld4A2DS64Float:
227 case NVPTXISD::Tld4R2DU64Float:
228 case NVPTXISD::Tld4G2DU64Float:
229 case NVPTXISD::Tld4B2DU64Float:
230 case NVPTXISD::Tld4A2DU64Float:
231 case NVPTXISD::TexUnified1DFloatS32:
232 case NVPTXISD::TexUnified1DFloatFloat:
233 case NVPTXISD::TexUnified1DFloatFloatLevel:
234 case NVPTXISD::TexUnified1DFloatFloatGrad:
235 case NVPTXISD::TexUnified1DS32S32:
236 case NVPTXISD::TexUnified1DS32Float:
237 case NVPTXISD::TexUnified1DS32FloatLevel:
238 case NVPTXISD::TexUnified1DS32FloatGrad:
239 case NVPTXISD::TexUnified1DU32S32:
240 case NVPTXISD::TexUnified1DU32Float:
241 case NVPTXISD::TexUnified1DU32FloatLevel:
242 case NVPTXISD::TexUnified1DU32FloatGrad:
243 case NVPTXISD::TexUnified1DArrayFloatS32:
244 case NVPTXISD::TexUnified1DArrayFloatFloat:
245 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
246 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
247 case NVPTXISD::TexUnified1DArrayS32S32:
248 case NVPTXISD::TexUnified1DArrayS32Float:
249 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
250 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
251 case NVPTXISD::TexUnified1DArrayU32S32:
252 case NVPTXISD::TexUnified1DArrayU32Float:
253 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
254 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
255 case NVPTXISD::TexUnified2DFloatS32:
256 case NVPTXISD::TexUnified2DFloatFloat:
257 case NVPTXISD::TexUnified2DFloatFloatLevel:
258 case NVPTXISD::TexUnified2DFloatFloatGrad:
259 case NVPTXISD::TexUnified2DS32S32:
260 case NVPTXISD::TexUnified2DS32Float:
261 case NVPTXISD::TexUnified2DS32FloatLevel:
262 case NVPTXISD::TexUnified2DS32FloatGrad:
263 case NVPTXISD::TexUnified2DU32S32:
264 case NVPTXISD::TexUnified2DU32Float:
265 case NVPTXISD::TexUnified2DU32FloatLevel:
266 case NVPTXISD::TexUnified2DU32FloatGrad:
267 case NVPTXISD::TexUnified2DArrayFloatS32:
268 case NVPTXISD::TexUnified2DArrayFloatFloat:
269 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
270 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
271 case NVPTXISD::TexUnified2DArrayS32S32:
272 case NVPTXISD::TexUnified2DArrayS32Float:
273 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
274 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
275 case NVPTXISD::TexUnified2DArrayU32S32:
276 case NVPTXISD::TexUnified2DArrayU32Float:
277 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
278 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
279 case NVPTXISD::TexUnified3DFloatS32:
280 case NVPTXISD::TexUnified3DFloatFloat:
281 case NVPTXISD::TexUnified3DFloatFloatLevel:
282 case NVPTXISD::TexUnified3DFloatFloatGrad:
283 case NVPTXISD::TexUnified3DS32S32:
284 case NVPTXISD::TexUnified3DS32Float:
285 case NVPTXISD::TexUnified3DS32FloatLevel:
286 case NVPTXISD::TexUnified3DS32FloatGrad:
287 case NVPTXISD::TexUnified3DU32S32:
288 case NVPTXISD::TexUnified3DU32Float:
289 case NVPTXISD::TexUnified3DU32FloatLevel:
290 case NVPTXISD::TexUnified3DU32FloatGrad:
291 case NVPTXISD::TexUnifiedCubeFloatFloat:
292 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
293 case NVPTXISD::TexUnifiedCubeS32Float:
294 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
295 case NVPTXISD::TexUnifiedCubeU32Float:
296 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
297 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
298 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
299 case NVPTXISD::TexUnifiedCubeArrayS32Float:
300 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
301 case NVPTXISD::TexUnifiedCubeArrayU32Float:
302 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
303 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
304 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
305 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
306 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
307 case NVPTXISD::Tld4UnifiedR2DS64Float:
308 case NVPTXISD::Tld4UnifiedG2DS64Float:
309 case NVPTXISD::Tld4UnifiedB2DS64Float:
310 case NVPTXISD::Tld4UnifiedA2DS64Float:
311 case NVPTXISD::Tld4UnifiedR2DU64Float:
312 case NVPTXISD::Tld4UnifiedG2DU64Float:
313 case NVPTXISD::Tld4UnifiedB2DU64Float:
314 case NVPTXISD::Tld4UnifiedA2DU64Float:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000315 if (tryTextureIntrinsic(N))
316 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000317 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000318 case NVPTXISD::Suld1DI8Clamp:
319 case NVPTXISD::Suld1DI16Clamp:
320 case NVPTXISD::Suld1DI32Clamp:
321 case NVPTXISD::Suld1DI64Clamp:
322 case NVPTXISD::Suld1DV2I8Clamp:
323 case NVPTXISD::Suld1DV2I16Clamp:
324 case NVPTXISD::Suld1DV2I32Clamp:
325 case NVPTXISD::Suld1DV2I64Clamp:
326 case NVPTXISD::Suld1DV4I8Clamp:
327 case NVPTXISD::Suld1DV4I16Clamp:
328 case NVPTXISD::Suld1DV4I32Clamp:
329 case NVPTXISD::Suld1DArrayI8Clamp:
330 case NVPTXISD::Suld1DArrayI16Clamp:
331 case NVPTXISD::Suld1DArrayI32Clamp:
332 case NVPTXISD::Suld1DArrayI64Clamp:
333 case NVPTXISD::Suld1DArrayV2I8Clamp:
334 case NVPTXISD::Suld1DArrayV2I16Clamp:
335 case NVPTXISD::Suld1DArrayV2I32Clamp:
336 case NVPTXISD::Suld1DArrayV2I64Clamp:
337 case NVPTXISD::Suld1DArrayV4I8Clamp:
338 case NVPTXISD::Suld1DArrayV4I16Clamp:
339 case NVPTXISD::Suld1DArrayV4I32Clamp:
340 case NVPTXISD::Suld2DI8Clamp:
341 case NVPTXISD::Suld2DI16Clamp:
342 case NVPTXISD::Suld2DI32Clamp:
343 case NVPTXISD::Suld2DI64Clamp:
344 case NVPTXISD::Suld2DV2I8Clamp:
345 case NVPTXISD::Suld2DV2I16Clamp:
346 case NVPTXISD::Suld2DV2I32Clamp:
347 case NVPTXISD::Suld2DV2I64Clamp:
348 case NVPTXISD::Suld2DV4I8Clamp:
349 case NVPTXISD::Suld2DV4I16Clamp:
350 case NVPTXISD::Suld2DV4I32Clamp:
351 case NVPTXISD::Suld2DArrayI8Clamp:
352 case NVPTXISD::Suld2DArrayI16Clamp:
353 case NVPTXISD::Suld2DArrayI32Clamp:
354 case NVPTXISD::Suld2DArrayI64Clamp:
355 case NVPTXISD::Suld2DArrayV2I8Clamp:
356 case NVPTXISD::Suld2DArrayV2I16Clamp:
357 case NVPTXISD::Suld2DArrayV2I32Clamp:
358 case NVPTXISD::Suld2DArrayV2I64Clamp:
359 case NVPTXISD::Suld2DArrayV4I8Clamp:
360 case NVPTXISD::Suld2DArrayV4I16Clamp:
361 case NVPTXISD::Suld2DArrayV4I32Clamp:
362 case NVPTXISD::Suld3DI8Clamp:
363 case NVPTXISD::Suld3DI16Clamp:
364 case NVPTXISD::Suld3DI32Clamp:
365 case NVPTXISD::Suld3DI64Clamp:
366 case NVPTXISD::Suld3DV2I8Clamp:
367 case NVPTXISD::Suld3DV2I16Clamp:
368 case NVPTXISD::Suld3DV2I32Clamp:
369 case NVPTXISD::Suld3DV2I64Clamp:
370 case NVPTXISD::Suld3DV4I8Clamp:
371 case NVPTXISD::Suld3DV4I16Clamp:
372 case NVPTXISD::Suld3DV4I32Clamp:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000373 case NVPTXISD::Suld1DI8Trap:
374 case NVPTXISD::Suld1DI16Trap:
375 case NVPTXISD::Suld1DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000376 case NVPTXISD::Suld1DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000377 case NVPTXISD::Suld1DV2I8Trap:
378 case NVPTXISD::Suld1DV2I16Trap:
379 case NVPTXISD::Suld1DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000380 case NVPTXISD::Suld1DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000381 case NVPTXISD::Suld1DV4I8Trap:
382 case NVPTXISD::Suld1DV4I16Trap:
383 case NVPTXISD::Suld1DV4I32Trap:
384 case NVPTXISD::Suld1DArrayI8Trap:
385 case NVPTXISD::Suld1DArrayI16Trap:
386 case NVPTXISD::Suld1DArrayI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000387 case NVPTXISD::Suld1DArrayI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000388 case NVPTXISD::Suld1DArrayV2I8Trap:
389 case NVPTXISD::Suld1DArrayV2I16Trap:
390 case NVPTXISD::Suld1DArrayV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000391 case NVPTXISD::Suld1DArrayV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000392 case NVPTXISD::Suld1DArrayV4I8Trap:
393 case NVPTXISD::Suld1DArrayV4I16Trap:
394 case NVPTXISD::Suld1DArrayV4I32Trap:
395 case NVPTXISD::Suld2DI8Trap:
396 case NVPTXISD::Suld2DI16Trap:
397 case NVPTXISD::Suld2DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000398 case NVPTXISD::Suld2DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000399 case NVPTXISD::Suld2DV2I8Trap:
400 case NVPTXISD::Suld2DV2I16Trap:
401 case NVPTXISD::Suld2DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000402 case NVPTXISD::Suld2DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000403 case NVPTXISD::Suld2DV4I8Trap:
404 case NVPTXISD::Suld2DV4I16Trap:
405 case NVPTXISD::Suld2DV4I32Trap:
406 case NVPTXISD::Suld2DArrayI8Trap:
407 case NVPTXISD::Suld2DArrayI16Trap:
408 case NVPTXISD::Suld2DArrayI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000409 case NVPTXISD::Suld2DArrayI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000410 case NVPTXISD::Suld2DArrayV2I8Trap:
411 case NVPTXISD::Suld2DArrayV2I16Trap:
412 case NVPTXISD::Suld2DArrayV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000413 case NVPTXISD::Suld2DArrayV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000414 case NVPTXISD::Suld2DArrayV4I8Trap:
415 case NVPTXISD::Suld2DArrayV4I16Trap:
416 case NVPTXISD::Suld2DArrayV4I32Trap:
417 case NVPTXISD::Suld3DI8Trap:
418 case NVPTXISD::Suld3DI16Trap:
419 case NVPTXISD::Suld3DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000420 case NVPTXISD::Suld3DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000421 case NVPTXISD::Suld3DV2I8Trap:
422 case NVPTXISD::Suld3DV2I16Trap:
423 case NVPTXISD::Suld3DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000424 case NVPTXISD::Suld3DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000425 case NVPTXISD::Suld3DV4I8Trap:
426 case NVPTXISD::Suld3DV4I16Trap:
427 case NVPTXISD::Suld3DV4I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000428 case NVPTXISD::Suld1DI8Zero:
429 case NVPTXISD::Suld1DI16Zero:
430 case NVPTXISD::Suld1DI32Zero:
431 case NVPTXISD::Suld1DI64Zero:
432 case NVPTXISD::Suld1DV2I8Zero:
433 case NVPTXISD::Suld1DV2I16Zero:
434 case NVPTXISD::Suld1DV2I32Zero:
435 case NVPTXISD::Suld1DV2I64Zero:
436 case NVPTXISD::Suld1DV4I8Zero:
437 case NVPTXISD::Suld1DV4I16Zero:
438 case NVPTXISD::Suld1DV4I32Zero:
439 case NVPTXISD::Suld1DArrayI8Zero:
440 case NVPTXISD::Suld1DArrayI16Zero:
441 case NVPTXISD::Suld1DArrayI32Zero:
442 case NVPTXISD::Suld1DArrayI64Zero:
443 case NVPTXISD::Suld1DArrayV2I8Zero:
444 case NVPTXISD::Suld1DArrayV2I16Zero:
445 case NVPTXISD::Suld1DArrayV2I32Zero:
446 case NVPTXISD::Suld1DArrayV2I64Zero:
447 case NVPTXISD::Suld1DArrayV4I8Zero:
448 case NVPTXISD::Suld1DArrayV4I16Zero:
449 case NVPTXISD::Suld1DArrayV4I32Zero:
450 case NVPTXISD::Suld2DI8Zero:
451 case NVPTXISD::Suld2DI16Zero:
452 case NVPTXISD::Suld2DI32Zero:
453 case NVPTXISD::Suld2DI64Zero:
454 case NVPTXISD::Suld2DV2I8Zero:
455 case NVPTXISD::Suld2DV2I16Zero:
456 case NVPTXISD::Suld2DV2I32Zero:
457 case NVPTXISD::Suld2DV2I64Zero:
458 case NVPTXISD::Suld2DV4I8Zero:
459 case NVPTXISD::Suld2DV4I16Zero:
460 case NVPTXISD::Suld2DV4I32Zero:
461 case NVPTXISD::Suld2DArrayI8Zero:
462 case NVPTXISD::Suld2DArrayI16Zero:
463 case NVPTXISD::Suld2DArrayI32Zero:
464 case NVPTXISD::Suld2DArrayI64Zero:
465 case NVPTXISD::Suld2DArrayV2I8Zero:
466 case NVPTXISD::Suld2DArrayV2I16Zero:
467 case NVPTXISD::Suld2DArrayV2I32Zero:
468 case NVPTXISD::Suld2DArrayV2I64Zero:
469 case NVPTXISD::Suld2DArrayV4I8Zero:
470 case NVPTXISD::Suld2DArrayV4I16Zero:
471 case NVPTXISD::Suld2DArrayV4I32Zero:
472 case NVPTXISD::Suld3DI8Zero:
473 case NVPTXISD::Suld3DI16Zero:
474 case NVPTXISD::Suld3DI32Zero:
475 case NVPTXISD::Suld3DI64Zero:
476 case NVPTXISD::Suld3DV2I8Zero:
477 case NVPTXISD::Suld3DV2I16Zero:
478 case NVPTXISD::Suld3DV2I32Zero:
479 case NVPTXISD::Suld3DV2I64Zero:
480 case NVPTXISD::Suld3DV4I8Zero:
481 case NVPTXISD::Suld3DV4I16Zero:
482 case NVPTXISD::Suld3DV4I32Zero:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000483 if (trySurfaceIntrinsic(N))
484 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000485 break;
Justin Holewinskica7a4f12014-06-27 18:35:27 +0000486 case ISD::AND:
487 case ISD::SRA:
488 case ISD::SRL:
489 // Try to select BFE
Justin Bogner8d83fb62016-05-13 21:12:53 +0000490 if (tryBFE(N))
491 return;
Justin Holewinskica7a4f12014-06-27 18:35:27 +0000492 break;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000493 case ISD::ADDRSPACECAST:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000494 SelectAddrSpaceCast(N);
495 return;
Artem Belevich64dc9be2017-01-13 20:56:17 +0000496 case ISD::ConstantFP:
497 if (tryConstantFP16(N))
498 return;
499 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000500 default:
501 break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000502 }
Justin Bogner8d83fb62016-05-13 21:12:53 +0000503 SelectCode(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000504}
505
Justin Bogner8d83fb62016-05-13 21:12:53 +0000506bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000507 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
508 switch (IID) {
509 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000510 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000511 case Intrinsic::nvvm_ldg_global_f:
512 case Intrinsic::nvvm_ldg_global_i:
513 case Intrinsic::nvvm_ldg_global_p:
514 case Intrinsic::nvvm_ldu_global_f:
515 case Intrinsic::nvvm_ldu_global_i:
516 case Intrinsic::nvvm_ldu_global_p:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000517 return tryLDGLDU(N);
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000518 }
519}
520
Artem Belevich64dc9be2017-01-13 20:56:17 +0000521// There's no way to specify FP16 immediates in .f16 ops, so we have to
522// load them into an .f16 register first.
523bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
524 if (N->getValueType(0) != MVT::f16)
525 return false;
526 SDValue Val = CurDAG->getTargetConstantFP(
527 cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
528 SDNode *LoadConstF16 =
529 CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
530 ReplaceNode(N, LoadConstF16);
531 return true;
532}
533
Artem Belevich620db1f2017-02-23 22:38:24 +0000534// Map ISD:CONDCODE value to appropriate CmpMode expected by
535// NVPTXInstPrinter::printCmpMode()
536static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
537 using NVPTX::PTXCmpMode::CmpMode;
538 unsigned PTXCmpMode = [](ISD::CondCode CC) {
539 switch (CC) {
540 default:
541 llvm_unreachable("Unexpected condition code.");
542 case ISD::SETOEQ:
543 return CmpMode::EQ;
544 case ISD::SETOGT:
545 return CmpMode::GT;
546 case ISD::SETOGE:
547 return CmpMode::GE;
548 case ISD::SETOLT:
549 return CmpMode::LT;
550 case ISD::SETOLE:
551 return CmpMode::LE;
552 case ISD::SETONE:
553 return CmpMode::NE;
554 case ISD::SETO:
555 return CmpMode::NUM;
556 case ISD::SETUO:
557 return CmpMode::NotANumber;
558 case ISD::SETUEQ:
559 return CmpMode::EQU;
560 case ISD::SETUGT:
561 return CmpMode::GTU;
562 case ISD::SETUGE:
563 return CmpMode::GEU;
564 case ISD::SETULT:
565 return CmpMode::LTU;
566 case ISD::SETULE:
567 return CmpMode::LEU;
568 case ISD::SETUNE:
569 return CmpMode::NEU;
570 case ISD::SETEQ:
571 return CmpMode::EQ;
572 case ISD::SETGT:
573 return CmpMode::GT;
574 case ISD::SETGE:
575 return CmpMode::GE;
576 case ISD::SETLT:
577 return CmpMode::LT;
578 case ISD::SETLE:
579 return CmpMode::LE;
580 case ISD::SETNE:
581 return CmpMode::NE;
582 }
583 }(CondCode.get());
584
585 if (FTZ)
586 PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
587
588 return PTXCmpMode;
589}
590
591bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
592 unsigned PTXCmpMode =
593 getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
594 SDLoc DL(N);
595 SDNode *SetP = CurDAG->getMachineNode(
596 NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
597 N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
598 ReplaceNode(N, SetP);
599 return true;
600}
601
602// Find all instances of extract_vector_elt that use this v2f16 vector
603// and coalesce them into a scattering move instruction.
604bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
605 SDValue Vector = N->getOperand(0);
606
607 // We only care about f16x2 as it's the only real vector type we
608 // need to deal with.
609 if (Vector.getSimpleValueType() != MVT::v2f16)
610 return false;
611
612 // Find and record all uses of this vector that extract element 0 or 1.
613 SmallVector<SDNode *, 4> E0, E1;
614 for (const auto &U : Vector.getNode()->uses()) {
615 if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
616 continue;
617 if (U->getOperand(0) != Vector)
618 continue;
619 if (const ConstantSDNode *IdxConst =
620 dyn_cast<ConstantSDNode>(U->getOperand(1))) {
621 if (IdxConst->getZExtValue() == 0)
622 E0.push_back(U);
623 else if (IdxConst->getZExtValue() == 1)
624 E1.push_back(U);
625 else
626 llvm_unreachable("Invalid vector index.");
627 }
628 }
629
630 // There's no point scattering f16x2 if we only ever access one
631 // element of it.
632 if (E0.empty() || E1.empty())
633 return false;
634
635 unsigned Op = NVPTX::SplitF16x2;
636 // If the vector has been BITCAST'ed from i32, we can use original
637 // value directly and avoid register-to-register move.
638 SDValue Source = Vector;
639 if (Vector->getOpcode() == ISD::BITCAST) {
640 Op = NVPTX::SplitI32toF16x2;
641 Source = Vector->getOperand(0);
642 }
643 // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
644 // into f16,f16 SplitF16x2(V)
645 SDNode *ScatterOp =
646 CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source);
647 for (auto *Node : E0)
648 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
649 for (auto *Node : E1)
650 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
651
652 return true;
653}
654
Eric Christopher9745b3a2015-01-30 01:41:01 +0000655static unsigned int getCodeAddrSpace(MemSDNode *N) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +0000656 const Value *Src = N->getMemOperand()->getValue();
Justin Holewinskib96d1392013-06-10 13:29:47 +0000657
Justin Holewinskiae556d32012-05-04 20:18:50 +0000658 if (!Src)
Justin Holewinskib96d1392013-06-10 13:29:47 +0000659 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000660
Craig Toppere3dcce92015-08-01 22:20:21 +0000661 if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000662 switch (PT->getAddressSpace()) {
Justin Holewinskib96d1392013-06-10 13:29:47 +0000663 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
664 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
665 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
666 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
667 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
668 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
669 default: break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000670 }
671 }
Justin Holewinskib96d1392013-06-10 13:29:47 +0000672 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000673}
674
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000675static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000676 unsigned CodeAddrSpace, MachineFunction *F) {
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000677 // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
678 // space.
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000679 //
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000680 // We have two ways of identifying invariant loads: Loads may be explicitly
681 // marked as invariant, or we may infer them to be invariant.
682 //
Justin Lebarfaaf2d22018-02-28 23:58:05 +0000683 // We currently infer invariance for loads from
684 // - constant global variables, and
685 // - kernel function pointer params that are noalias (i.e. __restrict) and
686 // never written to.
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000687 //
688 // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
689 // not during the SelectionDAG phase).
690 //
691 // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
692 // explicitly invariant loads because these are how clang tells us to use ldg
693 // when the user uses a builtin.
694 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000695 return false;
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000696
697 if (N->isInvariant())
698 return true;
699
Justin Lebarfaaf2d22018-02-28 23:58:05 +0000700 bool IsKernelFn = isKernelFunction(F->getFunction());
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000701
Justin Lebarfaaf2d22018-02-28 23:58:05 +0000702 // We use GetUnderlyingObjects() here instead of GetUnderlyingObject() mainly
703 // because the former looks through phi nodes while the latter does not. We
704 // need to look through phi nodes to handle pointer induction variables.
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000705 SmallVector<Value *, 8> Objs;
706 GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
707 Objs, F->getDataLayout());
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000708
Justin Lebarfaaf2d22018-02-28 23:58:05 +0000709 return all_of(Objs, [&](Value *V) {
710 if (auto *A = dyn_cast<const Argument>(V))
711 return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
712 if (auto *GV = dyn_cast<const GlobalVariable>(V))
713 return GV->isConstant();
714 return false;
715 });
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000716}
717
Justin Bogner8d83fb62016-05-13 21:12:53 +0000718bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +0000719 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
720 switch (IID) {
721 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000722 return false;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000723 case Intrinsic::nvvm_texsurf_handle_internal:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000724 SelectTexSurfHandle(N);
725 return true;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000726 }
727}
728
Justin Bogner8d83fb62016-05-13 21:12:53 +0000729void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +0000730 // Op 0 is the intrinsic ID
731 SDValue Wrapper = N->getOperand(1);
732 SDValue GlobalVal = Wrapper.getOperand(0);
Justin Bogner8d83fb62016-05-13 21:12:53 +0000733 ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
734 MVT::i64, GlobalVal));
Justin Holewinski30d56a72014-04-09 15:39:15 +0000735}
736
Justin Bogner8d83fb62016-05-13 21:12:53 +0000737void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000738 SDValue Src = N->getOperand(0);
739 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
740 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
741 unsigned DstAddrSpace = CastN->getDestAddressSpace();
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000742 assert(SrcAddrSpace != DstAddrSpace &&
743 "addrspacecast must be between different address spaces");
744
745 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
746 // Specific to generic
747 unsigned Opc;
748 switch (SrcAddrSpace) {
749 default: report_fatal_error("Bad address space in addrspacecast");
750 case ADDRESS_SPACE_GLOBAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000751 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000752 break;
753 case ADDRESS_SPACE_SHARED:
Artem Belevich2f348ea2018-05-09 23:46:19 +0000754 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_shared_yes_6432
755 : NVPTX::cvta_shared_yes_64)
756 : NVPTX::cvta_shared_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000757 break;
758 case ADDRESS_SPACE_CONST:
Artem Belevich2f348ea2018-05-09 23:46:19 +0000759 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_const_yes_6432
760 : NVPTX::cvta_const_yes_64)
761 : NVPTX::cvta_const_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000762 break;
763 case ADDRESS_SPACE_LOCAL:
Artem Belevich2f348ea2018-05-09 23:46:19 +0000764 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_local_yes_6432
765 : NVPTX::cvta_local_yes_64)
766 : NVPTX::cvta_local_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000767 break;
768 }
Justin Bogner8d83fb62016-05-13 21:12:53 +0000769 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
770 Src));
771 return;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000772 } else {
773 // Generic to specific
774 if (SrcAddrSpace != 0)
775 report_fatal_error("Cannot cast between two non-generic address spaces");
776 unsigned Opc;
777 switch (DstAddrSpace) {
778 default: report_fatal_error("Bad address space in addrspacecast");
779 case ADDRESS_SPACE_GLOBAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000780 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
781 : NVPTX::cvta_to_global_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000782 break;
783 case ADDRESS_SPACE_SHARED:
Artem Belevich2f348ea2018-05-09 23:46:19 +0000784 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_shared_yes_3264
785 : NVPTX::cvta_to_shared_yes_64)
Eric Christopher02389e32015-02-19 00:08:27 +0000786 : NVPTX::cvta_to_shared_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000787 break;
788 case ADDRESS_SPACE_CONST:
Artem Belevich2f348ea2018-05-09 23:46:19 +0000789 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_const_yes_3264
790 : NVPTX::cvta_to_const_yes_64)
791 : NVPTX::cvta_to_const_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000792 break;
793 case ADDRESS_SPACE_LOCAL:
Artem Belevich2f348ea2018-05-09 23:46:19 +0000794 Opc = TM.is64Bit() ? (useShortPointers() ? NVPTX::cvta_to_local_yes_3264
795 : NVPTX::cvta_to_local_yes_64)
796 : NVPTX::cvta_to_local_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000797 break;
Jingyue Wua2f60272015-06-04 21:28:26 +0000798 case ADDRESS_SPACE_PARAM:
799 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
800 : NVPTX::nvvm_ptr_gen_to_param;
801 break;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000802 }
Justin Bogner8d83fb62016-05-13 21:12:53 +0000803 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
804 Src));
805 return;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000806 }
807}
808
Artem Belevichee7dd122017-03-02 19:14:14 +0000809// Helper function template to reduce amount of boilerplate code for
810// opcode selection.
811static Optional<unsigned> pickOpcodeForVT(
812 MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16,
813 unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16,
814 unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) {
815 switch (VT) {
816 case MVT::i1:
817 case MVT::i8:
818 return Opcode_i8;
819 case MVT::i16:
820 return Opcode_i16;
821 case MVT::i32:
822 return Opcode_i32;
823 case MVT::i64:
824 return Opcode_i64;
825 case MVT::f16:
826 return Opcode_f16;
827 case MVT::v2f16:
828 return Opcode_f16x2;
829 case MVT::f32:
830 return Opcode_f32;
831 case MVT::f64:
832 return Opcode_f64;
833 default:
834 return None;
835 }
836}
837
Justin Bogner8d83fb62016-05-13 21:12:53 +0000838bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000839 SDLoc dl(N);
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +0000840 MemSDNode *LD = cast<MemSDNode>(N);
841 assert(LD->readMem() && "Expected load");
842 LoadSDNode *PlainLoad = dyn_cast<LoadSDNode>(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000843 EVT LoadedVT = LD->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +0000844 SDNode *NVPTXLD = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000845
846 // do not support pre/post inc/dec
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +0000847 if (PlainLoad && PlainLoad->isIndexed())
Justin Bogner8d83fb62016-05-13 21:12:53 +0000848 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000849
850 if (!LoadedVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +0000851 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000852
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +0000853 AtomicOrdering Ordering = LD->getOrdering();
854 // In order to lower atomic loads with stronger guarantees we would need to
855 // use load.acquire or insert fences. However these features were only added
856 // with PTX ISA 6.0 / sm_70.
857 // TODO: Check if we can actually use the new instructions and implement them.
858 if (isStrongerThanMonotonic(Ordering))
859 return false;
860
Justin Holewinskiae556d32012-05-04 20:18:50 +0000861 // Address Space Setting
Artem Belevich2f348ea2018-05-09 23:46:19 +0000862 unsigned int CodeAddrSpace = getCodeAddrSpace(LD);
863 if (canLowerToLDG(LD, *Subtarget, CodeAddrSpace, MF)) {
Justin Bogner8d83fb62016-05-13 21:12:53 +0000864 return tryLDGLDU(N);
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000865 }
866
Artem Belevich2f348ea2018-05-09 23:46:19 +0000867 unsigned int PointerSize =
868 CurDAG->getDataLayout().getPointerSizeInBits(LD->getAddressSpace());
869
Justin Holewinskiae556d32012-05-04 20:18:50 +0000870 // Volatile Setting
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +0000871 // - .volatile is only available for .global and .shared
872 // - .volatile has the same memory synchronization semantics as .relaxed.sys
873 bool isVolatile = LD->isVolatile() || Ordering == AtomicOrdering::Monotonic;
Artem Belevich2f348ea2018-05-09 23:46:19 +0000874 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
875 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
876 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
Justin Holewinskiae556d32012-05-04 20:18:50 +0000877 isVolatile = false;
878
Justin Holewinskiae556d32012-05-04 20:18:50 +0000879 // Type Setting: fromType + fromTypeWidth
880 //
881 // Sign : ISD::SEXTLOAD
882 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
883 // type is integer
884 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
Artem Belevich620db1f2017-02-23 22:38:24 +0000885 MVT SimpleVT = LoadedVT.getSimpleVT();
Justin Holewinskiae556d32012-05-04 20:18:50 +0000886 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +0000887 // Read at least 8 bits (predicates are stored as 8-bit values)
888 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskiae556d32012-05-04 20:18:50 +0000889 unsigned int fromType;
Artem Belevich620db1f2017-02-23 22:38:24 +0000890
891 // Vector Setting
892 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
893 if (SimpleVT.isVector()) {
894 assert(LoadedVT == MVT::v2f16 && "Unexpected vector type");
895 // v2f16 is loaded using ld.b32
896 fromTypeWidth = 32;
897 }
898
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +0000899 if (PlainLoad && (PlainLoad->getExtensionType() == ISD::SEXTLOAD))
Justin Holewinskiae556d32012-05-04 20:18:50 +0000900 fromType = NVPTX::PTXLdStInstCode::Signed;
901 else if (ScalarVT.isFloatingPoint())
Artem Belevich64dc9be2017-01-13 20:56:17 +0000902 // f16 uses .b16 as its storage type.
903 fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
904 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000905 else
906 fromType = NVPTX::PTXLdStInstCode::Unsigned;
907
908 // Create the machine instruction DAG
909 SDValue Chain = N->getOperand(0);
910 SDValue N1 = N->getOperand(1);
911 SDValue Addr;
912 SDValue Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +0000913 Optional<unsigned> Opcode;
Craig Topperd9c27832013-08-15 02:44:19 +0000914 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000915
916 if (SelectDirectAddr(N1, Addr)) {
Artem Belevichee7dd122017-03-02 19:14:14 +0000917 Opcode = pickOpcodeForVT(
918 TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
919 NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
920 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
921 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +0000922 return false;
Artem Belevich2f348ea2018-05-09 23:46:19 +0000923 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000924 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
925 getI32Imm(fromTypeWidth, dl), Addr, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +0000926 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
927 MVT::Other, Ops);
Artem Belevich2f348ea2018-05-09 23:46:19 +0000928 } else if (PointerSize == 64 ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
929 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +0000930 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
931 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
932 NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
933 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
934 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +0000935 return false;
Artem Belevich2f348ea2018-05-09 23:46:19 +0000936 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000937 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
938 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +0000939 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
940 MVT::Other, Ops);
Artem Belevich2f348ea2018-05-09 23:46:19 +0000941 } else if (PointerSize == 64 ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
942 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
943 if (PointerSize == 64)
Artem Belevichee7dd122017-03-02 19:14:14 +0000944 Opcode = pickOpcodeForVT(
945 TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
946 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
947 NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
948 else
949 Opcode = pickOpcodeForVT(
950 TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
951 NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
952 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
953 if (!Opcode)
954 return false;
Artem Belevich2f348ea2018-05-09 23:46:19 +0000955 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000956 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
957 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +0000958 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
959 MVT::Other, Ops);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000960 } else {
Artem Belevich2f348ea2018-05-09 23:46:19 +0000961 if (PointerSize == 64)
Artem Belevichee7dd122017-03-02 19:14:14 +0000962 Opcode = pickOpcodeForVT(
963 TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
964 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
965 NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
966 NVPTX::LD_f64_areg_64);
967 else
968 Opcode = pickOpcodeForVT(
969 TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
970 NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
971 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
972 if (!Opcode)
973 return false;
Artem Belevich2f348ea2018-05-09 23:46:19 +0000974 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(CodeAddrSpace, dl),
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000975 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
976 getI32Imm(fromTypeWidth, dl), N1, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +0000977 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
978 MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000979 }
980
Justin Bogner8d83fb62016-05-13 21:12:53 +0000981 if (!NVPTXLD)
982 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000983
Chandler Carruth66654b72018-08-14 23:30:32 +0000984 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
985 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXLD), {MemRef});
Justin Bogner8d83fb62016-05-13 21:12:53 +0000986
987 ReplaceNode(N, NVPTXLD);
988 return true;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000989}
990
Justin Bogner8d83fb62016-05-13 21:12:53 +0000991bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000992
993 SDValue Chain = N->getOperand(0);
994 SDValue Op1 = N->getOperand(1);
995 SDValue Addr, Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +0000996 Optional<unsigned> Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +0000997 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000998 SDNode *LD;
999 MemSDNode *MemSD = cast<MemSDNode>(N);
1000 EVT LoadedVT = MemSD->getMemoryVT();
1001
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001002 if (!LoadedVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +00001003 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001004
1005 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00001006 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +00001007 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00001008 return tryLDGLDU(N);
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001009 }
1010
Artem Belevich2f348ea2018-05-09 23:46:19 +00001011 unsigned int PointerSize =
1012 CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
1013
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001014 // Volatile Setting
1015 // - .volatile is only availalble for .global and .shared
1016 bool IsVolatile = MemSD->isVolatile();
1017 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1018 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1019 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1020 IsVolatile = false;
1021
1022 // Vector Setting
1023 MVT SimpleVT = LoadedVT.getSimpleVT();
1024
1025 // Type Setting: fromType + fromTypeWidth
1026 //
1027 // Sign : ISD::SEXTLOAD
1028 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1029 // type is integer
1030 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1031 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +00001032 // Read at least 8 bits (predicates are stored as 8-bit values)
1033 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001034 unsigned int FromType;
1035 // The last operand holds the original LoadSDNode::getExtensionType() value
Justin Holewinski0497ab12013-03-30 14:29:21 +00001036 unsigned ExtensionType = cast<ConstantSDNode>(
1037 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001038 if (ExtensionType == ISD::SEXTLOAD)
1039 FromType = NVPTX::PTXLdStInstCode::Signed;
1040 else if (ScalarVT.isFloatingPoint())
Artem Belevich620db1f2017-02-23 22:38:24 +00001041 FromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1042 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001043 else
1044 FromType = NVPTX::PTXLdStInstCode::Unsigned;
1045
1046 unsigned VecType;
1047
1048 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001049 case NVPTXISD::LoadV2:
1050 VecType = NVPTX::PTXLdStInstCode::V2;
1051 break;
1052 case NVPTXISD::LoadV4:
1053 VecType = NVPTX::PTXLdStInstCode::V4;
1054 break;
1055 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001056 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001057 }
1058
1059 EVT EltVT = N->getValueType(0);
1060
Artem Belevich620db1f2017-02-23 22:38:24 +00001061 // v8f16 is a special case. PTX doesn't have ld.v8.f16
1062 // instruction. Instead, we split the vector into v2f16 chunks and
1063 // load them with ld.v4.b32.
1064 if (EltVT == MVT::v2f16) {
1065 assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1066 EltVT = MVT::i32;
1067 FromType = NVPTX::PTXLdStInstCode::Untyped;
1068 FromTypeWidth = 32;
1069 }
1070
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001071 if (SelectDirectAddr(Op1, Addr)) {
1072 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001073 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001074 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001075 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001076 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1077 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1078 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1079 NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
1080 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001081 break;
1082 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001083 Opcode =
1084 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1085 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None,
1086 NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
1087 NVPTX::LDV_f32_v4_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001088 break;
1089 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001090 if (!Opcode)
1091 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001092 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1093 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1094 getI32Imm(FromTypeWidth, DL), Addr, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001095 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Artem Belevich2f348ea2018-05-09 23:46:19 +00001096 } else if (PointerSize == 64
1097 ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1098 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001099 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001100 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001101 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001102 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001103 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1104 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1105 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1106 NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
1107 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001108 break;
1109 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001110 Opcode =
1111 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1112 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None,
1113 NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
1114 NVPTX::LDV_f32_v4_asi, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001115 break;
1116 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001117 if (!Opcode)
1118 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001119 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1120 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1121 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001122 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Artem Belevich2f348ea2018-05-09 23:46:19 +00001123 } else if (PointerSize == 64
1124 ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1125 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1126 if (PointerSize == 64) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001127 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001128 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001129 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001130 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001131 Opcode = pickOpcodeForVT(
1132 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
1133 NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
1134 NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
1135 NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
1136 NVPTX::LDV_f64_v2_ari_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001137 break;
1138 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001139 Opcode = pickOpcodeForVT(
1140 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1141 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None,
1142 NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
1143 NVPTX::LDV_f32_v4_ari_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001144 break;
1145 }
1146 } else {
1147 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001148 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001149 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001150 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001151 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1152 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1153 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1154 NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
1155 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001156 break;
1157 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001158 Opcode =
1159 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1160 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None,
1161 NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
1162 NVPTX::LDV_f32_v4_ari, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001163 break;
1164 }
1165 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001166 if (!Opcode)
1167 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001168 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1169 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1170 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001171
Artem Belevichee7dd122017-03-02 19:14:14 +00001172 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001173 } else {
Artem Belevich2f348ea2018-05-09 23:46:19 +00001174 if (PointerSize == 64) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001175 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001176 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001177 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001178 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001179 Opcode = pickOpcodeForVT(
1180 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1181 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1182 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
1183 NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1184 NVPTX::LDV_f64_v2_areg_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001185 break;
1186 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001187 Opcode = pickOpcodeForVT(
1188 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1189 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None,
1190 NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
1191 NVPTX::LDV_f32_v4_areg_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001192 break;
1193 }
1194 } else {
1195 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001196 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001197 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001198 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001199 Opcode =
1200 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1201 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1202 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
1203 NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
1204 NVPTX::LDV_f64_v2_areg);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001205 break;
1206 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001207 Opcode = pickOpcodeForVT(
1208 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1209 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None,
1210 NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
1211 NVPTX::LDV_f32_v4_areg, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001212 break;
1213 }
1214 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001215 if (!Opcode)
1216 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001217 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1218 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1219 getI32Imm(FromTypeWidth, DL), Op1, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001220 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001221 }
1222
Chandler Carruth66654b72018-08-14 23:30:32 +00001223 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1224 CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001225
Justin Bogner8d83fb62016-05-13 21:12:53 +00001226 ReplaceNode(N, LD);
1227 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001228}
1229
Justin Bogner8d83fb62016-05-13 21:12:53 +00001230bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001231
1232 SDValue Chain = N->getOperand(0);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001233 SDValue Op1;
1234 MemSDNode *Mem;
1235 bool IsLDG = true;
1236
Justin Holewinskic7997922016-04-05 12:38:01 +00001237 // If this is an LDG intrinsic, the address is the third operand. If its an
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001238 // LDG/LDU SD node (from custom vector handling), then its the second operand
1239 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1240 Op1 = N->getOperand(2);
1241 Mem = cast<MemIntrinsicSDNode>(N);
1242 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1243 switch (IID) {
1244 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001245 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001246 case Intrinsic::nvvm_ldg_global_f:
1247 case Intrinsic::nvvm_ldg_global_i:
1248 case Intrinsic::nvvm_ldg_global_p:
1249 IsLDG = true;
1250 break;
1251 case Intrinsic::nvvm_ldu_global_f:
1252 case Intrinsic::nvvm_ldu_global_i:
1253 case Intrinsic::nvvm_ldu_global_p:
1254 IsLDG = false;
1255 break;
1256 }
1257 } else {
1258 Op1 = N->getOperand(1);
1259 Mem = cast<MemSDNode>(N);
1260 }
1261
Artem Belevichee7dd122017-03-02 19:14:14 +00001262 Optional<unsigned> Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00001263 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001264 SDNode *LD;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001265 SDValue Base, Offset, Addr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00001266
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001267 EVT EltVT = Mem->getMemoryVT();
Justin Holewinskic7997922016-04-05 12:38:01 +00001268 unsigned NumElts = 1;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001269 if (EltVT.isVector()) {
Justin Holewinskic7997922016-04-05 12:38:01 +00001270 NumElts = EltVT.getVectorNumElements();
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001271 EltVT = EltVT.getVectorElementType();
Artem Belevicha28e5982018-04-06 21:10:24 +00001272 // vectors of f16 are loaded/stored as multiples of v2f16 elements.
1273 if (EltVT == MVT::f16 && N->getValueType(0) == MVT::v2f16) {
1274 assert(NumElts % 2 == 0 && "Vector must have even number of elements");
1275 EltVT = MVT::v2f16;
1276 NumElts /= 2;
1277 }
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001278 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001279
Justin Holewinskic7997922016-04-05 12:38:01 +00001280 // Build the "promoted" result VTList for the load. If we are really loading
1281 // i8s, then the return type will be promoted to i16 since we do not expose
1282 // 8-bit registers in NVPTX.
1283 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1284 SmallVector<EVT, 5> InstVTs;
1285 for (unsigned i = 0; i != NumElts; ++i) {
1286 InstVTs.push_back(NodeVT);
1287 }
1288 InstVTs.push_back(MVT::Other);
1289 SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1290
Justin Holewinskie40e9292013-07-01 12:58:52 +00001291 if (SelectDirectAddr(Op1, Addr)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001292 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001293 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001294 return false;
Justin Lebarfaaf2d22018-02-28 23:58:05 +00001295 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001296 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001297 if (IsLDG)
1298 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1299 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1300 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1301 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1302 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1303 NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
1304 NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
1305 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1306 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1307 else
1308 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1309 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1310 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1311 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1312 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1313 NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
1314 NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
1315 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1316 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001317 break;
Justin Lebarfaaf2d22018-02-28 23:58:05 +00001318 case NVPTXISD::LoadV2:
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001319 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001320 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1321 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1322 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1323 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1324 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1325 NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
1326 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
1327 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1328 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001329 break;
1330 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001331 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1332 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1333 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1334 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1335 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1336 NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
1337 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
1338 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1339 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001340 break;
Justin Lebarfaaf2d22018-02-28 23:58:05 +00001341 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001342 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001343 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1344 NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1345 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1346 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None,
1347 NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
1348 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
1349 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001350 break;
1351 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001352 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1353 NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1354 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1355 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None,
1356 NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
1357 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
1358 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001359 break;
1360 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001361 if (!Opcode)
1362 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001363 SDValue Ops[] = { Addr, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001364 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001365 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1366 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1367 if (TM.is64Bit()) {
Justin Holewinskie40e9292013-07-01 12:58:52 +00001368 switch (N->getOpcode()) {
1369 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001370 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001371 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001372 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001373 if (IsLDG)
1374 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1375 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1376 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1377 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1378 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1379 NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
1380 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
1381 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1382 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1383 else
1384 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1385 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1386 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1387 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1388 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1389 NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
1390 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
1391 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1392 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001393 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001394 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001395 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001396 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1397 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1398 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1399 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1400 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1401 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
1402 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
1403 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1404 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001405 break;
1406 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001407 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1408 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1409 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1410 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1411 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1412 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
1413 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
1414 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1415 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001416 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001417 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001418 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001419 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1420 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1421 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1422 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None,
1423 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
1424 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
1425 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001426 break;
1427 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001428 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1429 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1430 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1431 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None,
1432 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
1433 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
1434 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001435 break;
1436 }
1437 } else {
1438 switch (N->getOpcode()) {
1439 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001440 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001441 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001442 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001443 if (IsLDG)
1444 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1445 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1446 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1447 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1448 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1449 NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
1450 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
1451 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1452 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1453 else
1454 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1455 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1456 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1457 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1458 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1459 NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
1460 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
1461 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1462 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001463 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001464 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001465 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001466 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1467 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1468 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1469 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1470 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1471 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
1472 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
1473 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1474 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001475 break;
1476 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001477 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1478 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1479 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1480 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1481 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1482 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
1483 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
1484 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1485 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001486 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001487 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001488 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001489 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1490 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1491 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1492 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None,
1493 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
1494 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
1495 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001496 break;
1497 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001498 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1499 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1500 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1501 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None,
1502 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
1503 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
1504 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001505 break;
1506 }
1507 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001508 if (!Opcode)
1509 return false;
1510 SDValue Ops[] = {Base, Offset, Chain};
1511 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001512 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001513 if (TM.is64Bit()) {
Justin Holewinskie40e9292013-07-01 12:58:52 +00001514 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001515 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001516 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001517 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001518 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001519 if (IsLDG)
1520 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1521 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1522 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1523 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1524 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1525 NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
1526 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
1527 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1528 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1529 else
1530 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1531 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1532 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1533 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1534 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1535 NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
1536 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
1537 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1538 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001539 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001540 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001541 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001542 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1543 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1544 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1545 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1546 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1547 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
1548 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
1549 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1550 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001551 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001552 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001553 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1554 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1555 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1556 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1557 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1558 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
1559 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
1560 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1561 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001562 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001563 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001564 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001565 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1566 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1567 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1568 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None,
1569 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
1570 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
1571 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001572 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001573 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001574 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1575 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1576 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1577 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None,
1578 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
1579 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
1580 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001581 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001582 }
Justin Holewinskie40e9292013-07-01 12:58:52 +00001583 } else {
1584 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001585 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001586 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001587 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001588 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001589 if (IsLDG)
1590 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1591 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1592 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1593 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1594 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1595 NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
1596 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
1597 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1598 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1599 else
1600 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1601 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1602 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1603 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1604 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1605 NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
1606 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
1607 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1608 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001609 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001610 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001611 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001612 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1613 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1614 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1615 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1616 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1617 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
1618 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
1619 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1620 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001621 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001622 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001623 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1624 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1625 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1626 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1627 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1628 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
1629 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
1630 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1631 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001632 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001633 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001634 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001635 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1636 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1637 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1638 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None,
1639 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
1640 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
1641 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001642 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001643 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001644 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1645 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1646 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1647 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None,
1648 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
1649 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
1650 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001651 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001652 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001653 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001654 if (!Opcode)
1655 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001656 SDValue Ops[] = { Op1, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001657 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001658 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001659
Chandler Carruth66654b72018-08-14 23:30:32 +00001660 MachineMemOperand *MemRef = Mem->getMemOperand();
1661 CurDAG->setNodeMemRefs(cast<MachineSDNode>(LD), {MemRef});
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001662
Justin Holewinskic7997922016-04-05 12:38:01 +00001663 // For automatic generation of LDG (through SelectLoad[Vector], not the
1664 // intrinsics), we may have an extending load like:
1665 //
1666 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1667 //
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001668 // In this case, the matching logic above will select a load for the original
1669 // memory type (in this case, i8) and our types will not match (the node needs
1670 // to return an i32 in this case). Our LDG/LDU nodes do not support the
1671 // concept of sign-/zero-extension, so emulate it here by adding an explicit
1672 // CVT instruction. Ptxas should clean up any redundancies here.
1673
Justin Holewinskic7997922016-04-05 12:38:01 +00001674 EVT OrigType = N->getValueType(0);
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001675 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
Justin Holewinskic7997922016-04-05 12:38:01 +00001676
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001677 if (OrigType != EltVT && LdNode) {
1678 // We have an extending-load. The instruction we selected operates on the
1679 // smaller type, but the SDNode we are replacing has the larger type. We
1680 // need to emit a CVT to make the types match.
1681 bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
1682 unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
1683 EltVT.getSimpleVT(), IsSigned);
Justin Holewinskic7997922016-04-05 12:38:01 +00001684
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001685 // For each output value, apply the manual sign/zero-extension and make sure
1686 // all users of the load go through that CVT.
Justin Holewinskic7997922016-04-05 12:38:01 +00001687 for (unsigned i = 0; i != NumElts; ++i) {
1688 SDValue Res(LD, i);
1689 SDValue OrigVal(N, i);
1690
1691 SDNode *CvtNode =
1692 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001693 CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
1694 DL, MVT::i32));
Justin Holewinskic7997922016-04-05 12:38:01 +00001695 ReplaceUses(OrigVal, SDValue(CvtNode, 0));
1696 }
1697 }
1698
Justin Bogner8d83fb62016-05-13 21:12:53 +00001699 ReplaceNode(N, LD);
1700 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001701}
1702
Justin Bogner8d83fb62016-05-13 21:12:53 +00001703bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001704 SDLoc dl(N);
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +00001705 MemSDNode *ST = cast<MemSDNode>(N);
1706 assert(ST->writeMem() && "Expected store");
1707 StoreSDNode *PlainStore = dyn_cast<StoreSDNode>(N);
1708 AtomicSDNode *AtomicStore = dyn_cast<AtomicSDNode>(N);
1709 assert((PlainStore || AtomicStore) && "Expected store");
Justin Holewinskiae556d32012-05-04 20:18:50 +00001710 EVT StoreVT = ST->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +00001711 SDNode *NVPTXST = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001712
1713 // do not support pre/post inc/dec
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +00001714 if (PlainStore && PlainStore->isIndexed())
Justin Bogner8d83fb62016-05-13 21:12:53 +00001715 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001716
1717 if (!StoreVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +00001718 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001719
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +00001720 AtomicOrdering Ordering = ST->getOrdering();
1721 // In order to lower atomic loads with stronger guarantees we would need to
1722 // use store.release or insert fences. However these features were only added
1723 // with PTX ISA 6.0 / sm_70.
1724 // TODO: Check if we can actually use the new instructions and implement them.
1725 if (isStrongerThanMonotonic(Ordering))
1726 return false;
1727
Justin Holewinskiae556d32012-05-04 20:18:50 +00001728 // Address Space Setting
Artem Belevich2f348ea2018-05-09 23:46:19 +00001729 unsigned int CodeAddrSpace = getCodeAddrSpace(ST);
1730 unsigned int PointerSize =
1731 CurDAG->getDataLayout().getPointerSizeInBits(ST->getAddressSpace());
Justin Holewinskiae556d32012-05-04 20:18:50 +00001732
1733 // Volatile Setting
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +00001734 // - .volatile is only available for .global and .shared
1735 // - .volatile has the same memory synchronization semantics as .relaxed.sys
1736 bool isVolatile = ST->isVolatile() || Ordering == AtomicOrdering::Monotonic;
Artem Belevich2f348ea2018-05-09 23:46:19 +00001737 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1738 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1739 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
Justin Holewinskiae556d32012-05-04 20:18:50 +00001740 isVolatile = false;
1741
1742 // Vector Setting
1743 MVT SimpleVT = StoreVT.getSimpleVT();
1744 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001745
1746 // Type Setting: toType + toTypeWidth
1747 // - for integer type, always use 'u'
1748 //
1749 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00001750 unsigned toTypeWidth = ScalarVT.getSizeInBits();
Artem Belevich620db1f2017-02-23 22:38:24 +00001751 if (SimpleVT.isVector()) {
1752 assert(StoreVT == MVT::v2f16 && "Unexpected vector type");
1753 // v2f16 is stored using st.b32
1754 toTypeWidth = 32;
1755 }
1756
Justin Holewinskiae556d32012-05-04 20:18:50 +00001757 unsigned int toType;
1758 if (ScalarVT.isFloatingPoint())
Artem Belevich64dc9be2017-01-13 20:56:17 +00001759 // f16 uses .b16 as its storage type.
1760 toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1761 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001762 else
1763 toType = NVPTX::PTXLdStInstCode::Unsigned;
1764
1765 // Create the machine instruction DAG
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +00001766 SDValue Chain = ST->getChain();
1767 SDValue Value = PlainStore ? PlainStore->getValue() : AtomicStore->getVal();
1768 SDValue BasePtr = ST->getBasePtr();
Justin Holewinskiae556d32012-05-04 20:18:50 +00001769 SDValue Addr;
1770 SDValue Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +00001771 Optional<unsigned> Opcode;
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +00001772 MVT::SimpleValueType SourceVT =
1773 Value.getNode()->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001774
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +00001775 if (SelectDirectAddr(BasePtr, Addr)) {
Artem Belevichee7dd122017-03-02 19:14:14 +00001776 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1777 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1778 NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
1779 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1780 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +00001781 return false;
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +00001782 SDValue Ops[] = {Value,
1783 getI32Imm(isVolatile, dl),
1784 getI32Imm(CodeAddrSpace, dl),
1785 getI32Imm(vecType, dl),
1786 getI32Imm(toType, dl),
1787 getI32Imm(toTypeWidth, dl),
1788 Addr,
1789 Chain};
Artem Belevichee7dd122017-03-02 19:14:14 +00001790 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +00001791 } else if (PointerSize == 64
1792 ? SelectADDRsi64(BasePtr.getNode(), BasePtr, Base, Offset)
1793 : SelectADDRsi(BasePtr.getNode(), BasePtr, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +00001794 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1795 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1796 NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
1797 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1798 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +00001799 return false;
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +00001800 SDValue Ops[] = {Value,
1801 getI32Imm(isVolatile, dl),
1802 getI32Imm(CodeAddrSpace, dl),
1803 getI32Imm(vecType, dl),
1804 getI32Imm(toType, dl),
1805 getI32Imm(toTypeWidth, dl),
1806 Base,
1807 Offset,
1808 Chain};
Artem Belevichee7dd122017-03-02 19:14:14 +00001809 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +00001810 } else if (PointerSize == 64
1811 ? SelectADDRri64(BasePtr.getNode(), BasePtr, Base, Offset)
1812 : SelectADDRri(BasePtr.getNode(), BasePtr, Base, Offset)) {
Artem Belevich2f348ea2018-05-09 23:46:19 +00001813 if (PointerSize == 64)
Artem Belevichee7dd122017-03-02 19:14:14 +00001814 Opcode = pickOpcodeForVT(
1815 SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1816 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
1817 NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1818 else
1819 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1820 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1821 NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
1822 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1823 if (!Opcode)
1824 return false;
1825
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +00001826 SDValue Ops[] = {Value,
1827 getI32Imm(isVolatile, dl),
1828 getI32Imm(CodeAddrSpace, dl),
1829 getI32Imm(vecType, dl),
1830 getI32Imm(toType, dl),
1831 getI32Imm(toTypeWidth, dl),
1832 Base,
1833 Offset,
1834 Chain};
Artem Belevichee7dd122017-03-02 19:14:14 +00001835 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001836 } else {
Artem Belevich2f348ea2018-05-09 23:46:19 +00001837 if (PointerSize == 64)
Artem Belevichee7dd122017-03-02 19:14:14 +00001838 Opcode =
1839 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1840 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1841 NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
1842 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1843 else
1844 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1845 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1846 NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
1847 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1848 if (!Opcode)
1849 return false;
Jonas Hahnfeld20526bf2018-08-09 07:45:49 +00001850 SDValue Ops[] = {Value,
1851 getI32Imm(isVolatile, dl),
1852 getI32Imm(CodeAddrSpace, dl),
1853 getI32Imm(vecType, dl),
1854 getI32Imm(toType, dl),
1855 getI32Imm(toTypeWidth, dl),
1856 BasePtr,
1857 Chain};
Artem Belevichee7dd122017-03-02 19:14:14 +00001858 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001859 }
1860
Justin Bogner8d83fb62016-05-13 21:12:53 +00001861 if (!NVPTXST)
1862 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001863
Chandler Carruth66654b72018-08-14 23:30:32 +00001864 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
1865 CurDAG->setNodeMemRefs(cast<MachineSDNode>(NVPTXST), {MemRef});
Justin Bogner8d83fb62016-05-13 21:12:53 +00001866 ReplaceNode(N, NVPTXST);
1867 return true;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001868}
1869
Justin Bogner8d83fb62016-05-13 21:12:53 +00001870bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001871 SDValue Chain = N->getOperand(0);
1872 SDValue Op1 = N->getOperand(1);
1873 SDValue Addr, Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +00001874 Optional<unsigned> Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00001875 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001876 SDNode *ST;
1877 EVT EltVT = Op1.getValueType();
1878 MemSDNode *MemSD = cast<MemSDNode>(N);
1879 EVT StoreVT = MemSD->getMemoryVT();
1880
1881 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00001882 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001883 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1884 report_fatal_error("Cannot store to pointer that points to constant "
1885 "memory space");
1886 }
Artem Belevich2f348ea2018-05-09 23:46:19 +00001887 unsigned int PointerSize =
1888 CurDAG->getDataLayout().getPointerSizeInBits(MemSD->getAddressSpace());
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001889
1890 // Volatile Setting
1891 // - .volatile is only availalble for .global and .shared
1892 bool IsVolatile = MemSD->isVolatile();
1893 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1894 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1895 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1896 IsVolatile = false;
1897
1898 // Type Setting: toType + toTypeWidth
1899 // - for integer type, always use 'u'
1900 assert(StoreVT.isSimple() && "Store value is not simple");
1901 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00001902 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001903 unsigned ToType;
1904 if (ScalarVT.isFloatingPoint())
Artem Belevich620db1f2017-02-23 22:38:24 +00001905 ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1906 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001907 else
1908 ToType = NVPTX::PTXLdStInstCode::Unsigned;
1909
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001910 SmallVector<SDValue, 12> StOps;
1911 SDValue N2;
1912 unsigned VecType;
1913
1914 switch (N->getOpcode()) {
1915 case NVPTXISD::StoreV2:
1916 VecType = NVPTX::PTXLdStInstCode::V2;
1917 StOps.push_back(N->getOperand(1));
1918 StOps.push_back(N->getOperand(2));
1919 N2 = N->getOperand(3);
1920 break;
1921 case NVPTXISD::StoreV4:
1922 VecType = NVPTX::PTXLdStInstCode::V4;
1923 StOps.push_back(N->getOperand(1));
1924 StOps.push_back(N->getOperand(2));
1925 StOps.push_back(N->getOperand(3));
1926 StOps.push_back(N->getOperand(4));
1927 N2 = N->getOperand(5);
1928 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001929 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001930 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001931 }
1932
Artem Belevich620db1f2017-02-23 22:38:24 +00001933 // v8f16 is a special case. PTX doesn't have st.v8.f16
1934 // instruction. Instead, we split the vector into v2f16 chunks and
1935 // store them with st.v4.b32.
1936 if (EltVT == MVT::v2f16) {
1937 assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1938 EltVT = MVT::i32;
1939 ToType = NVPTX::PTXLdStInstCode::Untyped;
1940 ToTypeWidth = 32;
1941 }
1942
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001943 StOps.push_back(getI32Imm(IsVolatile, DL));
1944 StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1945 StOps.push_back(getI32Imm(VecType, DL));
1946 StOps.push_back(getI32Imm(ToType, DL));
1947 StOps.push_back(getI32Imm(ToTypeWidth, DL));
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001948
1949 if (SelectDirectAddr(N2, Addr)) {
1950 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001951 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001952 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001953 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001954 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1955 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1956 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1957 NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
1958 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001959 break;
1960 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001961 Opcode =
1962 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar,
1963 NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None,
1964 NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
1965 NVPTX::STV_f32_v4_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001966 break;
1967 }
1968 StOps.push_back(Addr);
Artem Belevich2f348ea2018-05-09 23:46:19 +00001969 } else if (PointerSize == 64 ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1970 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001971 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001972 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001973 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001974 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001975 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1976 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1977 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1978 NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
1979 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001980 break;
1981 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001982 Opcode =
1983 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
1984 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None,
1985 NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
1986 NVPTX::STV_f32_v4_asi, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001987 break;
1988 }
1989 StOps.push_back(Base);
1990 StOps.push_back(Offset);
Artem Belevich2f348ea2018-05-09 23:46:19 +00001991 } else if (PointerSize == 64 ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1992 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1993 if (PointerSize == 64) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001994 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001995 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001996 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001997 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001998 Opcode = pickOpcodeForVT(
1999 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
2000 NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
2001 NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
2002 NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
2003 NVPTX::STV_f64_v2_ari_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002004 break;
2005 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002006 Opcode = pickOpcodeForVT(
2007 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
2008 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None,
2009 NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
2010 NVPTX::STV_f32_v4_ari_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002011 break;
2012 }
2013 } else {
2014 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002015 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002016 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002017 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002018 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
2019 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
2020 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
2021 NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
2022 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002023 break;
2024 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002025 Opcode =
2026 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari,
2027 NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None,
2028 NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
2029 NVPTX::STV_f32_v4_ari, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002030 break;
2031 }
2032 }
2033 StOps.push_back(Base);
2034 StOps.push_back(Offset);
2035 } else {
Artem Belevich2f348ea2018-05-09 23:46:19 +00002036 if (PointerSize == 64) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002037 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002038 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002039 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002040 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002041 Opcode = pickOpcodeForVT(
2042 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
2043 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
2044 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
2045 NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
2046 NVPTX::STV_f64_v2_areg_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002047 break;
2048 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002049 Opcode = pickOpcodeForVT(
2050 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
2051 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None,
2052 NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
2053 NVPTX::STV_f32_v4_areg_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002054 break;
2055 }
2056 } else {
2057 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00002058 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002059 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002060 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002061 Opcode =
2062 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
2063 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
2064 NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
2065 NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
2066 NVPTX::STV_f64_v2_areg);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002067 break;
2068 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002069 Opcode =
2070 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
2071 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None,
2072 NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
2073 NVPTX::STV_f32_v4_areg, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002074 break;
2075 }
2076 }
2077 StOps.push_back(N2);
2078 }
2079
Artem Belevichee7dd122017-03-02 19:14:14 +00002080 if (!Opcode)
2081 return false;
2082
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002083 StOps.push_back(Chain);
2084
Artem Belevichee7dd122017-03-02 19:14:14 +00002085 ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002086
Chandler Carruth66654b72018-08-14 23:30:32 +00002087 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2088 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ST), {MemRef});
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002089
Justin Bogner8d83fb62016-05-13 21:12:53 +00002090 ReplaceNode(N, ST);
2091 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002092}
2093
Justin Bogner8d83fb62016-05-13 21:12:53 +00002094bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00002095 SDValue Chain = Node->getOperand(0);
2096 SDValue Offset = Node->getOperand(2);
2097 SDValue Flag = Node->getOperand(3);
2098 SDLoc DL(Node);
2099 MemSDNode *Mem = cast<MemSDNode>(Node);
2100
2101 unsigned VecSize;
2102 switch (Node->getOpcode()) {
2103 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002104 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002105 case NVPTXISD::LoadParam:
2106 VecSize = 1;
2107 break;
2108 case NVPTXISD::LoadParamV2:
2109 VecSize = 2;
2110 break;
2111 case NVPTXISD::LoadParamV4:
2112 VecSize = 4;
2113 break;
2114 }
2115
2116 EVT EltVT = Node->getValueType(0);
2117 EVT MemVT = Mem->getMemoryVT();
2118
Artem Belevichee7dd122017-03-02 19:14:14 +00002119 Optional<unsigned> Opcode;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002120
2121 switch (VecSize) {
2122 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002123 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002124 case 1:
Artem Belevichee7dd122017-03-02 19:14:14 +00002125 Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2126 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2127 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2128 NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
2129 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002130 break;
2131 case 2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002132 Opcode =
2133 pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2134 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2135 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
2136 NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
2137 NVPTX::LoadParamMemV2F64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002138 break;
2139 case 4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002140 Opcode = pickOpcodeForVT(
2141 MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2142 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None,
2143 NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
2144 NVPTX::LoadParamMemV4F32, None);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002145 break;
2146 }
Artem Belevichee7dd122017-03-02 19:14:14 +00002147 if (!Opcode)
2148 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002149
2150 SDVTList VTs;
2151 if (VecSize == 1) {
2152 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2153 } else if (VecSize == 2) {
2154 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2155 } else {
2156 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
Craig Topperabb4ac72014-04-16 06:10:51 +00002157 VTs = CurDAG->getVTList(EVTs);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002158 }
2159
2160 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2161
2162 SmallVector<SDValue, 2> Ops;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002163 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002164 Ops.push_back(Chain);
2165 Ops.push_back(Flag);
2166
Artem Belevichee7dd122017-03-02 19:14:14 +00002167 ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops));
Justin Bogner8d83fb62016-05-13 21:12:53 +00002168 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002169}
2170
Justin Bogner8d83fb62016-05-13 21:12:53 +00002171bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00002172 SDLoc DL(N);
2173 SDValue Chain = N->getOperand(0);
2174 SDValue Offset = N->getOperand(1);
2175 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2176 MemSDNode *Mem = cast<MemSDNode>(N);
2177
2178 // How many elements do we have?
2179 unsigned NumElts = 1;
2180 switch (N->getOpcode()) {
2181 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002182 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002183 case NVPTXISD::StoreRetval:
2184 NumElts = 1;
2185 break;
2186 case NVPTXISD::StoreRetvalV2:
2187 NumElts = 2;
2188 break;
2189 case NVPTXISD::StoreRetvalV4:
2190 NumElts = 4;
2191 break;
2192 }
2193
2194 // Build vector of operands
2195 SmallVector<SDValue, 6> Ops;
2196 for (unsigned i = 0; i < NumElts; ++i)
2197 Ops.push_back(N->getOperand(i + 2));
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002198 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002199 Ops.push_back(Chain);
2200
2201 // Determine target opcode
2202 // If we have an i1, use an 8-bit store. The lowering code in
2203 // NVPTXISelLowering will have already emitted an upcast.
Artem Belevichee7dd122017-03-02 19:14:14 +00002204 Optional<unsigned> Opcode = 0;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002205 switch (NumElts) {
2206 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002207 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002208 case 1:
Artem Belevichee7dd122017-03-02 19:14:14 +00002209 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2210 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2211 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2212 NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
2213 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002214 break;
2215 case 2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002216 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2217 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2218 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2219 NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
2220 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002221 break;
2222 case 4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002223 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2224 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2225 NVPTX::StoreRetvalV4I32, None,
2226 NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
2227 NVPTX::StoreRetvalV4F32, None);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002228 break;
2229 }
Artem Belevichee7dd122017-03-02 19:14:14 +00002230 if (!Opcode)
2231 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002232
Artem Belevichee7dd122017-03-02 19:14:14 +00002233 SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
Chandler Carruth66654b72018-08-14 23:30:32 +00002234 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2235 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
Justin Holewinskif8f70912013-06-28 17:57:59 +00002236
Justin Bogner8d83fb62016-05-13 21:12:53 +00002237 ReplaceNode(N, Ret);
2238 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002239}
2240
Justin Bogner8d83fb62016-05-13 21:12:53 +00002241bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00002242 SDLoc DL(N);
2243 SDValue Chain = N->getOperand(0);
2244 SDValue Param = N->getOperand(1);
2245 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2246 SDValue Offset = N->getOperand(2);
2247 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2248 MemSDNode *Mem = cast<MemSDNode>(N);
2249 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2250
2251 // How many elements do we have?
2252 unsigned NumElts = 1;
2253 switch (N->getOpcode()) {
2254 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002255 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002256 case NVPTXISD::StoreParamU32:
2257 case NVPTXISD::StoreParamS32:
2258 case NVPTXISD::StoreParam:
2259 NumElts = 1;
2260 break;
2261 case NVPTXISD::StoreParamV2:
2262 NumElts = 2;
2263 break;
2264 case NVPTXISD::StoreParamV4:
2265 NumElts = 4;
2266 break;
2267 }
2268
2269 // Build vector of operands
2270 SmallVector<SDValue, 8> Ops;
2271 for (unsigned i = 0; i < NumElts; ++i)
2272 Ops.push_back(N->getOperand(i + 3));
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002273 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2274 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002275 Ops.push_back(Chain);
2276 Ops.push_back(Flag);
2277
2278 // Determine target opcode
2279 // If we have an i1, use an 8-bit store. The lowering code in
2280 // NVPTXISelLowering will have already emitted an upcast.
Artem Belevichee7dd122017-03-02 19:14:14 +00002281 Optional<unsigned> Opcode = 0;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002282 switch (N->getOpcode()) {
2283 default:
2284 switch (NumElts) {
2285 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002286 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002287 case 1:
Artem Belevichee7dd122017-03-02 19:14:14 +00002288 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2289 NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2290 NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2291 NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
2292 NVPTX::StoreParamF32, NVPTX::StoreParamF64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002293 break;
2294 case 2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002295 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2296 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2297 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2298 NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
2299 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002300 break;
2301 case 4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002302 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2303 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2304 NVPTX::StoreParamV4I32, None,
2305 NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
2306 NVPTX::StoreParamV4F32, None);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002307 break;
2308 }
Artem Belevichee7dd122017-03-02 19:14:14 +00002309 if (!Opcode)
2310 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002311 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002312 // Special case: if we have a sign-extend/zero-extend node, insert the
2313 // conversion instruction first, and use that as the value operand to
2314 // the selected StoreParam node.
2315 case NVPTXISD::StoreParamU32: {
2316 Opcode = NVPTX::StoreParamI32;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002317 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002318 MVT::i32);
2319 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2320 MVT::i32, Ops[0], CvtNone);
2321 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002322 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002323 }
2324 case NVPTXISD::StoreParamS32: {
2325 Opcode = NVPTX::StoreParamI32;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002326 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002327 MVT::i32);
2328 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2329 MVT::i32, Ops[0], CvtNone);
2330 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002331 break;
2332 }
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002333 }
Justin Holewinskif8f70912013-06-28 17:57:59 +00002334
Justin Holewinskidff28d22013-07-01 12:59:01 +00002335 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002336 SDNode *Ret =
Artem Belevichee7dd122017-03-02 19:14:14 +00002337 CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
Chandler Carruth66654b72018-08-14 23:30:32 +00002338 MachineMemOperand *MemRef = cast<MemSDNode>(N)->getMemOperand();
2339 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ret), {MemRef});
Justin Holewinskif8f70912013-06-28 17:57:59 +00002340
Justin Bogner8d83fb62016-05-13 21:12:53 +00002341 ReplaceNode(N, Ret);
2342 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002343}
2344
Justin Bogner8d83fb62016-05-13 21:12:53 +00002345bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00002346 unsigned Opc = 0;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002347
2348 switch (N->getOpcode()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00002349 default: return false;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002350 case NVPTXISD::Tex1DFloatS32:
2351 Opc = NVPTX::TEX_1D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002352 break;
2353 case NVPTXISD::Tex1DFloatFloat:
2354 Opc = NVPTX::TEX_1D_F32_F32;
2355 break;
2356 case NVPTXISD::Tex1DFloatFloatLevel:
2357 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2358 break;
2359 case NVPTXISD::Tex1DFloatFloatGrad:
2360 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2361 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002362 case NVPTXISD::Tex1DS32S32:
2363 Opc = NVPTX::TEX_1D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002364 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002365 case NVPTXISD::Tex1DS32Float:
2366 Opc = NVPTX::TEX_1D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002367 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002368 case NVPTXISD::Tex1DS32FloatLevel:
2369 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002370 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002371 case NVPTXISD::Tex1DS32FloatGrad:
2372 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002373 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002374 case NVPTXISD::Tex1DU32S32:
2375 Opc = NVPTX::TEX_1D_U32_S32;
2376 break;
2377 case NVPTXISD::Tex1DU32Float:
2378 Opc = NVPTX::TEX_1D_U32_F32;
2379 break;
2380 case NVPTXISD::Tex1DU32FloatLevel:
2381 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
2382 break;
2383 case NVPTXISD::Tex1DU32FloatGrad:
2384 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
2385 break;
2386 case NVPTXISD::Tex1DArrayFloatS32:
2387 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002388 break;
2389 case NVPTXISD::Tex1DArrayFloatFloat:
2390 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2391 break;
2392 case NVPTXISD::Tex1DArrayFloatFloatLevel:
2393 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2394 break;
2395 case NVPTXISD::Tex1DArrayFloatFloatGrad:
2396 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2397 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002398 case NVPTXISD::Tex1DArrayS32S32:
2399 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002400 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002401 case NVPTXISD::Tex1DArrayS32Float:
2402 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002403 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002404 case NVPTXISD::Tex1DArrayS32FloatLevel:
2405 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002406 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002407 case NVPTXISD::Tex1DArrayS32FloatGrad:
2408 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002409 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002410 case NVPTXISD::Tex1DArrayU32S32:
2411 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
2412 break;
2413 case NVPTXISD::Tex1DArrayU32Float:
2414 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
2415 break;
2416 case NVPTXISD::Tex1DArrayU32FloatLevel:
2417 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
2418 break;
2419 case NVPTXISD::Tex1DArrayU32FloatGrad:
2420 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
2421 break;
2422 case NVPTXISD::Tex2DFloatS32:
2423 Opc = NVPTX::TEX_2D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002424 break;
2425 case NVPTXISD::Tex2DFloatFloat:
2426 Opc = NVPTX::TEX_2D_F32_F32;
2427 break;
2428 case NVPTXISD::Tex2DFloatFloatLevel:
2429 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2430 break;
2431 case NVPTXISD::Tex2DFloatFloatGrad:
2432 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2433 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002434 case NVPTXISD::Tex2DS32S32:
2435 Opc = NVPTX::TEX_2D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002436 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002437 case NVPTXISD::Tex2DS32Float:
2438 Opc = NVPTX::TEX_2D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002439 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002440 case NVPTXISD::Tex2DS32FloatLevel:
2441 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002442 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002443 case NVPTXISD::Tex2DS32FloatGrad:
2444 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002445 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002446 case NVPTXISD::Tex2DU32S32:
2447 Opc = NVPTX::TEX_2D_U32_S32;
2448 break;
2449 case NVPTXISD::Tex2DU32Float:
2450 Opc = NVPTX::TEX_2D_U32_F32;
2451 break;
2452 case NVPTXISD::Tex2DU32FloatLevel:
2453 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
2454 break;
2455 case NVPTXISD::Tex2DU32FloatGrad:
2456 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
2457 break;
2458 case NVPTXISD::Tex2DArrayFloatS32:
2459 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002460 break;
2461 case NVPTXISD::Tex2DArrayFloatFloat:
2462 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2463 break;
2464 case NVPTXISD::Tex2DArrayFloatFloatLevel:
2465 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2466 break;
2467 case NVPTXISD::Tex2DArrayFloatFloatGrad:
2468 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2469 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002470 case NVPTXISD::Tex2DArrayS32S32:
2471 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002472 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002473 case NVPTXISD::Tex2DArrayS32Float:
2474 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002475 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002476 case NVPTXISD::Tex2DArrayS32FloatLevel:
2477 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002478 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002479 case NVPTXISD::Tex2DArrayS32FloatGrad:
2480 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002481 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002482 case NVPTXISD::Tex2DArrayU32S32:
2483 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
2484 break;
2485 case NVPTXISD::Tex2DArrayU32Float:
2486 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
2487 break;
2488 case NVPTXISD::Tex2DArrayU32FloatLevel:
2489 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
2490 break;
2491 case NVPTXISD::Tex2DArrayU32FloatGrad:
2492 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
2493 break;
2494 case NVPTXISD::Tex3DFloatS32:
2495 Opc = NVPTX::TEX_3D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002496 break;
2497 case NVPTXISD::Tex3DFloatFloat:
2498 Opc = NVPTX::TEX_3D_F32_F32;
2499 break;
2500 case NVPTXISD::Tex3DFloatFloatLevel:
2501 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2502 break;
2503 case NVPTXISD::Tex3DFloatFloatGrad:
2504 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2505 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002506 case NVPTXISD::Tex3DS32S32:
2507 Opc = NVPTX::TEX_3D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002508 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002509 case NVPTXISD::Tex3DS32Float:
2510 Opc = NVPTX::TEX_3D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002511 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002512 case NVPTXISD::Tex3DS32FloatLevel:
2513 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002514 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002515 case NVPTXISD::Tex3DS32FloatGrad:
2516 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
2517 break;
2518 case NVPTXISD::Tex3DU32S32:
2519 Opc = NVPTX::TEX_3D_U32_S32;
2520 break;
2521 case NVPTXISD::Tex3DU32Float:
2522 Opc = NVPTX::TEX_3D_U32_F32;
2523 break;
2524 case NVPTXISD::Tex3DU32FloatLevel:
2525 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
2526 break;
2527 case NVPTXISD::Tex3DU32FloatGrad:
2528 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
2529 break;
2530 case NVPTXISD::TexCubeFloatFloat:
2531 Opc = NVPTX::TEX_CUBE_F32_F32;
2532 break;
2533 case NVPTXISD::TexCubeFloatFloatLevel:
2534 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
2535 break;
2536 case NVPTXISD::TexCubeS32Float:
2537 Opc = NVPTX::TEX_CUBE_S32_F32;
2538 break;
2539 case NVPTXISD::TexCubeS32FloatLevel:
2540 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
2541 break;
2542 case NVPTXISD::TexCubeU32Float:
2543 Opc = NVPTX::TEX_CUBE_U32_F32;
2544 break;
2545 case NVPTXISD::TexCubeU32FloatLevel:
2546 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
2547 break;
2548 case NVPTXISD::TexCubeArrayFloatFloat:
2549 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
2550 break;
2551 case NVPTXISD::TexCubeArrayFloatFloatLevel:
2552 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
2553 break;
2554 case NVPTXISD::TexCubeArrayS32Float:
2555 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
2556 break;
2557 case NVPTXISD::TexCubeArrayS32FloatLevel:
2558 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
2559 break;
2560 case NVPTXISD::TexCubeArrayU32Float:
2561 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
2562 break;
2563 case NVPTXISD::TexCubeArrayU32FloatLevel:
2564 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
2565 break;
2566 case NVPTXISD::Tld4R2DFloatFloat:
2567 Opc = NVPTX::TLD4_R_2D_F32_F32;
2568 break;
2569 case NVPTXISD::Tld4G2DFloatFloat:
2570 Opc = NVPTX::TLD4_G_2D_F32_F32;
2571 break;
2572 case NVPTXISD::Tld4B2DFloatFloat:
2573 Opc = NVPTX::TLD4_B_2D_F32_F32;
2574 break;
2575 case NVPTXISD::Tld4A2DFloatFloat:
2576 Opc = NVPTX::TLD4_A_2D_F32_F32;
2577 break;
2578 case NVPTXISD::Tld4R2DS64Float:
2579 Opc = NVPTX::TLD4_R_2D_S32_F32;
2580 break;
2581 case NVPTXISD::Tld4G2DS64Float:
2582 Opc = NVPTX::TLD4_G_2D_S32_F32;
2583 break;
2584 case NVPTXISD::Tld4B2DS64Float:
2585 Opc = NVPTX::TLD4_B_2D_S32_F32;
2586 break;
2587 case NVPTXISD::Tld4A2DS64Float:
2588 Opc = NVPTX::TLD4_A_2D_S32_F32;
2589 break;
2590 case NVPTXISD::Tld4R2DU64Float:
2591 Opc = NVPTX::TLD4_R_2D_U32_F32;
2592 break;
2593 case NVPTXISD::Tld4G2DU64Float:
2594 Opc = NVPTX::TLD4_G_2D_U32_F32;
2595 break;
2596 case NVPTXISD::Tld4B2DU64Float:
2597 Opc = NVPTX::TLD4_B_2D_U32_F32;
2598 break;
2599 case NVPTXISD::Tld4A2DU64Float:
2600 Opc = NVPTX::TLD4_A_2D_U32_F32;
2601 break;
2602 case NVPTXISD::TexUnified1DFloatS32:
2603 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
2604 break;
2605 case NVPTXISD::TexUnified1DFloatFloat:
2606 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
2607 break;
2608 case NVPTXISD::TexUnified1DFloatFloatLevel:
2609 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
2610 break;
2611 case NVPTXISD::TexUnified1DFloatFloatGrad:
2612 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
2613 break;
2614 case NVPTXISD::TexUnified1DS32S32:
2615 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
2616 break;
2617 case NVPTXISD::TexUnified1DS32Float:
2618 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
2619 break;
2620 case NVPTXISD::TexUnified1DS32FloatLevel:
2621 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
2622 break;
2623 case NVPTXISD::TexUnified1DS32FloatGrad:
2624 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
2625 break;
2626 case NVPTXISD::TexUnified1DU32S32:
2627 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
2628 break;
2629 case NVPTXISD::TexUnified1DU32Float:
2630 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
2631 break;
2632 case NVPTXISD::TexUnified1DU32FloatLevel:
2633 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
2634 break;
2635 case NVPTXISD::TexUnified1DU32FloatGrad:
2636 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
2637 break;
2638 case NVPTXISD::TexUnified1DArrayFloatS32:
2639 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
2640 break;
2641 case NVPTXISD::TexUnified1DArrayFloatFloat:
2642 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
2643 break;
2644 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
2645 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
2646 break;
2647 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
2648 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
2649 break;
2650 case NVPTXISD::TexUnified1DArrayS32S32:
2651 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
2652 break;
2653 case NVPTXISD::TexUnified1DArrayS32Float:
2654 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
2655 break;
2656 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
2657 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
2658 break;
2659 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
2660 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
2661 break;
2662 case NVPTXISD::TexUnified1DArrayU32S32:
2663 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
2664 break;
2665 case NVPTXISD::TexUnified1DArrayU32Float:
2666 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
2667 break;
2668 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
2669 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
2670 break;
2671 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
2672 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
2673 break;
2674 case NVPTXISD::TexUnified2DFloatS32:
2675 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
2676 break;
2677 case NVPTXISD::TexUnified2DFloatFloat:
2678 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
2679 break;
2680 case NVPTXISD::TexUnified2DFloatFloatLevel:
2681 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
2682 break;
2683 case NVPTXISD::TexUnified2DFloatFloatGrad:
2684 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
2685 break;
2686 case NVPTXISD::TexUnified2DS32S32:
2687 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
2688 break;
2689 case NVPTXISD::TexUnified2DS32Float:
2690 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
2691 break;
2692 case NVPTXISD::TexUnified2DS32FloatLevel:
2693 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
2694 break;
2695 case NVPTXISD::TexUnified2DS32FloatGrad:
2696 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
2697 break;
2698 case NVPTXISD::TexUnified2DU32S32:
2699 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
2700 break;
2701 case NVPTXISD::TexUnified2DU32Float:
2702 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
2703 break;
2704 case NVPTXISD::TexUnified2DU32FloatLevel:
2705 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
2706 break;
2707 case NVPTXISD::TexUnified2DU32FloatGrad:
2708 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
2709 break;
2710 case NVPTXISD::TexUnified2DArrayFloatS32:
2711 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
2712 break;
2713 case NVPTXISD::TexUnified2DArrayFloatFloat:
2714 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
2715 break;
2716 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
2717 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
2718 break;
2719 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
2720 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
2721 break;
2722 case NVPTXISD::TexUnified2DArrayS32S32:
2723 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
2724 break;
2725 case NVPTXISD::TexUnified2DArrayS32Float:
2726 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
2727 break;
2728 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
2729 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
2730 break;
2731 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
2732 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
2733 break;
2734 case NVPTXISD::TexUnified2DArrayU32S32:
2735 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
2736 break;
2737 case NVPTXISD::TexUnified2DArrayU32Float:
2738 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
2739 break;
2740 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
2741 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
2742 break;
2743 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
2744 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
2745 break;
2746 case NVPTXISD::TexUnified3DFloatS32:
2747 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
2748 break;
2749 case NVPTXISD::TexUnified3DFloatFloat:
2750 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
2751 break;
2752 case NVPTXISD::TexUnified3DFloatFloatLevel:
2753 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
2754 break;
2755 case NVPTXISD::TexUnified3DFloatFloatGrad:
2756 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
2757 break;
2758 case NVPTXISD::TexUnified3DS32S32:
2759 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
2760 break;
2761 case NVPTXISD::TexUnified3DS32Float:
2762 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
2763 break;
2764 case NVPTXISD::TexUnified3DS32FloatLevel:
2765 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
2766 break;
2767 case NVPTXISD::TexUnified3DS32FloatGrad:
2768 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
2769 break;
2770 case NVPTXISD::TexUnified3DU32S32:
2771 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
2772 break;
2773 case NVPTXISD::TexUnified3DU32Float:
2774 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
2775 break;
2776 case NVPTXISD::TexUnified3DU32FloatLevel:
2777 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
2778 break;
2779 case NVPTXISD::TexUnified3DU32FloatGrad:
2780 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
2781 break;
2782 case NVPTXISD::TexUnifiedCubeFloatFloat:
2783 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
2784 break;
2785 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
2786 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
2787 break;
2788 case NVPTXISD::TexUnifiedCubeS32Float:
2789 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
2790 break;
2791 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
2792 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
2793 break;
2794 case NVPTXISD::TexUnifiedCubeU32Float:
2795 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
2796 break;
2797 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
2798 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
2799 break;
2800 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
2801 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
2802 break;
2803 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
2804 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
2805 break;
2806 case NVPTXISD::TexUnifiedCubeArrayS32Float:
2807 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
2808 break;
2809 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
2810 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
2811 break;
2812 case NVPTXISD::TexUnifiedCubeArrayU32Float:
2813 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
2814 break;
2815 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
2816 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
2817 break;
2818 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
2819 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
2820 break;
2821 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
2822 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
2823 break;
2824 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
2825 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
2826 break;
2827 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
2828 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
2829 break;
2830 case NVPTXISD::Tld4UnifiedR2DS64Float:
2831 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
2832 break;
2833 case NVPTXISD::Tld4UnifiedG2DS64Float:
2834 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
2835 break;
2836 case NVPTXISD::Tld4UnifiedB2DS64Float:
2837 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
2838 break;
2839 case NVPTXISD::Tld4UnifiedA2DS64Float:
2840 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
2841 break;
2842 case NVPTXISD::Tld4UnifiedR2DU64Float:
2843 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
2844 break;
2845 case NVPTXISD::Tld4UnifiedG2DU64Float:
2846 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
2847 break;
2848 case NVPTXISD::Tld4UnifiedB2DU64Float:
2849 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
2850 break;
2851 case NVPTXISD::Tld4UnifiedA2DU64Float:
2852 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002853 break;
2854 }
2855
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002856 // Copy over operands
Benjamin Kramer806ae442017-08-20 17:30:32 +00002857 SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
2858 Ops.push_back(N->getOperand(0)); // Move chain to the back.
Justin Holewinski30d56a72014-04-09 15:39:15 +00002859
Justin Bogner8d83fb62016-05-13 21:12:53 +00002860 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
2861 return true;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002862}
2863
Justin Bogner8d83fb62016-05-13 21:12:53 +00002864bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00002865 unsigned Opc = 0;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002866 switch (N->getOpcode()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00002867 default: return false;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002868 case NVPTXISD::Suld1DI8Clamp:
2869 Opc = NVPTX::SULD_1D_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002870 break;
2871 case NVPTXISD::Suld1DI16Clamp:
2872 Opc = NVPTX::SULD_1D_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002873 break;
2874 case NVPTXISD::Suld1DI32Clamp:
2875 Opc = NVPTX::SULD_1D_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002876 break;
2877 case NVPTXISD::Suld1DI64Clamp:
2878 Opc = NVPTX::SULD_1D_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002879 break;
2880 case NVPTXISD::Suld1DV2I8Clamp:
2881 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002882 break;
2883 case NVPTXISD::Suld1DV2I16Clamp:
2884 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002885 break;
2886 case NVPTXISD::Suld1DV2I32Clamp:
2887 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002888 break;
2889 case NVPTXISD::Suld1DV2I64Clamp:
2890 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002891 break;
2892 case NVPTXISD::Suld1DV4I8Clamp:
2893 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002894 break;
2895 case NVPTXISD::Suld1DV4I16Clamp:
2896 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002897 break;
2898 case NVPTXISD::Suld1DV4I32Clamp:
2899 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002900 break;
2901 case NVPTXISD::Suld1DArrayI8Clamp:
2902 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002903 break;
2904 case NVPTXISD::Suld1DArrayI16Clamp:
2905 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002906 break;
2907 case NVPTXISD::Suld1DArrayI32Clamp:
2908 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002909 break;
2910 case NVPTXISD::Suld1DArrayI64Clamp:
2911 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002912 break;
2913 case NVPTXISD::Suld1DArrayV2I8Clamp:
2914 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002915 break;
2916 case NVPTXISD::Suld1DArrayV2I16Clamp:
2917 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002918 break;
2919 case NVPTXISD::Suld1DArrayV2I32Clamp:
2920 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002921 break;
2922 case NVPTXISD::Suld1DArrayV2I64Clamp:
2923 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002924 break;
2925 case NVPTXISD::Suld1DArrayV4I8Clamp:
2926 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002927 break;
2928 case NVPTXISD::Suld1DArrayV4I16Clamp:
2929 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002930 break;
2931 case NVPTXISD::Suld1DArrayV4I32Clamp:
2932 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002933 break;
2934 case NVPTXISD::Suld2DI8Clamp:
2935 Opc = NVPTX::SULD_2D_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002936 break;
2937 case NVPTXISD::Suld2DI16Clamp:
2938 Opc = NVPTX::SULD_2D_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002939 break;
2940 case NVPTXISD::Suld2DI32Clamp:
2941 Opc = NVPTX::SULD_2D_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002942 break;
2943 case NVPTXISD::Suld2DI64Clamp:
2944 Opc = NVPTX::SULD_2D_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002945 break;
2946 case NVPTXISD::Suld2DV2I8Clamp:
2947 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002948 break;
2949 case NVPTXISD::Suld2DV2I16Clamp:
2950 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002951 break;
2952 case NVPTXISD::Suld2DV2I32Clamp:
2953 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002954 break;
2955 case NVPTXISD::Suld2DV2I64Clamp:
2956 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002957 break;
2958 case NVPTXISD::Suld2DV4I8Clamp:
2959 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002960 break;
2961 case NVPTXISD::Suld2DV4I16Clamp:
2962 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002963 break;
2964 case NVPTXISD::Suld2DV4I32Clamp:
2965 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002966 break;
2967 case NVPTXISD::Suld2DArrayI8Clamp:
2968 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002969 break;
2970 case NVPTXISD::Suld2DArrayI16Clamp:
2971 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002972 break;
2973 case NVPTXISD::Suld2DArrayI32Clamp:
2974 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002975 break;
2976 case NVPTXISD::Suld2DArrayI64Clamp:
2977 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002978 break;
2979 case NVPTXISD::Suld2DArrayV2I8Clamp:
2980 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002981 break;
2982 case NVPTXISD::Suld2DArrayV2I16Clamp:
2983 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002984 break;
2985 case NVPTXISD::Suld2DArrayV2I32Clamp:
2986 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002987 break;
2988 case NVPTXISD::Suld2DArrayV2I64Clamp:
2989 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002990 break;
2991 case NVPTXISD::Suld2DArrayV4I8Clamp:
2992 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002993 break;
2994 case NVPTXISD::Suld2DArrayV4I16Clamp:
2995 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002996 break;
2997 case NVPTXISD::Suld2DArrayV4I32Clamp:
2998 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002999 break;
3000 case NVPTXISD::Suld3DI8Clamp:
3001 Opc = NVPTX::SULD_3D_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003002 break;
3003 case NVPTXISD::Suld3DI16Clamp:
3004 Opc = NVPTX::SULD_3D_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003005 break;
3006 case NVPTXISD::Suld3DI32Clamp:
3007 Opc = NVPTX::SULD_3D_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003008 break;
3009 case NVPTXISD::Suld3DI64Clamp:
3010 Opc = NVPTX::SULD_3D_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003011 break;
3012 case NVPTXISD::Suld3DV2I8Clamp:
3013 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003014 break;
3015 case NVPTXISD::Suld3DV2I16Clamp:
3016 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003017 break;
3018 case NVPTXISD::Suld3DV2I32Clamp:
3019 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003020 break;
3021 case NVPTXISD::Suld3DV2I64Clamp:
3022 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003023 break;
3024 case NVPTXISD::Suld3DV4I8Clamp:
3025 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003026 break;
3027 case NVPTXISD::Suld3DV4I16Clamp:
3028 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003029 break;
3030 case NVPTXISD::Suld3DV4I32Clamp:
3031 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003032 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003033 case NVPTXISD::Suld1DI8Trap:
3034 Opc = NVPTX::SULD_1D_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003035 break;
3036 case NVPTXISD::Suld1DI16Trap:
3037 Opc = NVPTX::SULD_1D_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003038 break;
3039 case NVPTXISD::Suld1DI32Trap:
3040 Opc = NVPTX::SULD_1D_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003041 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003042 case NVPTXISD::Suld1DI64Trap:
3043 Opc = NVPTX::SULD_1D_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003044 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003045 case NVPTXISD::Suld1DV2I8Trap:
3046 Opc = NVPTX::SULD_1D_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003047 break;
3048 case NVPTXISD::Suld1DV2I16Trap:
3049 Opc = NVPTX::SULD_1D_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003050 break;
3051 case NVPTXISD::Suld1DV2I32Trap:
3052 Opc = NVPTX::SULD_1D_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003053 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003054 case NVPTXISD::Suld1DV2I64Trap:
3055 Opc = NVPTX::SULD_1D_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003056 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003057 case NVPTXISD::Suld1DV4I8Trap:
3058 Opc = NVPTX::SULD_1D_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003059 break;
3060 case NVPTXISD::Suld1DV4I16Trap:
3061 Opc = NVPTX::SULD_1D_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003062 break;
3063 case NVPTXISD::Suld1DV4I32Trap:
3064 Opc = NVPTX::SULD_1D_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003065 break;
3066 case NVPTXISD::Suld1DArrayI8Trap:
3067 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003068 break;
3069 case NVPTXISD::Suld1DArrayI16Trap:
3070 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003071 break;
3072 case NVPTXISD::Suld1DArrayI32Trap:
3073 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003074 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003075 case NVPTXISD::Suld1DArrayI64Trap:
3076 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003077 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003078 case NVPTXISD::Suld1DArrayV2I8Trap:
3079 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003080 break;
3081 case NVPTXISD::Suld1DArrayV2I16Trap:
3082 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003083 break;
3084 case NVPTXISD::Suld1DArrayV2I32Trap:
3085 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003086 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003087 case NVPTXISD::Suld1DArrayV2I64Trap:
3088 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003089 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003090 case NVPTXISD::Suld1DArrayV4I8Trap:
3091 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003092 break;
3093 case NVPTXISD::Suld1DArrayV4I16Trap:
3094 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003095 break;
3096 case NVPTXISD::Suld1DArrayV4I32Trap:
3097 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003098 break;
3099 case NVPTXISD::Suld2DI8Trap:
3100 Opc = NVPTX::SULD_2D_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003101 break;
3102 case NVPTXISD::Suld2DI16Trap:
3103 Opc = NVPTX::SULD_2D_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003104 break;
3105 case NVPTXISD::Suld2DI32Trap:
3106 Opc = NVPTX::SULD_2D_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003107 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003108 case NVPTXISD::Suld2DI64Trap:
3109 Opc = NVPTX::SULD_2D_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003110 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003111 case NVPTXISD::Suld2DV2I8Trap:
3112 Opc = NVPTX::SULD_2D_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003113 break;
3114 case NVPTXISD::Suld2DV2I16Trap:
3115 Opc = NVPTX::SULD_2D_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003116 break;
3117 case NVPTXISD::Suld2DV2I32Trap:
3118 Opc = NVPTX::SULD_2D_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003119 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003120 case NVPTXISD::Suld2DV2I64Trap:
3121 Opc = NVPTX::SULD_2D_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003122 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003123 case NVPTXISD::Suld2DV4I8Trap:
3124 Opc = NVPTX::SULD_2D_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003125 break;
3126 case NVPTXISD::Suld2DV4I16Trap:
3127 Opc = NVPTX::SULD_2D_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003128 break;
3129 case NVPTXISD::Suld2DV4I32Trap:
3130 Opc = NVPTX::SULD_2D_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003131 break;
3132 case NVPTXISD::Suld2DArrayI8Trap:
3133 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003134 break;
3135 case NVPTXISD::Suld2DArrayI16Trap:
3136 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003137 break;
3138 case NVPTXISD::Suld2DArrayI32Trap:
3139 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003140 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003141 case NVPTXISD::Suld2DArrayI64Trap:
3142 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003143 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003144 case NVPTXISD::Suld2DArrayV2I8Trap:
3145 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003146 break;
3147 case NVPTXISD::Suld2DArrayV2I16Trap:
3148 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003149 break;
3150 case NVPTXISD::Suld2DArrayV2I32Trap:
3151 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003152 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003153 case NVPTXISD::Suld2DArrayV2I64Trap:
3154 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003155 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003156 case NVPTXISD::Suld2DArrayV4I8Trap:
3157 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003158 break;
3159 case NVPTXISD::Suld2DArrayV4I16Trap:
3160 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003161 break;
3162 case NVPTXISD::Suld2DArrayV4I32Trap:
3163 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003164 break;
3165 case NVPTXISD::Suld3DI8Trap:
3166 Opc = NVPTX::SULD_3D_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003167 break;
3168 case NVPTXISD::Suld3DI16Trap:
3169 Opc = NVPTX::SULD_3D_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003170 break;
3171 case NVPTXISD::Suld3DI32Trap:
3172 Opc = NVPTX::SULD_3D_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003173 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003174 case NVPTXISD::Suld3DI64Trap:
3175 Opc = NVPTX::SULD_3D_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003176 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003177 case NVPTXISD::Suld3DV2I8Trap:
3178 Opc = NVPTX::SULD_3D_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003179 break;
3180 case NVPTXISD::Suld3DV2I16Trap:
3181 Opc = NVPTX::SULD_3D_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003182 break;
3183 case NVPTXISD::Suld3DV2I32Trap:
3184 Opc = NVPTX::SULD_3D_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003185 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003186 case NVPTXISD::Suld3DV2I64Trap:
3187 Opc = NVPTX::SULD_3D_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003188 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003189 case NVPTXISD::Suld3DV4I8Trap:
3190 Opc = NVPTX::SULD_3D_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003191 break;
3192 case NVPTXISD::Suld3DV4I16Trap:
3193 Opc = NVPTX::SULD_3D_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003194 break;
3195 case NVPTXISD::Suld3DV4I32Trap:
3196 Opc = NVPTX::SULD_3D_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003197 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003198 case NVPTXISD::Suld1DI8Zero:
3199 Opc = NVPTX::SULD_1D_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003200 break;
3201 case NVPTXISD::Suld1DI16Zero:
3202 Opc = NVPTX::SULD_1D_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003203 break;
3204 case NVPTXISD::Suld1DI32Zero:
3205 Opc = NVPTX::SULD_1D_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003206 break;
3207 case NVPTXISD::Suld1DI64Zero:
3208 Opc = NVPTX::SULD_1D_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003209 break;
3210 case NVPTXISD::Suld1DV2I8Zero:
3211 Opc = NVPTX::SULD_1D_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003212 break;
3213 case NVPTXISD::Suld1DV2I16Zero:
3214 Opc = NVPTX::SULD_1D_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003215 break;
3216 case NVPTXISD::Suld1DV2I32Zero:
3217 Opc = NVPTX::SULD_1D_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003218 break;
3219 case NVPTXISD::Suld1DV2I64Zero:
3220 Opc = NVPTX::SULD_1D_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003221 break;
3222 case NVPTXISD::Suld1DV4I8Zero:
3223 Opc = NVPTX::SULD_1D_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003224 break;
3225 case NVPTXISD::Suld1DV4I16Zero:
3226 Opc = NVPTX::SULD_1D_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003227 break;
3228 case NVPTXISD::Suld1DV4I32Zero:
3229 Opc = NVPTX::SULD_1D_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003230 break;
3231 case NVPTXISD::Suld1DArrayI8Zero:
3232 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003233 break;
3234 case NVPTXISD::Suld1DArrayI16Zero:
3235 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003236 break;
3237 case NVPTXISD::Suld1DArrayI32Zero:
3238 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003239 break;
3240 case NVPTXISD::Suld1DArrayI64Zero:
3241 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003242 break;
3243 case NVPTXISD::Suld1DArrayV2I8Zero:
3244 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003245 break;
3246 case NVPTXISD::Suld1DArrayV2I16Zero:
3247 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003248 break;
3249 case NVPTXISD::Suld1DArrayV2I32Zero:
3250 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003251 break;
3252 case NVPTXISD::Suld1DArrayV2I64Zero:
3253 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003254 break;
3255 case NVPTXISD::Suld1DArrayV4I8Zero:
3256 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003257 break;
3258 case NVPTXISD::Suld1DArrayV4I16Zero:
3259 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003260 break;
3261 case NVPTXISD::Suld1DArrayV4I32Zero:
3262 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003263 break;
3264 case NVPTXISD::Suld2DI8Zero:
3265 Opc = NVPTX::SULD_2D_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003266 break;
3267 case NVPTXISD::Suld2DI16Zero:
3268 Opc = NVPTX::SULD_2D_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003269 break;
3270 case NVPTXISD::Suld2DI32Zero:
3271 Opc = NVPTX::SULD_2D_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003272 break;
3273 case NVPTXISD::Suld2DI64Zero:
3274 Opc = NVPTX::SULD_2D_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003275 break;
3276 case NVPTXISD::Suld2DV2I8Zero:
3277 Opc = NVPTX::SULD_2D_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003278 break;
3279 case NVPTXISD::Suld2DV2I16Zero:
3280 Opc = NVPTX::SULD_2D_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003281 break;
3282 case NVPTXISD::Suld2DV2I32Zero:
3283 Opc = NVPTX::SULD_2D_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003284 break;
3285 case NVPTXISD::Suld2DV2I64Zero:
3286 Opc = NVPTX::SULD_2D_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003287 break;
3288 case NVPTXISD::Suld2DV4I8Zero:
3289 Opc = NVPTX::SULD_2D_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003290 break;
3291 case NVPTXISD::Suld2DV4I16Zero:
3292 Opc = NVPTX::SULD_2D_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003293 break;
3294 case NVPTXISD::Suld2DV4I32Zero:
3295 Opc = NVPTX::SULD_2D_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003296 break;
3297 case NVPTXISD::Suld2DArrayI8Zero:
3298 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003299 break;
3300 case NVPTXISD::Suld2DArrayI16Zero:
3301 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003302 break;
3303 case NVPTXISD::Suld2DArrayI32Zero:
3304 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003305 break;
3306 case NVPTXISD::Suld2DArrayI64Zero:
3307 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003308 break;
3309 case NVPTXISD::Suld2DArrayV2I8Zero:
3310 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003311 break;
3312 case NVPTXISD::Suld2DArrayV2I16Zero:
3313 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003314 break;
3315 case NVPTXISD::Suld2DArrayV2I32Zero:
3316 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003317 break;
3318 case NVPTXISD::Suld2DArrayV2I64Zero:
3319 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003320 break;
3321 case NVPTXISD::Suld2DArrayV4I8Zero:
3322 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003323 break;
3324 case NVPTXISD::Suld2DArrayV4I16Zero:
3325 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003326 break;
3327 case NVPTXISD::Suld2DArrayV4I32Zero:
3328 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003329 break;
3330 case NVPTXISD::Suld3DI8Zero:
3331 Opc = NVPTX::SULD_3D_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003332 break;
3333 case NVPTXISD::Suld3DI16Zero:
3334 Opc = NVPTX::SULD_3D_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003335 break;
3336 case NVPTXISD::Suld3DI32Zero:
3337 Opc = NVPTX::SULD_3D_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003338 break;
3339 case NVPTXISD::Suld3DI64Zero:
3340 Opc = NVPTX::SULD_3D_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003341 break;
3342 case NVPTXISD::Suld3DV2I8Zero:
3343 Opc = NVPTX::SULD_3D_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003344 break;
3345 case NVPTXISD::Suld3DV2I16Zero:
3346 Opc = NVPTX::SULD_3D_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003347 break;
3348 case NVPTXISD::Suld3DV2I32Zero:
3349 Opc = NVPTX::SULD_3D_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003350 break;
3351 case NVPTXISD::Suld3DV2I64Zero:
3352 Opc = NVPTX::SULD_3D_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003353 break;
3354 case NVPTXISD::Suld3DV4I8Zero:
3355 Opc = NVPTX::SULD_3D_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003356 break;
3357 case NVPTXISD::Suld3DV4I16Zero:
3358 Opc = NVPTX::SULD_3D_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003359 break;
3360 case NVPTXISD::Suld3DV4I32Zero:
3361 Opc = NVPTX::SULD_3D_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003362 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003363 }
Benjamin Kramer806ae442017-08-20 17:30:32 +00003364
3365 // Copy over operands
3366 SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
3367 Ops.push_back(N->getOperand(0)); // Move chain to the back.
3368
Justin Bogner8d83fb62016-05-13 21:12:53 +00003369 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3370 return true;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003371}
3372
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003373
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003374/// SelectBFE - Look for instruction sequences that can be made more efficient
3375/// by using the 'bfe' (bit-field extract) PTX instruction
Justin Bogner8d83fb62016-05-13 21:12:53 +00003376bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003377 SDLoc DL(N);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003378 SDValue LHS = N->getOperand(0);
3379 SDValue RHS = N->getOperand(1);
3380 SDValue Len;
3381 SDValue Start;
3382 SDValue Val;
3383 bool IsSigned = false;
3384
3385 if (N->getOpcode() == ISD::AND) {
3386 // Canonicalize the operands
3387 // We want 'and %val, %mask'
3388 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3389 std::swap(LHS, RHS);
3390 }
3391
3392 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
3393 if (!Mask) {
3394 // We need a constant mask on the RHS of the AND
Justin Bogner8d83fb62016-05-13 21:12:53 +00003395 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003396 }
3397
3398 // Extract the mask bits
3399 uint64_t MaskVal = Mask->getZExtValue();
3400 if (!isMask_64(MaskVal)) {
3401 // We *could* handle shifted masks here, but doing so would require an
3402 // 'and' operation to fix up the low-order bits so we would trade
3403 // shr+and for bfe+and, which has the same throughput
Justin Bogner8d83fb62016-05-13 21:12:53 +00003404 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003405 }
3406
3407 // How many bits are in our mask?
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00003408 uint64_t NumBits = countTrailingOnes(MaskVal);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003409 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003410
3411 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3412 // We have a 'srl/and' pair, extract the effective start bit and length
3413 Val = LHS.getNode()->getOperand(0);
3414 Start = LHS.getNode()->getOperand(1);
3415 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3416 if (StartConst) {
3417 uint64_t StartVal = StartConst->getZExtValue();
3418 // How many "good" bits do we have left? "good" is defined here as bits
3419 // that exist in the original value, not shifted in.
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00003420 uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003421 if (NumBits > GoodBits) {
3422 // Do not handle the case where bits have been shifted in. In theory
3423 // we could handle this, but the cost is likely higher than just
3424 // emitting the srl/and pair.
Justin Bogner8d83fb62016-05-13 21:12:53 +00003425 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003426 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003427 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003428 } else {
3429 // Do not handle the case where the shift amount (can be zero if no srl
3430 // was found) is not constant. We could handle this case, but it would
3431 // require run-time logic that would be more expensive than just
3432 // emitting the srl/and pair.
Justin Bogner8d83fb62016-05-13 21:12:53 +00003433 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003434 }
3435 } else {
3436 // Do not handle the case where the LHS of the and is not a shift. While
3437 // it would be trivial to handle this case, it would just transform
3438 // 'and' -> 'bfe', but 'and' has higher-throughput.
Justin Bogner8d83fb62016-05-13 21:12:53 +00003439 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003440 }
3441 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3442 if (LHS->getOpcode() == ISD::AND) {
3443 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3444 if (!ShiftCnst) {
3445 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00003446 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003447 }
3448
3449 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3450
3451 SDValue AndLHS = LHS->getOperand(0);
3452 SDValue AndRHS = LHS->getOperand(1);
3453
3454 // Canonicalize the AND to have the mask on the RHS
3455 if (isa<ConstantSDNode>(AndLHS)) {
3456 std::swap(AndLHS, AndRHS);
3457 }
3458
3459 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3460 if (!MaskCnst) {
3461 // Mask must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00003462 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003463 }
3464
3465 uint64_t MaskVal = MaskCnst->getZExtValue();
3466 uint64_t NumZeros;
3467 uint64_t NumBits;
3468 if (isMask_64(MaskVal)) {
3469 NumZeros = 0;
3470 // The number of bits in the result bitfield will be the number of
3471 // trailing ones (the AND) minus the number of bits we shift off
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00003472 NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003473 } else if (isShiftedMask_64(MaskVal)) {
3474 NumZeros = countTrailingZeros(MaskVal);
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00003475 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003476 // The number of bits in the result bitfield will be the number of
3477 // trailing zeros plus the number of set bits in the mask minus the
3478 // number of bits we shift off
3479 NumBits = NumZeros + NumOnes - ShiftAmt;
3480 } else {
3481 // This is not a mask we can handle
Justin Bogner8d83fb62016-05-13 21:12:53 +00003482 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003483 }
3484
3485 if (ShiftAmt < NumZeros) {
3486 // Handling this case would require extra logic that would make this
3487 // transformation non-profitable
Justin Bogner8d83fb62016-05-13 21:12:53 +00003488 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003489 }
3490
3491 Val = AndLHS;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003492 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
3493 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003494 } else if (LHS->getOpcode() == ISD::SHL) {
3495 // Here, we have a pattern like:
3496 //
3497 // (sra (shl val, NN), MM)
3498 // or
3499 // (srl (shl val, NN), MM)
3500 //
3501 // If MM >= NN, we can efficiently optimize this with bfe
3502 Val = LHS->getOperand(0);
3503
3504 SDValue ShlRHS = LHS->getOperand(1);
3505 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3506 if (!ShlCnst) {
3507 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00003508 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003509 }
3510 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3511
3512 SDValue ShrRHS = RHS;
3513 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3514 if (!ShrCnst) {
3515 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00003516 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003517 }
3518 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3519
3520 // To avoid extra codegen and be profitable, we need Outer >= Inner
3521 if (OuterShiftAmt < InnerShiftAmt) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00003522 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003523 }
3524
3525 // If the outer shift is more than the type size, we have no bitfield to
3526 // extract (since we also check that the inner shift is <= the outer shift
3527 // then this also implies that the inner shift is < the type size)
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00003528 if (OuterShiftAmt >= Val.getValueSizeInBits()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00003529 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003530 }
3531
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00003532 Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
3533 MVT::i32);
3534 Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
3535 DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003536
3537 if (N->getOpcode() == ISD::SRA) {
3538 // If we have a arithmetic right shift, we need to use the signed bfe
3539 // variant
3540 IsSigned = true;
3541 }
3542 } else {
3543 // No can do...
Justin Bogner8d83fb62016-05-13 21:12:53 +00003544 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003545 }
3546 } else {
3547 // No can do...
Justin Bogner8d83fb62016-05-13 21:12:53 +00003548 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003549 }
3550
3551
3552 unsigned Opc;
3553 // For the BFE operations we form here from "and" and "srl", always use the
3554 // unsigned variants.
3555 if (Val.getValueType() == MVT::i32) {
3556 if (IsSigned) {
3557 Opc = NVPTX::BFE_S32rii;
3558 } else {
3559 Opc = NVPTX::BFE_U32rii;
3560 }
3561 } else if (Val.getValueType() == MVT::i64) {
3562 if (IsSigned) {
3563 Opc = NVPTX::BFE_S64rii;
3564 } else {
3565 Opc = NVPTX::BFE_U64rii;
3566 }
3567 } else {
3568 // We cannot handle this type
Justin Bogner8d83fb62016-05-13 21:12:53 +00003569 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003570 }
3571
3572 SDValue Ops[] = {
3573 Val, Start, Len
3574 };
3575
Justin Bogner8d83fb62016-05-13 21:12:53 +00003576 ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
3577 return true;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003578}
3579
Justin Holewinskiae556d32012-05-04 20:18:50 +00003580// SelectDirectAddr - Match a direct address for DAG.
3581// A direct address could be a globaladdress or externalsymbol.
3582bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3583 // Return true if TGA or ES.
Justin Holewinski0497ab12013-03-30 14:29:21 +00003584 if (N.getOpcode() == ISD::TargetGlobalAddress ||
3585 N.getOpcode() == ISD::TargetExternalSymbol) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003586 Address = N;
3587 return true;
3588 }
3589 if (N.getOpcode() == NVPTXISD::Wrapper) {
3590 Address = N.getOperand(0);
3591 return true;
3592 }
Artem Belevichb2e76a52016-07-20 18:39:47 +00003593 // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
3594 if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
3595 if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
3596 CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
3597 CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
3598 return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003599 }
3600 return false;
3601}
3602
3603// symbol+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00003604bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3605 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003606 if (Addr.getOpcode() == ISD::ADD) {
3607 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00003608 SDValue base = Addr.getOperand(0);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003609 if (SelectDirectAddr(base, Base)) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003610 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3611 mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003612 return true;
3613 }
3614 }
3615 }
3616 return false;
3617}
3618
3619// symbol+offset
3620bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3621 SDValue &Base, SDValue &Offset) {
3622 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3623}
3624
3625// symbol+offset
3626bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3627 SDValue &Base, SDValue &Offset) {
3628 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3629}
3630
3631// register+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00003632bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3633 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003634 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3635 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003636 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003637 return true;
3638 }
3639 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3640 Addr.getOpcode() == ISD::TargetGlobalAddress)
Justin Holewinski0497ab12013-03-30 14:29:21 +00003641 return false; // direct calls.
Justin Holewinskiae556d32012-05-04 20:18:50 +00003642
3643 if (Addr.getOpcode() == ISD::ADD) {
3644 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3645 return false;
3646 }
3647 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3648 if (FrameIndexSDNode *FIN =
Justin Holewinski0497ab12013-03-30 14:29:21 +00003649 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
Justin Holewinskiae556d32012-05-04 20:18:50 +00003650 // Constant offset from frame ref.
3651 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3652 else
3653 Base = Addr.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003654 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3655 mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003656 return true;
3657 }
3658 }
3659 return false;
3660}
3661
3662// register+offset
3663bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3664 SDValue &Base, SDValue &Offset) {
3665 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3666}
3667
3668// register+offset
3669bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3670 SDValue &Base, SDValue &Offset) {
3671 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3672}
3673
3674bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3675 unsigned int spN) const {
Craig Topper062a2ba2014-04-25 05:30:21 +00003676 const Value *Src = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00003677 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +00003678 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3679 return true;
3680 Src = mN->getMemOperand()->getValue();
Justin Holewinskiae556d32012-05-04 20:18:50 +00003681 }
3682 if (!Src)
3683 return false;
Craig Toppere3dcce92015-08-01 22:20:21 +00003684 if (auto *PT = dyn_cast<PointerType>(Src->getType()))
Justin Holewinskiae556d32012-05-04 20:18:50 +00003685 return (PT->getAddressSpace() == spN);
3686 return false;
3687}
3688
3689/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3690/// inline asm expressions.
Justin Holewinski0497ab12013-03-30 14:29:21 +00003691bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
Daniel Sanders60f1db02015-03-13 12:45:09 +00003692 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003693 SDValue Op0, Op1;
Daniel Sanders60f1db02015-03-13 12:45:09 +00003694 switch (ConstraintID) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00003695 default:
3696 return true;
Daniel Sanders60f1db02015-03-13 12:45:09 +00003697 case InlineAsm::Constraint_m: // memory
Justin Holewinskiae556d32012-05-04 20:18:50 +00003698 if (SelectDirectAddr(Op, Op0)) {
3699 OutOps.push_back(Op0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003700 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
Justin Holewinskiae556d32012-05-04 20:18:50 +00003701 return false;
3702 }
3703 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3704 OutOps.push_back(Op0);
3705 OutOps.push_back(Op1);
3706 return false;
3707 }
3708 break;
3709 }
3710 return true;
3711}
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00003712
3713/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
3714/// conversion from \p SrcTy to \p DestTy.
3715unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
3716 bool IsSigned) {
3717 switch (SrcTy.SimpleTy) {
3718 default:
3719 llvm_unreachable("Unhandled source type");
3720 case MVT::i8:
3721 switch (DestTy.SimpleTy) {
3722 default:
3723 llvm_unreachable("Unhandled dest type");
3724 case MVT::i16:
3725 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3726 case MVT::i32:
3727 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3728 case MVT::i64:
3729 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3730 }
3731 case MVT::i16:
3732 switch (DestTy.SimpleTy) {
3733 default:
3734 llvm_unreachable("Unhandled dest type");
3735 case MVT::i8:
3736 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3737 case MVT::i32:
3738 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3739 case MVT::i64:
3740 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3741 }
3742 case MVT::i32:
3743 switch (DestTy.SimpleTy) {
3744 default:
3745 llvm_unreachable("Unhandled dest type");
3746 case MVT::i8:
3747 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3748 case MVT::i16:
3749 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3750 case MVT::i64:
3751 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3752 }
3753 case MVT::i64:
3754 switch (DestTy.SimpleTy) {
3755 default:
3756 llvm_unreachable("Unhandled dest type");
3757 case MVT::i8:
3758 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3759 case MVT::i16:
3760 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3761 case MVT::i32:
3762 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3763 }
3764 }
3765}