blob: 99305440eefac9b722b767e218dde23ff220dc49 [file] [log] [blame]
Justin Holewinskiae556d32012-05-04 20:18:50 +00001//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
Justin Holewinskiae556d32012-05-04 20:18:50 +000014#include "NVPTXISelDAGToDAG.h"
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +000015#include "NVPTXUtilities.h"
Jingyue Wu48a9bdc2015-07-20 21:28:54 +000016#include "llvm/Analysis/ValueTracking.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000017#include "llvm/IR/GlobalValue.h"
18#include "llvm/IR/Instructions.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000019#include "llvm/Support/CommandLine.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000020#include "llvm/Support/Debug.h"
21#include "llvm/Support/ErrorHandling.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000022#include "llvm/Support/raw_ostream.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000023#include "llvm/Target/TargetIntrinsicInfo.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000024
Justin Holewinskiae556d32012-05-04 20:18:50 +000025using namespace llvm;
26
Chandler Carruth84e68b22014-04-22 02:41:26 +000027#define DEBUG_TYPE "nvptx-isel"
28
Justin Holewinskiae556d32012-05-04 20:18:50 +000029/// createNVPTXISelDag - This pass converts a legalized DAG into a
30/// NVPTX-specific DAG, ready for instruction scheduling.
31FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
32 llvm::CodeGenOpt::Level OptLevel) {
33 return new NVPTXDAGToDAGISel(TM, OptLevel);
34}
35
Justin Holewinskiae556d32012-05-04 20:18:50 +000036NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
37 CodeGenOpt::Level OptLevel)
Eric Christopher02389e32015-02-19 00:08:27 +000038 : SelectionDAGISel(tm, OptLevel), TM(tm) {
Justin Holewinskiae556d32012-05-04 20:18:50 +000039 doMulWide = (OptLevel > 0);
Justin Holewinskicd069e62013-07-22 12:18:04 +000040}
Justin Holewinskiae556d32012-05-04 20:18:50 +000041
Eric Christopher147bba22015-01-30 01:40:59 +000042bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
Justin Lebar077f8fb2017-01-21 01:00:14 +000043 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
44 return SelectionDAGISel::runOnMachineFunction(MF);
Eric Christopher147bba22015-01-30 01:40:59 +000045}
46
Justin Holewinskicd069e62013-07-22 12:18:04 +000047int NVPTXDAGToDAGISel::getDivF32Level() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000048 return Subtarget->getTargetLowering()->getDivF32Level();
Justin Holewinskicd069e62013-07-22 12:18:04 +000049}
Justin Holewinskiae556d32012-05-04 20:18:50 +000050
Justin Holewinskicd069e62013-07-22 12:18:04 +000051bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000052 return Subtarget->getTargetLowering()->usePrecSqrtF32();
Justin Holewinskicd069e62013-07-22 12:18:04 +000053}
54
55bool NVPTXDAGToDAGISel::useF32FTZ() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000056 return Subtarget->getTargetLowering()->useF32FTZ(*MF);
Justin Holewinskiae556d32012-05-04 20:18:50 +000057}
58
Justin Holewinski428cf0e2014-07-17 18:10:09 +000059bool NVPTXDAGToDAGISel::allowFMA() const {
Eric Christopher147bba22015-01-30 01:40:59 +000060 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
Justin Holewinski428cf0e2014-07-17 18:10:09 +000061 return TL->allowFMA(*MF, OptLevel);
62}
63
Artem Belevichd109f462017-01-13 18:48:13 +000064bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
65 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
66 return TL->allowUnsafeFPMath(*MF);
67}
68
Justin Holewinskiae556d32012-05-04 20:18:50 +000069/// Select - Select instructions not customized! Used for
70/// expanded, promoted and normal instructions.
Justin Bogner8d83fb62016-05-13 21:12:53 +000071void NVPTXDAGToDAGISel::Select(SDNode *N) {
Justin Holewinskiae556d32012-05-04 20:18:50 +000072
Tim Northover31d093c2013-09-22 08:21:56 +000073 if (N->isMachineOpcode()) {
74 N->setNodeId(-1);
Justin Bogner8d83fb62016-05-13 21:12:53 +000075 return; // Already selected.
Tim Northover31d093c2013-09-22 08:21:56 +000076 }
Justin Holewinskiae556d32012-05-04 20:18:50 +000077
Justin Holewinskiae556d32012-05-04 20:18:50 +000078 switch (N->getOpcode()) {
79 case ISD::LOAD:
Justin Bogner8d83fb62016-05-13 21:12:53 +000080 if (tryLoad(N))
81 return;
Justin Holewinskiae556d32012-05-04 20:18:50 +000082 break;
83 case ISD::STORE:
Justin Bogner8d83fb62016-05-13 21:12:53 +000084 if (tryStore(N))
85 return;
Justin Holewinskiae556d32012-05-04 20:18:50 +000086 break;
Artem Belevich620db1f2017-02-23 22:38:24 +000087 case ISD::EXTRACT_VECTOR_ELT:
88 if (tryEXTRACT_VECTOR_ELEMENT(N))
89 return;
90 break;
91 case NVPTXISD::SETP_F16X2:
92 SelectSETP_F16X2(N);
93 return;
94
Justin Holewinskibe8dc642013-02-12 14:18:49 +000095 case NVPTXISD::LoadV2:
96 case NVPTXISD::LoadV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +000097 if (tryLoadVector(N))
98 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +000099 break;
100 case NVPTXISD::LDGV2:
101 case NVPTXISD::LDGV4:
102 case NVPTXISD::LDUV2:
103 case NVPTXISD::LDUV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000104 if (tryLDGLDU(N))
105 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000106 break;
107 case NVPTXISD::StoreV2:
108 case NVPTXISD::StoreV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000109 if (tryStoreVector(N))
110 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000111 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000112 case NVPTXISD::LoadParam:
113 case NVPTXISD::LoadParamV2:
114 case NVPTXISD::LoadParamV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000115 if (tryLoadParam(N))
116 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000117 break;
118 case NVPTXISD::StoreRetval:
119 case NVPTXISD::StoreRetvalV2:
120 case NVPTXISD::StoreRetvalV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000121 if (tryStoreRetval(N))
122 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000123 break;
124 case NVPTXISD::StoreParam:
125 case NVPTXISD::StoreParamV2:
126 case NVPTXISD::StoreParamV4:
127 case NVPTXISD::StoreParamS32:
128 case NVPTXISD::StoreParamU32:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000129 if (tryStoreParam(N))
130 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000131 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000132 case ISD::INTRINSIC_WO_CHAIN:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000133 if (tryIntrinsicNoChain(N))
134 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000135 break;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000136 case ISD::INTRINSIC_W_CHAIN:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000137 if (tryIntrinsicChain(N))
138 return;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000139 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000140 case NVPTXISD::Tex1DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000141 case NVPTXISD::Tex1DFloatFloat:
142 case NVPTXISD::Tex1DFloatFloatLevel:
143 case NVPTXISD::Tex1DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000144 case NVPTXISD::Tex1DS32S32:
145 case NVPTXISD::Tex1DS32Float:
146 case NVPTXISD::Tex1DS32FloatLevel:
147 case NVPTXISD::Tex1DS32FloatGrad:
148 case NVPTXISD::Tex1DU32S32:
149 case NVPTXISD::Tex1DU32Float:
150 case NVPTXISD::Tex1DU32FloatLevel:
151 case NVPTXISD::Tex1DU32FloatGrad:
152 case NVPTXISD::Tex1DArrayFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000153 case NVPTXISD::Tex1DArrayFloatFloat:
154 case NVPTXISD::Tex1DArrayFloatFloatLevel:
155 case NVPTXISD::Tex1DArrayFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000156 case NVPTXISD::Tex1DArrayS32S32:
157 case NVPTXISD::Tex1DArrayS32Float:
158 case NVPTXISD::Tex1DArrayS32FloatLevel:
159 case NVPTXISD::Tex1DArrayS32FloatGrad:
160 case NVPTXISD::Tex1DArrayU32S32:
161 case NVPTXISD::Tex1DArrayU32Float:
162 case NVPTXISD::Tex1DArrayU32FloatLevel:
163 case NVPTXISD::Tex1DArrayU32FloatGrad:
164 case NVPTXISD::Tex2DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000165 case NVPTXISD::Tex2DFloatFloat:
166 case NVPTXISD::Tex2DFloatFloatLevel:
167 case NVPTXISD::Tex2DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000168 case NVPTXISD::Tex2DS32S32:
169 case NVPTXISD::Tex2DS32Float:
170 case NVPTXISD::Tex2DS32FloatLevel:
171 case NVPTXISD::Tex2DS32FloatGrad:
172 case NVPTXISD::Tex2DU32S32:
173 case NVPTXISD::Tex2DU32Float:
174 case NVPTXISD::Tex2DU32FloatLevel:
175 case NVPTXISD::Tex2DU32FloatGrad:
176 case NVPTXISD::Tex2DArrayFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000177 case NVPTXISD::Tex2DArrayFloatFloat:
178 case NVPTXISD::Tex2DArrayFloatFloatLevel:
179 case NVPTXISD::Tex2DArrayFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000180 case NVPTXISD::Tex2DArrayS32S32:
181 case NVPTXISD::Tex2DArrayS32Float:
182 case NVPTXISD::Tex2DArrayS32FloatLevel:
183 case NVPTXISD::Tex2DArrayS32FloatGrad:
184 case NVPTXISD::Tex2DArrayU32S32:
185 case NVPTXISD::Tex2DArrayU32Float:
186 case NVPTXISD::Tex2DArrayU32FloatLevel:
187 case NVPTXISD::Tex2DArrayU32FloatGrad:
188 case NVPTXISD::Tex3DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000189 case NVPTXISD::Tex3DFloatFloat:
190 case NVPTXISD::Tex3DFloatFloatLevel:
191 case NVPTXISD::Tex3DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000192 case NVPTXISD::Tex3DS32S32:
193 case NVPTXISD::Tex3DS32Float:
194 case NVPTXISD::Tex3DS32FloatLevel:
195 case NVPTXISD::Tex3DS32FloatGrad:
196 case NVPTXISD::Tex3DU32S32:
197 case NVPTXISD::Tex3DU32Float:
198 case NVPTXISD::Tex3DU32FloatLevel:
199 case NVPTXISD::Tex3DU32FloatGrad:
200 case NVPTXISD::TexCubeFloatFloat:
201 case NVPTXISD::TexCubeFloatFloatLevel:
202 case NVPTXISD::TexCubeS32Float:
203 case NVPTXISD::TexCubeS32FloatLevel:
204 case NVPTXISD::TexCubeU32Float:
205 case NVPTXISD::TexCubeU32FloatLevel:
206 case NVPTXISD::TexCubeArrayFloatFloat:
207 case NVPTXISD::TexCubeArrayFloatFloatLevel:
208 case NVPTXISD::TexCubeArrayS32Float:
209 case NVPTXISD::TexCubeArrayS32FloatLevel:
210 case NVPTXISD::TexCubeArrayU32Float:
211 case NVPTXISD::TexCubeArrayU32FloatLevel:
212 case NVPTXISD::Tld4R2DFloatFloat:
213 case NVPTXISD::Tld4G2DFloatFloat:
214 case NVPTXISD::Tld4B2DFloatFloat:
215 case NVPTXISD::Tld4A2DFloatFloat:
216 case NVPTXISD::Tld4R2DS64Float:
217 case NVPTXISD::Tld4G2DS64Float:
218 case NVPTXISD::Tld4B2DS64Float:
219 case NVPTXISD::Tld4A2DS64Float:
220 case NVPTXISD::Tld4R2DU64Float:
221 case NVPTXISD::Tld4G2DU64Float:
222 case NVPTXISD::Tld4B2DU64Float:
223 case NVPTXISD::Tld4A2DU64Float:
224 case NVPTXISD::TexUnified1DFloatS32:
225 case NVPTXISD::TexUnified1DFloatFloat:
226 case NVPTXISD::TexUnified1DFloatFloatLevel:
227 case NVPTXISD::TexUnified1DFloatFloatGrad:
228 case NVPTXISD::TexUnified1DS32S32:
229 case NVPTXISD::TexUnified1DS32Float:
230 case NVPTXISD::TexUnified1DS32FloatLevel:
231 case NVPTXISD::TexUnified1DS32FloatGrad:
232 case NVPTXISD::TexUnified1DU32S32:
233 case NVPTXISD::TexUnified1DU32Float:
234 case NVPTXISD::TexUnified1DU32FloatLevel:
235 case NVPTXISD::TexUnified1DU32FloatGrad:
236 case NVPTXISD::TexUnified1DArrayFloatS32:
237 case NVPTXISD::TexUnified1DArrayFloatFloat:
238 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
239 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
240 case NVPTXISD::TexUnified1DArrayS32S32:
241 case NVPTXISD::TexUnified1DArrayS32Float:
242 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
243 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
244 case NVPTXISD::TexUnified1DArrayU32S32:
245 case NVPTXISD::TexUnified1DArrayU32Float:
246 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
247 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
248 case NVPTXISD::TexUnified2DFloatS32:
249 case NVPTXISD::TexUnified2DFloatFloat:
250 case NVPTXISD::TexUnified2DFloatFloatLevel:
251 case NVPTXISD::TexUnified2DFloatFloatGrad:
252 case NVPTXISD::TexUnified2DS32S32:
253 case NVPTXISD::TexUnified2DS32Float:
254 case NVPTXISD::TexUnified2DS32FloatLevel:
255 case NVPTXISD::TexUnified2DS32FloatGrad:
256 case NVPTXISD::TexUnified2DU32S32:
257 case NVPTXISD::TexUnified2DU32Float:
258 case NVPTXISD::TexUnified2DU32FloatLevel:
259 case NVPTXISD::TexUnified2DU32FloatGrad:
260 case NVPTXISD::TexUnified2DArrayFloatS32:
261 case NVPTXISD::TexUnified2DArrayFloatFloat:
262 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
263 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
264 case NVPTXISD::TexUnified2DArrayS32S32:
265 case NVPTXISD::TexUnified2DArrayS32Float:
266 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
267 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
268 case NVPTXISD::TexUnified2DArrayU32S32:
269 case NVPTXISD::TexUnified2DArrayU32Float:
270 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
271 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
272 case NVPTXISD::TexUnified3DFloatS32:
273 case NVPTXISD::TexUnified3DFloatFloat:
274 case NVPTXISD::TexUnified3DFloatFloatLevel:
275 case NVPTXISD::TexUnified3DFloatFloatGrad:
276 case NVPTXISD::TexUnified3DS32S32:
277 case NVPTXISD::TexUnified3DS32Float:
278 case NVPTXISD::TexUnified3DS32FloatLevel:
279 case NVPTXISD::TexUnified3DS32FloatGrad:
280 case NVPTXISD::TexUnified3DU32S32:
281 case NVPTXISD::TexUnified3DU32Float:
282 case NVPTXISD::TexUnified3DU32FloatLevel:
283 case NVPTXISD::TexUnified3DU32FloatGrad:
284 case NVPTXISD::TexUnifiedCubeFloatFloat:
285 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
286 case NVPTXISD::TexUnifiedCubeS32Float:
287 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
288 case NVPTXISD::TexUnifiedCubeU32Float:
289 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
290 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
291 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
292 case NVPTXISD::TexUnifiedCubeArrayS32Float:
293 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
294 case NVPTXISD::TexUnifiedCubeArrayU32Float:
295 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
296 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
297 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
298 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
299 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
300 case NVPTXISD::Tld4UnifiedR2DS64Float:
301 case NVPTXISD::Tld4UnifiedG2DS64Float:
302 case NVPTXISD::Tld4UnifiedB2DS64Float:
303 case NVPTXISD::Tld4UnifiedA2DS64Float:
304 case NVPTXISD::Tld4UnifiedR2DU64Float:
305 case NVPTXISD::Tld4UnifiedG2DU64Float:
306 case NVPTXISD::Tld4UnifiedB2DU64Float:
307 case NVPTXISD::Tld4UnifiedA2DU64Float:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000308 if (tryTextureIntrinsic(N))
309 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000310 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000311 case NVPTXISD::Suld1DI8Clamp:
312 case NVPTXISD::Suld1DI16Clamp:
313 case NVPTXISD::Suld1DI32Clamp:
314 case NVPTXISD::Suld1DI64Clamp:
315 case NVPTXISD::Suld1DV2I8Clamp:
316 case NVPTXISD::Suld1DV2I16Clamp:
317 case NVPTXISD::Suld1DV2I32Clamp:
318 case NVPTXISD::Suld1DV2I64Clamp:
319 case NVPTXISD::Suld1DV4I8Clamp:
320 case NVPTXISD::Suld1DV4I16Clamp:
321 case NVPTXISD::Suld1DV4I32Clamp:
322 case NVPTXISD::Suld1DArrayI8Clamp:
323 case NVPTXISD::Suld1DArrayI16Clamp:
324 case NVPTXISD::Suld1DArrayI32Clamp:
325 case NVPTXISD::Suld1DArrayI64Clamp:
326 case NVPTXISD::Suld1DArrayV2I8Clamp:
327 case NVPTXISD::Suld1DArrayV2I16Clamp:
328 case NVPTXISD::Suld1DArrayV2I32Clamp:
329 case NVPTXISD::Suld1DArrayV2I64Clamp:
330 case NVPTXISD::Suld1DArrayV4I8Clamp:
331 case NVPTXISD::Suld1DArrayV4I16Clamp:
332 case NVPTXISD::Suld1DArrayV4I32Clamp:
333 case NVPTXISD::Suld2DI8Clamp:
334 case NVPTXISD::Suld2DI16Clamp:
335 case NVPTXISD::Suld2DI32Clamp:
336 case NVPTXISD::Suld2DI64Clamp:
337 case NVPTXISD::Suld2DV2I8Clamp:
338 case NVPTXISD::Suld2DV2I16Clamp:
339 case NVPTXISD::Suld2DV2I32Clamp:
340 case NVPTXISD::Suld2DV2I64Clamp:
341 case NVPTXISD::Suld2DV4I8Clamp:
342 case NVPTXISD::Suld2DV4I16Clamp:
343 case NVPTXISD::Suld2DV4I32Clamp:
344 case NVPTXISD::Suld2DArrayI8Clamp:
345 case NVPTXISD::Suld2DArrayI16Clamp:
346 case NVPTXISD::Suld2DArrayI32Clamp:
347 case NVPTXISD::Suld2DArrayI64Clamp:
348 case NVPTXISD::Suld2DArrayV2I8Clamp:
349 case NVPTXISD::Suld2DArrayV2I16Clamp:
350 case NVPTXISD::Suld2DArrayV2I32Clamp:
351 case NVPTXISD::Suld2DArrayV2I64Clamp:
352 case NVPTXISD::Suld2DArrayV4I8Clamp:
353 case NVPTXISD::Suld2DArrayV4I16Clamp:
354 case NVPTXISD::Suld2DArrayV4I32Clamp:
355 case NVPTXISD::Suld3DI8Clamp:
356 case NVPTXISD::Suld3DI16Clamp:
357 case NVPTXISD::Suld3DI32Clamp:
358 case NVPTXISD::Suld3DI64Clamp:
359 case NVPTXISD::Suld3DV2I8Clamp:
360 case NVPTXISD::Suld3DV2I16Clamp:
361 case NVPTXISD::Suld3DV2I32Clamp:
362 case NVPTXISD::Suld3DV2I64Clamp:
363 case NVPTXISD::Suld3DV4I8Clamp:
364 case NVPTXISD::Suld3DV4I16Clamp:
365 case NVPTXISD::Suld3DV4I32Clamp:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000366 case NVPTXISD::Suld1DI8Trap:
367 case NVPTXISD::Suld1DI16Trap:
368 case NVPTXISD::Suld1DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000369 case NVPTXISD::Suld1DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000370 case NVPTXISD::Suld1DV2I8Trap:
371 case NVPTXISD::Suld1DV2I16Trap:
372 case NVPTXISD::Suld1DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000373 case NVPTXISD::Suld1DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000374 case NVPTXISD::Suld1DV4I8Trap:
375 case NVPTXISD::Suld1DV4I16Trap:
376 case NVPTXISD::Suld1DV4I32Trap:
377 case NVPTXISD::Suld1DArrayI8Trap:
378 case NVPTXISD::Suld1DArrayI16Trap:
379 case NVPTXISD::Suld1DArrayI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000380 case NVPTXISD::Suld1DArrayI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000381 case NVPTXISD::Suld1DArrayV2I8Trap:
382 case NVPTXISD::Suld1DArrayV2I16Trap:
383 case NVPTXISD::Suld1DArrayV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000384 case NVPTXISD::Suld1DArrayV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000385 case NVPTXISD::Suld1DArrayV4I8Trap:
386 case NVPTXISD::Suld1DArrayV4I16Trap:
387 case NVPTXISD::Suld1DArrayV4I32Trap:
388 case NVPTXISD::Suld2DI8Trap:
389 case NVPTXISD::Suld2DI16Trap:
390 case NVPTXISD::Suld2DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000391 case NVPTXISD::Suld2DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000392 case NVPTXISD::Suld2DV2I8Trap:
393 case NVPTXISD::Suld2DV2I16Trap:
394 case NVPTXISD::Suld2DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000395 case NVPTXISD::Suld2DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000396 case NVPTXISD::Suld2DV4I8Trap:
397 case NVPTXISD::Suld2DV4I16Trap:
398 case NVPTXISD::Suld2DV4I32Trap:
399 case NVPTXISD::Suld2DArrayI8Trap:
400 case NVPTXISD::Suld2DArrayI16Trap:
401 case NVPTXISD::Suld2DArrayI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000402 case NVPTXISD::Suld2DArrayI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000403 case NVPTXISD::Suld2DArrayV2I8Trap:
404 case NVPTXISD::Suld2DArrayV2I16Trap:
405 case NVPTXISD::Suld2DArrayV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000406 case NVPTXISD::Suld2DArrayV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000407 case NVPTXISD::Suld2DArrayV4I8Trap:
408 case NVPTXISD::Suld2DArrayV4I16Trap:
409 case NVPTXISD::Suld2DArrayV4I32Trap:
410 case NVPTXISD::Suld3DI8Trap:
411 case NVPTXISD::Suld3DI16Trap:
412 case NVPTXISD::Suld3DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000413 case NVPTXISD::Suld3DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000414 case NVPTXISD::Suld3DV2I8Trap:
415 case NVPTXISD::Suld3DV2I16Trap:
416 case NVPTXISD::Suld3DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000417 case NVPTXISD::Suld3DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000418 case NVPTXISD::Suld3DV4I8Trap:
419 case NVPTXISD::Suld3DV4I16Trap:
420 case NVPTXISD::Suld3DV4I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000421 case NVPTXISD::Suld1DI8Zero:
422 case NVPTXISD::Suld1DI16Zero:
423 case NVPTXISD::Suld1DI32Zero:
424 case NVPTXISD::Suld1DI64Zero:
425 case NVPTXISD::Suld1DV2I8Zero:
426 case NVPTXISD::Suld1DV2I16Zero:
427 case NVPTXISD::Suld1DV2I32Zero:
428 case NVPTXISD::Suld1DV2I64Zero:
429 case NVPTXISD::Suld1DV4I8Zero:
430 case NVPTXISD::Suld1DV4I16Zero:
431 case NVPTXISD::Suld1DV4I32Zero:
432 case NVPTXISD::Suld1DArrayI8Zero:
433 case NVPTXISD::Suld1DArrayI16Zero:
434 case NVPTXISD::Suld1DArrayI32Zero:
435 case NVPTXISD::Suld1DArrayI64Zero:
436 case NVPTXISD::Suld1DArrayV2I8Zero:
437 case NVPTXISD::Suld1DArrayV2I16Zero:
438 case NVPTXISD::Suld1DArrayV2I32Zero:
439 case NVPTXISD::Suld1DArrayV2I64Zero:
440 case NVPTXISD::Suld1DArrayV4I8Zero:
441 case NVPTXISD::Suld1DArrayV4I16Zero:
442 case NVPTXISD::Suld1DArrayV4I32Zero:
443 case NVPTXISD::Suld2DI8Zero:
444 case NVPTXISD::Suld2DI16Zero:
445 case NVPTXISD::Suld2DI32Zero:
446 case NVPTXISD::Suld2DI64Zero:
447 case NVPTXISD::Suld2DV2I8Zero:
448 case NVPTXISD::Suld2DV2I16Zero:
449 case NVPTXISD::Suld2DV2I32Zero:
450 case NVPTXISD::Suld2DV2I64Zero:
451 case NVPTXISD::Suld2DV4I8Zero:
452 case NVPTXISD::Suld2DV4I16Zero:
453 case NVPTXISD::Suld2DV4I32Zero:
454 case NVPTXISD::Suld2DArrayI8Zero:
455 case NVPTXISD::Suld2DArrayI16Zero:
456 case NVPTXISD::Suld2DArrayI32Zero:
457 case NVPTXISD::Suld2DArrayI64Zero:
458 case NVPTXISD::Suld2DArrayV2I8Zero:
459 case NVPTXISD::Suld2DArrayV2I16Zero:
460 case NVPTXISD::Suld2DArrayV2I32Zero:
461 case NVPTXISD::Suld2DArrayV2I64Zero:
462 case NVPTXISD::Suld2DArrayV4I8Zero:
463 case NVPTXISD::Suld2DArrayV4I16Zero:
464 case NVPTXISD::Suld2DArrayV4I32Zero:
465 case NVPTXISD::Suld3DI8Zero:
466 case NVPTXISD::Suld3DI16Zero:
467 case NVPTXISD::Suld3DI32Zero:
468 case NVPTXISD::Suld3DI64Zero:
469 case NVPTXISD::Suld3DV2I8Zero:
470 case NVPTXISD::Suld3DV2I16Zero:
471 case NVPTXISD::Suld3DV2I32Zero:
472 case NVPTXISD::Suld3DV2I64Zero:
473 case NVPTXISD::Suld3DV4I8Zero:
474 case NVPTXISD::Suld3DV4I16Zero:
475 case NVPTXISD::Suld3DV4I32Zero:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000476 if (trySurfaceIntrinsic(N))
477 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000478 break;
Justin Holewinskica7a4f12014-06-27 18:35:27 +0000479 case ISD::AND:
480 case ISD::SRA:
481 case ISD::SRL:
482 // Try to select BFE
Justin Bogner8d83fb62016-05-13 21:12:53 +0000483 if (tryBFE(N))
484 return;
Justin Holewinskica7a4f12014-06-27 18:35:27 +0000485 break;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000486 case ISD::ADDRSPACECAST:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000487 SelectAddrSpaceCast(N);
488 return;
Artem Belevich64dc9be2017-01-13 20:56:17 +0000489 case ISD::ConstantFP:
490 if (tryConstantFP16(N))
491 return;
492 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000493 default:
494 break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000495 }
Justin Bogner8d83fb62016-05-13 21:12:53 +0000496 SelectCode(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000497}
498
Justin Bogner8d83fb62016-05-13 21:12:53 +0000499bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000500 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
501 switch (IID) {
502 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000503 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000504 case Intrinsic::nvvm_ldg_global_f:
505 case Intrinsic::nvvm_ldg_global_i:
506 case Intrinsic::nvvm_ldg_global_p:
507 case Intrinsic::nvvm_ldu_global_f:
508 case Intrinsic::nvvm_ldu_global_i:
509 case Intrinsic::nvvm_ldu_global_p:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000510 return tryLDGLDU(N);
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000511 }
512}
513
Artem Belevich64dc9be2017-01-13 20:56:17 +0000514// There's no way to specify FP16 immediates in .f16 ops, so we have to
515// load them into an .f16 register first.
516bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
517 if (N->getValueType(0) != MVT::f16)
518 return false;
519 SDValue Val = CurDAG->getTargetConstantFP(
520 cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
521 SDNode *LoadConstF16 =
522 CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
523 ReplaceNode(N, LoadConstF16);
524 return true;
525}
526
Artem Belevich620db1f2017-02-23 22:38:24 +0000527// Map ISD:CONDCODE value to appropriate CmpMode expected by
528// NVPTXInstPrinter::printCmpMode()
529static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
530 using NVPTX::PTXCmpMode::CmpMode;
531 unsigned PTXCmpMode = [](ISD::CondCode CC) {
532 switch (CC) {
533 default:
534 llvm_unreachable("Unexpected condition code.");
535 case ISD::SETOEQ:
536 return CmpMode::EQ;
537 case ISD::SETOGT:
538 return CmpMode::GT;
539 case ISD::SETOGE:
540 return CmpMode::GE;
541 case ISD::SETOLT:
542 return CmpMode::LT;
543 case ISD::SETOLE:
544 return CmpMode::LE;
545 case ISD::SETONE:
546 return CmpMode::NE;
547 case ISD::SETO:
548 return CmpMode::NUM;
549 case ISD::SETUO:
550 return CmpMode::NotANumber;
551 case ISD::SETUEQ:
552 return CmpMode::EQU;
553 case ISD::SETUGT:
554 return CmpMode::GTU;
555 case ISD::SETUGE:
556 return CmpMode::GEU;
557 case ISD::SETULT:
558 return CmpMode::LTU;
559 case ISD::SETULE:
560 return CmpMode::LEU;
561 case ISD::SETUNE:
562 return CmpMode::NEU;
563 case ISD::SETEQ:
564 return CmpMode::EQ;
565 case ISD::SETGT:
566 return CmpMode::GT;
567 case ISD::SETGE:
568 return CmpMode::GE;
569 case ISD::SETLT:
570 return CmpMode::LT;
571 case ISD::SETLE:
572 return CmpMode::LE;
573 case ISD::SETNE:
574 return CmpMode::NE;
575 }
576 }(CondCode.get());
577
578 if (FTZ)
579 PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
580
581 return PTXCmpMode;
582}
583
584bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
585 unsigned PTXCmpMode =
586 getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
587 SDLoc DL(N);
588 SDNode *SetP = CurDAG->getMachineNode(
589 NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
590 N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
591 ReplaceNode(N, SetP);
592 return true;
593}
594
595// Find all instances of extract_vector_elt that use this v2f16 vector
596// and coalesce them into a scattering move instruction.
597bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
598 SDValue Vector = N->getOperand(0);
599
600 // We only care about f16x2 as it's the only real vector type we
601 // need to deal with.
602 if (Vector.getSimpleValueType() != MVT::v2f16)
603 return false;
604
605 // Find and record all uses of this vector that extract element 0 or 1.
606 SmallVector<SDNode *, 4> E0, E1;
607 for (const auto &U : Vector.getNode()->uses()) {
608 if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
609 continue;
610 if (U->getOperand(0) != Vector)
611 continue;
612 if (const ConstantSDNode *IdxConst =
613 dyn_cast<ConstantSDNode>(U->getOperand(1))) {
614 if (IdxConst->getZExtValue() == 0)
615 E0.push_back(U);
616 else if (IdxConst->getZExtValue() == 1)
617 E1.push_back(U);
618 else
619 llvm_unreachable("Invalid vector index.");
620 }
621 }
622
623 // There's no point scattering f16x2 if we only ever access one
624 // element of it.
625 if (E0.empty() || E1.empty())
626 return false;
627
628 unsigned Op = NVPTX::SplitF16x2;
629 // If the vector has been BITCAST'ed from i32, we can use original
630 // value directly and avoid register-to-register move.
631 SDValue Source = Vector;
632 if (Vector->getOpcode() == ISD::BITCAST) {
633 Op = NVPTX::SplitI32toF16x2;
634 Source = Vector->getOperand(0);
635 }
636 // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
637 // into f16,f16 SplitF16x2(V)
638 SDNode *ScatterOp =
639 CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source);
640 for (auto *Node : E0)
641 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
642 for (auto *Node : E1)
643 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
644
645 return true;
646}
647
Eric Christopher9745b3a2015-01-30 01:41:01 +0000648static unsigned int getCodeAddrSpace(MemSDNode *N) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +0000649 const Value *Src = N->getMemOperand()->getValue();
Justin Holewinskib96d1392013-06-10 13:29:47 +0000650
Justin Holewinskiae556d32012-05-04 20:18:50 +0000651 if (!Src)
Justin Holewinskib96d1392013-06-10 13:29:47 +0000652 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000653
Craig Toppere3dcce92015-08-01 22:20:21 +0000654 if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000655 switch (PT->getAddressSpace()) {
Justin Holewinskib96d1392013-06-10 13:29:47 +0000656 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
657 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
658 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
659 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
660 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
661 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
662 default: break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000663 }
664 }
Justin Holewinskib96d1392013-06-10 13:29:47 +0000665 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000666}
667
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000668static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000669 unsigned CodeAddrSpace, MachineFunction *F) {
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000670 // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
671 // space.
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000672 //
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000673 // We have two ways of identifying invariant loads: Loads may be explicitly
674 // marked as invariant, or we may infer them to be invariant.
675 //
Justin Lebarfaaf2d22018-02-28 23:58:05 +0000676 // We currently infer invariance for loads from
677 // - constant global variables, and
678 // - kernel function pointer params that are noalias (i.e. __restrict) and
679 // never written to.
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000680 //
681 // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
682 // not during the SelectionDAG phase).
683 //
684 // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
685 // explicitly invariant loads because these are how clang tells us to use ldg
686 // when the user uses a builtin.
687 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000688 return false;
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000689
690 if (N->isInvariant())
691 return true;
692
Justin Lebarfaaf2d22018-02-28 23:58:05 +0000693 bool IsKernelFn = isKernelFunction(F->getFunction());
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000694
Justin Lebarfaaf2d22018-02-28 23:58:05 +0000695 // We use GetUnderlyingObjects() here instead of GetUnderlyingObject() mainly
696 // because the former looks through phi nodes while the latter does not. We
697 // need to look through phi nodes to handle pointer induction variables.
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000698 SmallVector<Value *, 8> Objs;
699 GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
700 Objs, F->getDataLayout());
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000701
Justin Lebarfaaf2d22018-02-28 23:58:05 +0000702 return all_of(Objs, [&](Value *V) {
703 if (auto *A = dyn_cast<const Argument>(V))
704 return IsKernelFn && A->onlyReadsMemory() && A->hasNoAliasAttr();
705 if (auto *GV = dyn_cast<const GlobalVariable>(V))
706 return GV->isConstant();
707 return false;
708 });
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000709}
710
Justin Bogner8d83fb62016-05-13 21:12:53 +0000711bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +0000712 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
713 switch (IID) {
714 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000715 return false;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000716 case Intrinsic::nvvm_texsurf_handle_internal:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000717 SelectTexSurfHandle(N);
718 return true;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000719 }
720}
721
Justin Bogner8d83fb62016-05-13 21:12:53 +0000722void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +0000723 // Op 0 is the intrinsic ID
724 SDValue Wrapper = N->getOperand(1);
725 SDValue GlobalVal = Wrapper.getOperand(0);
Justin Bogner8d83fb62016-05-13 21:12:53 +0000726 ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
727 MVT::i64, GlobalVal));
Justin Holewinski30d56a72014-04-09 15:39:15 +0000728}
729
Justin Bogner8d83fb62016-05-13 21:12:53 +0000730void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000731 SDValue Src = N->getOperand(0);
732 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
733 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
734 unsigned DstAddrSpace = CastN->getDestAddressSpace();
735
736 assert(SrcAddrSpace != DstAddrSpace &&
737 "addrspacecast must be between different address spaces");
738
739 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
740 // Specific to generic
741 unsigned Opc;
742 switch (SrcAddrSpace) {
743 default: report_fatal_error("Bad address space in addrspacecast");
744 case ADDRESS_SPACE_GLOBAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000745 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000746 break;
747 case ADDRESS_SPACE_SHARED:
Eric Christopher02389e32015-02-19 00:08:27 +0000748 Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000749 break;
750 case ADDRESS_SPACE_CONST:
Eric Christopher02389e32015-02-19 00:08:27 +0000751 Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000752 break;
753 case ADDRESS_SPACE_LOCAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000754 Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000755 break;
756 }
Justin Bogner8d83fb62016-05-13 21:12:53 +0000757 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
758 Src));
759 return;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000760 } else {
761 // Generic to specific
762 if (SrcAddrSpace != 0)
763 report_fatal_error("Cannot cast between two non-generic address spaces");
764 unsigned Opc;
765 switch (DstAddrSpace) {
766 default: report_fatal_error("Bad address space in addrspacecast");
767 case ADDRESS_SPACE_GLOBAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000768 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
769 : NVPTX::cvta_to_global_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000770 break;
771 case ADDRESS_SPACE_SHARED:
Eric Christopher02389e32015-02-19 00:08:27 +0000772 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
773 : NVPTX::cvta_to_shared_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000774 break;
775 case ADDRESS_SPACE_CONST:
Eric Christopher02389e32015-02-19 00:08:27 +0000776 Opc =
777 TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000778 break;
779 case ADDRESS_SPACE_LOCAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000780 Opc =
781 TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000782 break;
Jingyue Wua2f60272015-06-04 21:28:26 +0000783 case ADDRESS_SPACE_PARAM:
784 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
785 : NVPTX::nvvm_ptr_gen_to_param;
786 break;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000787 }
Justin Bogner8d83fb62016-05-13 21:12:53 +0000788 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
789 Src));
790 return;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000791 }
792}
793
Artem Belevichee7dd122017-03-02 19:14:14 +0000794// Helper function template to reduce amount of boilerplate code for
795// opcode selection.
796static Optional<unsigned> pickOpcodeForVT(
797 MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16,
798 unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16,
799 unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) {
800 switch (VT) {
801 case MVT::i1:
802 case MVT::i8:
803 return Opcode_i8;
804 case MVT::i16:
805 return Opcode_i16;
806 case MVT::i32:
807 return Opcode_i32;
808 case MVT::i64:
809 return Opcode_i64;
810 case MVT::f16:
811 return Opcode_f16;
812 case MVT::v2f16:
813 return Opcode_f16x2;
814 case MVT::f32:
815 return Opcode_f32;
816 case MVT::f64:
817 return Opcode_f64;
818 default:
819 return None;
820 }
821}
822
Justin Bogner8d83fb62016-05-13 21:12:53 +0000823bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000824 SDLoc dl(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000825 LoadSDNode *LD = cast<LoadSDNode>(N);
826 EVT LoadedVT = LD->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +0000827 SDNode *NVPTXLD = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000828
829 // do not support pre/post inc/dec
830 if (LD->isIndexed())
Justin Bogner8d83fb62016-05-13 21:12:53 +0000831 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000832
833 if (!LoadedVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +0000834 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000835
836 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +0000837 unsigned int codeAddrSpace = getCodeAddrSpace(LD);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000838
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000839 if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
Justin Bogner8d83fb62016-05-13 21:12:53 +0000840 return tryLDGLDU(N);
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000841 }
842
Justin Holewinskiae556d32012-05-04 20:18:50 +0000843 // Volatile Setting
844 // - .volatile is only availalble for .global and .shared
845 bool isVolatile = LD->isVolatile();
846 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
847 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
848 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
849 isVolatile = false;
850
Justin Holewinskiae556d32012-05-04 20:18:50 +0000851 // Type Setting: fromType + fromTypeWidth
852 //
853 // Sign : ISD::SEXTLOAD
854 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
855 // type is integer
856 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
Artem Belevich620db1f2017-02-23 22:38:24 +0000857 MVT SimpleVT = LoadedVT.getSimpleVT();
Justin Holewinskiae556d32012-05-04 20:18:50 +0000858 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +0000859 // Read at least 8 bits (predicates are stored as 8-bit values)
860 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskiae556d32012-05-04 20:18:50 +0000861 unsigned int fromType;
Artem Belevich620db1f2017-02-23 22:38:24 +0000862
863 // Vector Setting
864 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
865 if (SimpleVT.isVector()) {
866 assert(LoadedVT == MVT::v2f16 && "Unexpected vector type");
867 // v2f16 is loaded using ld.b32
868 fromTypeWidth = 32;
869 }
870
Justin Holewinskiae556d32012-05-04 20:18:50 +0000871 if ((LD->getExtensionType() == ISD::SEXTLOAD))
872 fromType = NVPTX::PTXLdStInstCode::Signed;
873 else if (ScalarVT.isFloatingPoint())
Artem Belevich64dc9be2017-01-13 20:56:17 +0000874 // f16 uses .b16 as its storage type.
875 fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
876 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000877 else
878 fromType = NVPTX::PTXLdStInstCode::Unsigned;
879
880 // Create the machine instruction DAG
881 SDValue Chain = N->getOperand(0);
882 SDValue N1 = N->getOperand(1);
883 SDValue Addr;
884 SDValue Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +0000885 Optional<unsigned> Opcode;
Craig Topperd9c27832013-08-15 02:44:19 +0000886 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000887
888 if (SelectDirectAddr(N1, Addr)) {
Artem Belevichee7dd122017-03-02 19:14:14 +0000889 Opcode = pickOpcodeForVT(
890 TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
891 NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
892 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
893 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +0000894 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000895 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
896 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
897 getI32Imm(fromTypeWidth, dl), Addr, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +0000898 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
899 MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +0000900 } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
901 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +0000902 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
903 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
904 NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
905 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
906 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +0000907 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000908 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
909 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
910 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +0000911 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
912 MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +0000913 } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
914 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +0000915 if (TM.is64Bit())
916 Opcode = pickOpcodeForVT(
917 TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
918 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
919 NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
920 else
921 Opcode = pickOpcodeForVT(
922 TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
923 NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
924 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
925 if (!Opcode)
926 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000927 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
928 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
929 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +0000930 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
931 MVT::Other, Ops);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000932 } else {
Artem Belevichee7dd122017-03-02 19:14:14 +0000933 if (TM.is64Bit())
934 Opcode = pickOpcodeForVT(
935 TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
936 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
937 NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
938 NVPTX::LD_f64_areg_64);
939 else
940 Opcode = pickOpcodeForVT(
941 TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
942 NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
943 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
944 if (!Opcode)
945 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000946 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
947 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
948 getI32Imm(fromTypeWidth, dl), N1, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +0000949 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
950 MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000951 }
952
Justin Bogner8d83fb62016-05-13 21:12:53 +0000953 if (!NVPTXLD)
954 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000955
Justin Bogner8d83fb62016-05-13 21:12:53 +0000956 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
957 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
958 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
959
960 ReplaceNode(N, NVPTXLD);
961 return true;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000962}
963
Justin Bogner8d83fb62016-05-13 21:12:53 +0000964bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000965
966 SDValue Chain = N->getOperand(0);
967 SDValue Op1 = N->getOperand(1);
968 SDValue Addr, Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +0000969 Optional<unsigned> Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +0000970 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000971 SDNode *LD;
972 MemSDNode *MemSD = cast<MemSDNode>(N);
973 EVT LoadedVT = MemSD->getMemoryVT();
974
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000975 if (!LoadedVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +0000976 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000977
978 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +0000979 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000980
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000981 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
Justin Bogner8d83fb62016-05-13 21:12:53 +0000982 return tryLDGLDU(N);
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000983 }
984
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000985 // Volatile Setting
986 // - .volatile is only availalble for .global and .shared
987 bool IsVolatile = MemSD->isVolatile();
988 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
989 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
990 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
991 IsVolatile = false;
992
993 // Vector Setting
994 MVT SimpleVT = LoadedVT.getSimpleVT();
995
996 // Type Setting: fromType + fromTypeWidth
997 //
998 // Sign : ISD::SEXTLOAD
999 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
1000 // type is integer
1001 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1002 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +00001003 // Read at least 8 bits (predicates are stored as 8-bit values)
1004 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001005 unsigned int FromType;
1006 // The last operand holds the original LoadSDNode::getExtensionType() value
Justin Holewinski0497ab12013-03-30 14:29:21 +00001007 unsigned ExtensionType = cast<ConstantSDNode>(
1008 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001009 if (ExtensionType == ISD::SEXTLOAD)
1010 FromType = NVPTX::PTXLdStInstCode::Signed;
1011 else if (ScalarVT.isFloatingPoint())
Artem Belevich620db1f2017-02-23 22:38:24 +00001012 FromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1013 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001014 else
1015 FromType = NVPTX::PTXLdStInstCode::Unsigned;
1016
1017 unsigned VecType;
1018
1019 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001020 case NVPTXISD::LoadV2:
1021 VecType = NVPTX::PTXLdStInstCode::V2;
1022 break;
1023 case NVPTXISD::LoadV4:
1024 VecType = NVPTX::PTXLdStInstCode::V4;
1025 break;
1026 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001027 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001028 }
1029
1030 EVT EltVT = N->getValueType(0);
1031
Artem Belevich620db1f2017-02-23 22:38:24 +00001032 // v8f16 is a special case. PTX doesn't have ld.v8.f16
1033 // instruction. Instead, we split the vector into v2f16 chunks and
1034 // load them with ld.v4.b32.
1035 if (EltVT == MVT::v2f16) {
1036 assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1037 EltVT = MVT::i32;
1038 FromType = NVPTX::PTXLdStInstCode::Untyped;
1039 FromTypeWidth = 32;
1040 }
1041
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001042 if (SelectDirectAddr(Op1, Addr)) {
1043 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001044 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001045 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001046 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001047 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1048 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1049 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1050 NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
1051 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001052 break;
1053 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001054 Opcode =
1055 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1056 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None,
1057 NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
1058 NVPTX::LDV_f32_v4_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001059 break;
1060 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001061 if (!Opcode)
1062 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001063 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1064 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1065 getI32Imm(FromTypeWidth, DL), Addr, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001066 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001067 } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1068 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001069 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001070 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001071 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001072 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001073 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1074 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1075 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1076 NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
1077 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001078 break;
1079 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001080 Opcode =
1081 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1082 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None,
1083 NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
1084 NVPTX::LDV_f32_v4_asi, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001085 break;
1086 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001087 if (!Opcode)
1088 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001089 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1090 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1091 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001092 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001093 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1094 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1095 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001096 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001097 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001098 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001099 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001100 Opcode = pickOpcodeForVT(
1101 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
1102 NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
1103 NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
1104 NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
1105 NVPTX::LDV_f64_v2_ari_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001106 break;
1107 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001108 Opcode = pickOpcodeForVT(
1109 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1110 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None,
1111 NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
1112 NVPTX::LDV_f32_v4_ari_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001113 break;
1114 }
1115 } else {
1116 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001117 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001118 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001119 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001120 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1121 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1122 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1123 NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
1124 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001125 break;
1126 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001127 Opcode =
1128 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1129 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None,
1130 NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
1131 NVPTX::LDV_f32_v4_ari, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001132 break;
1133 }
1134 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001135 if (!Opcode)
1136 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001137 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1138 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1139 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001140
Artem Belevichee7dd122017-03-02 19:14:14 +00001141 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001142 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001143 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001144 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001145 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001146 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001147 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001148 Opcode = pickOpcodeForVT(
1149 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1150 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1151 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
1152 NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1153 NVPTX::LDV_f64_v2_areg_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001154 break;
1155 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001156 Opcode = pickOpcodeForVT(
1157 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1158 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None,
1159 NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
1160 NVPTX::LDV_f32_v4_areg_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001161 break;
1162 }
1163 } else {
1164 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001165 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001166 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001167 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001168 Opcode =
1169 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1170 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1171 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
1172 NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
1173 NVPTX::LDV_f64_v2_areg);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001174 break;
1175 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001176 Opcode = pickOpcodeForVT(
1177 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1178 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None,
1179 NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
1180 NVPTX::LDV_f32_v4_areg, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001181 break;
1182 }
1183 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001184 if (!Opcode)
1185 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001186 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1187 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1188 getI32Imm(FromTypeWidth, DL), Op1, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001189 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001190 }
1191
1192 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1193 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1194 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1195
Justin Bogner8d83fb62016-05-13 21:12:53 +00001196 ReplaceNode(N, LD);
1197 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001198}
1199
Justin Bogner8d83fb62016-05-13 21:12:53 +00001200bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001201
1202 SDValue Chain = N->getOperand(0);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001203 SDValue Op1;
1204 MemSDNode *Mem;
1205 bool IsLDG = true;
1206
Justin Holewinskic7997922016-04-05 12:38:01 +00001207 // If this is an LDG intrinsic, the address is the third operand. If its an
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001208 // LDG/LDU SD node (from custom vector handling), then its the second operand
1209 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1210 Op1 = N->getOperand(2);
1211 Mem = cast<MemIntrinsicSDNode>(N);
1212 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1213 switch (IID) {
1214 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001215 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001216 case Intrinsic::nvvm_ldg_global_f:
1217 case Intrinsic::nvvm_ldg_global_i:
1218 case Intrinsic::nvvm_ldg_global_p:
1219 IsLDG = true;
1220 break;
1221 case Intrinsic::nvvm_ldu_global_f:
1222 case Intrinsic::nvvm_ldu_global_i:
1223 case Intrinsic::nvvm_ldu_global_p:
1224 IsLDG = false;
1225 break;
1226 }
1227 } else {
1228 Op1 = N->getOperand(1);
1229 Mem = cast<MemSDNode>(N);
1230 }
1231
Artem Belevichee7dd122017-03-02 19:14:14 +00001232 Optional<unsigned> Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00001233 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001234 SDNode *LD;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001235 SDValue Base, Offset, Addr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00001236
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001237 EVT EltVT = Mem->getMemoryVT();
Justin Holewinskic7997922016-04-05 12:38:01 +00001238 unsigned NumElts = 1;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001239 if (EltVT.isVector()) {
Justin Holewinskic7997922016-04-05 12:38:01 +00001240 NumElts = EltVT.getVectorNumElements();
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001241 EltVT = EltVT.getVectorElementType();
1242 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001243
Justin Holewinskic7997922016-04-05 12:38:01 +00001244 // Build the "promoted" result VTList for the load. If we are really loading
1245 // i8s, then the return type will be promoted to i16 since we do not expose
1246 // 8-bit registers in NVPTX.
1247 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1248 SmallVector<EVT, 5> InstVTs;
1249 for (unsigned i = 0; i != NumElts; ++i) {
1250 InstVTs.push_back(NodeVT);
1251 }
1252 InstVTs.push_back(MVT::Other);
1253 SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1254
Justin Holewinskie40e9292013-07-01 12:58:52 +00001255 if (SelectDirectAddr(Op1, Addr)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001256 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001257 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001258 return false;
Justin Lebarfaaf2d22018-02-28 23:58:05 +00001259 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001260 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001261 if (IsLDG)
1262 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1263 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1264 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1265 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1266 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1267 NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
1268 NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
1269 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1270 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1271 else
1272 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1273 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1274 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1275 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1276 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1277 NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
1278 NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
1279 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1280 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001281 break;
Justin Lebarfaaf2d22018-02-28 23:58:05 +00001282 case NVPTXISD::LoadV2:
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001283 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001284 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1285 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1286 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1287 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1288 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1289 NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
1290 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
1291 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1292 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001293 break;
1294 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001295 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1296 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1297 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1298 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1299 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1300 NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
1301 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
1302 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1303 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001304 break;
Justin Lebarfaaf2d22018-02-28 23:58:05 +00001305 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001306 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001307 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1308 NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1309 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1310 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None,
1311 NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
1312 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
1313 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001314 break;
1315 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001316 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1317 NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1318 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1319 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None,
1320 NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
1321 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
1322 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001323 break;
1324 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001325 if (!Opcode)
1326 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001327 SDValue Ops[] = { Addr, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001328 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001329 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1330 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1331 if (TM.is64Bit()) {
Justin Holewinskie40e9292013-07-01 12:58:52 +00001332 switch (N->getOpcode()) {
1333 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001334 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001335 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001336 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001337 if (IsLDG)
1338 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1339 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1340 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1341 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1342 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1343 NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
1344 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
1345 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1346 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1347 else
1348 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1349 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1350 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1351 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1352 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1353 NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
1354 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
1355 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1356 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001357 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001358 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001359 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001360 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1361 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1362 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1363 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1364 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1365 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
1366 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
1367 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1368 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001369 break;
1370 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001371 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1372 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1373 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1374 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1375 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1376 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
1377 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
1378 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1379 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001380 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001381 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001382 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001383 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1384 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1385 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1386 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None,
1387 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
1388 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
1389 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001390 break;
1391 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001392 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1393 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1394 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1395 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None,
1396 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
1397 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
1398 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001399 break;
1400 }
1401 } else {
1402 switch (N->getOpcode()) {
1403 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001404 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001405 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001406 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001407 if (IsLDG)
1408 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1409 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1410 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1411 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1412 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1413 NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
1414 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
1415 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1416 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1417 else
1418 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1419 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1420 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1421 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1422 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1423 NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
1424 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
1425 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1426 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001427 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001428 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001429 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001430 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1431 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1432 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1433 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1434 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1435 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
1436 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
1437 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1438 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001439 break;
1440 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001441 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1442 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1443 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1444 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1445 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1446 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
1447 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
1448 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1449 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001450 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001451 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001452 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001453 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1454 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1455 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1456 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None,
1457 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
1458 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
1459 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001460 break;
1461 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001462 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1463 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1464 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1465 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None,
1466 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
1467 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
1468 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001469 break;
1470 }
1471 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001472 if (!Opcode)
1473 return false;
1474 SDValue Ops[] = {Base, Offset, Chain};
1475 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001476 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001477 if (TM.is64Bit()) {
Justin Holewinskie40e9292013-07-01 12:58:52 +00001478 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001479 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001480 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001481 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001482 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001483 if (IsLDG)
1484 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1485 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1486 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1487 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1488 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1489 NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
1490 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
1491 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1492 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1493 else
1494 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1495 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1496 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1497 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1498 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1499 NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
1500 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
1501 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1502 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001503 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001504 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001505 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001506 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1507 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1508 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1509 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1510 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1511 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
1512 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
1513 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1514 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001515 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001516 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001517 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1518 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1519 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1520 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1521 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1522 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
1523 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
1524 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1525 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001526 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001527 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001528 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001529 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1530 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1531 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1532 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None,
1533 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
1534 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
1535 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001536 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001537 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001538 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1539 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1540 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1541 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None,
1542 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
1543 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
1544 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001545 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001546 }
Justin Holewinskie40e9292013-07-01 12:58:52 +00001547 } else {
1548 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001549 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001550 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001551 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001552 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001553 if (IsLDG)
1554 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1555 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1556 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1557 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1558 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1559 NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
1560 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
1561 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1562 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1563 else
1564 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1565 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1566 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1567 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1568 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1569 NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
1570 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
1571 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1572 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001573 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001574 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001575 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001576 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1577 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1578 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1579 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1580 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1581 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
1582 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
1583 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1584 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001585 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001586 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001587 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1588 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1589 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1590 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1591 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1592 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
1593 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
1594 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1595 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001596 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001597 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001598 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001599 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1600 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1601 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1602 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None,
1603 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
1604 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
1605 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001606 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001607 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001608 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1609 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1610 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1611 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None,
1612 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
1613 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
1614 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001615 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001616 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001617 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001618 if (!Opcode)
1619 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001620 SDValue Ops[] = { Op1, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001621 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001622 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001623
1624 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001625 MemRefs0[0] = Mem->getMemOperand();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001626 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1627
Justin Holewinskic7997922016-04-05 12:38:01 +00001628 // For automatic generation of LDG (through SelectLoad[Vector], not the
1629 // intrinsics), we may have an extending load like:
1630 //
1631 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1632 //
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001633 // In this case, the matching logic above will select a load for the original
1634 // memory type (in this case, i8) and our types will not match (the node needs
1635 // to return an i32 in this case). Our LDG/LDU nodes do not support the
1636 // concept of sign-/zero-extension, so emulate it here by adding an explicit
1637 // CVT instruction. Ptxas should clean up any redundancies here.
1638
Justin Holewinskic7997922016-04-05 12:38:01 +00001639 EVT OrigType = N->getValueType(0);
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001640 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
Justin Holewinskic7997922016-04-05 12:38:01 +00001641
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001642 if (OrigType != EltVT && LdNode) {
1643 // We have an extending-load. The instruction we selected operates on the
1644 // smaller type, but the SDNode we are replacing has the larger type. We
1645 // need to emit a CVT to make the types match.
1646 bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
1647 unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
1648 EltVT.getSimpleVT(), IsSigned);
Justin Holewinskic7997922016-04-05 12:38:01 +00001649
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001650 // For each output value, apply the manual sign/zero-extension and make sure
1651 // all users of the load go through that CVT.
Justin Holewinskic7997922016-04-05 12:38:01 +00001652 for (unsigned i = 0; i != NumElts; ++i) {
1653 SDValue Res(LD, i);
1654 SDValue OrigVal(N, i);
1655
1656 SDNode *CvtNode =
1657 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001658 CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
1659 DL, MVT::i32));
Justin Holewinskic7997922016-04-05 12:38:01 +00001660 ReplaceUses(OrigVal, SDValue(CvtNode, 0));
1661 }
1662 }
1663
Justin Bogner8d83fb62016-05-13 21:12:53 +00001664 ReplaceNode(N, LD);
1665 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001666}
1667
Justin Bogner8d83fb62016-05-13 21:12:53 +00001668bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001669 SDLoc dl(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001670 StoreSDNode *ST = cast<StoreSDNode>(N);
1671 EVT StoreVT = ST->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +00001672 SDNode *NVPTXST = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001673
1674 // do not support pre/post inc/dec
1675 if (ST->isIndexed())
Justin Bogner8d83fb62016-05-13 21:12:53 +00001676 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001677
1678 if (!StoreVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +00001679 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001680
1681 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00001682 unsigned int codeAddrSpace = getCodeAddrSpace(ST);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001683
1684 // Volatile Setting
1685 // - .volatile is only availalble for .global and .shared
1686 bool isVolatile = ST->isVolatile();
1687 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1688 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1689 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1690 isVolatile = false;
1691
1692 // Vector Setting
1693 MVT SimpleVT = StoreVT.getSimpleVT();
1694 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001695
1696 // Type Setting: toType + toTypeWidth
1697 // - for integer type, always use 'u'
1698 //
1699 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00001700 unsigned toTypeWidth = ScalarVT.getSizeInBits();
Artem Belevich620db1f2017-02-23 22:38:24 +00001701 if (SimpleVT.isVector()) {
1702 assert(StoreVT == MVT::v2f16 && "Unexpected vector type");
1703 // v2f16 is stored using st.b32
1704 toTypeWidth = 32;
1705 }
1706
Justin Holewinskiae556d32012-05-04 20:18:50 +00001707 unsigned int toType;
1708 if (ScalarVT.isFloatingPoint())
Artem Belevich64dc9be2017-01-13 20:56:17 +00001709 // f16 uses .b16 as its storage type.
1710 toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1711 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001712 else
1713 toType = NVPTX::PTXLdStInstCode::Unsigned;
1714
1715 // Create the machine instruction DAG
1716 SDValue Chain = N->getOperand(0);
1717 SDValue N1 = N->getOperand(1);
1718 SDValue N2 = N->getOperand(2);
1719 SDValue Addr;
1720 SDValue Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +00001721 Optional<unsigned> Opcode;
Craig Topperd9c27832013-08-15 02:44:19 +00001722 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001723
1724 if (SelectDirectAddr(N2, Addr)) {
Artem Belevichee7dd122017-03-02 19:14:14 +00001725 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1726 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1727 NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
1728 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1729 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +00001730 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001731 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1732 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1733 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
1734 Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001735 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001736 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1737 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +00001738 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1739 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1740 NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
1741 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1742 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +00001743 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001744 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1745 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1746 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
1747 Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001748 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001749 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1750 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +00001751 if (TM.is64Bit())
1752 Opcode = pickOpcodeForVT(
1753 SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1754 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
1755 NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1756 else
1757 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1758 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1759 NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
1760 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1761 if (!Opcode)
1762 return false;
1763
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001764 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1765 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1766 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
1767 Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001768 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001769 } else {
Artem Belevichee7dd122017-03-02 19:14:14 +00001770 if (TM.is64Bit())
1771 Opcode =
1772 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1773 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1774 NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
1775 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1776 else
1777 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1778 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1779 NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
1780 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1781 if (!Opcode)
1782 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001783 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1784 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1785 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
1786 Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001787 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001788 }
1789
Justin Bogner8d83fb62016-05-13 21:12:53 +00001790 if (!NVPTXST)
1791 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001792
Justin Bogner8d83fb62016-05-13 21:12:53 +00001793 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1794 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1795 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1796 ReplaceNode(N, NVPTXST);
1797 return true;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001798}
1799
Justin Bogner8d83fb62016-05-13 21:12:53 +00001800bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001801 SDValue Chain = N->getOperand(0);
1802 SDValue Op1 = N->getOperand(1);
1803 SDValue Addr, Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +00001804 Optional<unsigned> Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00001805 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001806 SDNode *ST;
1807 EVT EltVT = Op1.getValueType();
1808 MemSDNode *MemSD = cast<MemSDNode>(N);
1809 EVT StoreVT = MemSD->getMemoryVT();
1810
1811 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00001812 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001813
1814 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1815 report_fatal_error("Cannot store to pointer that points to constant "
1816 "memory space");
1817 }
1818
1819 // Volatile Setting
1820 // - .volatile is only availalble for .global and .shared
1821 bool IsVolatile = MemSD->isVolatile();
1822 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1823 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1824 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1825 IsVolatile = false;
1826
1827 // Type Setting: toType + toTypeWidth
1828 // - for integer type, always use 'u'
1829 assert(StoreVT.isSimple() && "Store value is not simple");
1830 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00001831 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001832 unsigned ToType;
1833 if (ScalarVT.isFloatingPoint())
Artem Belevich620db1f2017-02-23 22:38:24 +00001834 ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1835 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001836 else
1837 ToType = NVPTX::PTXLdStInstCode::Unsigned;
1838
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001839 SmallVector<SDValue, 12> StOps;
1840 SDValue N2;
1841 unsigned VecType;
1842
1843 switch (N->getOpcode()) {
1844 case NVPTXISD::StoreV2:
1845 VecType = NVPTX::PTXLdStInstCode::V2;
1846 StOps.push_back(N->getOperand(1));
1847 StOps.push_back(N->getOperand(2));
1848 N2 = N->getOperand(3);
1849 break;
1850 case NVPTXISD::StoreV4:
1851 VecType = NVPTX::PTXLdStInstCode::V4;
1852 StOps.push_back(N->getOperand(1));
1853 StOps.push_back(N->getOperand(2));
1854 StOps.push_back(N->getOperand(3));
1855 StOps.push_back(N->getOperand(4));
1856 N2 = N->getOperand(5);
1857 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001858 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001859 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001860 }
1861
Artem Belevich620db1f2017-02-23 22:38:24 +00001862 // v8f16 is a special case. PTX doesn't have st.v8.f16
1863 // instruction. Instead, we split the vector into v2f16 chunks and
1864 // store them with st.v4.b32.
1865 if (EltVT == MVT::v2f16) {
1866 assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1867 EltVT = MVT::i32;
1868 ToType = NVPTX::PTXLdStInstCode::Untyped;
1869 ToTypeWidth = 32;
1870 }
1871
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001872 StOps.push_back(getI32Imm(IsVolatile, DL));
1873 StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1874 StOps.push_back(getI32Imm(VecType, DL));
1875 StOps.push_back(getI32Imm(ToType, DL));
1876 StOps.push_back(getI32Imm(ToTypeWidth, DL));
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001877
1878 if (SelectDirectAddr(N2, Addr)) {
1879 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001880 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001881 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001882 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001883 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1884 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1885 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1886 NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
1887 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001888 break;
1889 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001890 Opcode =
1891 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar,
1892 NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None,
1893 NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
1894 NVPTX::STV_f32_v4_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001895 break;
1896 }
1897 StOps.push_back(Addr);
Eric Christopher02389e32015-02-19 00:08:27 +00001898 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1899 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001900 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001901 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001902 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001903 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001904 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1905 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1906 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1907 NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
1908 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001909 break;
1910 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001911 Opcode =
1912 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
1913 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None,
1914 NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
1915 NVPTX::STV_f32_v4_asi, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001916 break;
1917 }
1918 StOps.push_back(Base);
1919 StOps.push_back(Offset);
Eric Christopher02389e32015-02-19 00:08:27 +00001920 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1921 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1922 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001923 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001924 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001925 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001926 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001927 Opcode = pickOpcodeForVT(
1928 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
1929 NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
1930 NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
1931 NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
1932 NVPTX::STV_f64_v2_ari_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001933 break;
1934 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001935 Opcode = pickOpcodeForVT(
1936 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
1937 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None,
1938 NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
1939 NVPTX::STV_f32_v4_ari_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001940 break;
1941 }
1942 } else {
1943 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001944 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001945 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001946 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001947 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1948 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1949 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1950 NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
1951 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001952 break;
1953 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001954 Opcode =
1955 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari,
1956 NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None,
1957 NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
1958 NVPTX::STV_f32_v4_ari, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001959 break;
1960 }
1961 }
1962 StOps.push_back(Base);
1963 StOps.push_back(Offset);
1964 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001965 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001966 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001967 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001968 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001969 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001970 Opcode = pickOpcodeForVT(
1971 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
1972 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1973 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
1974 NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1975 NVPTX::STV_f64_v2_areg_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001976 break;
1977 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001978 Opcode = pickOpcodeForVT(
1979 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
1980 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None,
1981 NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
1982 NVPTX::STV_f32_v4_areg_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001983 break;
1984 }
1985 } else {
1986 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001987 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001988 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001989 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001990 Opcode =
1991 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
1992 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
1993 NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
1994 NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
1995 NVPTX::STV_f64_v2_areg);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001996 break;
1997 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001998 Opcode =
1999 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
2000 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None,
2001 NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
2002 NVPTX::STV_f32_v4_areg, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002003 break;
2004 }
2005 }
2006 StOps.push_back(N2);
2007 }
2008
Artem Belevichee7dd122017-03-02 19:14:14 +00002009 if (!Opcode)
2010 return false;
2011
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002012 StOps.push_back(Chain);
2013
Artem Belevichee7dd122017-03-02 19:14:14 +00002014 ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002015
2016 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2017 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2018 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2019
Justin Bogner8d83fb62016-05-13 21:12:53 +00002020 ReplaceNode(N, ST);
2021 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002022}
2023
Justin Bogner8d83fb62016-05-13 21:12:53 +00002024bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00002025 SDValue Chain = Node->getOperand(0);
2026 SDValue Offset = Node->getOperand(2);
2027 SDValue Flag = Node->getOperand(3);
2028 SDLoc DL(Node);
2029 MemSDNode *Mem = cast<MemSDNode>(Node);
2030
2031 unsigned VecSize;
2032 switch (Node->getOpcode()) {
2033 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002034 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002035 case NVPTXISD::LoadParam:
2036 VecSize = 1;
2037 break;
2038 case NVPTXISD::LoadParamV2:
2039 VecSize = 2;
2040 break;
2041 case NVPTXISD::LoadParamV4:
2042 VecSize = 4;
2043 break;
2044 }
2045
2046 EVT EltVT = Node->getValueType(0);
2047 EVT MemVT = Mem->getMemoryVT();
2048
Artem Belevichee7dd122017-03-02 19:14:14 +00002049 Optional<unsigned> Opcode;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002050
2051 switch (VecSize) {
2052 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002053 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002054 case 1:
Artem Belevichee7dd122017-03-02 19:14:14 +00002055 Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2056 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2057 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2058 NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
2059 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002060 break;
2061 case 2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002062 Opcode =
2063 pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2064 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2065 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
2066 NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
2067 NVPTX::LoadParamMemV2F64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002068 break;
2069 case 4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002070 Opcode = pickOpcodeForVT(
2071 MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2072 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None,
2073 NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
2074 NVPTX::LoadParamMemV4F32, None);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002075 break;
2076 }
Artem Belevichee7dd122017-03-02 19:14:14 +00002077 if (!Opcode)
2078 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002079
2080 SDVTList VTs;
2081 if (VecSize == 1) {
2082 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2083 } else if (VecSize == 2) {
2084 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2085 } else {
2086 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
Craig Topperabb4ac72014-04-16 06:10:51 +00002087 VTs = CurDAG->getVTList(EVTs);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002088 }
2089
2090 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2091
2092 SmallVector<SDValue, 2> Ops;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002093 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002094 Ops.push_back(Chain);
2095 Ops.push_back(Flag);
2096
Artem Belevichee7dd122017-03-02 19:14:14 +00002097 ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops));
Justin Bogner8d83fb62016-05-13 21:12:53 +00002098 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002099}
2100
Justin Bogner8d83fb62016-05-13 21:12:53 +00002101bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00002102 SDLoc DL(N);
2103 SDValue Chain = N->getOperand(0);
2104 SDValue Offset = N->getOperand(1);
2105 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2106 MemSDNode *Mem = cast<MemSDNode>(N);
2107
2108 // How many elements do we have?
2109 unsigned NumElts = 1;
2110 switch (N->getOpcode()) {
2111 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002112 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002113 case NVPTXISD::StoreRetval:
2114 NumElts = 1;
2115 break;
2116 case NVPTXISD::StoreRetvalV2:
2117 NumElts = 2;
2118 break;
2119 case NVPTXISD::StoreRetvalV4:
2120 NumElts = 4;
2121 break;
2122 }
2123
2124 // Build vector of operands
2125 SmallVector<SDValue, 6> Ops;
2126 for (unsigned i = 0; i < NumElts; ++i)
2127 Ops.push_back(N->getOperand(i + 2));
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002128 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002129 Ops.push_back(Chain);
2130
2131 // Determine target opcode
2132 // If we have an i1, use an 8-bit store. The lowering code in
2133 // NVPTXISelLowering will have already emitted an upcast.
Artem Belevichee7dd122017-03-02 19:14:14 +00002134 Optional<unsigned> Opcode = 0;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002135 switch (NumElts) {
2136 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002137 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002138 case 1:
Artem Belevichee7dd122017-03-02 19:14:14 +00002139 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2140 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2141 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2142 NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
2143 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002144 break;
2145 case 2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002146 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2147 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2148 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2149 NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
2150 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002151 break;
2152 case 4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002153 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2154 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2155 NVPTX::StoreRetvalV4I32, None,
2156 NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
2157 NVPTX::StoreRetvalV4F32, None);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002158 break;
2159 }
Artem Belevichee7dd122017-03-02 19:14:14 +00002160 if (!Opcode)
2161 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002162
Artem Belevichee7dd122017-03-02 19:14:14 +00002163 SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002164 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2165 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2166 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2167
Justin Bogner8d83fb62016-05-13 21:12:53 +00002168 ReplaceNode(N, Ret);
2169 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002170}
2171
Justin Bogner8d83fb62016-05-13 21:12:53 +00002172bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00002173 SDLoc DL(N);
2174 SDValue Chain = N->getOperand(0);
2175 SDValue Param = N->getOperand(1);
2176 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2177 SDValue Offset = N->getOperand(2);
2178 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2179 MemSDNode *Mem = cast<MemSDNode>(N);
2180 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2181
2182 // How many elements do we have?
2183 unsigned NumElts = 1;
2184 switch (N->getOpcode()) {
2185 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002186 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002187 case NVPTXISD::StoreParamU32:
2188 case NVPTXISD::StoreParamS32:
2189 case NVPTXISD::StoreParam:
2190 NumElts = 1;
2191 break;
2192 case NVPTXISD::StoreParamV2:
2193 NumElts = 2;
2194 break;
2195 case NVPTXISD::StoreParamV4:
2196 NumElts = 4;
2197 break;
2198 }
2199
2200 // Build vector of operands
2201 SmallVector<SDValue, 8> Ops;
2202 for (unsigned i = 0; i < NumElts; ++i)
2203 Ops.push_back(N->getOperand(i + 3));
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002204 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2205 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002206 Ops.push_back(Chain);
2207 Ops.push_back(Flag);
2208
2209 // Determine target opcode
2210 // If we have an i1, use an 8-bit store. The lowering code in
2211 // NVPTXISelLowering will have already emitted an upcast.
Artem Belevichee7dd122017-03-02 19:14:14 +00002212 Optional<unsigned> Opcode = 0;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002213 switch (N->getOpcode()) {
2214 default:
2215 switch (NumElts) {
2216 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002217 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002218 case 1:
Artem Belevichee7dd122017-03-02 19:14:14 +00002219 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2220 NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2221 NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2222 NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
2223 NVPTX::StoreParamF32, NVPTX::StoreParamF64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002224 break;
2225 case 2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002226 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2227 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2228 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2229 NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
2230 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002231 break;
2232 case 4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002233 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2234 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2235 NVPTX::StoreParamV4I32, None,
2236 NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
2237 NVPTX::StoreParamV4F32, None);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002238 break;
2239 }
Artem Belevichee7dd122017-03-02 19:14:14 +00002240 if (!Opcode)
2241 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002242 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002243 // Special case: if we have a sign-extend/zero-extend node, insert the
2244 // conversion instruction first, and use that as the value operand to
2245 // the selected StoreParam node.
2246 case NVPTXISD::StoreParamU32: {
2247 Opcode = NVPTX::StoreParamI32;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002248 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002249 MVT::i32);
2250 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2251 MVT::i32, Ops[0], CvtNone);
2252 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002253 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002254 }
2255 case NVPTXISD::StoreParamS32: {
2256 Opcode = NVPTX::StoreParamI32;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002257 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002258 MVT::i32);
2259 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2260 MVT::i32, Ops[0], CvtNone);
2261 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002262 break;
2263 }
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002264 }
Justin Holewinskif8f70912013-06-28 17:57:59 +00002265
Justin Holewinskidff28d22013-07-01 12:59:01 +00002266 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002267 SDNode *Ret =
Artem Belevichee7dd122017-03-02 19:14:14 +00002268 CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002269 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2270 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2271 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2272
Justin Bogner8d83fb62016-05-13 21:12:53 +00002273 ReplaceNode(N, Ret);
2274 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002275}
2276
Justin Bogner8d83fb62016-05-13 21:12:53 +00002277bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00002278 unsigned Opc = 0;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002279
2280 switch (N->getOpcode()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00002281 default: return false;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002282 case NVPTXISD::Tex1DFloatS32:
2283 Opc = NVPTX::TEX_1D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002284 break;
2285 case NVPTXISD::Tex1DFloatFloat:
2286 Opc = NVPTX::TEX_1D_F32_F32;
2287 break;
2288 case NVPTXISD::Tex1DFloatFloatLevel:
2289 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2290 break;
2291 case NVPTXISD::Tex1DFloatFloatGrad:
2292 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2293 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002294 case NVPTXISD::Tex1DS32S32:
2295 Opc = NVPTX::TEX_1D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002296 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002297 case NVPTXISD::Tex1DS32Float:
2298 Opc = NVPTX::TEX_1D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002299 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002300 case NVPTXISD::Tex1DS32FloatLevel:
2301 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002302 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002303 case NVPTXISD::Tex1DS32FloatGrad:
2304 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002305 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002306 case NVPTXISD::Tex1DU32S32:
2307 Opc = NVPTX::TEX_1D_U32_S32;
2308 break;
2309 case NVPTXISD::Tex1DU32Float:
2310 Opc = NVPTX::TEX_1D_U32_F32;
2311 break;
2312 case NVPTXISD::Tex1DU32FloatLevel:
2313 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
2314 break;
2315 case NVPTXISD::Tex1DU32FloatGrad:
2316 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
2317 break;
2318 case NVPTXISD::Tex1DArrayFloatS32:
2319 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002320 break;
2321 case NVPTXISD::Tex1DArrayFloatFloat:
2322 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2323 break;
2324 case NVPTXISD::Tex1DArrayFloatFloatLevel:
2325 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2326 break;
2327 case NVPTXISD::Tex1DArrayFloatFloatGrad:
2328 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2329 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002330 case NVPTXISD::Tex1DArrayS32S32:
2331 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002332 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002333 case NVPTXISD::Tex1DArrayS32Float:
2334 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002335 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002336 case NVPTXISD::Tex1DArrayS32FloatLevel:
2337 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002338 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002339 case NVPTXISD::Tex1DArrayS32FloatGrad:
2340 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002341 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002342 case NVPTXISD::Tex1DArrayU32S32:
2343 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
2344 break;
2345 case NVPTXISD::Tex1DArrayU32Float:
2346 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
2347 break;
2348 case NVPTXISD::Tex1DArrayU32FloatLevel:
2349 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
2350 break;
2351 case NVPTXISD::Tex1DArrayU32FloatGrad:
2352 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
2353 break;
2354 case NVPTXISD::Tex2DFloatS32:
2355 Opc = NVPTX::TEX_2D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002356 break;
2357 case NVPTXISD::Tex2DFloatFloat:
2358 Opc = NVPTX::TEX_2D_F32_F32;
2359 break;
2360 case NVPTXISD::Tex2DFloatFloatLevel:
2361 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2362 break;
2363 case NVPTXISD::Tex2DFloatFloatGrad:
2364 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2365 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002366 case NVPTXISD::Tex2DS32S32:
2367 Opc = NVPTX::TEX_2D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002368 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002369 case NVPTXISD::Tex2DS32Float:
2370 Opc = NVPTX::TEX_2D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002371 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002372 case NVPTXISD::Tex2DS32FloatLevel:
2373 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002374 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002375 case NVPTXISD::Tex2DS32FloatGrad:
2376 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002377 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002378 case NVPTXISD::Tex2DU32S32:
2379 Opc = NVPTX::TEX_2D_U32_S32;
2380 break;
2381 case NVPTXISD::Tex2DU32Float:
2382 Opc = NVPTX::TEX_2D_U32_F32;
2383 break;
2384 case NVPTXISD::Tex2DU32FloatLevel:
2385 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
2386 break;
2387 case NVPTXISD::Tex2DU32FloatGrad:
2388 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
2389 break;
2390 case NVPTXISD::Tex2DArrayFloatS32:
2391 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002392 break;
2393 case NVPTXISD::Tex2DArrayFloatFloat:
2394 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2395 break;
2396 case NVPTXISD::Tex2DArrayFloatFloatLevel:
2397 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2398 break;
2399 case NVPTXISD::Tex2DArrayFloatFloatGrad:
2400 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2401 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002402 case NVPTXISD::Tex2DArrayS32S32:
2403 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002404 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002405 case NVPTXISD::Tex2DArrayS32Float:
2406 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002407 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002408 case NVPTXISD::Tex2DArrayS32FloatLevel:
2409 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002410 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002411 case NVPTXISD::Tex2DArrayS32FloatGrad:
2412 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002413 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002414 case NVPTXISD::Tex2DArrayU32S32:
2415 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
2416 break;
2417 case NVPTXISD::Tex2DArrayU32Float:
2418 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
2419 break;
2420 case NVPTXISD::Tex2DArrayU32FloatLevel:
2421 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
2422 break;
2423 case NVPTXISD::Tex2DArrayU32FloatGrad:
2424 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
2425 break;
2426 case NVPTXISD::Tex3DFloatS32:
2427 Opc = NVPTX::TEX_3D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002428 break;
2429 case NVPTXISD::Tex3DFloatFloat:
2430 Opc = NVPTX::TEX_3D_F32_F32;
2431 break;
2432 case NVPTXISD::Tex3DFloatFloatLevel:
2433 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2434 break;
2435 case NVPTXISD::Tex3DFloatFloatGrad:
2436 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2437 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002438 case NVPTXISD::Tex3DS32S32:
2439 Opc = NVPTX::TEX_3D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002440 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002441 case NVPTXISD::Tex3DS32Float:
2442 Opc = NVPTX::TEX_3D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002443 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002444 case NVPTXISD::Tex3DS32FloatLevel:
2445 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002446 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002447 case NVPTXISD::Tex3DS32FloatGrad:
2448 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
2449 break;
2450 case NVPTXISD::Tex3DU32S32:
2451 Opc = NVPTX::TEX_3D_U32_S32;
2452 break;
2453 case NVPTXISD::Tex3DU32Float:
2454 Opc = NVPTX::TEX_3D_U32_F32;
2455 break;
2456 case NVPTXISD::Tex3DU32FloatLevel:
2457 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
2458 break;
2459 case NVPTXISD::Tex3DU32FloatGrad:
2460 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
2461 break;
2462 case NVPTXISD::TexCubeFloatFloat:
2463 Opc = NVPTX::TEX_CUBE_F32_F32;
2464 break;
2465 case NVPTXISD::TexCubeFloatFloatLevel:
2466 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
2467 break;
2468 case NVPTXISD::TexCubeS32Float:
2469 Opc = NVPTX::TEX_CUBE_S32_F32;
2470 break;
2471 case NVPTXISD::TexCubeS32FloatLevel:
2472 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
2473 break;
2474 case NVPTXISD::TexCubeU32Float:
2475 Opc = NVPTX::TEX_CUBE_U32_F32;
2476 break;
2477 case NVPTXISD::TexCubeU32FloatLevel:
2478 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
2479 break;
2480 case NVPTXISD::TexCubeArrayFloatFloat:
2481 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
2482 break;
2483 case NVPTXISD::TexCubeArrayFloatFloatLevel:
2484 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
2485 break;
2486 case NVPTXISD::TexCubeArrayS32Float:
2487 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
2488 break;
2489 case NVPTXISD::TexCubeArrayS32FloatLevel:
2490 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
2491 break;
2492 case NVPTXISD::TexCubeArrayU32Float:
2493 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
2494 break;
2495 case NVPTXISD::TexCubeArrayU32FloatLevel:
2496 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
2497 break;
2498 case NVPTXISD::Tld4R2DFloatFloat:
2499 Opc = NVPTX::TLD4_R_2D_F32_F32;
2500 break;
2501 case NVPTXISD::Tld4G2DFloatFloat:
2502 Opc = NVPTX::TLD4_G_2D_F32_F32;
2503 break;
2504 case NVPTXISD::Tld4B2DFloatFloat:
2505 Opc = NVPTX::TLD4_B_2D_F32_F32;
2506 break;
2507 case NVPTXISD::Tld4A2DFloatFloat:
2508 Opc = NVPTX::TLD4_A_2D_F32_F32;
2509 break;
2510 case NVPTXISD::Tld4R2DS64Float:
2511 Opc = NVPTX::TLD4_R_2D_S32_F32;
2512 break;
2513 case NVPTXISD::Tld4G2DS64Float:
2514 Opc = NVPTX::TLD4_G_2D_S32_F32;
2515 break;
2516 case NVPTXISD::Tld4B2DS64Float:
2517 Opc = NVPTX::TLD4_B_2D_S32_F32;
2518 break;
2519 case NVPTXISD::Tld4A2DS64Float:
2520 Opc = NVPTX::TLD4_A_2D_S32_F32;
2521 break;
2522 case NVPTXISD::Tld4R2DU64Float:
2523 Opc = NVPTX::TLD4_R_2D_U32_F32;
2524 break;
2525 case NVPTXISD::Tld4G2DU64Float:
2526 Opc = NVPTX::TLD4_G_2D_U32_F32;
2527 break;
2528 case NVPTXISD::Tld4B2DU64Float:
2529 Opc = NVPTX::TLD4_B_2D_U32_F32;
2530 break;
2531 case NVPTXISD::Tld4A2DU64Float:
2532 Opc = NVPTX::TLD4_A_2D_U32_F32;
2533 break;
2534 case NVPTXISD::TexUnified1DFloatS32:
2535 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
2536 break;
2537 case NVPTXISD::TexUnified1DFloatFloat:
2538 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
2539 break;
2540 case NVPTXISD::TexUnified1DFloatFloatLevel:
2541 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
2542 break;
2543 case NVPTXISD::TexUnified1DFloatFloatGrad:
2544 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
2545 break;
2546 case NVPTXISD::TexUnified1DS32S32:
2547 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
2548 break;
2549 case NVPTXISD::TexUnified1DS32Float:
2550 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
2551 break;
2552 case NVPTXISD::TexUnified1DS32FloatLevel:
2553 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
2554 break;
2555 case NVPTXISD::TexUnified1DS32FloatGrad:
2556 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
2557 break;
2558 case NVPTXISD::TexUnified1DU32S32:
2559 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
2560 break;
2561 case NVPTXISD::TexUnified1DU32Float:
2562 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
2563 break;
2564 case NVPTXISD::TexUnified1DU32FloatLevel:
2565 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
2566 break;
2567 case NVPTXISD::TexUnified1DU32FloatGrad:
2568 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
2569 break;
2570 case NVPTXISD::TexUnified1DArrayFloatS32:
2571 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
2572 break;
2573 case NVPTXISD::TexUnified1DArrayFloatFloat:
2574 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
2575 break;
2576 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
2577 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
2578 break;
2579 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
2580 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
2581 break;
2582 case NVPTXISD::TexUnified1DArrayS32S32:
2583 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
2584 break;
2585 case NVPTXISD::TexUnified1DArrayS32Float:
2586 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
2587 break;
2588 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
2589 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
2590 break;
2591 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
2592 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
2593 break;
2594 case NVPTXISD::TexUnified1DArrayU32S32:
2595 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
2596 break;
2597 case NVPTXISD::TexUnified1DArrayU32Float:
2598 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
2599 break;
2600 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
2601 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
2602 break;
2603 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
2604 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
2605 break;
2606 case NVPTXISD::TexUnified2DFloatS32:
2607 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
2608 break;
2609 case NVPTXISD::TexUnified2DFloatFloat:
2610 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
2611 break;
2612 case NVPTXISD::TexUnified2DFloatFloatLevel:
2613 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
2614 break;
2615 case NVPTXISD::TexUnified2DFloatFloatGrad:
2616 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
2617 break;
2618 case NVPTXISD::TexUnified2DS32S32:
2619 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
2620 break;
2621 case NVPTXISD::TexUnified2DS32Float:
2622 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
2623 break;
2624 case NVPTXISD::TexUnified2DS32FloatLevel:
2625 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
2626 break;
2627 case NVPTXISD::TexUnified2DS32FloatGrad:
2628 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
2629 break;
2630 case NVPTXISD::TexUnified2DU32S32:
2631 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
2632 break;
2633 case NVPTXISD::TexUnified2DU32Float:
2634 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
2635 break;
2636 case NVPTXISD::TexUnified2DU32FloatLevel:
2637 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
2638 break;
2639 case NVPTXISD::TexUnified2DU32FloatGrad:
2640 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
2641 break;
2642 case NVPTXISD::TexUnified2DArrayFloatS32:
2643 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
2644 break;
2645 case NVPTXISD::TexUnified2DArrayFloatFloat:
2646 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
2647 break;
2648 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
2649 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
2650 break;
2651 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
2652 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
2653 break;
2654 case NVPTXISD::TexUnified2DArrayS32S32:
2655 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
2656 break;
2657 case NVPTXISD::TexUnified2DArrayS32Float:
2658 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
2659 break;
2660 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
2661 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
2662 break;
2663 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
2664 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
2665 break;
2666 case NVPTXISD::TexUnified2DArrayU32S32:
2667 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
2668 break;
2669 case NVPTXISD::TexUnified2DArrayU32Float:
2670 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
2671 break;
2672 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
2673 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
2674 break;
2675 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
2676 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
2677 break;
2678 case NVPTXISD::TexUnified3DFloatS32:
2679 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
2680 break;
2681 case NVPTXISD::TexUnified3DFloatFloat:
2682 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
2683 break;
2684 case NVPTXISD::TexUnified3DFloatFloatLevel:
2685 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
2686 break;
2687 case NVPTXISD::TexUnified3DFloatFloatGrad:
2688 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
2689 break;
2690 case NVPTXISD::TexUnified3DS32S32:
2691 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
2692 break;
2693 case NVPTXISD::TexUnified3DS32Float:
2694 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
2695 break;
2696 case NVPTXISD::TexUnified3DS32FloatLevel:
2697 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
2698 break;
2699 case NVPTXISD::TexUnified3DS32FloatGrad:
2700 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
2701 break;
2702 case NVPTXISD::TexUnified3DU32S32:
2703 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
2704 break;
2705 case NVPTXISD::TexUnified3DU32Float:
2706 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
2707 break;
2708 case NVPTXISD::TexUnified3DU32FloatLevel:
2709 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
2710 break;
2711 case NVPTXISD::TexUnified3DU32FloatGrad:
2712 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
2713 break;
2714 case NVPTXISD::TexUnifiedCubeFloatFloat:
2715 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
2716 break;
2717 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
2718 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
2719 break;
2720 case NVPTXISD::TexUnifiedCubeS32Float:
2721 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
2722 break;
2723 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
2724 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
2725 break;
2726 case NVPTXISD::TexUnifiedCubeU32Float:
2727 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
2728 break;
2729 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
2730 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
2731 break;
2732 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
2733 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
2734 break;
2735 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
2736 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
2737 break;
2738 case NVPTXISD::TexUnifiedCubeArrayS32Float:
2739 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
2740 break;
2741 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
2742 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
2743 break;
2744 case NVPTXISD::TexUnifiedCubeArrayU32Float:
2745 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
2746 break;
2747 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
2748 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
2749 break;
2750 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
2751 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
2752 break;
2753 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
2754 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
2755 break;
2756 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
2757 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
2758 break;
2759 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
2760 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
2761 break;
2762 case NVPTXISD::Tld4UnifiedR2DS64Float:
2763 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
2764 break;
2765 case NVPTXISD::Tld4UnifiedG2DS64Float:
2766 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
2767 break;
2768 case NVPTXISD::Tld4UnifiedB2DS64Float:
2769 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
2770 break;
2771 case NVPTXISD::Tld4UnifiedA2DS64Float:
2772 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
2773 break;
2774 case NVPTXISD::Tld4UnifiedR2DU64Float:
2775 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
2776 break;
2777 case NVPTXISD::Tld4UnifiedG2DU64Float:
2778 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
2779 break;
2780 case NVPTXISD::Tld4UnifiedB2DU64Float:
2781 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
2782 break;
2783 case NVPTXISD::Tld4UnifiedA2DU64Float:
2784 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002785 break;
2786 }
2787
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002788 // Copy over operands
Benjamin Kramer806ae442017-08-20 17:30:32 +00002789 SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
2790 Ops.push_back(N->getOperand(0)); // Move chain to the back.
Justin Holewinski30d56a72014-04-09 15:39:15 +00002791
Justin Bogner8d83fb62016-05-13 21:12:53 +00002792 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
2793 return true;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002794}
2795
Justin Bogner8d83fb62016-05-13 21:12:53 +00002796bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00002797 unsigned Opc = 0;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002798 switch (N->getOpcode()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00002799 default: return false;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002800 case NVPTXISD::Suld1DI8Clamp:
2801 Opc = NVPTX::SULD_1D_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002802 break;
2803 case NVPTXISD::Suld1DI16Clamp:
2804 Opc = NVPTX::SULD_1D_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002805 break;
2806 case NVPTXISD::Suld1DI32Clamp:
2807 Opc = NVPTX::SULD_1D_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002808 break;
2809 case NVPTXISD::Suld1DI64Clamp:
2810 Opc = NVPTX::SULD_1D_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002811 break;
2812 case NVPTXISD::Suld1DV2I8Clamp:
2813 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002814 break;
2815 case NVPTXISD::Suld1DV2I16Clamp:
2816 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002817 break;
2818 case NVPTXISD::Suld1DV2I32Clamp:
2819 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002820 break;
2821 case NVPTXISD::Suld1DV2I64Clamp:
2822 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002823 break;
2824 case NVPTXISD::Suld1DV4I8Clamp:
2825 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002826 break;
2827 case NVPTXISD::Suld1DV4I16Clamp:
2828 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002829 break;
2830 case NVPTXISD::Suld1DV4I32Clamp:
2831 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002832 break;
2833 case NVPTXISD::Suld1DArrayI8Clamp:
2834 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002835 break;
2836 case NVPTXISD::Suld1DArrayI16Clamp:
2837 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002838 break;
2839 case NVPTXISD::Suld1DArrayI32Clamp:
2840 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002841 break;
2842 case NVPTXISD::Suld1DArrayI64Clamp:
2843 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002844 break;
2845 case NVPTXISD::Suld1DArrayV2I8Clamp:
2846 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002847 break;
2848 case NVPTXISD::Suld1DArrayV2I16Clamp:
2849 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002850 break;
2851 case NVPTXISD::Suld1DArrayV2I32Clamp:
2852 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002853 break;
2854 case NVPTXISD::Suld1DArrayV2I64Clamp:
2855 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002856 break;
2857 case NVPTXISD::Suld1DArrayV4I8Clamp:
2858 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002859 break;
2860 case NVPTXISD::Suld1DArrayV4I16Clamp:
2861 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002862 break;
2863 case NVPTXISD::Suld1DArrayV4I32Clamp:
2864 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002865 break;
2866 case NVPTXISD::Suld2DI8Clamp:
2867 Opc = NVPTX::SULD_2D_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002868 break;
2869 case NVPTXISD::Suld2DI16Clamp:
2870 Opc = NVPTX::SULD_2D_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002871 break;
2872 case NVPTXISD::Suld2DI32Clamp:
2873 Opc = NVPTX::SULD_2D_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002874 break;
2875 case NVPTXISD::Suld2DI64Clamp:
2876 Opc = NVPTX::SULD_2D_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002877 break;
2878 case NVPTXISD::Suld2DV2I8Clamp:
2879 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002880 break;
2881 case NVPTXISD::Suld2DV2I16Clamp:
2882 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002883 break;
2884 case NVPTXISD::Suld2DV2I32Clamp:
2885 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002886 break;
2887 case NVPTXISD::Suld2DV2I64Clamp:
2888 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002889 break;
2890 case NVPTXISD::Suld2DV4I8Clamp:
2891 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002892 break;
2893 case NVPTXISD::Suld2DV4I16Clamp:
2894 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002895 break;
2896 case NVPTXISD::Suld2DV4I32Clamp:
2897 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002898 break;
2899 case NVPTXISD::Suld2DArrayI8Clamp:
2900 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002901 break;
2902 case NVPTXISD::Suld2DArrayI16Clamp:
2903 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002904 break;
2905 case NVPTXISD::Suld2DArrayI32Clamp:
2906 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002907 break;
2908 case NVPTXISD::Suld2DArrayI64Clamp:
2909 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002910 break;
2911 case NVPTXISD::Suld2DArrayV2I8Clamp:
2912 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002913 break;
2914 case NVPTXISD::Suld2DArrayV2I16Clamp:
2915 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002916 break;
2917 case NVPTXISD::Suld2DArrayV2I32Clamp:
2918 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002919 break;
2920 case NVPTXISD::Suld2DArrayV2I64Clamp:
2921 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002922 break;
2923 case NVPTXISD::Suld2DArrayV4I8Clamp:
2924 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002925 break;
2926 case NVPTXISD::Suld2DArrayV4I16Clamp:
2927 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002928 break;
2929 case NVPTXISD::Suld2DArrayV4I32Clamp:
2930 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002931 break;
2932 case NVPTXISD::Suld3DI8Clamp:
2933 Opc = NVPTX::SULD_3D_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002934 break;
2935 case NVPTXISD::Suld3DI16Clamp:
2936 Opc = NVPTX::SULD_3D_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002937 break;
2938 case NVPTXISD::Suld3DI32Clamp:
2939 Opc = NVPTX::SULD_3D_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002940 break;
2941 case NVPTXISD::Suld3DI64Clamp:
2942 Opc = NVPTX::SULD_3D_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002943 break;
2944 case NVPTXISD::Suld3DV2I8Clamp:
2945 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002946 break;
2947 case NVPTXISD::Suld3DV2I16Clamp:
2948 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002949 break;
2950 case NVPTXISD::Suld3DV2I32Clamp:
2951 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002952 break;
2953 case NVPTXISD::Suld3DV2I64Clamp:
2954 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002955 break;
2956 case NVPTXISD::Suld3DV4I8Clamp:
2957 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002958 break;
2959 case NVPTXISD::Suld3DV4I16Clamp:
2960 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002961 break;
2962 case NVPTXISD::Suld3DV4I32Clamp:
2963 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002964 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002965 case NVPTXISD::Suld1DI8Trap:
2966 Opc = NVPTX::SULD_1D_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002967 break;
2968 case NVPTXISD::Suld1DI16Trap:
2969 Opc = NVPTX::SULD_1D_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002970 break;
2971 case NVPTXISD::Suld1DI32Trap:
2972 Opc = NVPTX::SULD_1D_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002973 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002974 case NVPTXISD::Suld1DI64Trap:
2975 Opc = NVPTX::SULD_1D_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002976 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002977 case NVPTXISD::Suld1DV2I8Trap:
2978 Opc = NVPTX::SULD_1D_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002979 break;
2980 case NVPTXISD::Suld1DV2I16Trap:
2981 Opc = NVPTX::SULD_1D_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002982 break;
2983 case NVPTXISD::Suld1DV2I32Trap:
2984 Opc = NVPTX::SULD_1D_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002985 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002986 case NVPTXISD::Suld1DV2I64Trap:
2987 Opc = NVPTX::SULD_1D_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002988 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002989 case NVPTXISD::Suld1DV4I8Trap:
2990 Opc = NVPTX::SULD_1D_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002991 break;
2992 case NVPTXISD::Suld1DV4I16Trap:
2993 Opc = NVPTX::SULD_1D_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002994 break;
2995 case NVPTXISD::Suld1DV4I32Trap:
2996 Opc = NVPTX::SULD_1D_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002997 break;
2998 case NVPTXISD::Suld1DArrayI8Trap:
2999 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003000 break;
3001 case NVPTXISD::Suld1DArrayI16Trap:
3002 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003003 break;
3004 case NVPTXISD::Suld1DArrayI32Trap:
3005 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003006 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003007 case NVPTXISD::Suld1DArrayI64Trap:
3008 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003009 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003010 case NVPTXISD::Suld1DArrayV2I8Trap:
3011 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003012 break;
3013 case NVPTXISD::Suld1DArrayV2I16Trap:
3014 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003015 break;
3016 case NVPTXISD::Suld1DArrayV2I32Trap:
3017 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003018 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003019 case NVPTXISD::Suld1DArrayV2I64Trap:
3020 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003021 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003022 case NVPTXISD::Suld1DArrayV4I8Trap:
3023 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003024 break;
3025 case NVPTXISD::Suld1DArrayV4I16Trap:
3026 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003027 break;
3028 case NVPTXISD::Suld1DArrayV4I32Trap:
3029 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003030 break;
3031 case NVPTXISD::Suld2DI8Trap:
3032 Opc = NVPTX::SULD_2D_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003033 break;
3034 case NVPTXISD::Suld2DI16Trap:
3035 Opc = NVPTX::SULD_2D_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003036 break;
3037 case NVPTXISD::Suld2DI32Trap:
3038 Opc = NVPTX::SULD_2D_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003039 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003040 case NVPTXISD::Suld2DI64Trap:
3041 Opc = NVPTX::SULD_2D_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003042 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003043 case NVPTXISD::Suld2DV2I8Trap:
3044 Opc = NVPTX::SULD_2D_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003045 break;
3046 case NVPTXISD::Suld2DV2I16Trap:
3047 Opc = NVPTX::SULD_2D_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003048 break;
3049 case NVPTXISD::Suld2DV2I32Trap:
3050 Opc = NVPTX::SULD_2D_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003051 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003052 case NVPTXISD::Suld2DV2I64Trap:
3053 Opc = NVPTX::SULD_2D_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003054 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003055 case NVPTXISD::Suld2DV4I8Trap:
3056 Opc = NVPTX::SULD_2D_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003057 break;
3058 case NVPTXISD::Suld2DV4I16Trap:
3059 Opc = NVPTX::SULD_2D_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003060 break;
3061 case NVPTXISD::Suld2DV4I32Trap:
3062 Opc = NVPTX::SULD_2D_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003063 break;
3064 case NVPTXISD::Suld2DArrayI8Trap:
3065 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003066 break;
3067 case NVPTXISD::Suld2DArrayI16Trap:
3068 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003069 break;
3070 case NVPTXISD::Suld2DArrayI32Trap:
3071 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003072 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003073 case NVPTXISD::Suld2DArrayI64Trap:
3074 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003075 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003076 case NVPTXISD::Suld2DArrayV2I8Trap:
3077 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003078 break;
3079 case NVPTXISD::Suld2DArrayV2I16Trap:
3080 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003081 break;
3082 case NVPTXISD::Suld2DArrayV2I32Trap:
3083 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003084 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003085 case NVPTXISD::Suld2DArrayV2I64Trap:
3086 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003087 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003088 case NVPTXISD::Suld2DArrayV4I8Trap:
3089 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003090 break;
3091 case NVPTXISD::Suld2DArrayV4I16Trap:
3092 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003093 break;
3094 case NVPTXISD::Suld2DArrayV4I32Trap:
3095 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003096 break;
3097 case NVPTXISD::Suld3DI8Trap:
3098 Opc = NVPTX::SULD_3D_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003099 break;
3100 case NVPTXISD::Suld3DI16Trap:
3101 Opc = NVPTX::SULD_3D_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003102 break;
3103 case NVPTXISD::Suld3DI32Trap:
3104 Opc = NVPTX::SULD_3D_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003105 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003106 case NVPTXISD::Suld3DI64Trap:
3107 Opc = NVPTX::SULD_3D_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003108 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003109 case NVPTXISD::Suld3DV2I8Trap:
3110 Opc = NVPTX::SULD_3D_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003111 break;
3112 case NVPTXISD::Suld3DV2I16Trap:
3113 Opc = NVPTX::SULD_3D_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003114 break;
3115 case NVPTXISD::Suld3DV2I32Trap:
3116 Opc = NVPTX::SULD_3D_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003117 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003118 case NVPTXISD::Suld3DV2I64Trap:
3119 Opc = NVPTX::SULD_3D_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003120 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003121 case NVPTXISD::Suld3DV4I8Trap:
3122 Opc = NVPTX::SULD_3D_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003123 break;
3124 case NVPTXISD::Suld3DV4I16Trap:
3125 Opc = NVPTX::SULD_3D_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003126 break;
3127 case NVPTXISD::Suld3DV4I32Trap:
3128 Opc = NVPTX::SULD_3D_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003129 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003130 case NVPTXISD::Suld1DI8Zero:
3131 Opc = NVPTX::SULD_1D_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003132 break;
3133 case NVPTXISD::Suld1DI16Zero:
3134 Opc = NVPTX::SULD_1D_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003135 break;
3136 case NVPTXISD::Suld1DI32Zero:
3137 Opc = NVPTX::SULD_1D_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003138 break;
3139 case NVPTXISD::Suld1DI64Zero:
3140 Opc = NVPTX::SULD_1D_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003141 break;
3142 case NVPTXISD::Suld1DV2I8Zero:
3143 Opc = NVPTX::SULD_1D_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003144 break;
3145 case NVPTXISD::Suld1DV2I16Zero:
3146 Opc = NVPTX::SULD_1D_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003147 break;
3148 case NVPTXISD::Suld1DV2I32Zero:
3149 Opc = NVPTX::SULD_1D_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003150 break;
3151 case NVPTXISD::Suld1DV2I64Zero:
3152 Opc = NVPTX::SULD_1D_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003153 break;
3154 case NVPTXISD::Suld1DV4I8Zero:
3155 Opc = NVPTX::SULD_1D_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003156 break;
3157 case NVPTXISD::Suld1DV4I16Zero:
3158 Opc = NVPTX::SULD_1D_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003159 break;
3160 case NVPTXISD::Suld1DV4I32Zero:
3161 Opc = NVPTX::SULD_1D_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003162 break;
3163 case NVPTXISD::Suld1DArrayI8Zero:
3164 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003165 break;
3166 case NVPTXISD::Suld1DArrayI16Zero:
3167 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003168 break;
3169 case NVPTXISD::Suld1DArrayI32Zero:
3170 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003171 break;
3172 case NVPTXISD::Suld1DArrayI64Zero:
3173 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003174 break;
3175 case NVPTXISD::Suld1DArrayV2I8Zero:
3176 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003177 break;
3178 case NVPTXISD::Suld1DArrayV2I16Zero:
3179 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003180 break;
3181 case NVPTXISD::Suld1DArrayV2I32Zero:
3182 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003183 break;
3184 case NVPTXISD::Suld1DArrayV2I64Zero:
3185 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003186 break;
3187 case NVPTXISD::Suld1DArrayV4I8Zero:
3188 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003189 break;
3190 case NVPTXISD::Suld1DArrayV4I16Zero:
3191 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003192 break;
3193 case NVPTXISD::Suld1DArrayV4I32Zero:
3194 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003195 break;
3196 case NVPTXISD::Suld2DI8Zero:
3197 Opc = NVPTX::SULD_2D_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003198 break;
3199 case NVPTXISD::Suld2DI16Zero:
3200 Opc = NVPTX::SULD_2D_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003201 break;
3202 case NVPTXISD::Suld2DI32Zero:
3203 Opc = NVPTX::SULD_2D_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003204 break;
3205 case NVPTXISD::Suld2DI64Zero:
3206 Opc = NVPTX::SULD_2D_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003207 break;
3208 case NVPTXISD::Suld2DV2I8Zero:
3209 Opc = NVPTX::SULD_2D_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003210 break;
3211 case NVPTXISD::Suld2DV2I16Zero:
3212 Opc = NVPTX::SULD_2D_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003213 break;
3214 case NVPTXISD::Suld2DV2I32Zero:
3215 Opc = NVPTX::SULD_2D_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003216 break;
3217 case NVPTXISD::Suld2DV2I64Zero:
3218 Opc = NVPTX::SULD_2D_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003219 break;
3220 case NVPTXISD::Suld2DV4I8Zero:
3221 Opc = NVPTX::SULD_2D_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003222 break;
3223 case NVPTXISD::Suld2DV4I16Zero:
3224 Opc = NVPTX::SULD_2D_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003225 break;
3226 case NVPTXISD::Suld2DV4I32Zero:
3227 Opc = NVPTX::SULD_2D_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003228 break;
3229 case NVPTXISD::Suld2DArrayI8Zero:
3230 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003231 break;
3232 case NVPTXISD::Suld2DArrayI16Zero:
3233 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003234 break;
3235 case NVPTXISD::Suld2DArrayI32Zero:
3236 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003237 break;
3238 case NVPTXISD::Suld2DArrayI64Zero:
3239 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003240 break;
3241 case NVPTXISD::Suld2DArrayV2I8Zero:
3242 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003243 break;
3244 case NVPTXISD::Suld2DArrayV2I16Zero:
3245 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003246 break;
3247 case NVPTXISD::Suld2DArrayV2I32Zero:
3248 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003249 break;
3250 case NVPTXISD::Suld2DArrayV2I64Zero:
3251 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003252 break;
3253 case NVPTXISD::Suld2DArrayV4I8Zero:
3254 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003255 break;
3256 case NVPTXISD::Suld2DArrayV4I16Zero:
3257 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003258 break;
3259 case NVPTXISD::Suld2DArrayV4I32Zero:
3260 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003261 break;
3262 case NVPTXISD::Suld3DI8Zero:
3263 Opc = NVPTX::SULD_3D_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003264 break;
3265 case NVPTXISD::Suld3DI16Zero:
3266 Opc = NVPTX::SULD_3D_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003267 break;
3268 case NVPTXISD::Suld3DI32Zero:
3269 Opc = NVPTX::SULD_3D_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003270 break;
3271 case NVPTXISD::Suld3DI64Zero:
3272 Opc = NVPTX::SULD_3D_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003273 break;
3274 case NVPTXISD::Suld3DV2I8Zero:
3275 Opc = NVPTX::SULD_3D_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003276 break;
3277 case NVPTXISD::Suld3DV2I16Zero:
3278 Opc = NVPTX::SULD_3D_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003279 break;
3280 case NVPTXISD::Suld3DV2I32Zero:
3281 Opc = NVPTX::SULD_3D_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003282 break;
3283 case NVPTXISD::Suld3DV2I64Zero:
3284 Opc = NVPTX::SULD_3D_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003285 break;
3286 case NVPTXISD::Suld3DV4I8Zero:
3287 Opc = NVPTX::SULD_3D_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003288 break;
3289 case NVPTXISD::Suld3DV4I16Zero:
3290 Opc = NVPTX::SULD_3D_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003291 break;
3292 case NVPTXISD::Suld3DV4I32Zero:
3293 Opc = NVPTX::SULD_3D_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003294 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003295 }
Benjamin Kramer806ae442017-08-20 17:30:32 +00003296
3297 // Copy over operands
3298 SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
3299 Ops.push_back(N->getOperand(0)); // Move chain to the back.
3300
Justin Bogner8d83fb62016-05-13 21:12:53 +00003301 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3302 return true;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003303}
3304
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003305
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003306/// SelectBFE - Look for instruction sequences that can be made more efficient
3307/// by using the 'bfe' (bit-field extract) PTX instruction
Justin Bogner8d83fb62016-05-13 21:12:53 +00003308bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003309 SDLoc DL(N);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003310 SDValue LHS = N->getOperand(0);
3311 SDValue RHS = N->getOperand(1);
3312 SDValue Len;
3313 SDValue Start;
3314 SDValue Val;
3315 bool IsSigned = false;
3316
3317 if (N->getOpcode() == ISD::AND) {
3318 // Canonicalize the operands
3319 // We want 'and %val, %mask'
3320 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3321 std::swap(LHS, RHS);
3322 }
3323
3324 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
3325 if (!Mask) {
3326 // We need a constant mask on the RHS of the AND
Justin Bogner8d83fb62016-05-13 21:12:53 +00003327 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003328 }
3329
3330 // Extract the mask bits
3331 uint64_t MaskVal = Mask->getZExtValue();
3332 if (!isMask_64(MaskVal)) {
3333 // We *could* handle shifted masks here, but doing so would require an
3334 // 'and' operation to fix up the low-order bits so we would trade
3335 // shr+and for bfe+and, which has the same throughput
Justin Bogner8d83fb62016-05-13 21:12:53 +00003336 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003337 }
3338
3339 // How many bits are in our mask?
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00003340 uint64_t NumBits = countTrailingOnes(MaskVal);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003341 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003342
3343 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3344 // We have a 'srl/and' pair, extract the effective start bit and length
3345 Val = LHS.getNode()->getOperand(0);
3346 Start = LHS.getNode()->getOperand(1);
3347 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3348 if (StartConst) {
3349 uint64_t StartVal = StartConst->getZExtValue();
3350 // How many "good" bits do we have left? "good" is defined here as bits
3351 // that exist in the original value, not shifted in.
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00003352 uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003353 if (NumBits > GoodBits) {
3354 // Do not handle the case where bits have been shifted in. In theory
3355 // we could handle this, but the cost is likely higher than just
3356 // emitting the srl/and pair.
Justin Bogner8d83fb62016-05-13 21:12:53 +00003357 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003358 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003359 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003360 } else {
3361 // Do not handle the case where the shift amount (can be zero if no srl
3362 // was found) is not constant. We could handle this case, but it would
3363 // require run-time logic that would be more expensive than just
3364 // emitting the srl/and pair.
Justin Bogner8d83fb62016-05-13 21:12:53 +00003365 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003366 }
3367 } else {
3368 // Do not handle the case where the LHS of the and is not a shift. While
3369 // it would be trivial to handle this case, it would just transform
3370 // 'and' -> 'bfe', but 'and' has higher-throughput.
Justin Bogner8d83fb62016-05-13 21:12:53 +00003371 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003372 }
3373 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3374 if (LHS->getOpcode() == ISD::AND) {
3375 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3376 if (!ShiftCnst) {
3377 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00003378 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003379 }
3380
3381 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3382
3383 SDValue AndLHS = LHS->getOperand(0);
3384 SDValue AndRHS = LHS->getOperand(1);
3385
3386 // Canonicalize the AND to have the mask on the RHS
3387 if (isa<ConstantSDNode>(AndLHS)) {
3388 std::swap(AndLHS, AndRHS);
3389 }
3390
3391 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3392 if (!MaskCnst) {
3393 // Mask must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00003394 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003395 }
3396
3397 uint64_t MaskVal = MaskCnst->getZExtValue();
3398 uint64_t NumZeros;
3399 uint64_t NumBits;
3400 if (isMask_64(MaskVal)) {
3401 NumZeros = 0;
3402 // The number of bits in the result bitfield will be the number of
3403 // trailing ones (the AND) minus the number of bits we shift off
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00003404 NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003405 } else if (isShiftedMask_64(MaskVal)) {
3406 NumZeros = countTrailingZeros(MaskVal);
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00003407 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003408 // The number of bits in the result bitfield will be the number of
3409 // trailing zeros plus the number of set bits in the mask minus the
3410 // number of bits we shift off
3411 NumBits = NumZeros + NumOnes - ShiftAmt;
3412 } else {
3413 // This is not a mask we can handle
Justin Bogner8d83fb62016-05-13 21:12:53 +00003414 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003415 }
3416
3417 if (ShiftAmt < NumZeros) {
3418 // Handling this case would require extra logic that would make this
3419 // transformation non-profitable
Justin Bogner8d83fb62016-05-13 21:12:53 +00003420 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003421 }
3422
3423 Val = AndLHS;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003424 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
3425 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003426 } else if (LHS->getOpcode() == ISD::SHL) {
3427 // Here, we have a pattern like:
3428 //
3429 // (sra (shl val, NN), MM)
3430 // or
3431 // (srl (shl val, NN), MM)
3432 //
3433 // If MM >= NN, we can efficiently optimize this with bfe
3434 Val = LHS->getOperand(0);
3435
3436 SDValue ShlRHS = LHS->getOperand(1);
3437 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3438 if (!ShlCnst) {
3439 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00003440 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003441 }
3442 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3443
3444 SDValue ShrRHS = RHS;
3445 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3446 if (!ShrCnst) {
3447 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00003448 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003449 }
3450 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3451
3452 // To avoid extra codegen and be profitable, we need Outer >= Inner
3453 if (OuterShiftAmt < InnerShiftAmt) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00003454 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003455 }
3456
3457 // If the outer shift is more than the type size, we have no bitfield to
3458 // extract (since we also check that the inner shift is <= the outer shift
3459 // then this also implies that the inner shift is < the type size)
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00003460 if (OuterShiftAmt >= Val.getValueSizeInBits()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00003461 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003462 }
3463
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00003464 Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
3465 MVT::i32);
3466 Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
3467 DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003468
3469 if (N->getOpcode() == ISD::SRA) {
3470 // If we have a arithmetic right shift, we need to use the signed bfe
3471 // variant
3472 IsSigned = true;
3473 }
3474 } else {
3475 // No can do...
Justin Bogner8d83fb62016-05-13 21:12:53 +00003476 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003477 }
3478 } else {
3479 // No can do...
Justin Bogner8d83fb62016-05-13 21:12:53 +00003480 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003481 }
3482
3483
3484 unsigned Opc;
3485 // For the BFE operations we form here from "and" and "srl", always use the
3486 // unsigned variants.
3487 if (Val.getValueType() == MVT::i32) {
3488 if (IsSigned) {
3489 Opc = NVPTX::BFE_S32rii;
3490 } else {
3491 Opc = NVPTX::BFE_U32rii;
3492 }
3493 } else if (Val.getValueType() == MVT::i64) {
3494 if (IsSigned) {
3495 Opc = NVPTX::BFE_S64rii;
3496 } else {
3497 Opc = NVPTX::BFE_U64rii;
3498 }
3499 } else {
3500 // We cannot handle this type
Justin Bogner8d83fb62016-05-13 21:12:53 +00003501 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003502 }
3503
3504 SDValue Ops[] = {
3505 Val, Start, Len
3506 };
3507
Justin Bogner8d83fb62016-05-13 21:12:53 +00003508 ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
3509 return true;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003510}
3511
Justin Holewinskiae556d32012-05-04 20:18:50 +00003512// SelectDirectAddr - Match a direct address for DAG.
3513// A direct address could be a globaladdress or externalsymbol.
3514bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3515 // Return true if TGA or ES.
Justin Holewinski0497ab12013-03-30 14:29:21 +00003516 if (N.getOpcode() == ISD::TargetGlobalAddress ||
3517 N.getOpcode() == ISD::TargetExternalSymbol) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003518 Address = N;
3519 return true;
3520 }
3521 if (N.getOpcode() == NVPTXISD::Wrapper) {
3522 Address = N.getOperand(0);
3523 return true;
3524 }
Artem Belevichb2e76a52016-07-20 18:39:47 +00003525 // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
3526 if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
3527 if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
3528 CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
3529 CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
3530 return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003531 }
3532 return false;
3533}
3534
3535// symbol+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00003536bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3537 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003538 if (Addr.getOpcode() == ISD::ADD) {
3539 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00003540 SDValue base = Addr.getOperand(0);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003541 if (SelectDirectAddr(base, Base)) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003542 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3543 mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003544 return true;
3545 }
3546 }
3547 }
3548 return false;
3549}
3550
3551// symbol+offset
3552bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3553 SDValue &Base, SDValue &Offset) {
3554 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3555}
3556
3557// symbol+offset
3558bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3559 SDValue &Base, SDValue &Offset) {
3560 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3561}
3562
3563// register+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00003564bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3565 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003566 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3567 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003568 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003569 return true;
3570 }
3571 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3572 Addr.getOpcode() == ISD::TargetGlobalAddress)
Justin Holewinski0497ab12013-03-30 14:29:21 +00003573 return false; // direct calls.
Justin Holewinskiae556d32012-05-04 20:18:50 +00003574
3575 if (Addr.getOpcode() == ISD::ADD) {
3576 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3577 return false;
3578 }
3579 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3580 if (FrameIndexSDNode *FIN =
Justin Holewinski0497ab12013-03-30 14:29:21 +00003581 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
Justin Holewinskiae556d32012-05-04 20:18:50 +00003582 // Constant offset from frame ref.
3583 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3584 else
3585 Base = Addr.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003586 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3587 mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003588 return true;
3589 }
3590 }
3591 return false;
3592}
3593
3594// register+offset
3595bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3596 SDValue &Base, SDValue &Offset) {
3597 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3598}
3599
3600// register+offset
3601bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3602 SDValue &Base, SDValue &Offset) {
3603 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3604}
3605
Artem Belevich7b14e7f2018-03-15 21:40:56 +00003606// symbol
3607bool NVPTXDAGToDAGISel::SelectADDRvar(SDNode *OpNode, SDValue Addr,
3608 SDValue &Value) {
3609 return SelectDirectAddr(Addr, Value);
3610}
3611
Justin Holewinskiae556d32012-05-04 20:18:50 +00003612bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3613 unsigned int spN) const {
Craig Topper062a2ba2014-04-25 05:30:21 +00003614 const Value *Src = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00003615 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +00003616 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3617 return true;
3618 Src = mN->getMemOperand()->getValue();
Justin Holewinskiae556d32012-05-04 20:18:50 +00003619 }
3620 if (!Src)
3621 return false;
Craig Toppere3dcce92015-08-01 22:20:21 +00003622 if (auto *PT = dyn_cast<PointerType>(Src->getType()))
Justin Holewinskiae556d32012-05-04 20:18:50 +00003623 return (PT->getAddressSpace() == spN);
3624 return false;
3625}
3626
3627/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3628/// inline asm expressions.
Justin Holewinski0497ab12013-03-30 14:29:21 +00003629bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
Daniel Sanders60f1db02015-03-13 12:45:09 +00003630 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003631 SDValue Op0, Op1;
Daniel Sanders60f1db02015-03-13 12:45:09 +00003632 switch (ConstraintID) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00003633 default:
3634 return true;
Daniel Sanders60f1db02015-03-13 12:45:09 +00003635 case InlineAsm::Constraint_m: // memory
Justin Holewinskiae556d32012-05-04 20:18:50 +00003636 if (SelectDirectAddr(Op, Op0)) {
3637 OutOps.push_back(Op0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003638 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
Justin Holewinskiae556d32012-05-04 20:18:50 +00003639 return false;
3640 }
3641 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3642 OutOps.push_back(Op0);
3643 OutOps.push_back(Op1);
3644 return false;
3645 }
3646 break;
3647 }
3648 return true;
3649}
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00003650
3651/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
3652/// conversion from \p SrcTy to \p DestTy.
3653unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
3654 bool IsSigned) {
3655 switch (SrcTy.SimpleTy) {
3656 default:
3657 llvm_unreachable("Unhandled source type");
3658 case MVT::i8:
3659 switch (DestTy.SimpleTy) {
3660 default:
3661 llvm_unreachable("Unhandled dest type");
3662 case MVT::i16:
3663 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3664 case MVT::i32:
3665 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3666 case MVT::i64:
3667 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3668 }
3669 case MVT::i16:
3670 switch (DestTy.SimpleTy) {
3671 default:
3672 llvm_unreachable("Unhandled dest type");
3673 case MVT::i8:
3674 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3675 case MVT::i32:
3676 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3677 case MVT::i64:
3678 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3679 }
3680 case MVT::i32:
3681 switch (DestTy.SimpleTy) {
3682 default:
3683 llvm_unreachable("Unhandled dest type");
3684 case MVT::i8:
3685 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3686 case MVT::i16:
3687 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3688 case MVT::i64:
3689 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3690 }
3691 case MVT::i64:
3692 switch (DestTy.SimpleTy) {
3693 default:
3694 llvm_unreachable("Unhandled dest type");
3695 case MVT::i8:
3696 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3697 case MVT::i16:
3698 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3699 case MVT::i32:
3700 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3701 }
3702 }
3703}