blob: 7d507100d3e27a1843c0c42f008521dbf61f1c47 [file] [log] [blame]
Justin Holewinskiae556d32012-05-04 20:18:50 +00001//===-- NVPTXISelDAGToDAG.cpp - A dag to dag inst selector for NVPTX ------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines an instruction selector for the NVPTX target.
11//
12//===----------------------------------------------------------------------===//
13
Justin Holewinskiae556d32012-05-04 20:18:50 +000014#include "NVPTXISelDAGToDAG.h"
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +000015#include "NVPTXUtilities.h"
Jingyue Wu48a9bdc2015-07-20 21:28:54 +000016#include "llvm/Analysis/ValueTracking.h"
Chandler Carruth9fb823b2013-01-02 11:36:10 +000017#include "llvm/IR/GlobalValue.h"
18#include "llvm/IR/Instructions.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000019#include "llvm/Support/CommandLine.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000020#include "llvm/Support/Debug.h"
21#include "llvm/Support/ErrorHandling.h"
Chandler Carruthed0881b2012-12-03 16:50:05 +000022#include "llvm/Support/raw_ostream.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000023#include "llvm/Target/TargetIntrinsicInfo.h"
Justin Holewinskiae556d32012-05-04 20:18:50 +000024
Justin Holewinskiae556d32012-05-04 20:18:50 +000025using namespace llvm;
26
Chandler Carruth84e68b22014-04-22 02:41:26 +000027#define DEBUG_TYPE "nvptx-isel"
28
Justin Holewinskiae556d32012-05-04 20:18:50 +000029/// createNVPTXISelDag - This pass converts a legalized DAG into a
30/// NVPTX-specific DAG, ready for instruction scheduling.
31FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
32 llvm::CodeGenOpt::Level OptLevel) {
33 return new NVPTXDAGToDAGISel(TM, OptLevel);
34}
35
Justin Holewinskiae556d32012-05-04 20:18:50 +000036NVPTXDAGToDAGISel::NVPTXDAGToDAGISel(NVPTXTargetMachine &tm,
37 CodeGenOpt::Level OptLevel)
Eric Christopher02389e32015-02-19 00:08:27 +000038 : SelectionDAGISel(tm, OptLevel), TM(tm) {
Justin Holewinskiae556d32012-05-04 20:18:50 +000039 doMulWide = (OptLevel > 0);
Justin Holewinskicd069e62013-07-22 12:18:04 +000040}
Justin Holewinskiae556d32012-05-04 20:18:50 +000041
Eric Christopher147bba22015-01-30 01:40:59 +000042bool NVPTXDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
Justin Lebar077f8fb2017-01-21 01:00:14 +000043 Subtarget = &static_cast<const NVPTXSubtarget &>(MF.getSubtarget());
44 return SelectionDAGISel::runOnMachineFunction(MF);
Eric Christopher147bba22015-01-30 01:40:59 +000045}
46
Justin Holewinskicd069e62013-07-22 12:18:04 +000047int NVPTXDAGToDAGISel::getDivF32Level() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000048 return Subtarget->getTargetLowering()->getDivF32Level();
Justin Holewinskicd069e62013-07-22 12:18:04 +000049}
Justin Holewinskiae556d32012-05-04 20:18:50 +000050
Justin Holewinskicd069e62013-07-22 12:18:04 +000051bool NVPTXDAGToDAGISel::usePrecSqrtF32() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000052 return Subtarget->getTargetLowering()->usePrecSqrtF32();
Justin Holewinskicd069e62013-07-22 12:18:04 +000053}
54
55bool NVPTXDAGToDAGISel::useF32FTZ() const {
Justin Lebar077f8fb2017-01-21 01:00:14 +000056 return Subtarget->getTargetLowering()->useF32FTZ(*MF);
Justin Holewinskiae556d32012-05-04 20:18:50 +000057}
58
Justin Holewinski428cf0e2014-07-17 18:10:09 +000059bool NVPTXDAGToDAGISel::allowFMA() const {
Eric Christopher147bba22015-01-30 01:40:59 +000060 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
Justin Holewinski428cf0e2014-07-17 18:10:09 +000061 return TL->allowFMA(*MF, OptLevel);
62}
63
Artem Belevichd109f462017-01-13 18:48:13 +000064bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
65 const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
66 return TL->allowUnsafeFPMath(*MF);
67}
68
Justin Holewinskiae556d32012-05-04 20:18:50 +000069/// Select - Select instructions not customized! Used for
70/// expanded, promoted and normal instructions.
Justin Bogner8d83fb62016-05-13 21:12:53 +000071void NVPTXDAGToDAGISel::Select(SDNode *N) {
Justin Holewinskiae556d32012-05-04 20:18:50 +000072
Tim Northover31d093c2013-09-22 08:21:56 +000073 if (N->isMachineOpcode()) {
74 N->setNodeId(-1);
Justin Bogner8d83fb62016-05-13 21:12:53 +000075 return; // Already selected.
Tim Northover31d093c2013-09-22 08:21:56 +000076 }
Justin Holewinskiae556d32012-05-04 20:18:50 +000077
Justin Holewinskiae556d32012-05-04 20:18:50 +000078 switch (N->getOpcode()) {
79 case ISD::LOAD:
Justin Bogner8d83fb62016-05-13 21:12:53 +000080 if (tryLoad(N))
81 return;
Justin Holewinskiae556d32012-05-04 20:18:50 +000082 break;
83 case ISD::STORE:
Justin Bogner8d83fb62016-05-13 21:12:53 +000084 if (tryStore(N))
85 return;
Justin Holewinskiae556d32012-05-04 20:18:50 +000086 break;
Artem Belevich620db1f2017-02-23 22:38:24 +000087 case ISD::EXTRACT_VECTOR_ELT:
88 if (tryEXTRACT_VECTOR_ELEMENT(N))
89 return;
90 break;
91 case NVPTXISD::SETP_F16X2:
92 SelectSETP_F16X2(N);
93 return;
94
Justin Holewinskibe8dc642013-02-12 14:18:49 +000095 case NVPTXISD::LoadV2:
96 case NVPTXISD::LoadV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +000097 if (tryLoadVector(N))
98 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +000099 break;
100 case NVPTXISD::LDGV2:
101 case NVPTXISD::LDGV4:
102 case NVPTXISD::LDUV2:
103 case NVPTXISD::LDUV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000104 if (tryLDGLDU(N))
105 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000106 break;
107 case NVPTXISD::StoreV2:
108 case NVPTXISD::StoreV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000109 if (tryStoreVector(N))
110 return;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000111 break;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000112 case NVPTXISD::LoadParam:
113 case NVPTXISD::LoadParamV2:
114 case NVPTXISD::LoadParamV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000115 if (tryLoadParam(N))
116 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000117 break;
118 case NVPTXISD::StoreRetval:
119 case NVPTXISD::StoreRetvalV2:
120 case NVPTXISD::StoreRetvalV4:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000121 if (tryStoreRetval(N))
122 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000123 break;
124 case NVPTXISD::StoreParam:
125 case NVPTXISD::StoreParamV2:
126 case NVPTXISD::StoreParamV4:
127 case NVPTXISD::StoreParamS32:
128 case NVPTXISD::StoreParamU32:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000129 if (tryStoreParam(N))
130 return;
Justin Holewinskif8f70912013-06-28 17:57:59 +0000131 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000132 case ISD::INTRINSIC_WO_CHAIN:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000133 if (tryIntrinsicNoChain(N))
134 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000135 break;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000136 case ISD::INTRINSIC_W_CHAIN:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000137 if (tryIntrinsicChain(N))
138 return;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000139 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000140 case NVPTXISD::Tex1DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000141 case NVPTXISD::Tex1DFloatFloat:
142 case NVPTXISD::Tex1DFloatFloatLevel:
143 case NVPTXISD::Tex1DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000144 case NVPTXISD::Tex1DS32S32:
145 case NVPTXISD::Tex1DS32Float:
146 case NVPTXISD::Tex1DS32FloatLevel:
147 case NVPTXISD::Tex1DS32FloatGrad:
148 case NVPTXISD::Tex1DU32S32:
149 case NVPTXISD::Tex1DU32Float:
150 case NVPTXISD::Tex1DU32FloatLevel:
151 case NVPTXISD::Tex1DU32FloatGrad:
152 case NVPTXISD::Tex1DArrayFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000153 case NVPTXISD::Tex1DArrayFloatFloat:
154 case NVPTXISD::Tex1DArrayFloatFloatLevel:
155 case NVPTXISD::Tex1DArrayFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000156 case NVPTXISD::Tex1DArrayS32S32:
157 case NVPTXISD::Tex1DArrayS32Float:
158 case NVPTXISD::Tex1DArrayS32FloatLevel:
159 case NVPTXISD::Tex1DArrayS32FloatGrad:
160 case NVPTXISD::Tex1DArrayU32S32:
161 case NVPTXISD::Tex1DArrayU32Float:
162 case NVPTXISD::Tex1DArrayU32FloatLevel:
163 case NVPTXISD::Tex1DArrayU32FloatGrad:
164 case NVPTXISD::Tex2DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000165 case NVPTXISD::Tex2DFloatFloat:
166 case NVPTXISD::Tex2DFloatFloatLevel:
167 case NVPTXISD::Tex2DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000168 case NVPTXISD::Tex2DS32S32:
169 case NVPTXISD::Tex2DS32Float:
170 case NVPTXISD::Tex2DS32FloatLevel:
171 case NVPTXISD::Tex2DS32FloatGrad:
172 case NVPTXISD::Tex2DU32S32:
173 case NVPTXISD::Tex2DU32Float:
174 case NVPTXISD::Tex2DU32FloatLevel:
175 case NVPTXISD::Tex2DU32FloatGrad:
176 case NVPTXISD::Tex2DArrayFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000177 case NVPTXISD::Tex2DArrayFloatFloat:
178 case NVPTXISD::Tex2DArrayFloatFloatLevel:
179 case NVPTXISD::Tex2DArrayFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000180 case NVPTXISD::Tex2DArrayS32S32:
181 case NVPTXISD::Tex2DArrayS32Float:
182 case NVPTXISD::Tex2DArrayS32FloatLevel:
183 case NVPTXISD::Tex2DArrayS32FloatGrad:
184 case NVPTXISD::Tex2DArrayU32S32:
185 case NVPTXISD::Tex2DArrayU32Float:
186 case NVPTXISD::Tex2DArrayU32FloatLevel:
187 case NVPTXISD::Tex2DArrayU32FloatGrad:
188 case NVPTXISD::Tex3DFloatS32:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000189 case NVPTXISD::Tex3DFloatFloat:
190 case NVPTXISD::Tex3DFloatFloatLevel:
191 case NVPTXISD::Tex3DFloatFloatGrad:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000192 case NVPTXISD::Tex3DS32S32:
193 case NVPTXISD::Tex3DS32Float:
194 case NVPTXISD::Tex3DS32FloatLevel:
195 case NVPTXISD::Tex3DS32FloatGrad:
196 case NVPTXISD::Tex3DU32S32:
197 case NVPTXISD::Tex3DU32Float:
198 case NVPTXISD::Tex3DU32FloatLevel:
199 case NVPTXISD::Tex3DU32FloatGrad:
200 case NVPTXISD::TexCubeFloatFloat:
201 case NVPTXISD::TexCubeFloatFloatLevel:
202 case NVPTXISD::TexCubeS32Float:
203 case NVPTXISD::TexCubeS32FloatLevel:
204 case NVPTXISD::TexCubeU32Float:
205 case NVPTXISD::TexCubeU32FloatLevel:
206 case NVPTXISD::TexCubeArrayFloatFloat:
207 case NVPTXISD::TexCubeArrayFloatFloatLevel:
208 case NVPTXISD::TexCubeArrayS32Float:
209 case NVPTXISD::TexCubeArrayS32FloatLevel:
210 case NVPTXISD::TexCubeArrayU32Float:
211 case NVPTXISD::TexCubeArrayU32FloatLevel:
212 case NVPTXISD::Tld4R2DFloatFloat:
213 case NVPTXISD::Tld4G2DFloatFloat:
214 case NVPTXISD::Tld4B2DFloatFloat:
215 case NVPTXISD::Tld4A2DFloatFloat:
216 case NVPTXISD::Tld4R2DS64Float:
217 case NVPTXISD::Tld4G2DS64Float:
218 case NVPTXISD::Tld4B2DS64Float:
219 case NVPTXISD::Tld4A2DS64Float:
220 case NVPTXISD::Tld4R2DU64Float:
221 case NVPTXISD::Tld4G2DU64Float:
222 case NVPTXISD::Tld4B2DU64Float:
223 case NVPTXISD::Tld4A2DU64Float:
224 case NVPTXISD::TexUnified1DFloatS32:
225 case NVPTXISD::TexUnified1DFloatFloat:
226 case NVPTXISD::TexUnified1DFloatFloatLevel:
227 case NVPTXISD::TexUnified1DFloatFloatGrad:
228 case NVPTXISD::TexUnified1DS32S32:
229 case NVPTXISD::TexUnified1DS32Float:
230 case NVPTXISD::TexUnified1DS32FloatLevel:
231 case NVPTXISD::TexUnified1DS32FloatGrad:
232 case NVPTXISD::TexUnified1DU32S32:
233 case NVPTXISD::TexUnified1DU32Float:
234 case NVPTXISD::TexUnified1DU32FloatLevel:
235 case NVPTXISD::TexUnified1DU32FloatGrad:
236 case NVPTXISD::TexUnified1DArrayFloatS32:
237 case NVPTXISD::TexUnified1DArrayFloatFloat:
238 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
239 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
240 case NVPTXISD::TexUnified1DArrayS32S32:
241 case NVPTXISD::TexUnified1DArrayS32Float:
242 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
243 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
244 case NVPTXISD::TexUnified1DArrayU32S32:
245 case NVPTXISD::TexUnified1DArrayU32Float:
246 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
247 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
248 case NVPTXISD::TexUnified2DFloatS32:
249 case NVPTXISD::TexUnified2DFloatFloat:
250 case NVPTXISD::TexUnified2DFloatFloatLevel:
251 case NVPTXISD::TexUnified2DFloatFloatGrad:
252 case NVPTXISD::TexUnified2DS32S32:
253 case NVPTXISD::TexUnified2DS32Float:
254 case NVPTXISD::TexUnified2DS32FloatLevel:
255 case NVPTXISD::TexUnified2DS32FloatGrad:
256 case NVPTXISD::TexUnified2DU32S32:
257 case NVPTXISD::TexUnified2DU32Float:
258 case NVPTXISD::TexUnified2DU32FloatLevel:
259 case NVPTXISD::TexUnified2DU32FloatGrad:
260 case NVPTXISD::TexUnified2DArrayFloatS32:
261 case NVPTXISD::TexUnified2DArrayFloatFloat:
262 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
263 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
264 case NVPTXISD::TexUnified2DArrayS32S32:
265 case NVPTXISD::TexUnified2DArrayS32Float:
266 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
267 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
268 case NVPTXISD::TexUnified2DArrayU32S32:
269 case NVPTXISD::TexUnified2DArrayU32Float:
270 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
271 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
272 case NVPTXISD::TexUnified3DFloatS32:
273 case NVPTXISD::TexUnified3DFloatFloat:
274 case NVPTXISD::TexUnified3DFloatFloatLevel:
275 case NVPTXISD::TexUnified3DFloatFloatGrad:
276 case NVPTXISD::TexUnified3DS32S32:
277 case NVPTXISD::TexUnified3DS32Float:
278 case NVPTXISD::TexUnified3DS32FloatLevel:
279 case NVPTXISD::TexUnified3DS32FloatGrad:
280 case NVPTXISD::TexUnified3DU32S32:
281 case NVPTXISD::TexUnified3DU32Float:
282 case NVPTXISD::TexUnified3DU32FloatLevel:
283 case NVPTXISD::TexUnified3DU32FloatGrad:
284 case NVPTXISD::TexUnifiedCubeFloatFloat:
285 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
286 case NVPTXISD::TexUnifiedCubeS32Float:
287 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
288 case NVPTXISD::TexUnifiedCubeU32Float:
289 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
290 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
291 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
292 case NVPTXISD::TexUnifiedCubeArrayS32Float:
293 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
294 case NVPTXISD::TexUnifiedCubeArrayU32Float:
295 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
296 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
297 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
298 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
299 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
300 case NVPTXISD::Tld4UnifiedR2DS64Float:
301 case NVPTXISD::Tld4UnifiedG2DS64Float:
302 case NVPTXISD::Tld4UnifiedB2DS64Float:
303 case NVPTXISD::Tld4UnifiedA2DS64Float:
304 case NVPTXISD::Tld4UnifiedR2DU64Float:
305 case NVPTXISD::Tld4UnifiedG2DU64Float:
306 case NVPTXISD::Tld4UnifiedB2DU64Float:
307 case NVPTXISD::Tld4UnifiedA2DU64Float:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000308 if (tryTextureIntrinsic(N))
309 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000310 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000311 case NVPTXISD::Suld1DI8Clamp:
312 case NVPTXISD::Suld1DI16Clamp:
313 case NVPTXISD::Suld1DI32Clamp:
314 case NVPTXISD::Suld1DI64Clamp:
315 case NVPTXISD::Suld1DV2I8Clamp:
316 case NVPTXISD::Suld1DV2I16Clamp:
317 case NVPTXISD::Suld1DV2I32Clamp:
318 case NVPTXISD::Suld1DV2I64Clamp:
319 case NVPTXISD::Suld1DV4I8Clamp:
320 case NVPTXISD::Suld1DV4I16Clamp:
321 case NVPTXISD::Suld1DV4I32Clamp:
322 case NVPTXISD::Suld1DArrayI8Clamp:
323 case NVPTXISD::Suld1DArrayI16Clamp:
324 case NVPTXISD::Suld1DArrayI32Clamp:
325 case NVPTXISD::Suld1DArrayI64Clamp:
326 case NVPTXISD::Suld1DArrayV2I8Clamp:
327 case NVPTXISD::Suld1DArrayV2I16Clamp:
328 case NVPTXISD::Suld1DArrayV2I32Clamp:
329 case NVPTXISD::Suld1DArrayV2I64Clamp:
330 case NVPTXISD::Suld1DArrayV4I8Clamp:
331 case NVPTXISD::Suld1DArrayV4I16Clamp:
332 case NVPTXISD::Suld1DArrayV4I32Clamp:
333 case NVPTXISD::Suld2DI8Clamp:
334 case NVPTXISD::Suld2DI16Clamp:
335 case NVPTXISD::Suld2DI32Clamp:
336 case NVPTXISD::Suld2DI64Clamp:
337 case NVPTXISD::Suld2DV2I8Clamp:
338 case NVPTXISD::Suld2DV2I16Clamp:
339 case NVPTXISD::Suld2DV2I32Clamp:
340 case NVPTXISD::Suld2DV2I64Clamp:
341 case NVPTXISD::Suld2DV4I8Clamp:
342 case NVPTXISD::Suld2DV4I16Clamp:
343 case NVPTXISD::Suld2DV4I32Clamp:
344 case NVPTXISD::Suld2DArrayI8Clamp:
345 case NVPTXISD::Suld2DArrayI16Clamp:
346 case NVPTXISD::Suld2DArrayI32Clamp:
347 case NVPTXISD::Suld2DArrayI64Clamp:
348 case NVPTXISD::Suld2DArrayV2I8Clamp:
349 case NVPTXISD::Suld2DArrayV2I16Clamp:
350 case NVPTXISD::Suld2DArrayV2I32Clamp:
351 case NVPTXISD::Suld2DArrayV2I64Clamp:
352 case NVPTXISD::Suld2DArrayV4I8Clamp:
353 case NVPTXISD::Suld2DArrayV4I16Clamp:
354 case NVPTXISD::Suld2DArrayV4I32Clamp:
355 case NVPTXISD::Suld3DI8Clamp:
356 case NVPTXISD::Suld3DI16Clamp:
357 case NVPTXISD::Suld3DI32Clamp:
358 case NVPTXISD::Suld3DI64Clamp:
359 case NVPTXISD::Suld3DV2I8Clamp:
360 case NVPTXISD::Suld3DV2I16Clamp:
361 case NVPTXISD::Suld3DV2I32Clamp:
362 case NVPTXISD::Suld3DV2I64Clamp:
363 case NVPTXISD::Suld3DV4I8Clamp:
364 case NVPTXISD::Suld3DV4I16Clamp:
365 case NVPTXISD::Suld3DV4I32Clamp:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000366 case NVPTXISD::Suld1DI8Trap:
367 case NVPTXISD::Suld1DI16Trap:
368 case NVPTXISD::Suld1DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000369 case NVPTXISD::Suld1DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000370 case NVPTXISD::Suld1DV2I8Trap:
371 case NVPTXISD::Suld1DV2I16Trap:
372 case NVPTXISD::Suld1DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000373 case NVPTXISD::Suld1DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000374 case NVPTXISD::Suld1DV4I8Trap:
375 case NVPTXISD::Suld1DV4I16Trap:
376 case NVPTXISD::Suld1DV4I32Trap:
377 case NVPTXISD::Suld1DArrayI8Trap:
378 case NVPTXISD::Suld1DArrayI16Trap:
379 case NVPTXISD::Suld1DArrayI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000380 case NVPTXISD::Suld1DArrayI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000381 case NVPTXISD::Suld1DArrayV2I8Trap:
382 case NVPTXISD::Suld1DArrayV2I16Trap:
383 case NVPTXISD::Suld1DArrayV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000384 case NVPTXISD::Suld1DArrayV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000385 case NVPTXISD::Suld1DArrayV4I8Trap:
386 case NVPTXISD::Suld1DArrayV4I16Trap:
387 case NVPTXISD::Suld1DArrayV4I32Trap:
388 case NVPTXISD::Suld2DI8Trap:
389 case NVPTXISD::Suld2DI16Trap:
390 case NVPTXISD::Suld2DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000391 case NVPTXISD::Suld2DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000392 case NVPTXISD::Suld2DV2I8Trap:
393 case NVPTXISD::Suld2DV2I16Trap:
394 case NVPTXISD::Suld2DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000395 case NVPTXISD::Suld2DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000396 case NVPTXISD::Suld2DV4I8Trap:
397 case NVPTXISD::Suld2DV4I16Trap:
398 case NVPTXISD::Suld2DV4I32Trap:
399 case NVPTXISD::Suld2DArrayI8Trap:
400 case NVPTXISD::Suld2DArrayI16Trap:
401 case NVPTXISD::Suld2DArrayI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000402 case NVPTXISD::Suld2DArrayI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000403 case NVPTXISD::Suld2DArrayV2I8Trap:
404 case NVPTXISD::Suld2DArrayV2I16Trap:
405 case NVPTXISD::Suld2DArrayV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000406 case NVPTXISD::Suld2DArrayV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000407 case NVPTXISD::Suld2DArrayV4I8Trap:
408 case NVPTXISD::Suld2DArrayV4I16Trap:
409 case NVPTXISD::Suld2DArrayV4I32Trap:
410 case NVPTXISD::Suld3DI8Trap:
411 case NVPTXISD::Suld3DI16Trap:
412 case NVPTXISD::Suld3DI32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000413 case NVPTXISD::Suld3DI64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000414 case NVPTXISD::Suld3DV2I8Trap:
415 case NVPTXISD::Suld3DV2I16Trap:
416 case NVPTXISD::Suld3DV2I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000417 case NVPTXISD::Suld3DV2I64Trap:
Justin Holewinski30d56a72014-04-09 15:39:15 +0000418 case NVPTXISD::Suld3DV4I8Trap:
419 case NVPTXISD::Suld3DV4I16Trap:
420 case NVPTXISD::Suld3DV4I32Trap:
Justin Holewinski9a2350e2014-07-17 11:59:04 +0000421 case NVPTXISD::Suld1DI8Zero:
422 case NVPTXISD::Suld1DI16Zero:
423 case NVPTXISD::Suld1DI32Zero:
424 case NVPTXISD::Suld1DI64Zero:
425 case NVPTXISD::Suld1DV2I8Zero:
426 case NVPTXISD::Suld1DV2I16Zero:
427 case NVPTXISD::Suld1DV2I32Zero:
428 case NVPTXISD::Suld1DV2I64Zero:
429 case NVPTXISD::Suld1DV4I8Zero:
430 case NVPTXISD::Suld1DV4I16Zero:
431 case NVPTXISD::Suld1DV4I32Zero:
432 case NVPTXISD::Suld1DArrayI8Zero:
433 case NVPTXISD::Suld1DArrayI16Zero:
434 case NVPTXISD::Suld1DArrayI32Zero:
435 case NVPTXISD::Suld1DArrayI64Zero:
436 case NVPTXISD::Suld1DArrayV2I8Zero:
437 case NVPTXISD::Suld1DArrayV2I16Zero:
438 case NVPTXISD::Suld1DArrayV2I32Zero:
439 case NVPTXISD::Suld1DArrayV2I64Zero:
440 case NVPTXISD::Suld1DArrayV4I8Zero:
441 case NVPTXISD::Suld1DArrayV4I16Zero:
442 case NVPTXISD::Suld1DArrayV4I32Zero:
443 case NVPTXISD::Suld2DI8Zero:
444 case NVPTXISD::Suld2DI16Zero:
445 case NVPTXISD::Suld2DI32Zero:
446 case NVPTXISD::Suld2DI64Zero:
447 case NVPTXISD::Suld2DV2I8Zero:
448 case NVPTXISD::Suld2DV2I16Zero:
449 case NVPTXISD::Suld2DV2I32Zero:
450 case NVPTXISD::Suld2DV2I64Zero:
451 case NVPTXISD::Suld2DV4I8Zero:
452 case NVPTXISD::Suld2DV4I16Zero:
453 case NVPTXISD::Suld2DV4I32Zero:
454 case NVPTXISD::Suld2DArrayI8Zero:
455 case NVPTXISD::Suld2DArrayI16Zero:
456 case NVPTXISD::Suld2DArrayI32Zero:
457 case NVPTXISD::Suld2DArrayI64Zero:
458 case NVPTXISD::Suld2DArrayV2I8Zero:
459 case NVPTXISD::Suld2DArrayV2I16Zero:
460 case NVPTXISD::Suld2DArrayV2I32Zero:
461 case NVPTXISD::Suld2DArrayV2I64Zero:
462 case NVPTXISD::Suld2DArrayV4I8Zero:
463 case NVPTXISD::Suld2DArrayV4I16Zero:
464 case NVPTXISD::Suld2DArrayV4I32Zero:
465 case NVPTXISD::Suld3DI8Zero:
466 case NVPTXISD::Suld3DI16Zero:
467 case NVPTXISD::Suld3DI32Zero:
468 case NVPTXISD::Suld3DI64Zero:
469 case NVPTXISD::Suld3DV2I8Zero:
470 case NVPTXISD::Suld3DV2I16Zero:
471 case NVPTXISD::Suld3DV2I32Zero:
472 case NVPTXISD::Suld3DV2I64Zero:
473 case NVPTXISD::Suld3DV4I8Zero:
474 case NVPTXISD::Suld3DV4I16Zero:
475 case NVPTXISD::Suld3DV4I32Zero:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000476 if (trySurfaceIntrinsic(N))
477 return;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000478 break;
Justin Holewinskica7a4f12014-06-27 18:35:27 +0000479 case ISD::AND:
480 case ISD::SRA:
481 case ISD::SRL:
482 // Try to select BFE
Justin Bogner8d83fb62016-05-13 21:12:53 +0000483 if (tryBFE(N))
484 return;
Justin Holewinskica7a4f12014-06-27 18:35:27 +0000485 break;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000486 case ISD::ADDRSPACECAST:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000487 SelectAddrSpaceCast(N);
488 return;
Artem Belevich64dc9be2017-01-13 20:56:17 +0000489 case ISD::ConstantFP:
490 if (tryConstantFP16(N))
491 return;
492 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +0000493 default:
494 break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000495 }
Justin Bogner8d83fb62016-05-13 21:12:53 +0000496 SelectCode(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000497}
498
Justin Bogner8d83fb62016-05-13 21:12:53 +0000499bool NVPTXDAGToDAGISel::tryIntrinsicChain(SDNode *N) {
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000500 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
501 switch (IID) {
502 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000503 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000504 case Intrinsic::nvvm_ldg_global_f:
505 case Intrinsic::nvvm_ldg_global_i:
506 case Intrinsic::nvvm_ldg_global_p:
507 case Intrinsic::nvvm_ldu_global_f:
508 case Intrinsic::nvvm_ldu_global_i:
509 case Intrinsic::nvvm_ldu_global_p:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000510 return tryLDGLDU(N);
Justin Holewinskib926d9d2014-06-27 18:35:51 +0000511 }
512}
513
Artem Belevich64dc9be2017-01-13 20:56:17 +0000514// There's no way to specify FP16 immediates in .f16 ops, so we have to
515// load them into an .f16 register first.
516bool NVPTXDAGToDAGISel::tryConstantFP16(SDNode *N) {
517 if (N->getValueType(0) != MVT::f16)
518 return false;
519 SDValue Val = CurDAG->getTargetConstantFP(
520 cast<ConstantFPSDNode>(N)->getValueAPF(), SDLoc(N), MVT::f16);
521 SDNode *LoadConstF16 =
522 CurDAG->getMachineNode(NVPTX::LOAD_CONST_F16, SDLoc(N), MVT::f16, Val);
523 ReplaceNode(N, LoadConstF16);
524 return true;
525}
526
Artem Belevich620db1f2017-02-23 22:38:24 +0000527// Map ISD:CONDCODE value to appropriate CmpMode expected by
528// NVPTXInstPrinter::printCmpMode()
529static unsigned getPTXCmpMode(const CondCodeSDNode &CondCode, bool FTZ) {
530 using NVPTX::PTXCmpMode::CmpMode;
531 unsigned PTXCmpMode = [](ISD::CondCode CC) {
532 switch (CC) {
533 default:
534 llvm_unreachable("Unexpected condition code.");
535 case ISD::SETOEQ:
536 return CmpMode::EQ;
537 case ISD::SETOGT:
538 return CmpMode::GT;
539 case ISD::SETOGE:
540 return CmpMode::GE;
541 case ISD::SETOLT:
542 return CmpMode::LT;
543 case ISD::SETOLE:
544 return CmpMode::LE;
545 case ISD::SETONE:
546 return CmpMode::NE;
547 case ISD::SETO:
548 return CmpMode::NUM;
549 case ISD::SETUO:
550 return CmpMode::NotANumber;
551 case ISD::SETUEQ:
552 return CmpMode::EQU;
553 case ISD::SETUGT:
554 return CmpMode::GTU;
555 case ISD::SETUGE:
556 return CmpMode::GEU;
557 case ISD::SETULT:
558 return CmpMode::LTU;
559 case ISD::SETULE:
560 return CmpMode::LEU;
561 case ISD::SETUNE:
562 return CmpMode::NEU;
563 case ISD::SETEQ:
564 return CmpMode::EQ;
565 case ISD::SETGT:
566 return CmpMode::GT;
567 case ISD::SETGE:
568 return CmpMode::GE;
569 case ISD::SETLT:
570 return CmpMode::LT;
571 case ISD::SETLE:
572 return CmpMode::LE;
573 case ISD::SETNE:
574 return CmpMode::NE;
575 }
576 }(CondCode.get());
577
578 if (FTZ)
579 PTXCmpMode |= NVPTX::PTXCmpMode::FTZ_FLAG;
580
581 return PTXCmpMode;
582}
583
584bool NVPTXDAGToDAGISel::SelectSETP_F16X2(SDNode *N) {
585 unsigned PTXCmpMode =
586 getPTXCmpMode(*cast<CondCodeSDNode>(N->getOperand(2)), useF32FTZ());
587 SDLoc DL(N);
588 SDNode *SetP = CurDAG->getMachineNode(
589 NVPTX::SETP_f16x2rr, DL, MVT::i1, MVT::i1, N->getOperand(0),
590 N->getOperand(1), CurDAG->getTargetConstant(PTXCmpMode, DL, MVT::i32));
591 ReplaceNode(N, SetP);
592 return true;
593}
594
595// Find all instances of extract_vector_elt that use this v2f16 vector
596// and coalesce them into a scattering move instruction.
597bool NVPTXDAGToDAGISel::tryEXTRACT_VECTOR_ELEMENT(SDNode *N) {
598 SDValue Vector = N->getOperand(0);
599
600 // We only care about f16x2 as it's the only real vector type we
601 // need to deal with.
602 if (Vector.getSimpleValueType() != MVT::v2f16)
603 return false;
604
605 // Find and record all uses of this vector that extract element 0 or 1.
606 SmallVector<SDNode *, 4> E0, E1;
607 for (const auto &U : Vector.getNode()->uses()) {
608 if (U->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
609 continue;
610 if (U->getOperand(0) != Vector)
611 continue;
612 if (const ConstantSDNode *IdxConst =
613 dyn_cast<ConstantSDNode>(U->getOperand(1))) {
614 if (IdxConst->getZExtValue() == 0)
615 E0.push_back(U);
616 else if (IdxConst->getZExtValue() == 1)
617 E1.push_back(U);
618 else
619 llvm_unreachable("Invalid vector index.");
620 }
621 }
622
623 // There's no point scattering f16x2 if we only ever access one
624 // element of it.
625 if (E0.empty() || E1.empty())
626 return false;
627
628 unsigned Op = NVPTX::SplitF16x2;
629 // If the vector has been BITCAST'ed from i32, we can use original
630 // value directly and avoid register-to-register move.
631 SDValue Source = Vector;
632 if (Vector->getOpcode() == ISD::BITCAST) {
633 Op = NVPTX::SplitI32toF16x2;
634 Source = Vector->getOperand(0);
635 }
636 // Merge (f16 extractelt(V, 0), f16 extractelt(V,1))
637 // into f16,f16 SplitF16x2(V)
638 SDNode *ScatterOp =
639 CurDAG->getMachineNode(Op, SDLoc(N), MVT::f16, MVT::f16, Source);
640 for (auto *Node : E0)
641 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 0));
642 for (auto *Node : E1)
643 ReplaceUses(SDValue(Node, 0), SDValue(ScatterOp, 1));
644
645 return true;
646}
647
Eric Christopher9745b3a2015-01-30 01:41:01 +0000648static unsigned int getCodeAddrSpace(MemSDNode *N) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +0000649 const Value *Src = N->getMemOperand()->getValue();
Justin Holewinskib96d1392013-06-10 13:29:47 +0000650
Justin Holewinskiae556d32012-05-04 20:18:50 +0000651 if (!Src)
Justin Holewinskib96d1392013-06-10 13:29:47 +0000652 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000653
Craig Toppere3dcce92015-08-01 22:20:21 +0000654 if (auto *PT = dyn_cast<PointerType>(Src->getType())) {
Justin Holewinskiae556d32012-05-04 20:18:50 +0000655 switch (PT->getAddressSpace()) {
Justin Holewinskib96d1392013-06-10 13:29:47 +0000656 case llvm::ADDRESS_SPACE_LOCAL: return NVPTX::PTXLdStInstCode::LOCAL;
657 case llvm::ADDRESS_SPACE_GLOBAL: return NVPTX::PTXLdStInstCode::GLOBAL;
658 case llvm::ADDRESS_SPACE_SHARED: return NVPTX::PTXLdStInstCode::SHARED;
659 case llvm::ADDRESS_SPACE_GENERIC: return NVPTX::PTXLdStInstCode::GENERIC;
660 case llvm::ADDRESS_SPACE_PARAM: return NVPTX::PTXLdStInstCode::PARAM;
661 case llvm::ADDRESS_SPACE_CONST: return NVPTX::PTXLdStInstCode::CONSTANT;
662 default: break;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000663 }
664 }
Justin Holewinskib96d1392013-06-10 13:29:47 +0000665 return NVPTX::PTXLdStInstCode::GENERIC;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000666}
667
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000668static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000669 unsigned CodeAddrSpace, MachineFunction *F) {
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000670 // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
671 // space.
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000672 //
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000673 // We have two ways of identifying invariant loads: Loads may be explicitly
674 // marked as invariant, or we may infer them to be invariant.
675 //
676 // We currently infer invariance only for kernel function pointer params that
677 // are noalias (i.e. __restrict) and never written to.
678 //
679 // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
680 // not during the SelectionDAG phase).
681 //
682 // TODO: Infer invariance only at -O2. We still want to use ldg at -O0 for
683 // explicitly invariant loads because these are how clang tells us to use ldg
684 // when the user uses a builtin.
685 if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000686 return false;
Justin Lebar6d6b11a2016-09-11 01:39:04 +0000687
688 if (N->isInvariant())
689 return true;
690
691 // Load wasn't explicitly invariant. Attempt to infer invariance.
692 if (!isKernelFunction(*F->getFunction()))
693 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000694
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000695 // We use GetUnderlyingObjects() here instead of
696 // GetUnderlyingObject() mainly because the former looks through phi
697 // nodes while the latter does not. We need to look through phi
698 // nodes to handle pointer induction variables.
699 SmallVector<Value *, 8> Objs;
700 GetUnderlyingObjects(const_cast<Value *>(N->getMemOperand()->getValue()),
701 Objs, F->getDataLayout());
702 for (Value *Obj : Objs) {
703 auto *A = dyn_cast<const Argument>(Obj);
704 if (!A || !A->onlyReadsMemory() || !A->hasNoAliasAttr()) return false;
705 }
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000706
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000707 return true;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000708}
709
Justin Bogner8d83fb62016-05-13 21:12:53 +0000710bool NVPTXDAGToDAGISel::tryIntrinsicNoChain(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +0000711 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
712 switch (IID) {
713 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000714 return false;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000715 case Intrinsic::nvvm_texsurf_handle_internal:
Justin Bogner8d83fb62016-05-13 21:12:53 +0000716 SelectTexSurfHandle(N);
717 return true;
Justin Holewinski30d56a72014-04-09 15:39:15 +0000718 }
719}
720
Justin Bogner8d83fb62016-05-13 21:12:53 +0000721void NVPTXDAGToDAGISel::SelectTexSurfHandle(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +0000722 // Op 0 is the intrinsic ID
723 SDValue Wrapper = N->getOperand(1);
724 SDValue GlobalVal = Wrapper.getOperand(0);
Justin Bogner8d83fb62016-05-13 21:12:53 +0000725 ReplaceNode(N, CurDAG->getMachineNode(NVPTX::texsurf_handles, SDLoc(N),
726 MVT::i64, GlobalVal));
Justin Holewinski30d56a72014-04-09 15:39:15 +0000727}
728
Justin Bogner8d83fb62016-05-13 21:12:53 +0000729void NVPTXDAGToDAGISel::SelectAddrSpaceCast(SDNode *N) {
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000730 SDValue Src = N->getOperand(0);
731 AddrSpaceCastSDNode *CastN = cast<AddrSpaceCastSDNode>(N);
732 unsigned SrcAddrSpace = CastN->getSrcAddressSpace();
733 unsigned DstAddrSpace = CastN->getDestAddressSpace();
734
735 assert(SrcAddrSpace != DstAddrSpace &&
736 "addrspacecast must be between different address spaces");
737
738 if (DstAddrSpace == ADDRESS_SPACE_GENERIC) {
739 // Specific to generic
740 unsigned Opc;
741 switch (SrcAddrSpace) {
742 default: report_fatal_error("Bad address space in addrspacecast");
743 case ADDRESS_SPACE_GLOBAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000744 Opc = TM.is64Bit() ? NVPTX::cvta_global_yes_64 : NVPTX::cvta_global_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000745 break;
746 case ADDRESS_SPACE_SHARED:
Eric Christopher02389e32015-02-19 00:08:27 +0000747 Opc = TM.is64Bit() ? NVPTX::cvta_shared_yes_64 : NVPTX::cvta_shared_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000748 break;
749 case ADDRESS_SPACE_CONST:
Eric Christopher02389e32015-02-19 00:08:27 +0000750 Opc = TM.is64Bit() ? NVPTX::cvta_const_yes_64 : NVPTX::cvta_const_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000751 break;
752 case ADDRESS_SPACE_LOCAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000753 Opc = TM.is64Bit() ? NVPTX::cvta_local_yes_64 : NVPTX::cvta_local_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000754 break;
755 }
Justin Bogner8d83fb62016-05-13 21:12:53 +0000756 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
757 Src));
758 return;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000759 } else {
760 // Generic to specific
761 if (SrcAddrSpace != 0)
762 report_fatal_error("Cannot cast between two non-generic address spaces");
763 unsigned Opc;
764 switch (DstAddrSpace) {
765 default: report_fatal_error("Bad address space in addrspacecast");
766 case ADDRESS_SPACE_GLOBAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000767 Opc = TM.is64Bit() ? NVPTX::cvta_to_global_yes_64
768 : NVPTX::cvta_to_global_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000769 break;
770 case ADDRESS_SPACE_SHARED:
Eric Christopher02389e32015-02-19 00:08:27 +0000771 Opc = TM.is64Bit() ? NVPTX::cvta_to_shared_yes_64
772 : NVPTX::cvta_to_shared_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000773 break;
774 case ADDRESS_SPACE_CONST:
Eric Christopher02389e32015-02-19 00:08:27 +0000775 Opc =
776 TM.is64Bit() ? NVPTX::cvta_to_const_yes_64 : NVPTX::cvta_to_const_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000777 break;
778 case ADDRESS_SPACE_LOCAL:
Eric Christopher02389e32015-02-19 00:08:27 +0000779 Opc =
780 TM.is64Bit() ? NVPTX::cvta_to_local_yes_64 : NVPTX::cvta_to_local_yes;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000781 break;
Jingyue Wua2f60272015-06-04 21:28:26 +0000782 case ADDRESS_SPACE_PARAM:
783 Opc = TM.is64Bit() ? NVPTX::nvvm_ptr_gen_to_param_64
784 : NVPTX::nvvm_ptr_gen_to_param;
785 break;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000786 }
Justin Bogner8d83fb62016-05-13 21:12:53 +0000787 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getValueType(0),
788 Src));
789 return;
Justin Holewinskiba2fa6d2014-03-24 11:17:53 +0000790 }
791}
792
Artem Belevichee7dd122017-03-02 19:14:14 +0000793// Helper function template to reduce amount of boilerplate code for
794// opcode selection.
795static Optional<unsigned> pickOpcodeForVT(
796 MVT::SimpleValueType VT, unsigned Opcode_i8, unsigned Opcode_i16,
797 unsigned Opcode_i32, Optional<unsigned> Opcode_i64, unsigned Opcode_f16,
798 unsigned Opcode_f16x2, unsigned Opcode_f32, Optional<unsigned> Opcode_f64) {
799 switch (VT) {
800 case MVT::i1:
801 case MVT::i8:
802 return Opcode_i8;
803 case MVT::i16:
804 return Opcode_i16;
805 case MVT::i32:
806 return Opcode_i32;
807 case MVT::i64:
808 return Opcode_i64;
809 case MVT::f16:
810 return Opcode_f16;
811 case MVT::v2f16:
812 return Opcode_f16x2;
813 case MVT::f32:
814 return Opcode_f32;
815 case MVT::f64:
816 return Opcode_f64;
817 default:
818 return None;
819 }
820}
821
Justin Bogner8d83fb62016-05-13 21:12:53 +0000822bool NVPTXDAGToDAGISel::tryLoad(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +0000823 SDLoc dl(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000824 LoadSDNode *LD = cast<LoadSDNode>(N);
825 EVT LoadedVT = LD->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +0000826 SDNode *NVPTXLD = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000827
828 // do not support pre/post inc/dec
829 if (LD->isIndexed())
Justin Bogner8d83fb62016-05-13 21:12:53 +0000830 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000831
832 if (!LoadedVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +0000833 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000834
835 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +0000836 unsigned int codeAddrSpace = getCodeAddrSpace(LD);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000837
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000838 if (canLowerToLDG(LD, *Subtarget, codeAddrSpace, MF)) {
Justin Bogner8d83fb62016-05-13 21:12:53 +0000839 return tryLDGLDU(N);
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000840 }
841
Justin Holewinskiae556d32012-05-04 20:18:50 +0000842 // Volatile Setting
843 // - .volatile is only availalble for .global and .shared
844 bool isVolatile = LD->isVolatile();
845 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
846 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
847 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
848 isVolatile = false;
849
Justin Holewinskiae556d32012-05-04 20:18:50 +0000850 // Type Setting: fromType + fromTypeWidth
851 //
852 // Sign : ISD::SEXTLOAD
853 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
854 // type is integer
855 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
Artem Belevich620db1f2017-02-23 22:38:24 +0000856 MVT SimpleVT = LoadedVT.getSimpleVT();
Justin Holewinskiae556d32012-05-04 20:18:50 +0000857 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +0000858 // Read at least 8 bits (predicates are stored as 8-bit values)
859 unsigned fromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskiae556d32012-05-04 20:18:50 +0000860 unsigned int fromType;
Artem Belevich620db1f2017-02-23 22:38:24 +0000861
862 // Vector Setting
863 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
864 if (SimpleVT.isVector()) {
865 assert(LoadedVT == MVT::v2f16 && "Unexpected vector type");
866 // v2f16 is loaded using ld.b32
867 fromTypeWidth = 32;
868 }
869
Justin Holewinskiae556d32012-05-04 20:18:50 +0000870 if ((LD->getExtensionType() == ISD::SEXTLOAD))
871 fromType = NVPTX::PTXLdStInstCode::Signed;
872 else if (ScalarVT.isFloatingPoint())
Artem Belevich64dc9be2017-01-13 20:56:17 +0000873 // f16 uses .b16 as its storage type.
874 fromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
875 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000876 else
877 fromType = NVPTX::PTXLdStInstCode::Unsigned;
878
879 // Create the machine instruction DAG
880 SDValue Chain = N->getOperand(0);
881 SDValue N1 = N->getOperand(1);
882 SDValue Addr;
883 SDValue Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +0000884 Optional<unsigned> Opcode;
Craig Topperd9c27832013-08-15 02:44:19 +0000885 MVT::SimpleValueType TargetVT = LD->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000886
887 if (SelectDirectAddr(N1, Addr)) {
Artem Belevichee7dd122017-03-02 19:14:14 +0000888 Opcode = pickOpcodeForVT(
889 TargetVT, NVPTX::LD_i8_avar, NVPTX::LD_i16_avar, NVPTX::LD_i32_avar,
890 NVPTX::LD_i64_avar, NVPTX::LD_f16_avar, NVPTX::LD_f16x2_avar,
891 NVPTX::LD_f32_avar, NVPTX::LD_f64_avar);
892 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +0000893 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000894 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
895 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
896 getI32Imm(fromTypeWidth, dl), Addr, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +0000897 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
898 MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +0000899 } else if (TM.is64Bit() ? SelectADDRsi64(N1.getNode(), N1, Base, Offset)
900 : SelectADDRsi(N1.getNode(), N1, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +0000901 Opcode = pickOpcodeForVT(TargetVT, NVPTX::LD_i8_asi, NVPTX::LD_i16_asi,
902 NVPTX::LD_i32_asi, NVPTX::LD_i64_asi,
903 NVPTX::LD_f16_asi, NVPTX::LD_f16x2_asi,
904 NVPTX::LD_f32_asi, NVPTX::LD_f64_asi);
905 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +0000906 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000907 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
908 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
909 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +0000910 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
911 MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +0000912 } else if (TM.is64Bit() ? SelectADDRri64(N1.getNode(), N1, Base, Offset)
913 : SelectADDRri(N1.getNode(), N1, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +0000914 if (TM.is64Bit())
915 Opcode = pickOpcodeForVT(
916 TargetVT, NVPTX::LD_i8_ari_64, NVPTX::LD_i16_ari_64,
917 NVPTX::LD_i32_ari_64, NVPTX::LD_i64_ari_64, NVPTX::LD_f16_ari_64,
918 NVPTX::LD_f16x2_ari_64, NVPTX::LD_f32_ari_64, NVPTX::LD_f64_ari_64);
919 else
920 Opcode = pickOpcodeForVT(
921 TargetVT, NVPTX::LD_i8_ari, NVPTX::LD_i16_ari, NVPTX::LD_i32_ari,
922 NVPTX::LD_i64_ari, NVPTX::LD_f16_ari, NVPTX::LD_f16x2_ari,
923 NVPTX::LD_f32_ari, NVPTX::LD_f64_ari);
924 if (!Opcode)
925 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000926 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
927 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
928 getI32Imm(fromTypeWidth, dl), Base, Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +0000929 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
930 MVT::Other, Ops);
Justin Holewinski0497ab12013-03-30 14:29:21 +0000931 } else {
Artem Belevichee7dd122017-03-02 19:14:14 +0000932 if (TM.is64Bit())
933 Opcode = pickOpcodeForVT(
934 TargetVT, NVPTX::LD_i8_areg_64, NVPTX::LD_i16_areg_64,
935 NVPTX::LD_i32_areg_64, NVPTX::LD_i64_areg_64, NVPTX::LD_f16_areg_64,
936 NVPTX::LD_f16x2_areg_64, NVPTX::LD_f32_areg_64,
937 NVPTX::LD_f64_areg_64);
938 else
939 Opcode = pickOpcodeForVT(
940 TargetVT, NVPTX::LD_i8_areg, NVPTX::LD_i16_areg, NVPTX::LD_i32_areg,
941 NVPTX::LD_i64_areg, NVPTX::LD_f16_areg, NVPTX::LD_f16x2_areg,
942 NVPTX::LD_f32_areg, NVPTX::LD_f64_areg);
943 if (!Opcode)
944 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +0000945 SDValue Ops[] = { getI32Imm(isVolatile, dl), getI32Imm(codeAddrSpace, dl),
946 getI32Imm(vecType, dl), getI32Imm(fromType, dl),
947 getI32Imm(fromTypeWidth, dl), N1, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +0000948 NVPTXLD = CurDAG->getMachineNode(Opcode.getValue(), dl, TargetVT,
949 MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +0000950 }
951
Justin Bogner8d83fb62016-05-13 21:12:53 +0000952 if (!NVPTXLD)
953 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000954
Justin Bogner8d83fb62016-05-13 21:12:53 +0000955 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
956 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
957 cast<MachineSDNode>(NVPTXLD)->setMemRefs(MemRefs0, MemRefs0 + 1);
958
959 ReplaceNode(N, NVPTXLD);
960 return true;
Justin Holewinskiae556d32012-05-04 20:18:50 +0000961}
962
Justin Bogner8d83fb62016-05-13 21:12:53 +0000963bool NVPTXDAGToDAGISel::tryLoadVector(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000964
965 SDValue Chain = N->getOperand(0);
966 SDValue Op1 = N->getOperand(1);
967 SDValue Addr, Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +0000968 Optional<unsigned> Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +0000969 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000970 SDNode *LD;
971 MemSDNode *MemSD = cast<MemSDNode>(N);
972 EVT LoadedVT = MemSD->getMemoryVT();
973
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000974 if (!LoadedVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +0000975 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000976
977 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +0000978 unsigned int CodeAddrSpace = getCodeAddrSpace(MemSD);
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000979
Bjarke Hammersholt Roune5cbc7d22015-08-05 23:11:57 +0000980 if (canLowerToLDG(MemSD, *Subtarget, CodeAddrSpace, MF)) {
Justin Bogner8d83fb62016-05-13 21:12:53 +0000981 return tryLDGLDU(N);
Jingyue Wu48a9bdc2015-07-20 21:28:54 +0000982 }
983
Justin Holewinskibe8dc642013-02-12 14:18:49 +0000984 // Volatile Setting
985 // - .volatile is only availalble for .global and .shared
986 bool IsVolatile = MemSD->isVolatile();
987 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
988 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
989 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
990 IsVolatile = false;
991
992 // Vector Setting
993 MVT SimpleVT = LoadedVT.getSimpleVT();
994
995 // Type Setting: fromType + fromTypeWidth
996 //
997 // Sign : ISD::SEXTLOAD
998 // Unsign : ISD::ZEXTLOAD, ISD::NON_EXTLOAD or ISD::EXTLOAD and the
999 // type is integer
1000 // Float : ISD::NON_EXTLOAD or ISD::EXTLOAD and the type is float
1001 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski994d66a2013-05-30 12:22:39 +00001002 // Read at least 8 bits (predicates are stored as 8-bit values)
1003 unsigned FromTypeWidth = std::max(8U, ScalarVT.getSizeInBits());
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001004 unsigned int FromType;
1005 // The last operand holds the original LoadSDNode::getExtensionType() value
Justin Holewinski0497ab12013-03-30 14:29:21 +00001006 unsigned ExtensionType = cast<ConstantSDNode>(
1007 N->getOperand(N->getNumOperands() - 1))->getZExtValue();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001008 if (ExtensionType == ISD::SEXTLOAD)
1009 FromType = NVPTX::PTXLdStInstCode::Signed;
1010 else if (ScalarVT.isFloatingPoint())
Artem Belevich620db1f2017-02-23 22:38:24 +00001011 FromType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1012 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001013 else
1014 FromType = NVPTX::PTXLdStInstCode::Unsigned;
1015
1016 unsigned VecType;
1017
1018 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001019 case NVPTXISD::LoadV2:
1020 VecType = NVPTX::PTXLdStInstCode::V2;
1021 break;
1022 case NVPTXISD::LoadV4:
1023 VecType = NVPTX::PTXLdStInstCode::V4;
1024 break;
1025 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001026 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001027 }
1028
1029 EVT EltVT = N->getValueType(0);
1030
Artem Belevich620db1f2017-02-23 22:38:24 +00001031 // v8f16 is a special case. PTX doesn't have ld.v8.f16
1032 // instruction. Instead, we split the vector into v2f16 chunks and
1033 // load them with ld.v4.b32.
1034 if (EltVT == MVT::v2f16) {
1035 assert(N->getOpcode() == NVPTXISD::LoadV4 && "Unexpected load opcode.");
1036 EltVT = MVT::i32;
1037 FromType = NVPTX::PTXLdStInstCode::Untyped;
1038 FromTypeWidth = 32;
1039 }
1040
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001041 if (SelectDirectAddr(Op1, Addr)) {
1042 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001043 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001044 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001045 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001046 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1047 NVPTX::LDV_i8_v2_avar, NVPTX::LDV_i16_v2_avar,
1048 NVPTX::LDV_i32_v2_avar, NVPTX::LDV_i64_v2_avar,
1049 NVPTX::LDV_f16_v2_avar, NVPTX::LDV_f16x2_v2_avar,
1050 NVPTX::LDV_f32_v2_avar, NVPTX::LDV_f64_v2_avar);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001051 break;
1052 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001053 Opcode =
1054 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_avar,
1055 NVPTX::LDV_i16_v4_avar, NVPTX::LDV_i32_v4_avar, None,
1056 NVPTX::LDV_f16_v4_avar, NVPTX::LDV_f16x2_v4_avar,
1057 NVPTX::LDV_f32_v4_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001058 break;
1059 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001060 if (!Opcode)
1061 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001062 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1063 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1064 getI32Imm(FromTypeWidth, DL), Addr, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001065 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001066 } else if (TM.is64Bit() ? SelectADDRsi64(Op1.getNode(), Op1, Base, Offset)
1067 : SelectADDRsi(Op1.getNode(), Op1, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001068 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001069 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001070 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001071 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001072 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1073 NVPTX::LDV_i8_v2_asi, NVPTX::LDV_i16_v2_asi,
1074 NVPTX::LDV_i32_v2_asi, NVPTX::LDV_i64_v2_asi,
1075 NVPTX::LDV_f16_v2_asi, NVPTX::LDV_f16x2_v2_asi,
1076 NVPTX::LDV_f32_v2_asi, NVPTX::LDV_f64_v2_asi);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001077 break;
1078 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001079 Opcode =
1080 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_asi,
1081 NVPTX::LDV_i16_v4_asi, NVPTX::LDV_i32_v4_asi, None,
1082 NVPTX::LDV_f16_v4_asi, NVPTX::LDV_f16x2_v4_asi,
1083 NVPTX::LDV_f32_v4_asi, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001084 break;
1085 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001086 if (!Opcode)
1087 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001088 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1089 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1090 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001091 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001092 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1093 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1094 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001095 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001096 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001097 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001098 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001099 Opcode = pickOpcodeForVT(
1100 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_ari_64,
1101 NVPTX::LDV_i16_v2_ari_64, NVPTX::LDV_i32_v2_ari_64,
1102 NVPTX::LDV_i64_v2_ari_64, NVPTX::LDV_f16_v2_ari_64,
1103 NVPTX::LDV_f16x2_v2_ari_64, NVPTX::LDV_f32_v2_ari_64,
1104 NVPTX::LDV_f64_v2_ari_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001105 break;
1106 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001107 Opcode = pickOpcodeForVT(
1108 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari_64,
1109 NVPTX::LDV_i16_v4_ari_64, NVPTX::LDV_i32_v4_ari_64, None,
1110 NVPTX::LDV_f16_v4_ari_64, NVPTX::LDV_f16x2_v4_ari_64,
1111 NVPTX::LDV_f32_v4_ari_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001112 break;
1113 }
1114 } else {
1115 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001116 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001117 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001118 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001119 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1120 NVPTX::LDV_i8_v2_ari, NVPTX::LDV_i16_v2_ari,
1121 NVPTX::LDV_i32_v2_ari, NVPTX::LDV_i64_v2_ari,
1122 NVPTX::LDV_f16_v2_ari, NVPTX::LDV_f16x2_v2_ari,
1123 NVPTX::LDV_f32_v2_ari, NVPTX::LDV_f64_v2_ari);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001124 break;
1125 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001126 Opcode =
1127 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_ari,
1128 NVPTX::LDV_i16_v4_ari, NVPTX::LDV_i32_v4_ari, None,
1129 NVPTX::LDV_f16_v4_ari, NVPTX::LDV_f16x2_v4_ari,
1130 NVPTX::LDV_f32_v4_ari, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001131 break;
1132 }
1133 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001134 if (!Opcode)
1135 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001136 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1137 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1138 getI32Imm(FromTypeWidth, DL), Base, Offset, Chain };
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001139
Artem Belevichee7dd122017-03-02 19:14:14 +00001140 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001141 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001142 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001143 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001144 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001145 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001146 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001147 Opcode = pickOpcodeForVT(
1148 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg_64,
1149 NVPTX::LDV_i16_v2_areg_64, NVPTX::LDV_i32_v2_areg_64,
1150 NVPTX::LDV_i64_v2_areg_64, NVPTX::LDV_f16_v2_areg_64,
1151 NVPTX::LDV_f16x2_v2_areg_64, NVPTX::LDV_f32_v2_areg_64,
1152 NVPTX::LDV_f64_v2_areg_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001153 break;
1154 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001155 Opcode = pickOpcodeForVT(
1156 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg_64,
1157 NVPTX::LDV_i16_v4_areg_64, NVPTX::LDV_i32_v4_areg_64, None,
1158 NVPTX::LDV_f16_v4_areg_64, NVPTX::LDV_f16x2_v4_areg_64,
1159 NVPTX::LDV_f32_v4_areg_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001160 break;
1161 }
1162 } else {
1163 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001164 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001165 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001166 case NVPTXISD::LoadV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001167 Opcode =
1168 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v2_areg,
1169 NVPTX::LDV_i16_v2_areg, NVPTX::LDV_i32_v2_areg,
1170 NVPTX::LDV_i64_v2_areg, NVPTX::LDV_f16_v2_areg,
1171 NVPTX::LDV_f16x2_v2_areg, NVPTX::LDV_f32_v2_areg,
1172 NVPTX::LDV_f64_v2_areg);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001173 break;
1174 case NVPTXISD::LoadV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001175 Opcode = pickOpcodeForVT(
1176 EltVT.getSimpleVT().SimpleTy, NVPTX::LDV_i8_v4_areg,
1177 NVPTX::LDV_i16_v4_areg, NVPTX::LDV_i32_v4_areg, None,
1178 NVPTX::LDV_f16_v4_areg, NVPTX::LDV_f16x2_v4_areg,
1179 NVPTX::LDV_f32_v4_areg, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001180 break;
1181 }
1182 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001183 if (!Opcode)
1184 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001185 SDValue Ops[] = { getI32Imm(IsVolatile, DL), getI32Imm(CodeAddrSpace, DL),
1186 getI32Imm(VecType, DL), getI32Imm(FromType, DL),
1187 getI32Imm(FromTypeWidth, DL), Op1, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001188 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, N->getVTList(), Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001189 }
1190
1191 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1192 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1193 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1194
Justin Bogner8d83fb62016-05-13 21:12:53 +00001195 ReplaceNode(N, LD);
1196 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001197}
1198
Justin Bogner8d83fb62016-05-13 21:12:53 +00001199bool NVPTXDAGToDAGISel::tryLDGLDU(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001200
1201 SDValue Chain = N->getOperand(0);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001202 SDValue Op1;
1203 MemSDNode *Mem;
1204 bool IsLDG = true;
1205
Justin Holewinskic7997922016-04-05 12:38:01 +00001206 // If this is an LDG intrinsic, the address is the third operand. If its an
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001207 // LDG/LDU SD node (from custom vector handling), then its the second operand
1208 if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
1209 Op1 = N->getOperand(2);
1210 Mem = cast<MemIntrinsicSDNode>(N);
1211 unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
1212 switch (IID) {
1213 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001214 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001215 case Intrinsic::nvvm_ldg_global_f:
1216 case Intrinsic::nvvm_ldg_global_i:
1217 case Intrinsic::nvvm_ldg_global_p:
1218 IsLDG = true;
1219 break;
1220 case Intrinsic::nvvm_ldu_global_f:
1221 case Intrinsic::nvvm_ldu_global_i:
1222 case Intrinsic::nvvm_ldu_global_p:
1223 IsLDG = false;
1224 break;
1225 }
1226 } else {
1227 Op1 = N->getOperand(1);
1228 Mem = cast<MemSDNode>(N);
1229 }
1230
Artem Belevichee7dd122017-03-02 19:14:14 +00001231 Optional<unsigned> Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00001232 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001233 SDNode *LD;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001234 SDValue Base, Offset, Addr;
Justin Holewinskif8f70912013-06-28 17:57:59 +00001235
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001236 EVT EltVT = Mem->getMemoryVT();
Justin Holewinskic7997922016-04-05 12:38:01 +00001237 unsigned NumElts = 1;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001238 if (EltVT.isVector()) {
Justin Holewinskic7997922016-04-05 12:38:01 +00001239 NumElts = EltVT.getVectorNumElements();
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001240 EltVT = EltVT.getVectorElementType();
1241 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001242
Justin Holewinskic7997922016-04-05 12:38:01 +00001243 // Build the "promoted" result VTList for the load. If we are really loading
1244 // i8s, then the return type will be promoted to i16 since we do not expose
1245 // 8-bit registers in NVPTX.
1246 EVT NodeVT = (EltVT == MVT::i8) ? MVT::i16 : EltVT;
1247 SmallVector<EVT, 5> InstVTs;
1248 for (unsigned i = 0; i != NumElts; ++i) {
1249 InstVTs.push_back(NodeVT);
1250 }
1251 InstVTs.push_back(MVT::Other);
1252 SDVTList InstVTList = CurDAG->getVTList(InstVTs);
1253
Justin Holewinskie40e9292013-07-01 12:58:52 +00001254 if (SelectDirectAddr(Op1, Addr)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001255 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001256 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001257 return false;
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001258 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001259 if (IsLDG)
1260 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1261 NVPTX::INT_PTX_LDG_GLOBAL_i8avar,
1262 NVPTX::INT_PTX_LDG_GLOBAL_i16avar,
1263 NVPTX::INT_PTX_LDG_GLOBAL_i32avar,
1264 NVPTX::INT_PTX_LDG_GLOBAL_i64avar,
1265 NVPTX::INT_PTX_LDG_GLOBAL_f16avar,
1266 NVPTX::INT_PTX_LDG_GLOBAL_f16x2avar,
1267 NVPTX::INT_PTX_LDG_GLOBAL_f32avar,
1268 NVPTX::INT_PTX_LDG_GLOBAL_f64avar);
1269 else
1270 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1271 NVPTX::INT_PTX_LDU_GLOBAL_i8avar,
1272 NVPTX::INT_PTX_LDU_GLOBAL_i16avar,
1273 NVPTX::INT_PTX_LDU_GLOBAL_i32avar,
1274 NVPTX::INT_PTX_LDU_GLOBAL_i64avar,
1275 NVPTX::INT_PTX_LDU_GLOBAL_f16avar,
1276 NVPTX::INT_PTX_LDU_GLOBAL_f16x2avar,
1277 NVPTX::INT_PTX_LDU_GLOBAL_f32avar,
1278 NVPTX::INT_PTX_LDU_GLOBAL_f64avar);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001279 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001280 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001281 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1282 NVPTX::INT_PTX_LDG_G_v2i8_ELE_avar,
1283 NVPTX::INT_PTX_LDG_G_v2i16_ELE_avar,
1284 NVPTX::INT_PTX_LDG_G_v2i32_ELE_avar,
1285 NVPTX::INT_PTX_LDG_G_v2i64_ELE_avar,
1286 NVPTX::INT_PTX_LDG_G_v2f16_ELE_avar,
1287 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_avar,
1288 NVPTX::INT_PTX_LDG_G_v2f32_ELE_avar,
1289 NVPTX::INT_PTX_LDG_G_v2f64_ELE_avar);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001290 break;
1291 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001292 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1293 NVPTX::INT_PTX_LDU_G_v2i8_ELE_avar,
1294 NVPTX::INT_PTX_LDU_G_v2i16_ELE_avar,
1295 NVPTX::INT_PTX_LDU_G_v2i32_ELE_avar,
1296 NVPTX::INT_PTX_LDU_G_v2i64_ELE_avar,
1297 NVPTX::INT_PTX_LDU_G_v2f16_ELE_avar,
1298 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_avar,
1299 NVPTX::INT_PTX_LDU_G_v2f32_ELE_avar,
1300 NVPTX::INT_PTX_LDU_G_v2f64_ELE_avar);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001301 break;
1302 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001303 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1304 NVPTX::INT_PTX_LDG_G_v4i8_ELE_avar,
1305 NVPTX::INT_PTX_LDG_G_v4i16_ELE_avar,
1306 NVPTX::INT_PTX_LDG_G_v4i32_ELE_avar, None,
1307 NVPTX::INT_PTX_LDG_G_v4f16_ELE_avar,
1308 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_avar,
1309 NVPTX::INT_PTX_LDG_G_v4f32_ELE_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001310 break;
1311 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001312 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1313 NVPTX::INT_PTX_LDU_G_v4i8_ELE_avar,
1314 NVPTX::INT_PTX_LDU_G_v4i16_ELE_avar,
1315 NVPTX::INT_PTX_LDU_G_v4i32_ELE_avar, None,
1316 NVPTX::INT_PTX_LDU_G_v4f16_ELE_avar,
1317 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_avar,
1318 NVPTX::INT_PTX_LDU_G_v4f32_ELE_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001319 break;
1320 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001321 if (!Opcode)
1322 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001323 SDValue Ops[] = { Addr, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001324 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001325 } else if (TM.is64Bit() ? SelectADDRri64(Op1.getNode(), Op1, Base, Offset)
1326 : SelectADDRri(Op1.getNode(), Op1, Base, Offset)) {
1327 if (TM.is64Bit()) {
Justin Holewinskie40e9292013-07-01 12:58:52 +00001328 switch (N->getOpcode()) {
1329 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001330 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001331 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001332 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001333 if (IsLDG)
1334 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1335 NVPTX::INT_PTX_LDG_GLOBAL_i8ari64,
1336 NVPTX::INT_PTX_LDG_GLOBAL_i16ari64,
1337 NVPTX::INT_PTX_LDG_GLOBAL_i32ari64,
1338 NVPTX::INT_PTX_LDG_GLOBAL_i64ari64,
1339 NVPTX::INT_PTX_LDG_GLOBAL_f16ari64,
1340 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari64,
1341 NVPTX::INT_PTX_LDG_GLOBAL_f32ari64,
1342 NVPTX::INT_PTX_LDG_GLOBAL_f64ari64);
1343 else
1344 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1345 NVPTX::INT_PTX_LDU_GLOBAL_i8ari64,
1346 NVPTX::INT_PTX_LDU_GLOBAL_i16ari64,
1347 NVPTX::INT_PTX_LDU_GLOBAL_i32ari64,
1348 NVPTX::INT_PTX_LDU_GLOBAL_i64ari64,
1349 NVPTX::INT_PTX_LDU_GLOBAL_f16ari64,
1350 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari64,
1351 NVPTX::INT_PTX_LDU_GLOBAL_f32ari64,
1352 NVPTX::INT_PTX_LDU_GLOBAL_f64ari64);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001353 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001354 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001355 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001356 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1357 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari64,
1358 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari64,
1359 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari64,
1360 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari64,
1361 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari64,
1362 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari64,
1363 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari64,
1364 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari64);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001365 break;
1366 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001367 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1368 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari64,
1369 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari64,
1370 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari64,
1371 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari64,
1372 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari64,
1373 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari64,
1374 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari64,
1375 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari64);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001376 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001377 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001378 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001379 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1380 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari64,
1381 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari64,
1382 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari64, None,
1383 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari64,
1384 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari64,
1385 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari64, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001386 break;
1387 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001388 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1389 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari64,
1390 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari64,
1391 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari64, None,
1392 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari64,
1393 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari64,
1394 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari64, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001395 break;
1396 }
1397 } else {
1398 switch (N->getOpcode()) {
1399 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001400 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001401 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001402 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001403 if (IsLDG)
1404 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1405 NVPTX::INT_PTX_LDG_GLOBAL_i8ari,
1406 NVPTX::INT_PTX_LDG_GLOBAL_i16ari,
1407 NVPTX::INT_PTX_LDG_GLOBAL_i32ari,
1408 NVPTX::INT_PTX_LDG_GLOBAL_i64ari,
1409 NVPTX::INT_PTX_LDG_GLOBAL_f16ari,
1410 NVPTX::INT_PTX_LDG_GLOBAL_f16x2ari,
1411 NVPTX::INT_PTX_LDG_GLOBAL_f32ari,
1412 NVPTX::INT_PTX_LDG_GLOBAL_f64ari);
1413 else
1414 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1415 NVPTX::INT_PTX_LDU_GLOBAL_i8ari,
1416 NVPTX::INT_PTX_LDU_GLOBAL_i16ari,
1417 NVPTX::INT_PTX_LDU_GLOBAL_i32ari,
1418 NVPTX::INT_PTX_LDU_GLOBAL_i64ari,
1419 NVPTX::INT_PTX_LDU_GLOBAL_f16ari,
1420 NVPTX::INT_PTX_LDU_GLOBAL_f16x2ari,
1421 NVPTX::INT_PTX_LDU_GLOBAL_f32ari,
1422 NVPTX::INT_PTX_LDU_GLOBAL_f64ari);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001423 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001424 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001425 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001426 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1427 NVPTX::INT_PTX_LDG_G_v2i8_ELE_ari32,
1428 NVPTX::INT_PTX_LDG_G_v2i16_ELE_ari32,
1429 NVPTX::INT_PTX_LDG_G_v2i32_ELE_ari32,
1430 NVPTX::INT_PTX_LDG_G_v2i64_ELE_ari32,
1431 NVPTX::INT_PTX_LDG_G_v2f16_ELE_ari32,
1432 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_ari32,
1433 NVPTX::INT_PTX_LDG_G_v2f32_ELE_ari32,
1434 NVPTX::INT_PTX_LDG_G_v2f64_ELE_ari32);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001435 break;
1436 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001437 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1438 NVPTX::INT_PTX_LDU_G_v2i8_ELE_ari32,
1439 NVPTX::INT_PTX_LDU_G_v2i16_ELE_ari32,
1440 NVPTX::INT_PTX_LDU_G_v2i32_ELE_ari32,
1441 NVPTX::INT_PTX_LDU_G_v2i64_ELE_ari32,
1442 NVPTX::INT_PTX_LDU_G_v2f16_ELE_ari32,
1443 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_ari32,
1444 NVPTX::INT_PTX_LDU_G_v2f32_ELE_ari32,
1445 NVPTX::INT_PTX_LDU_G_v2f64_ELE_ari32);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001446 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001447 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001448 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001449 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1450 NVPTX::INT_PTX_LDG_G_v4i8_ELE_ari32,
1451 NVPTX::INT_PTX_LDG_G_v4i16_ELE_ari32,
1452 NVPTX::INT_PTX_LDG_G_v4i32_ELE_ari32, None,
1453 NVPTX::INT_PTX_LDG_G_v4f16_ELE_ari32,
1454 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_ari32,
1455 NVPTX::INT_PTX_LDG_G_v4f32_ELE_ari32, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001456 break;
1457 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001458 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1459 NVPTX::INT_PTX_LDU_G_v4i8_ELE_ari32,
1460 NVPTX::INT_PTX_LDU_G_v4i16_ELE_ari32,
1461 NVPTX::INT_PTX_LDU_G_v4i32_ELE_ari32, None,
1462 NVPTX::INT_PTX_LDU_G_v4f16_ELE_ari32,
1463 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_ari32,
1464 NVPTX::INT_PTX_LDU_G_v4f32_ELE_ari32, None);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001465 break;
1466 }
1467 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001468 if (!Opcode)
1469 return false;
1470 SDValue Ops[] = {Base, Offset, Chain};
1471 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001472 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001473 if (TM.is64Bit()) {
Justin Holewinskie40e9292013-07-01 12:58:52 +00001474 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001475 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001476 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001477 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001478 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001479 if (IsLDG)
1480 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1481 NVPTX::INT_PTX_LDG_GLOBAL_i8areg64,
1482 NVPTX::INT_PTX_LDG_GLOBAL_i16areg64,
1483 NVPTX::INT_PTX_LDG_GLOBAL_i32areg64,
1484 NVPTX::INT_PTX_LDG_GLOBAL_i64areg64,
1485 NVPTX::INT_PTX_LDG_GLOBAL_f16areg64,
1486 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg64,
1487 NVPTX::INT_PTX_LDG_GLOBAL_f32areg64,
1488 NVPTX::INT_PTX_LDG_GLOBAL_f64areg64);
1489 else
1490 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1491 NVPTX::INT_PTX_LDU_GLOBAL_i8areg64,
1492 NVPTX::INT_PTX_LDU_GLOBAL_i16areg64,
1493 NVPTX::INT_PTX_LDU_GLOBAL_i32areg64,
1494 NVPTX::INT_PTX_LDU_GLOBAL_i64areg64,
1495 NVPTX::INT_PTX_LDU_GLOBAL_f16areg64,
1496 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg64,
1497 NVPTX::INT_PTX_LDU_GLOBAL_f32areg64,
1498 NVPTX::INT_PTX_LDU_GLOBAL_f64areg64);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001499 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001500 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001501 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001502 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1503 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg64,
1504 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg64,
1505 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg64,
1506 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg64,
1507 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg64,
1508 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg64,
1509 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg64,
1510 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg64);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001511 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001512 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001513 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1514 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg64,
1515 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg64,
1516 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg64,
1517 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg64,
1518 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg64,
1519 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg64,
1520 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg64,
1521 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg64);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001522 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001523 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001524 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001525 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1526 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg64,
1527 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg64,
1528 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg64, None,
1529 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg64,
1530 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg64,
1531 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg64, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001532 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001533 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001534 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1535 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg64,
1536 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg64,
1537 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg64, None,
1538 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg64,
1539 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg64,
1540 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg64, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001541 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001542 }
Justin Holewinskie40e9292013-07-01 12:58:52 +00001543 } else {
1544 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001545 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001546 return false;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001547 case ISD::LOAD:
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001548 case ISD::INTRINSIC_W_CHAIN:
Artem Belevichee7dd122017-03-02 19:14:14 +00001549 if (IsLDG)
1550 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1551 NVPTX::INT_PTX_LDG_GLOBAL_i8areg,
1552 NVPTX::INT_PTX_LDG_GLOBAL_i16areg,
1553 NVPTX::INT_PTX_LDG_GLOBAL_i32areg,
1554 NVPTX::INT_PTX_LDG_GLOBAL_i64areg,
1555 NVPTX::INT_PTX_LDG_GLOBAL_f16areg,
1556 NVPTX::INT_PTX_LDG_GLOBAL_f16x2areg,
1557 NVPTX::INT_PTX_LDG_GLOBAL_f32areg,
1558 NVPTX::INT_PTX_LDG_GLOBAL_f64areg);
1559 else
1560 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1561 NVPTX::INT_PTX_LDU_GLOBAL_i8areg,
1562 NVPTX::INT_PTX_LDU_GLOBAL_i16areg,
1563 NVPTX::INT_PTX_LDU_GLOBAL_i32areg,
1564 NVPTX::INT_PTX_LDU_GLOBAL_i64areg,
1565 NVPTX::INT_PTX_LDU_GLOBAL_f16areg,
1566 NVPTX::INT_PTX_LDU_GLOBAL_f16x2areg,
1567 NVPTX::INT_PTX_LDU_GLOBAL_f32areg,
1568 NVPTX::INT_PTX_LDU_GLOBAL_f64areg);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001569 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001570 case NVPTXISD::LoadV2:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001571 case NVPTXISD::LDGV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001572 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1573 NVPTX::INT_PTX_LDG_G_v2i8_ELE_areg32,
1574 NVPTX::INT_PTX_LDG_G_v2i16_ELE_areg32,
1575 NVPTX::INT_PTX_LDG_G_v2i32_ELE_areg32,
1576 NVPTX::INT_PTX_LDG_G_v2i64_ELE_areg32,
1577 NVPTX::INT_PTX_LDG_G_v2f16_ELE_areg32,
1578 NVPTX::INT_PTX_LDG_G_v2f16x2_ELE_areg32,
1579 NVPTX::INT_PTX_LDG_G_v2f32_ELE_areg32,
1580 NVPTX::INT_PTX_LDG_G_v2f64_ELE_areg32);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001581 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001582 case NVPTXISD::LDUV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001583 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1584 NVPTX::INT_PTX_LDU_G_v2i8_ELE_areg32,
1585 NVPTX::INT_PTX_LDU_G_v2i16_ELE_areg32,
1586 NVPTX::INT_PTX_LDU_G_v2i32_ELE_areg32,
1587 NVPTX::INT_PTX_LDU_G_v2i64_ELE_areg32,
1588 NVPTX::INT_PTX_LDU_G_v2f16_ELE_areg32,
1589 NVPTX::INT_PTX_LDU_G_v2f16x2_ELE_areg32,
1590 NVPTX::INT_PTX_LDU_G_v2f32_ELE_areg32,
1591 NVPTX::INT_PTX_LDU_G_v2f64_ELE_areg32);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001592 break;
Jingyue Wu48a9bdc2015-07-20 21:28:54 +00001593 case NVPTXISD::LoadV4:
Justin Holewinskie40e9292013-07-01 12:58:52 +00001594 case NVPTXISD::LDGV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001595 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1596 NVPTX::INT_PTX_LDG_G_v4i8_ELE_areg32,
1597 NVPTX::INT_PTX_LDG_G_v4i16_ELE_areg32,
1598 NVPTX::INT_PTX_LDG_G_v4i32_ELE_areg32, None,
1599 NVPTX::INT_PTX_LDG_G_v4f16_ELE_areg32,
1600 NVPTX::INT_PTX_LDG_G_v4f16x2_ELE_areg32,
1601 NVPTX::INT_PTX_LDG_G_v4f32_ELE_areg32, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001602 break;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001603 case NVPTXISD::LDUV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001604 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1605 NVPTX::INT_PTX_LDU_G_v4i8_ELE_areg32,
1606 NVPTX::INT_PTX_LDU_G_v4i16_ELE_areg32,
1607 NVPTX::INT_PTX_LDU_G_v4i32_ELE_areg32, None,
1608 NVPTX::INT_PTX_LDU_G_v4f16_ELE_areg32,
1609 NVPTX::INT_PTX_LDU_G_v4f16x2_ELE_areg32,
1610 NVPTX::INT_PTX_LDU_G_v4f32_ELE_areg32, None);
Justin Holewinski0497ab12013-03-30 14:29:21 +00001611 break;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001612 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001613 }
Artem Belevichee7dd122017-03-02 19:14:14 +00001614 if (!Opcode)
1615 return false;
Justin Holewinskie40e9292013-07-01 12:58:52 +00001616 SDValue Ops[] = { Op1, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001617 LD = CurDAG->getMachineNode(Opcode.getValue(), DL, InstVTList, Ops);
Justin Holewinskie40e9292013-07-01 12:58:52 +00001618 }
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001619
1620 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
Justin Holewinskib926d9d2014-06-27 18:35:51 +00001621 MemRefs0[0] = Mem->getMemOperand();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001622 cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1);
1623
Justin Holewinskic7997922016-04-05 12:38:01 +00001624 // For automatic generation of LDG (through SelectLoad[Vector], not the
1625 // intrinsics), we may have an extending load like:
1626 //
1627 // i32,ch = load<LD1[%data1(addrspace=1)], zext from i8> t0, t7, undef:i64
1628 //
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001629 // In this case, the matching logic above will select a load for the original
1630 // memory type (in this case, i8) and our types will not match (the node needs
1631 // to return an i32 in this case). Our LDG/LDU nodes do not support the
1632 // concept of sign-/zero-extension, so emulate it here by adding an explicit
1633 // CVT instruction. Ptxas should clean up any redundancies here.
1634
Justin Holewinskic7997922016-04-05 12:38:01 +00001635 EVT OrigType = N->getValueType(0);
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001636 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(N);
Justin Holewinskic7997922016-04-05 12:38:01 +00001637
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001638 if (OrigType != EltVT && LdNode) {
1639 // We have an extending-load. The instruction we selected operates on the
1640 // smaller type, but the SDNode we are replacing has the larger type. We
1641 // need to emit a CVT to make the types match.
1642 bool IsSigned = LdNode->getExtensionType() == ISD::SEXTLOAD;
1643 unsigned CvtOpc = GetConvertOpcode(OrigType.getSimpleVT(),
1644 EltVT.getSimpleVT(), IsSigned);
Justin Holewinskic7997922016-04-05 12:38:01 +00001645
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001646 // For each output value, apply the manual sign/zero-extension and make sure
1647 // all users of the load go through that CVT.
Justin Holewinskic7997922016-04-05 12:38:01 +00001648 for (unsigned i = 0; i != NumElts; ++i) {
1649 SDValue Res(LD, i);
1650 SDValue OrigVal(N, i);
1651
1652 SDNode *CvtNode =
1653 CurDAG->getMachineNode(CvtOpc, DL, OrigType, Res,
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00001654 CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE,
1655 DL, MVT::i32));
Justin Holewinskic7997922016-04-05 12:38:01 +00001656 ReplaceUses(OrigVal, SDValue(CvtNode, 0));
1657 }
1658 }
1659
Justin Bogner8d83fb62016-05-13 21:12:53 +00001660 ReplaceNode(N, LD);
1661 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001662}
1663
Justin Bogner8d83fb62016-05-13 21:12:53 +00001664bool NVPTXDAGToDAGISel::tryStore(SDNode *N) {
Andrew Trickef9de2a2013-05-25 02:42:55 +00001665 SDLoc dl(N);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001666 StoreSDNode *ST = cast<StoreSDNode>(N);
1667 EVT StoreVT = ST->getMemoryVT();
Craig Topper062a2ba2014-04-25 05:30:21 +00001668 SDNode *NVPTXST = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001669
1670 // do not support pre/post inc/dec
1671 if (ST->isIndexed())
Justin Bogner8d83fb62016-05-13 21:12:53 +00001672 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001673
1674 if (!StoreVT.isSimple())
Justin Bogner8d83fb62016-05-13 21:12:53 +00001675 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001676
1677 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00001678 unsigned int codeAddrSpace = getCodeAddrSpace(ST);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001679
1680 // Volatile Setting
1681 // - .volatile is only availalble for .global and .shared
1682 bool isVolatile = ST->isVolatile();
1683 if (codeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1684 codeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1685 codeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1686 isVolatile = false;
1687
1688 // Vector Setting
1689 MVT SimpleVT = StoreVT.getSimpleVT();
1690 unsigned vecType = NVPTX::PTXLdStInstCode::Scalar;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001691
1692 // Type Setting: toType + toTypeWidth
1693 // - for integer type, always use 'u'
1694 //
1695 MVT ScalarVT = SimpleVT.getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00001696 unsigned toTypeWidth = ScalarVT.getSizeInBits();
Artem Belevich620db1f2017-02-23 22:38:24 +00001697 if (SimpleVT.isVector()) {
1698 assert(StoreVT == MVT::v2f16 && "Unexpected vector type");
1699 // v2f16 is stored using st.b32
1700 toTypeWidth = 32;
1701 }
1702
Justin Holewinskiae556d32012-05-04 20:18:50 +00001703 unsigned int toType;
1704 if (ScalarVT.isFloatingPoint())
Artem Belevich64dc9be2017-01-13 20:56:17 +00001705 // f16 uses .b16 as its storage type.
1706 toType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1707 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001708 else
1709 toType = NVPTX::PTXLdStInstCode::Unsigned;
1710
1711 // Create the machine instruction DAG
1712 SDValue Chain = N->getOperand(0);
1713 SDValue N1 = N->getOperand(1);
1714 SDValue N2 = N->getOperand(2);
1715 SDValue Addr;
1716 SDValue Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +00001717 Optional<unsigned> Opcode;
Craig Topperd9c27832013-08-15 02:44:19 +00001718 MVT::SimpleValueType SourceVT = N1.getNode()->getSimpleValueType(0).SimpleTy;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001719
1720 if (SelectDirectAddr(N2, Addr)) {
Artem Belevichee7dd122017-03-02 19:14:14 +00001721 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_avar, NVPTX::ST_i16_avar,
1722 NVPTX::ST_i32_avar, NVPTX::ST_i64_avar,
1723 NVPTX::ST_f16_avar, NVPTX::ST_f16x2_avar,
1724 NVPTX::ST_f32_avar, NVPTX::ST_f64_avar);
1725 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +00001726 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001727 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1728 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1729 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Addr,
1730 Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001731 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001732 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1733 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +00001734 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_asi, NVPTX::ST_i16_asi,
1735 NVPTX::ST_i32_asi, NVPTX::ST_i64_asi,
1736 NVPTX::ST_f16_asi, NVPTX::ST_f16x2_asi,
1737 NVPTX::ST_f32_asi, NVPTX::ST_f64_asi);
1738 if (!Opcode)
Justin Bogner8d83fb62016-05-13 21:12:53 +00001739 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001740 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1741 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1742 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
1743 Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001744 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Eric Christopher02389e32015-02-19 00:08:27 +00001745 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1746 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
Artem Belevichee7dd122017-03-02 19:14:14 +00001747 if (TM.is64Bit())
1748 Opcode = pickOpcodeForVT(
1749 SourceVT, NVPTX::ST_i8_ari_64, NVPTX::ST_i16_ari_64,
1750 NVPTX::ST_i32_ari_64, NVPTX::ST_i64_ari_64, NVPTX::ST_f16_ari_64,
1751 NVPTX::ST_f16x2_ari_64, NVPTX::ST_f32_ari_64, NVPTX::ST_f64_ari_64);
1752 else
1753 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_ari, NVPTX::ST_i16_ari,
1754 NVPTX::ST_i32_ari, NVPTX::ST_i64_ari,
1755 NVPTX::ST_f16_ari, NVPTX::ST_f16x2_ari,
1756 NVPTX::ST_f32_ari, NVPTX::ST_f64_ari);
1757 if (!Opcode)
1758 return false;
1759
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001760 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1761 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1762 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), Base,
1763 Offset, Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001764 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001765 } else {
Artem Belevichee7dd122017-03-02 19:14:14 +00001766 if (TM.is64Bit())
1767 Opcode =
1768 pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg_64, NVPTX::ST_i16_areg_64,
1769 NVPTX::ST_i32_areg_64, NVPTX::ST_i64_areg_64,
1770 NVPTX::ST_f16_areg_64, NVPTX::ST_f16x2_areg_64,
1771 NVPTX::ST_f32_areg_64, NVPTX::ST_f64_areg_64);
1772 else
1773 Opcode = pickOpcodeForVT(SourceVT, NVPTX::ST_i8_areg, NVPTX::ST_i16_areg,
1774 NVPTX::ST_i32_areg, NVPTX::ST_i64_areg,
1775 NVPTX::ST_f16_areg, NVPTX::ST_f16x2_areg,
1776 NVPTX::ST_f32_areg, NVPTX::ST_f64_areg);
1777 if (!Opcode)
1778 return false;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001779 SDValue Ops[] = { N1, getI32Imm(isVolatile, dl),
1780 getI32Imm(codeAddrSpace, dl), getI32Imm(vecType, dl),
1781 getI32Imm(toType, dl), getI32Imm(toTypeWidth, dl), N2,
1782 Chain };
Artem Belevichee7dd122017-03-02 19:14:14 +00001783 NVPTXST = CurDAG->getMachineNode(Opcode.getValue(), dl, MVT::Other, Ops);
Justin Holewinskiae556d32012-05-04 20:18:50 +00001784 }
1785
Justin Bogner8d83fb62016-05-13 21:12:53 +00001786 if (!NVPTXST)
1787 return false;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001788
Justin Bogner8d83fb62016-05-13 21:12:53 +00001789 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
1790 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
1791 cast<MachineSDNode>(NVPTXST)->setMemRefs(MemRefs0, MemRefs0 + 1);
1792 ReplaceNode(N, NVPTXST);
1793 return true;
Justin Holewinskiae556d32012-05-04 20:18:50 +00001794}
1795
Justin Bogner8d83fb62016-05-13 21:12:53 +00001796bool NVPTXDAGToDAGISel::tryStoreVector(SDNode *N) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001797 SDValue Chain = N->getOperand(0);
1798 SDValue Op1 = N->getOperand(1);
1799 SDValue Addr, Offset, Base;
Artem Belevichee7dd122017-03-02 19:14:14 +00001800 Optional<unsigned> Opcode;
Andrew Trickef9de2a2013-05-25 02:42:55 +00001801 SDLoc DL(N);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001802 SDNode *ST;
1803 EVT EltVT = Op1.getValueType();
1804 MemSDNode *MemSD = cast<MemSDNode>(N);
1805 EVT StoreVT = MemSD->getMemoryVT();
1806
1807 // Address Space Setting
Eric Christopher9745b3a2015-01-30 01:41:01 +00001808 unsigned CodeAddrSpace = getCodeAddrSpace(MemSD);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001809
1810 if (CodeAddrSpace == NVPTX::PTXLdStInstCode::CONSTANT) {
1811 report_fatal_error("Cannot store to pointer that points to constant "
1812 "memory space");
1813 }
1814
1815 // Volatile Setting
1816 // - .volatile is only availalble for .global and .shared
1817 bool IsVolatile = MemSD->isVolatile();
1818 if (CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL &&
1819 CodeAddrSpace != NVPTX::PTXLdStInstCode::SHARED &&
1820 CodeAddrSpace != NVPTX::PTXLdStInstCode::GENERIC)
1821 IsVolatile = false;
1822
1823 // Type Setting: toType + toTypeWidth
1824 // - for integer type, always use 'u'
1825 assert(StoreVT.isSimple() && "Store value is not simple");
1826 MVT ScalarVT = StoreVT.getSimpleVT().getScalarType();
Justin Holewinski0497ab12013-03-30 14:29:21 +00001827 unsigned ToTypeWidth = ScalarVT.getSizeInBits();
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001828 unsigned ToType;
1829 if (ScalarVT.isFloatingPoint())
Artem Belevich620db1f2017-02-23 22:38:24 +00001830 ToType = ScalarVT.SimpleTy == MVT::f16 ? NVPTX::PTXLdStInstCode::Untyped
1831 : NVPTX::PTXLdStInstCode::Float;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001832 else
1833 ToType = NVPTX::PTXLdStInstCode::Unsigned;
1834
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001835 SmallVector<SDValue, 12> StOps;
1836 SDValue N2;
1837 unsigned VecType;
1838
1839 switch (N->getOpcode()) {
1840 case NVPTXISD::StoreV2:
1841 VecType = NVPTX::PTXLdStInstCode::V2;
1842 StOps.push_back(N->getOperand(1));
1843 StOps.push_back(N->getOperand(2));
1844 N2 = N->getOperand(3);
1845 break;
1846 case NVPTXISD::StoreV4:
1847 VecType = NVPTX::PTXLdStInstCode::V4;
1848 StOps.push_back(N->getOperand(1));
1849 StOps.push_back(N->getOperand(2));
1850 StOps.push_back(N->getOperand(3));
1851 StOps.push_back(N->getOperand(4));
1852 N2 = N->getOperand(5);
1853 break;
Justin Holewinski0497ab12013-03-30 14:29:21 +00001854 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001855 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001856 }
1857
Artem Belevich620db1f2017-02-23 22:38:24 +00001858 // v8f16 is a special case. PTX doesn't have st.v8.f16
1859 // instruction. Instead, we split the vector into v2f16 chunks and
1860 // store them with st.v4.b32.
1861 if (EltVT == MVT::v2f16) {
1862 assert(N->getOpcode() == NVPTXISD::StoreV4 && "Unexpected load opcode.");
1863 EltVT = MVT::i32;
1864 ToType = NVPTX::PTXLdStInstCode::Untyped;
1865 ToTypeWidth = 32;
1866 }
1867
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00001868 StOps.push_back(getI32Imm(IsVolatile, DL));
1869 StOps.push_back(getI32Imm(CodeAddrSpace, DL));
1870 StOps.push_back(getI32Imm(VecType, DL));
1871 StOps.push_back(getI32Imm(ToType, DL));
1872 StOps.push_back(getI32Imm(ToTypeWidth, DL));
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001873
1874 if (SelectDirectAddr(N2, Addr)) {
1875 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001876 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001877 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001878 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001879 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1880 NVPTX::STV_i8_v2_avar, NVPTX::STV_i16_v2_avar,
1881 NVPTX::STV_i32_v2_avar, NVPTX::STV_i64_v2_avar,
1882 NVPTX::STV_f16_v2_avar, NVPTX::STV_f16x2_v2_avar,
1883 NVPTX::STV_f32_v2_avar, NVPTX::STV_f64_v2_avar);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001884 break;
1885 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001886 Opcode =
1887 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_avar,
1888 NVPTX::STV_i16_v4_avar, NVPTX::STV_i32_v4_avar, None,
1889 NVPTX::STV_f16_v4_avar, NVPTX::STV_f16x2_v4_avar,
1890 NVPTX::STV_f32_v4_avar, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001891 break;
1892 }
1893 StOps.push_back(Addr);
Eric Christopher02389e32015-02-19 00:08:27 +00001894 } else if (TM.is64Bit() ? SelectADDRsi64(N2.getNode(), N2, Base, Offset)
1895 : SelectADDRsi(N2.getNode(), N2, Base, Offset)) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001896 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001897 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001898 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001899 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001900 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1901 NVPTX::STV_i8_v2_asi, NVPTX::STV_i16_v2_asi,
1902 NVPTX::STV_i32_v2_asi, NVPTX::STV_i64_v2_asi,
1903 NVPTX::STV_f16_v2_asi, NVPTX::STV_f16x2_v2_asi,
1904 NVPTX::STV_f32_v2_asi, NVPTX::STV_f64_v2_asi);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001905 break;
1906 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001907 Opcode =
1908 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_asi,
1909 NVPTX::STV_i16_v4_asi, NVPTX::STV_i32_v4_asi, None,
1910 NVPTX::STV_f16_v4_asi, NVPTX::STV_f16x2_v4_asi,
1911 NVPTX::STV_f32_v4_asi, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001912 break;
1913 }
1914 StOps.push_back(Base);
1915 StOps.push_back(Offset);
Eric Christopher02389e32015-02-19 00:08:27 +00001916 } else if (TM.is64Bit() ? SelectADDRri64(N2.getNode(), N2, Base, Offset)
1917 : SelectADDRri(N2.getNode(), N2, Base, Offset)) {
1918 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001919 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001920 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001921 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001922 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001923 Opcode = pickOpcodeForVT(
1924 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_ari_64,
1925 NVPTX::STV_i16_v2_ari_64, NVPTX::STV_i32_v2_ari_64,
1926 NVPTX::STV_i64_v2_ari_64, NVPTX::STV_f16_v2_ari_64,
1927 NVPTX::STV_f16x2_v2_ari_64, NVPTX::STV_f32_v2_ari_64,
1928 NVPTX::STV_f64_v2_ari_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001929 break;
1930 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001931 Opcode = pickOpcodeForVT(
1932 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari_64,
1933 NVPTX::STV_i16_v4_ari_64, NVPTX::STV_i32_v4_ari_64, None,
1934 NVPTX::STV_f16_v4_ari_64, NVPTX::STV_f16x2_v4_ari_64,
1935 NVPTX::STV_f32_v4_ari_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001936 break;
1937 }
1938 } else {
1939 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001940 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001941 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001942 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001943 Opcode = pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy,
1944 NVPTX::STV_i8_v2_ari, NVPTX::STV_i16_v2_ari,
1945 NVPTX::STV_i32_v2_ari, NVPTX::STV_i64_v2_ari,
1946 NVPTX::STV_f16_v2_ari, NVPTX::STV_f16x2_v2_ari,
1947 NVPTX::STV_f32_v2_ari, NVPTX::STV_f64_v2_ari);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001948 break;
1949 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001950 Opcode =
1951 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_ari,
1952 NVPTX::STV_i16_v4_ari, NVPTX::STV_i32_v4_ari, None,
1953 NVPTX::STV_f16_v4_ari, NVPTX::STV_f16x2_v4_ari,
1954 NVPTX::STV_f32_v4_ari, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001955 break;
1956 }
1957 }
1958 StOps.push_back(Base);
1959 StOps.push_back(Offset);
1960 } else {
Eric Christopher02389e32015-02-19 00:08:27 +00001961 if (TM.is64Bit()) {
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001962 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001963 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001964 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001965 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001966 Opcode = pickOpcodeForVT(
1967 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg_64,
1968 NVPTX::STV_i16_v2_areg_64, NVPTX::STV_i32_v2_areg_64,
1969 NVPTX::STV_i64_v2_areg_64, NVPTX::STV_f16_v2_areg_64,
1970 NVPTX::STV_f16x2_v2_areg_64, NVPTX::STV_f32_v2_areg_64,
1971 NVPTX::STV_f64_v2_areg_64);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001972 break;
1973 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001974 Opcode = pickOpcodeForVT(
1975 EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg_64,
1976 NVPTX::STV_i16_v4_areg_64, NVPTX::STV_i32_v4_areg_64, None,
1977 NVPTX::STV_f16_v4_areg_64, NVPTX::STV_f16x2_v4_areg_64,
1978 NVPTX::STV_f32_v4_areg_64, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001979 break;
1980 }
1981 } else {
1982 switch (N->getOpcode()) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00001983 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00001984 return false;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001985 case NVPTXISD::StoreV2:
Artem Belevichee7dd122017-03-02 19:14:14 +00001986 Opcode =
1987 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v2_areg,
1988 NVPTX::STV_i16_v2_areg, NVPTX::STV_i32_v2_areg,
1989 NVPTX::STV_i64_v2_areg, NVPTX::STV_f16_v2_areg,
1990 NVPTX::STV_f16x2_v2_areg, NVPTX::STV_f32_v2_areg,
1991 NVPTX::STV_f64_v2_areg);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001992 break;
1993 case NVPTXISD::StoreV4:
Artem Belevichee7dd122017-03-02 19:14:14 +00001994 Opcode =
1995 pickOpcodeForVT(EltVT.getSimpleVT().SimpleTy, NVPTX::STV_i8_v4_areg,
1996 NVPTX::STV_i16_v4_areg, NVPTX::STV_i32_v4_areg, None,
1997 NVPTX::STV_f16_v4_areg, NVPTX::STV_f16x2_v4_areg,
1998 NVPTX::STV_f32_v4_areg, None);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00001999 break;
2000 }
2001 }
2002 StOps.push_back(N2);
2003 }
2004
Artem Belevichee7dd122017-03-02 19:14:14 +00002005 if (!Opcode)
2006 return false;
2007
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002008 StOps.push_back(Chain);
2009
Artem Belevichee7dd122017-03-02 19:14:14 +00002010 ST = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, StOps);
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002011
2012 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2013 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2014 cast<MachineSDNode>(ST)->setMemRefs(MemRefs0, MemRefs0 + 1);
2015
Justin Bogner8d83fb62016-05-13 21:12:53 +00002016 ReplaceNode(N, ST);
2017 return true;
Justin Holewinskibe8dc642013-02-12 14:18:49 +00002018}
2019
Justin Bogner8d83fb62016-05-13 21:12:53 +00002020bool NVPTXDAGToDAGISel::tryLoadParam(SDNode *Node) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00002021 SDValue Chain = Node->getOperand(0);
2022 SDValue Offset = Node->getOperand(2);
2023 SDValue Flag = Node->getOperand(3);
2024 SDLoc DL(Node);
2025 MemSDNode *Mem = cast<MemSDNode>(Node);
2026
2027 unsigned VecSize;
2028 switch (Node->getOpcode()) {
2029 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002030 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002031 case NVPTXISD::LoadParam:
2032 VecSize = 1;
2033 break;
2034 case NVPTXISD::LoadParamV2:
2035 VecSize = 2;
2036 break;
2037 case NVPTXISD::LoadParamV4:
2038 VecSize = 4;
2039 break;
2040 }
2041
2042 EVT EltVT = Node->getValueType(0);
2043 EVT MemVT = Mem->getMemoryVT();
2044
Artem Belevichee7dd122017-03-02 19:14:14 +00002045 Optional<unsigned> Opcode;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002046
2047 switch (VecSize) {
2048 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002049 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002050 case 1:
Artem Belevichee7dd122017-03-02 19:14:14 +00002051 Opcode = pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy,
2052 NVPTX::LoadParamMemI8, NVPTX::LoadParamMemI16,
2053 NVPTX::LoadParamMemI32, NVPTX::LoadParamMemI64,
2054 NVPTX::LoadParamMemF16, NVPTX::LoadParamMemF16x2,
2055 NVPTX::LoadParamMemF32, NVPTX::LoadParamMemF64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002056 break;
2057 case 2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002058 Opcode =
2059 pickOpcodeForVT(MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV2I8,
2060 NVPTX::LoadParamMemV2I16, NVPTX::LoadParamMemV2I32,
2061 NVPTX::LoadParamMemV2I64, NVPTX::LoadParamMemV2F16,
2062 NVPTX::LoadParamMemV2F16x2, NVPTX::LoadParamMemV2F32,
2063 NVPTX::LoadParamMemV2F64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002064 break;
2065 case 4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002066 Opcode = pickOpcodeForVT(
2067 MemVT.getSimpleVT().SimpleTy, NVPTX::LoadParamMemV4I8,
2068 NVPTX::LoadParamMemV4I16, NVPTX::LoadParamMemV4I32, None,
2069 NVPTX::LoadParamMemV4F16, NVPTX::LoadParamMemV4F16x2,
2070 NVPTX::LoadParamMemV4F32, None);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002071 break;
2072 }
Artem Belevichee7dd122017-03-02 19:14:14 +00002073 if (!Opcode)
2074 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002075
2076 SDVTList VTs;
2077 if (VecSize == 1) {
2078 VTs = CurDAG->getVTList(EltVT, MVT::Other, MVT::Glue);
2079 } else if (VecSize == 2) {
2080 VTs = CurDAG->getVTList(EltVT, EltVT, MVT::Other, MVT::Glue);
2081 } else {
2082 EVT EVTs[] = { EltVT, EltVT, EltVT, EltVT, MVT::Other, MVT::Glue };
Craig Topperabb4ac72014-04-16 06:10:51 +00002083 VTs = CurDAG->getVTList(EVTs);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002084 }
2085
2086 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2087
2088 SmallVector<SDValue, 2> Ops;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002089 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002090 Ops.push_back(Chain);
2091 Ops.push_back(Flag);
2092
Artem Belevichee7dd122017-03-02 19:14:14 +00002093 ReplaceNode(Node, CurDAG->getMachineNode(Opcode.getValue(), DL, VTs, Ops));
Justin Bogner8d83fb62016-05-13 21:12:53 +00002094 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002095}
2096
Justin Bogner8d83fb62016-05-13 21:12:53 +00002097bool NVPTXDAGToDAGISel::tryStoreRetval(SDNode *N) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00002098 SDLoc DL(N);
2099 SDValue Chain = N->getOperand(0);
2100 SDValue Offset = N->getOperand(1);
2101 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2102 MemSDNode *Mem = cast<MemSDNode>(N);
2103
2104 // How many elements do we have?
2105 unsigned NumElts = 1;
2106 switch (N->getOpcode()) {
2107 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002108 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002109 case NVPTXISD::StoreRetval:
2110 NumElts = 1;
2111 break;
2112 case NVPTXISD::StoreRetvalV2:
2113 NumElts = 2;
2114 break;
2115 case NVPTXISD::StoreRetvalV4:
2116 NumElts = 4;
2117 break;
2118 }
2119
2120 // Build vector of operands
2121 SmallVector<SDValue, 6> Ops;
2122 for (unsigned i = 0; i < NumElts; ++i)
2123 Ops.push_back(N->getOperand(i + 2));
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002124 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002125 Ops.push_back(Chain);
2126
2127 // Determine target opcode
2128 // If we have an i1, use an 8-bit store. The lowering code in
2129 // NVPTXISelLowering will have already emitted an upcast.
Artem Belevichee7dd122017-03-02 19:14:14 +00002130 Optional<unsigned> Opcode = 0;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002131 switch (NumElts) {
2132 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002133 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002134 case 1:
Artem Belevichee7dd122017-03-02 19:14:14 +00002135 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2136 NVPTX::StoreRetvalI8, NVPTX::StoreRetvalI16,
2137 NVPTX::StoreRetvalI32, NVPTX::StoreRetvalI64,
2138 NVPTX::StoreRetvalF16, NVPTX::StoreRetvalF16x2,
2139 NVPTX::StoreRetvalF32, NVPTX::StoreRetvalF64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002140 break;
2141 case 2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002142 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2143 NVPTX::StoreRetvalV2I8, NVPTX::StoreRetvalV2I16,
2144 NVPTX::StoreRetvalV2I32, NVPTX::StoreRetvalV2I64,
2145 NVPTX::StoreRetvalV2F16, NVPTX::StoreRetvalV2F16x2,
2146 NVPTX::StoreRetvalV2F32, NVPTX::StoreRetvalV2F64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002147 break;
2148 case 4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002149 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2150 NVPTX::StoreRetvalV4I8, NVPTX::StoreRetvalV4I16,
2151 NVPTX::StoreRetvalV4I32, None,
2152 NVPTX::StoreRetvalV4F16, NVPTX::StoreRetvalV4F16x2,
2153 NVPTX::StoreRetvalV4F32, None);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002154 break;
2155 }
Artem Belevichee7dd122017-03-02 19:14:14 +00002156 if (!Opcode)
2157 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002158
Artem Belevichee7dd122017-03-02 19:14:14 +00002159 SDNode *Ret = CurDAG->getMachineNode(Opcode.getValue(), DL, MVT::Other, Ops);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002160 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2161 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2162 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2163
Justin Bogner8d83fb62016-05-13 21:12:53 +00002164 ReplaceNode(N, Ret);
2165 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002166}
2167
Justin Bogner8d83fb62016-05-13 21:12:53 +00002168bool NVPTXDAGToDAGISel::tryStoreParam(SDNode *N) {
Justin Holewinskif8f70912013-06-28 17:57:59 +00002169 SDLoc DL(N);
2170 SDValue Chain = N->getOperand(0);
2171 SDValue Param = N->getOperand(1);
2172 unsigned ParamVal = cast<ConstantSDNode>(Param)->getZExtValue();
2173 SDValue Offset = N->getOperand(2);
2174 unsigned OffsetVal = cast<ConstantSDNode>(Offset)->getZExtValue();
2175 MemSDNode *Mem = cast<MemSDNode>(N);
2176 SDValue Flag = N->getOperand(N->getNumOperands() - 1);
2177
2178 // How many elements do we have?
2179 unsigned NumElts = 1;
2180 switch (N->getOpcode()) {
2181 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002182 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002183 case NVPTXISD::StoreParamU32:
2184 case NVPTXISD::StoreParamS32:
2185 case NVPTXISD::StoreParam:
2186 NumElts = 1;
2187 break;
2188 case NVPTXISD::StoreParamV2:
2189 NumElts = 2;
2190 break;
2191 case NVPTXISD::StoreParamV4:
2192 NumElts = 4;
2193 break;
2194 }
2195
2196 // Build vector of operands
2197 SmallVector<SDValue, 8> Ops;
2198 for (unsigned i = 0; i < NumElts; ++i)
2199 Ops.push_back(N->getOperand(i + 3));
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002200 Ops.push_back(CurDAG->getTargetConstant(ParamVal, DL, MVT::i32));
2201 Ops.push_back(CurDAG->getTargetConstant(OffsetVal, DL, MVT::i32));
Justin Holewinskif8f70912013-06-28 17:57:59 +00002202 Ops.push_back(Chain);
2203 Ops.push_back(Flag);
2204
2205 // Determine target opcode
2206 // If we have an i1, use an 8-bit store. The lowering code in
2207 // NVPTXISelLowering will have already emitted an upcast.
Artem Belevichee7dd122017-03-02 19:14:14 +00002208 Optional<unsigned> Opcode = 0;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002209 switch (N->getOpcode()) {
2210 default:
2211 switch (NumElts) {
2212 default:
Justin Bogner8d83fb62016-05-13 21:12:53 +00002213 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002214 case 1:
Artem Belevichee7dd122017-03-02 19:14:14 +00002215 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2216 NVPTX::StoreParamI8, NVPTX::StoreParamI16,
2217 NVPTX::StoreParamI32, NVPTX::StoreParamI64,
2218 NVPTX::StoreParamF16, NVPTX::StoreParamF16x2,
2219 NVPTX::StoreParamF32, NVPTX::StoreParamF64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002220 break;
2221 case 2:
Artem Belevichee7dd122017-03-02 19:14:14 +00002222 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2223 NVPTX::StoreParamV2I8, NVPTX::StoreParamV2I16,
2224 NVPTX::StoreParamV2I32, NVPTX::StoreParamV2I64,
2225 NVPTX::StoreParamV2F16, NVPTX::StoreParamV2F16x2,
2226 NVPTX::StoreParamV2F32, NVPTX::StoreParamV2F64);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002227 break;
2228 case 4:
Artem Belevichee7dd122017-03-02 19:14:14 +00002229 Opcode = pickOpcodeForVT(Mem->getMemoryVT().getSimpleVT().SimpleTy,
2230 NVPTX::StoreParamV4I8, NVPTX::StoreParamV4I16,
2231 NVPTX::StoreParamV4I32, None,
2232 NVPTX::StoreParamV4F16, NVPTX::StoreParamV4F16x2,
2233 NVPTX::StoreParamV4F32, None);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002234 break;
2235 }
Artem Belevichee7dd122017-03-02 19:14:14 +00002236 if (!Opcode)
2237 return false;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002238 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002239 // Special case: if we have a sign-extend/zero-extend node, insert the
2240 // conversion instruction first, and use that as the value operand to
2241 // the selected StoreParam node.
2242 case NVPTXISD::StoreParamU32: {
2243 Opcode = NVPTX::StoreParamI32;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002244 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002245 MVT::i32);
2246 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_u32_u16, DL,
2247 MVT::i32, Ops[0], CvtNone);
2248 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002249 break;
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002250 }
2251 case NVPTXISD::StoreParamS32: {
2252 Opcode = NVPTX::StoreParamI32;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00002253 SDValue CvtNone = CurDAG->getTargetConstant(NVPTX::PTXCvtMode::NONE, DL,
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002254 MVT::i32);
2255 SDNode *Cvt = CurDAG->getMachineNode(NVPTX::CVT_s32_s16, DL,
2256 MVT::i32, Ops[0], CvtNone);
2257 Ops[0] = SDValue(Cvt, 0);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002258 break;
2259 }
Justin Holewinskidc5e3b62013-06-28 17:58:04 +00002260 }
Justin Holewinskif8f70912013-06-28 17:57:59 +00002261
Justin Holewinskidff28d22013-07-01 12:59:01 +00002262 SDVTList RetVTs = CurDAG->getVTList(MVT::Other, MVT::Glue);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002263 SDNode *Ret =
Artem Belevichee7dd122017-03-02 19:14:14 +00002264 CurDAG->getMachineNode(Opcode.getValue(), DL, RetVTs, Ops);
Justin Holewinskif8f70912013-06-28 17:57:59 +00002265 MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
2266 MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand();
2267 cast<MachineSDNode>(Ret)->setMemRefs(MemRefs0, MemRefs0 + 1);
2268
Justin Bogner8d83fb62016-05-13 21:12:53 +00002269 ReplaceNode(N, Ret);
2270 return true;
Justin Holewinskif8f70912013-06-28 17:57:59 +00002271}
2272
Justin Bogner8d83fb62016-05-13 21:12:53 +00002273bool NVPTXDAGToDAGISel::tryTextureIntrinsic(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00002274 unsigned Opc = 0;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002275
2276 switch (N->getOpcode()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00002277 default: return false;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002278 case NVPTXISD::Tex1DFloatS32:
2279 Opc = NVPTX::TEX_1D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002280 break;
2281 case NVPTXISD::Tex1DFloatFloat:
2282 Opc = NVPTX::TEX_1D_F32_F32;
2283 break;
2284 case NVPTXISD::Tex1DFloatFloatLevel:
2285 Opc = NVPTX::TEX_1D_F32_F32_LEVEL;
2286 break;
2287 case NVPTXISD::Tex1DFloatFloatGrad:
2288 Opc = NVPTX::TEX_1D_F32_F32_GRAD;
2289 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002290 case NVPTXISD::Tex1DS32S32:
2291 Opc = NVPTX::TEX_1D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002292 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002293 case NVPTXISD::Tex1DS32Float:
2294 Opc = NVPTX::TEX_1D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002295 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002296 case NVPTXISD::Tex1DS32FloatLevel:
2297 Opc = NVPTX::TEX_1D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002298 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002299 case NVPTXISD::Tex1DS32FloatGrad:
2300 Opc = NVPTX::TEX_1D_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002301 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002302 case NVPTXISD::Tex1DU32S32:
2303 Opc = NVPTX::TEX_1D_U32_S32;
2304 break;
2305 case NVPTXISD::Tex1DU32Float:
2306 Opc = NVPTX::TEX_1D_U32_F32;
2307 break;
2308 case NVPTXISD::Tex1DU32FloatLevel:
2309 Opc = NVPTX::TEX_1D_U32_F32_LEVEL;
2310 break;
2311 case NVPTXISD::Tex1DU32FloatGrad:
2312 Opc = NVPTX::TEX_1D_U32_F32_GRAD;
2313 break;
2314 case NVPTXISD::Tex1DArrayFloatS32:
2315 Opc = NVPTX::TEX_1D_ARRAY_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002316 break;
2317 case NVPTXISD::Tex1DArrayFloatFloat:
2318 Opc = NVPTX::TEX_1D_ARRAY_F32_F32;
2319 break;
2320 case NVPTXISD::Tex1DArrayFloatFloatLevel:
2321 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_LEVEL;
2322 break;
2323 case NVPTXISD::Tex1DArrayFloatFloatGrad:
2324 Opc = NVPTX::TEX_1D_ARRAY_F32_F32_GRAD;
2325 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002326 case NVPTXISD::Tex1DArrayS32S32:
2327 Opc = NVPTX::TEX_1D_ARRAY_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002328 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002329 case NVPTXISD::Tex1DArrayS32Float:
2330 Opc = NVPTX::TEX_1D_ARRAY_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002331 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002332 case NVPTXISD::Tex1DArrayS32FloatLevel:
2333 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002334 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002335 case NVPTXISD::Tex1DArrayS32FloatGrad:
2336 Opc = NVPTX::TEX_1D_ARRAY_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002337 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002338 case NVPTXISD::Tex1DArrayU32S32:
2339 Opc = NVPTX::TEX_1D_ARRAY_U32_S32;
2340 break;
2341 case NVPTXISD::Tex1DArrayU32Float:
2342 Opc = NVPTX::TEX_1D_ARRAY_U32_F32;
2343 break;
2344 case NVPTXISD::Tex1DArrayU32FloatLevel:
2345 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_LEVEL;
2346 break;
2347 case NVPTXISD::Tex1DArrayU32FloatGrad:
2348 Opc = NVPTX::TEX_1D_ARRAY_U32_F32_GRAD;
2349 break;
2350 case NVPTXISD::Tex2DFloatS32:
2351 Opc = NVPTX::TEX_2D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002352 break;
2353 case NVPTXISD::Tex2DFloatFloat:
2354 Opc = NVPTX::TEX_2D_F32_F32;
2355 break;
2356 case NVPTXISD::Tex2DFloatFloatLevel:
2357 Opc = NVPTX::TEX_2D_F32_F32_LEVEL;
2358 break;
2359 case NVPTXISD::Tex2DFloatFloatGrad:
2360 Opc = NVPTX::TEX_2D_F32_F32_GRAD;
2361 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002362 case NVPTXISD::Tex2DS32S32:
2363 Opc = NVPTX::TEX_2D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002364 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002365 case NVPTXISD::Tex2DS32Float:
2366 Opc = NVPTX::TEX_2D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002367 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002368 case NVPTXISD::Tex2DS32FloatLevel:
2369 Opc = NVPTX::TEX_2D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002370 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002371 case NVPTXISD::Tex2DS32FloatGrad:
2372 Opc = NVPTX::TEX_2D_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002373 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002374 case NVPTXISD::Tex2DU32S32:
2375 Opc = NVPTX::TEX_2D_U32_S32;
2376 break;
2377 case NVPTXISD::Tex2DU32Float:
2378 Opc = NVPTX::TEX_2D_U32_F32;
2379 break;
2380 case NVPTXISD::Tex2DU32FloatLevel:
2381 Opc = NVPTX::TEX_2D_U32_F32_LEVEL;
2382 break;
2383 case NVPTXISD::Tex2DU32FloatGrad:
2384 Opc = NVPTX::TEX_2D_U32_F32_GRAD;
2385 break;
2386 case NVPTXISD::Tex2DArrayFloatS32:
2387 Opc = NVPTX::TEX_2D_ARRAY_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002388 break;
2389 case NVPTXISD::Tex2DArrayFloatFloat:
2390 Opc = NVPTX::TEX_2D_ARRAY_F32_F32;
2391 break;
2392 case NVPTXISD::Tex2DArrayFloatFloatLevel:
2393 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_LEVEL;
2394 break;
2395 case NVPTXISD::Tex2DArrayFloatFloatGrad:
2396 Opc = NVPTX::TEX_2D_ARRAY_F32_F32_GRAD;
2397 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002398 case NVPTXISD::Tex2DArrayS32S32:
2399 Opc = NVPTX::TEX_2D_ARRAY_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002400 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002401 case NVPTXISD::Tex2DArrayS32Float:
2402 Opc = NVPTX::TEX_2D_ARRAY_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002403 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002404 case NVPTXISD::Tex2DArrayS32FloatLevel:
2405 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002406 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002407 case NVPTXISD::Tex2DArrayS32FloatGrad:
2408 Opc = NVPTX::TEX_2D_ARRAY_S32_F32_GRAD;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002409 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002410 case NVPTXISD::Tex2DArrayU32S32:
2411 Opc = NVPTX::TEX_2D_ARRAY_U32_S32;
2412 break;
2413 case NVPTXISD::Tex2DArrayU32Float:
2414 Opc = NVPTX::TEX_2D_ARRAY_U32_F32;
2415 break;
2416 case NVPTXISD::Tex2DArrayU32FloatLevel:
2417 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_LEVEL;
2418 break;
2419 case NVPTXISD::Tex2DArrayU32FloatGrad:
2420 Opc = NVPTX::TEX_2D_ARRAY_U32_F32_GRAD;
2421 break;
2422 case NVPTXISD::Tex3DFloatS32:
2423 Opc = NVPTX::TEX_3D_F32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002424 break;
2425 case NVPTXISD::Tex3DFloatFloat:
2426 Opc = NVPTX::TEX_3D_F32_F32;
2427 break;
2428 case NVPTXISD::Tex3DFloatFloatLevel:
2429 Opc = NVPTX::TEX_3D_F32_F32_LEVEL;
2430 break;
2431 case NVPTXISD::Tex3DFloatFloatGrad:
2432 Opc = NVPTX::TEX_3D_F32_F32_GRAD;
2433 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002434 case NVPTXISD::Tex3DS32S32:
2435 Opc = NVPTX::TEX_3D_S32_S32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002436 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002437 case NVPTXISD::Tex3DS32Float:
2438 Opc = NVPTX::TEX_3D_S32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002439 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002440 case NVPTXISD::Tex3DS32FloatLevel:
2441 Opc = NVPTX::TEX_3D_S32_F32_LEVEL;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002442 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002443 case NVPTXISD::Tex3DS32FloatGrad:
2444 Opc = NVPTX::TEX_3D_S32_F32_GRAD;
2445 break;
2446 case NVPTXISD::Tex3DU32S32:
2447 Opc = NVPTX::TEX_3D_U32_S32;
2448 break;
2449 case NVPTXISD::Tex3DU32Float:
2450 Opc = NVPTX::TEX_3D_U32_F32;
2451 break;
2452 case NVPTXISD::Tex3DU32FloatLevel:
2453 Opc = NVPTX::TEX_3D_U32_F32_LEVEL;
2454 break;
2455 case NVPTXISD::Tex3DU32FloatGrad:
2456 Opc = NVPTX::TEX_3D_U32_F32_GRAD;
2457 break;
2458 case NVPTXISD::TexCubeFloatFloat:
2459 Opc = NVPTX::TEX_CUBE_F32_F32;
2460 break;
2461 case NVPTXISD::TexCubeFloatFloatLevel:
2462 Opc = NVPTX::TEX_CUBE_F32_F32_LEVEL;
2463 break;
2464 case NVPTXISD::TexCubeS32Float:
2465 Opc = NVPTX::TEX_CUBE_S32_F32;
2466 break;
2467 case NVPTXISD::TexCubeS32FloatLevel:
2468 Opc = NVPTX::TEX_CUBE_S32_F32_LEVEL;
2469 break;
2470 case NVPTXISD::TexCubeU32Float:
2471 Opc = NVPTX::TEX_CUBE_U32_F32;
2472 break;
2473 case NVPTXISD::TexCubeU32FloatLevel:
2474 Opc = NVPTX::TEX_CUBE_U32_F32_LEVEL;
2475 break;
2476 case NVPTXISD::TexCubeArrayFloatFloat:
2477 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32;
2478 break;
2479 case NVPTXISD::TexCubeArrayFloatFloatLevel:
2480 Opc = NVPTX::TEX_CUBE_ARRAY_F32_F32_LEVEL;
2481 break;
2482 case NVPTXISD::TexCubeArrayS32Float:
2483 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32;
2484 break;
2485 case NVPTXISD::TexCubeArrayS32FloatLevel:
2486 Opc = NVPTX::TEX_CUBE_ARRAY_S32_F32_LEVEL;
2487 break;
2488 case NVPTXISD::TexCubeArrayU32Float:
2489 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32;
2490 break;
2491 case NVPTXISD::TexCubeArrayU32FloatLevel:
2492 Opc = NVPTX::TEX_CUBE_ARRAY_U32_F32_LEVEL;
2493 break;
2494 case NVPTXISD::Tld4R2DFloatFloat:
2495 Opc = NVPTX::TLD4_R_2D_F32_F32;
2496 break;
2497 case NVPTXISD::Tld4G2DFloatFloat:
2498 Opc = NVPTX::TLD4_G_2D_F32_F32;
2499 break;
2500 case NVPTXISD::Tld4B2DFloatFloat:
2501 Opc = NVPTX::TLD4_B_2D_F32_F32;
2502 break;
2503 case NVPTXISD::Tld4A2DFloatFloat:
2504 Opc = NVPTX::TLD4_A_2D_F32_F32;
2505 break;
2506 case NVPTXISD::Tld4R2DS64Float:
2507 Opc = NVPTX::TLD4_R_2D_S32_F32;
2508 break;
2509 case NVPTXISD::Tld4G2DS64Float:
2510 Opc = NVPTX::TLD4_G_2D_S32_F32;
2511 break;
2512 case NVPTXISD::Tld4B2DS64Float:
2513 Opc = NVPTX::TLD4_B_2D_S32_F32;
2514 break;
2515 case NVPTXISD::Tld4A2DS64Float:
2516 Opc = NVPTX::TLD4_A_2D_S32_F32;
2517 break;
2518 case NVPTXISD::Tld4R2DU64Float:
2519 Opc = NVPTX::TLD4_R_2D_U32_F32;
2520 break;
2521 case NVPTXISD::Tld4G2DU64Float:
2522 Opc = NVPTX::TLD4_G_2D_U32_F32;
2523 break;
2524 case NVPTXISD::Tld4B2DU64Float:
2525 Opc = NVPTX::TLD4_B_2D_U32_F32;
2526 break;
2527 case NVPTXISD::Tld4A2DU64Float:
2528 Opc = NVPTX::TLD4_A_2D_U32_F32;
2529 break;
2530 case NVPTXISD::TexUnified1DFloatS32:
2531 Opc = NVPTX::TEX_UNIFIED_1D_F32_S32;
2532 break;
2533 case NVPTXISD::TexUnified1DFloatFloat:
2534 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32;
2535 break;
2536 case NVPTXISD::TexUnified1DFloatFloatLevel:
2537 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_LEVEL;
2538 break;
2539 case NVPTXISD::TexUnified1DFloatFloatGrad:
2540 Opc = NVPTX::TEX_UNIFIED_1D_F32_F32_GRAD;
2541 break;
2542 case NVPTXISD::TexUnified1DS32S32:
2543 Opc = NVPTX::TEX_UNIFIED_1D_S32_S32;
2544 break;
2545 case NVPTXISD::TexUnified1DS32Float:
2546 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32;
2547 break;
2548 case NVPTXISD::TexUnified1DS32FloatLevel:
2549 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_LEVEL;
2550 break;
2551 case NVPTXISD::TexUnified1DS32FloatGrad:
2552 Opc = NVPTX::TEX_UNIFIED_1D_S32_F32_GRAD;
2553 break;
2554 case NVPTXISD::TexUnified1DU32S32:
2555 Opc = NVPTX::TEX_UNIFIED_1D_U32_S32;
2556 break;
2557 case NVPTXISD::TexUnified1DU32Float:
2558 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32;
2559 break;
2560 case NVPTXISD::TexUnified1DU32FloatLevel:
2561 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_LEVEL;
2562 break;
2563 case NVPTXISD::TexUnified1DU32FloatGrad:
2564 Opc = NVPTX::TEX_UNIFIED_1D_U32_F32_GRAD;
2565 break;
2566 case NVPTXISD::TexUnified1DArrayFloatS32:
2567 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_S32;
2568 break;
2569 case NVPTXISD::TexUnified1DArrayFloatFloat:
2570 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32;
2571 break;
2572 case NVPTXISD::TexUnified1DArrayFloatFloatLevel:
2573 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_LEVEL;
2574 break;
2575 case NVPTXISD::TexUnified1DArrayFloatFloatGrad:
2576 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_F32_F32_GRAD;
2577 break;
2578 case NVPTXISD::TexUnified1DArrayS32S32:
2579 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_S32;
2580 break;
2581 case NVPTXISD::TexUnified1DArrayS32Float:
2582 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32;
2583 break;
2584 case NVPTXISD::TexUnified1DArrayS32FloatLevel:
2585 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_LEVEL;
2586 break;
2587 case NVPTXISD::TexUnified1DArrayS32FloatGrad:
2588 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_S32_F32_GRAD;
2589 break;
2590 case NVPTXISD::TexUnified1DArrayU32S32:
2591 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_S32;
2592 break;
2593 case NVPTXISD::TexUnified1DArrayU32Float:
2594 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32;
2595 break;
2596 case NVPTXISD::TexUnified1DArrayU32FloatLevel:
2597 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_LEVEL;
2598 break;
2599 case NVPTXISD::TexUnified1DArrayU32FloatGrad:
2600 Opc = NVPTX::TEX_UNIFIED_1D_ARRAY_U32_F32_GRAD;
2601 break;
2602 case NVPTXISD::TexUnified2DFloatS32:
2603 Opc = NVPTX::TEX_UNIFIED_2D_F32_S32;
2604 break;
2605 case NVPTXISD::TexUnified2DFloatFloat:
2606 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32;
2607 break;
2608 case NVPTXISD::TexUnified2DFloatFloatLevel:
2609 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_LEVEL;
2610 break;
2611 case NVPTXISD::TexUnified2DFloatFloatGrad:
2612 Opc = NVPTX::TEX_UNIFIED_2D_F32_F32_GRAD;
2613 break;
2614 case NVPTXISD::TexUnified2DS32S32:
2615 Opc = NVPTX::TEX_UNIFIED_2D_S32_S32;
2616 break;
2617 case NVPTXISD::TexUnified2DS32Float:
2618 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32;
2619 break;
2620 case NVPTXISD::TexUnified2DS32FloatLevel:
2621 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_LEVEL;
2622 break;
2623 case NVPTXISD::TexUnified2DS32FloatGrad:
2624 Opc = NVPTX::TEX_UNIFIED_2D_S32_F32_GRAD;
2625 break;
2626 case NVPTXISD::TexUnified2DU32S32:
2627 Opc = NVPTX::TEX_UNIFIED_2D_U32_S32;
2628 break;
2629 case NVPTXISD::TexUnified2DU32Float:
2630 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32;
2631 break;
2632 case NVPTXISD::TexUnified2DU32FloatLevel:
2633 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_LEVEL;
2634 break;
2635 case NVPTXISD::TexUnified2DU32FloatGrad:
2636 Opc = NVPTX::TEX_UNIFIED_2D_U32_F32_GRAD;
2637 break;
2638 case NVPTXISD::TexUnified2DArrayFloatS32:
2639 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_S32;
2640 break;
2641 case NVPTXISD::TexUnified2DArrayFloatFloat:
2642 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32;
2643 break;
2644 case NVPTXISD::TexUnified2DArrayFloatFloatLevel:
2645 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_LEVEL;
2646 break;
2647 case NVPTXISD::TexUnified2DArrayFloatFloatGrad:
2648 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_F32_F32_GRAD;
2649 break;
2650 case NVPTXISD::TexUnified2DArrayS32S32:
2651 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_S32;
2652 break;
2653 case NVPTXISD::TexUnified2DArrayS32Float:
2654 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32;
2655 break;
2656 case NVPTXISD::TexUnified2DArrayS32FloatLevel:
2657 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_LEVEL;
2658 break;
2659 case NVPTXISD::TexUnified2DArrayS32FloatGrad:
2660 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_S32_F32_GRAD;
2661 break;
2662 case NVPTXISD::TexUnified2DArrayU32S32:
2663 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_S32;
2664 break;
2665 case NVPTXISD::TexUnified2DArrayU32Float:
2666 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32;
2667 break;
2668 case NVPTXISD::TexUnified2DArrayU32FloatLevel:
2669 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_LEVEL;
2670 break;
2671 case NVPTXISD::TexUnified2DArrayU32FloatGrad:
2672 Opc = NVPTX::TEX_UNIFIED_2D_ARRAY_U32_F32_GRAD;
2673 break;
2674 case NVPTXISD::TexUnified3DFloatS32:
2675 Opc = NVPTX::TEX_UNIFIED_3D_F32_S32;
2676 break;
2677 case NVPTXISD::TexUnified3DFloatFloat:
2678 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32;
2679 break;
2680 case NVPTXISD::TexUnified3DFloatFloatLevel:
2681 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_LEVEL;
2682 break;
2683 case NVPTXISD::TexUnified3DFloatFloatGrad:
2684 Opc = NVPTX::TEX_UNIFIED_3D_F32_F32_GRAD;
2685 break;
2686 case NVPTXISD::TexUnified3DS32S32:
2687 Opc = NVPTX::TEX_UNIFIED_3D_S32_S32;
2688 break;
2689 case NVPTXISD::TexUnified3DS32Float:
2690 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32;
2691 break;
2692 case NVPTXISD::TexUnified3DS32FloatLevel:
2693 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_LEVEL;
2694 break;
2695 case NVPTXISD::TexUnified3DS32FloatGrad:
2696 Opc = NVPTX::TEX_UNIFIED_3D_S32_F32_GRAD;
2697 break;
2698 case NVPTXISD::TexUnified3DU32S32:
2699 Opc = NVPTX::TEX_UNIFIED_3D_U32_S32;
2700 break;
2701 case NVPTXISD::TexUnified3DU32Float:
2702 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32;
2703 break;
2704 case NVPTXISD::TexUnified3DU32FloatLevel:
2705 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_LEVEL;
2706 break;
2707 case NVPTXISD::TexUnified3DU32FloatGrad:
2708 Opc = NVPTX::TEX_UNIFIED_3D_U32_F32_GRAD;
2709 break;
2710 case NVPTXISD::TexUnifiedCubeFloatFloat:
2711 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32;
2712 break;
2713 case NVPTXISD::TexUnifiedCubeFloatFloatLevel:
2714 Opc = NVPTX::TEX_UNIFIED_CUBE_F32_F32_LEVEL;
2715 break;
2716 case NVPTXISD::TexUnifiedCubeS32Float:
2717 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32;
2718 break;
2719 case NVPTXISD::TexUnifiedCubeS32FloatLevel:
2720 Opc = NVPTX::TEX_UNIFIED_CUBE_S32_F32_LEVEL;
2721 break;
2722 case NVPTXISD::TexUnifiedCubeU32Float:
2723 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32;
2724 break;
2725 case NVPTXISD::TexUnifiedCubeU32FloatLevel:
2726 Opc = NVPTX::TEX_UNIFIED_CUBE_U32_F32_LEVEL;
2727 break;
2728 case NVPTXISD::TexUnifiedCubeArrayFloatFloat:
2729 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32;
2730 break;
2731 case NVPTXISD::TexUnifiedCubeArrayFloatFloatLevel:
2732 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_F32_F32_LEVEL;
2733 break;
2734 case NVPTXISD::TexUnifiedCubeArrayS32Float:
2735 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32;
2736 break;
2737 case NVPTXISD::TexUnifiedCubeArrayS32FloatLevel:
2738 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_S32_F32_LEVEL;
2739 break;
2740 case NVPTXISD::TexUnifiedCubeArrayU32Float:
2741 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32;
2742 break;
2743 case NVPTXISD::TexUnifiedCubeArrayU32FloatLevel:
2744 Opc = NVPTX::TEX_UNIFIED_CUBE_ARRAY_U32_F32_LEVEL;
2745 break;
2746 case NVPTXISD::Tld4UnifiedR2DFloatFloat:
2747 Opc = NVPTX::TLD4_UNIFIED_R_2D_F32_F32;
2748 break;
2749 case NVPTXISD::Tld4UnifiedG2DFloatFloat:
2750 Opc = NVPTX::TLD4_UNIFIED_G_2D_F32_F32;
2751 break;
2752 case NVPTXISD::Tld4UnifiedB2DFloatFloat:
2753 Opc = NVPTX::TLD4_UNIFIED_B_2D_F32_F32;
2754 break;
2755 case NVPTXISD::Tld4UnifiedA2DFloatFloat:
2756 Opc = NVPTX::TLD4_UNIFIED_A_2D_F32_F32;
2757 break;
2758 case NVPTXISD::Tld4UnifiedR2DS64Float:
2759 Opc = NVPTX::TLD4_UNIFIED_R_2D_S32_F32;
2760 break;
2761 case NVPTXISD::Tld4UnifiedG2DS64Float:
2762 Opc = NVPTX::TLD4_UNIFIED_G_2D_S32_F32;
2763 break;
2764 case NVPTXISD::Tld4UnifiedB2DS64Float:
2765 Opc = NVPTX::TLD4_UNIFIED_B_2D_S32_F32;
2766 break;
2767 case NVPTXISD::Tld4UnifiedA2DS64Float:
2768 Opc = NVPTX::TLD4_UNIFIED_A_2D_S32_F32;
2769 break;
2770 case NVPTXISD::Tld4UnifiedR2DU64Float:
2771 Opc = NVPTX::TLD4_UNIFIED_R_2D_U32_F32;
2772 break;
2773 case NVPTXISD::Tld4UnifiedG2DU64Float:
2774 Opc = NVPTX::TLD4_UNIFIED_G_2D_U32_F32;
2775 break;
2776 case NVPTXISD::Tld4UnifiedB2DU64Float:
2777 Opc = NVPTX::TLD4_UNIFIED_B_2D_U32_F32;
2778 break;
2779 case NVPTXISD::Tld4UnifiedA2DU64Float:
2780 Opc = NVPTX::TLD4_UNIFIED_A_2D_U32_F32;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002781 break;
2782 }
2783
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002784 // Copy over operands
Benjamin Kramer806ae442017-08-20 17:30:32 +00002785 SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
2786 Ops.push_back(N->getOperand(0)); // Move chain to the back.
Justin Holewinski30d56a72014-04-09 15:39:15 +00002787
Justin Bogner8d83fb62016-05-13 21:12:53 +00002788 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
2789 return true;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002790}
2791
Justin Bogner8d83fb62016-05-13 21:12:53 +00002792bool NVPTXDAGToDAGISel::trySurfaceIntrinsic(SDNode *N) {
Justin Holewinski30d56a72014-04-09 15:39:15 +00002793 unsigned Opc = 0;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002794 switch (N->getOpcode()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00002795 default: return false;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002796 case NVPTXISD::Suld1DI8Clamp:
2797 Opc = NVPTX::SULD_1D_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002798 break;
2799 case NVPTXISD::Suld1DI16Clamp:
2800 Opc = NVPTX::SULD_1D_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002801 break;
2802 case NVPTXISD::Suld1DI32Clamp:
2803 Opc = NVPTX::SULD_1D_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002804 break;
2805 case NVPTXISD::Suld1DI64Clamp:
2806 Opc = NVPTX::SULD_1D_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002807 break;
2808 case NVPTXISD::Suld1DV2I8Clamp:
2809 Opc = NVPTX::SULD_1D_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002810 break;
2811 case NVPTXISD::Suld1DV2I16Clamp:
2812 Opc = NVPTX::SULD_1D_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002813 break;
2814 case NVPTXISD::Suld1DV2I32Clamp:
2815 Opc = NVPTX::SULD_1D_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002816 break;
2817 case NVPTXISD::Suld1DV2I64Clamp:
2818 Opc = NVPTX::SULD_1D_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002819 break;
2820 case NVPTXISD::Suld1DV4I8Clamp:
2821 Opc = NVPTX::SULD_1D_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002822 break;
2823 case NVPTXISD::Suld1DV4I16Clamp:
2824 Opc = NVPTX::SULD_1D_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002825 break;
2826 case NVPTXISD::Suld1DV4I32Clamp:
2827 Opc = NVPTX::SULD_1D_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002828 break;
2829 case NVPTXISD::Suld1DArrayI8Clamp:
2830 Opc = NVPTX::SULD_1D_ARRAY_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002831 break;
2832 case NVPTXISD::Suld1DArrayI16Clamp:
2833 Opc = NVPTX::SULD_1D_ARRAY_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002834 break;
2835 case NVPTXISD::Suld1DArrayI32Clamp:
2836 Opc = NVPTX::SULD_1D_ARRAY_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002837 break;
2838 case NVPTXISD::Suld1DArrayI64Clamp:
2839 Opc = NVPTX::SULD_1D_ARRAY_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002840 break;
2841 case NVPTXISD::Suld1DArrayV2I8Clamp:
2842 Opc = NVPTX::SULD_1D_ARRAY_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002843 break;
2844 case NVPTXISD::Suld1DArrayV2I16Clamp:
2845 Opc = NVPTX::SULD_1D_ARRAY_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002846 break;
2847 case NVPTXISD::Suld1DArrayV2I32Clamp:
2848 Opc = NVPTX::SULD_1D_ARRAY_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002849 break;
2850 case NVPTXISD::Suld1DArrayV2I64Clamp:
2851 Opc = NVPTX::SULD_1D_ARRAY_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002852 break;
2853 case NVPTXISD::Suld1DArrayV4I8Clamp:
2854 Opc = NVPTX::SULD_1D_ARRAY_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002855 break;
2856 case NVPTXISD::Suld1DArrayV4I16Clamp:
2857 Opc = NVPTX::SULD_1D_ARRAY_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002858 break;
2859 case NVPTXISD::Suld1DArrayV4I32Clamp:
2860 Opc = NVPTX::SULD_1D_ARRAY_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002861 break;
2862 case NVPTXISD::Suld2DI8Clamp:
2863 Opc = NVPTX::SULD_2D_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002864 break;
2865 case NVPTXISD::Suld2DI16Clamp:
2866 Opc = NVPTX::SULD_2D_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002867 break;
2868 case NVPTXISD::Suld2DI32Clamp:
2869 Opc = NVPTX::SULD_2D_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002870 break;
2871 case NVPTXISD::Suld2DI64Clamp:
2872 Opc = NVPTX::SULD_2D_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002873 break;
2874 case NVPTXISD::Suld2DV2I8Clamp:
2875 Opc = NVPTX::SULD_2D_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002876 break;
2877 case NVPTXISD::Suld2DV2I16Clamp:
2878 Opc = NVPTX::SULD_2D_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002879 break;
2880 case NVPTXISD::Suld2DV2I32Clamp:
2881 Opc = NVPTX::SULD_2D_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002882 break;
2883 case NVPTXISD::Suld2DV2I64Clamp:
2884 Opc = NVPTX::SULD_2D_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002885 break;
2886 case NVPTXISD::Suld2DV4I8Clamp:
2887 Opc = NVPTX::SULD_2D_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002888 break;
2889 case NVPTXISD::Suld2DV4I16Clamp:
2890 Opc = NVPTX::SULD_2D_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002891 break;
2892 case NVPTXISD::Suld2DV4I32Clamp:
2893 Opc = NVPTX::SULD_2D_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002894 break;
2895 case NVPTXISD::Suld2DArrayI8Clamp:
2896 Opc = NVPTX::SULD_2D_ARRAY_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002897 break;
2898 case NVPTXISD::Suld2DArrayI16Clamp:
2899 Opc = NVPTX::SULD_2D_ARRAY_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002900 break;
2901 case NVPTXISD::Suld2DArrayI32Clamp:
2902 Opc = NVPTX::SULD_2D_ARRAY_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002903 break;
2904 case NVPTXISD::Suld2DArrayI64Clamp:
2905 Opc = NVPTX::SULD_2D_ARRAY_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002906 break;
2907 case NVPTXISD::Suld2DArrayV2I8Clamp:
2908 Opc = NVPTX::SULD_2D_ARRAY_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002909 break;
2910 case NVPTXISD::Suld2DArrayV2I16Clamp:
2911 Opc = NVPTX::SULD_2D_ARRAY_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002912 break;
2913 case NVPTXISD::Suld2DArrayV2I32Clamp:
2914 Opc = NVPTX::SULD_2D_ARRAY_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002915 break;
2916 case NVPTXISD::Suld2DArrayV2I64Clamp:
2917 Opc = NVPTX::SULD_2D_ARRAY_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002918 break;
2919 case NVPTXISD::Suld2DArrayV4I8Clamp:
2920 Opc = NVPTX::SULD_2D_ARRAY_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002921 break;
2922 case NVPTXISD::Suld2DArrayV4I16Clamp:
2923 Opc = NVPTX::SULD_2D_ARRAY_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002924 break;
2925 case NVPTXISD::Suld2DArrayV4I32Clamp:
2926 Opc = NVPTX::SULD_2D_ARRAY_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002927 break;
2928 case NVPTXISD::Suld3DI8Clamp:
2929 Opc = NVPTX::SULD_3D_I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002930 break;
2931 case NVPTXISD::Suld3DI16Clamp:
2932 Opc = NVPTX::SULD_3D_I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002933 break;
2934 case NVPTXISD::Suld3DI32Clamp:
2935 Opc = NVPTX::SULD_3D_I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002936 break;
2937 case NVPTXISD::Suld3DI64Clamp:
2938 Opc = NVPTX::SULD_3D_I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002939 break;
2940 case NVPTXISD::Suld3DV2I8Clamp:
2941 Opc = NVPTX::SULD_3D_V2I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002942 break;
2943 case NVPTXISD::Suld3DV2I16Clamp:
2944 Opc = NVPTX::SULD_3D_V2I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002945 break;
2946 case NVPTXISD::Suld3DV2I32Clamp:
2947 Opc = NVPTX::SULD_3D_V2I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002948 break;
2949 case NVPTXISD::Suld3DV2I64Clamp:
2950 Opc = NVPTX::SULD_3D_V2I64_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002951 break;
2952 case NVPTXISD::Suld3DV4I8Clamp:
2953 Opc = NVPTX::SULD_3D_V4I8_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002954 break;
2955 case NVPTXISD::Suld3DV4I16Clamp:
2956 Opc = NVPTX::SULD_3D_V4I16_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002957 break;
2958 case NVPTXISD::Suld3DV4I32Clamp:
2959 Opc = NVPTX::SULD_3D_V4I32_CLAMP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002960 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002961 case NVPTXISD::Suld1DI8Trap:
2962 Opc = NVPTX::SULD_1D_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002963 break;
2964 case NVPTXISD::Suld1DI16Trap:
2965 Opc = NVPTX::SULD_1D_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002966 break;
2967 case NVPTXISD::Suld1DI32Trap:
2968 Opc = NVPTX::SULD_1D_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002969 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002970 case NVPTXISD::Suld1DI64Trap:
2971 Opc = NVPTX::SULD_1D_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002972 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002973 case NVPTXISD::Suld1DV2I8Trap:
2974 Opc = NVPTX::SULD_1D_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002975 break;
2976 case NVPTXISD::Suld1DV2I16Trap:
2977 Opc = NVPTX::SULD_1D_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002978 break;
2979 case NVPTXISD::Suld1DV2I32Trap:
2980 Opc = NVPTX::SULD_1D_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002981 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002982 case NVPTXISD::Suld1DV2I64Trap:
2983 Opc = NVPTX::SULD_1D_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00002984 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002985 case NVPTXISD::Suld1DV4I8Trap:
2986 Opc = NVPTX::SULD_1D_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002987 break;
2988 case NVPTXISD::Suld1DV4I16Trap:
2989 Opc = NVPTX::SULD_1D_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002990 break;
2991 case NVPTXISD::Suld1DV4I32Trap:
2992 Opc = NVPTX::SULD_1D_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002993 break;
2994 case NVPTXISD::Suld1DArrayI8Trap:
2995 Opc = NVPTX::SULD_1D_ARRAY_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002996 break;
2997 case NVPTXISD::Suld1DArrayI16Trap:
2998 Opc = NVPTX::SULD_1D_ARRAY_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00002999 break;
3000 case NVPTXISD::Suld1DArrayI32Trap:
3001 Opc = NVPTX::SULD_1D_ARRAY_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003002 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003003 case NVPTXISD::Suld1DArrayI64Trap:
3004 Opc = NVPTX::SULD_1D_ARRAY_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003005 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003006 case NVPTXISD::Suld1DArrayV2I8Trap:
3007 Opc = NVPTX::SULD_1D_ARRAY_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003008 break;
3009 case NVPTXISD::Suld1DArrayV2I16Trap:
3010 Opc = NVPTX::SULD_1D_ARRAY_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003011 break;
3012 case NVPTXISD::Suld1DArrayV2I32Trap:
3013 Opc = NVPTX::SULD_1D_ARRAY_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003014 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003015 case NVPTXISD::Suld1DArrayV2I64Trap:
3016 Opc = NVPTX::SULD_1D_ARRAY_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003017 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003018 case NVPTXISD::Suld1DArrayV4I8Trap:
3019 Opc = NVPTX::SULD_1D_ARRAY_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003020 break;
3021 case NVPTXISD::Suld1DArrayV4I16Trap:
3022 Opc = NVPTX::SULD_1D_ARRAY_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003023 break;
3024 case NVPTXISD::Suld1DArrayV4I32Trap:
3025 Opc = NVPTX::SULD_1D_ARRAY_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003026 break;
3027 case NVPTXISD::Suld2DI8Trap:
3028 Opc = NVPTX::SULD_2D_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003029 break;
3030 case NVPTXISD::Suld2DI16Trap:
3031 Opc = NVPTX::SULD_2D_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003032 break;
3033 case NVPTXISD::Suld2DI32Trap:
3034 Opc = NVPTX::SULD_2D_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003035 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003036 case NVPTXISD::Suld2DI64Trap:
3037 Opc = NVPTX::SULD_2D_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003038 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003039 case NVPTXISD::Suld2DV2I8Trap:
3040 Opc = NVPTX::SULD_2D_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003041 break;
3042 case NVPTXISD::Suld2DV2I16Trap:
3043 Opc = NVPTX::SULD_2D_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003044 break;
3045 case NVPTXISD::Suld2DV2I32Trap:
3046 Opc = NVPTX::SULD_2D_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003047 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003048 case NVPTXISD::Suld2DV2I64Trap:
3049 Opc = NVPTX::SULD_2D_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003050 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003051 case NVPTXISD::Suld2DV4I8Trap:
3052 Opc = NVPTX::SULD_2D_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003053 break;
3054 case NVPTXISD::Suld2DV4I16Trap:
3055 Opc = NVPTX::SULD_2D_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003056 break;
3057 case NVPTXISD::Suld2DV4I32Trap:
3058 Opc = NVPTX::SULD_2D_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003059 break;
3060 case NVPTXISD::Suld2DArrayI8Trap:
3061 Opc = NVPTX::SULD_2D_ARRAY_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003062 break;
3063 case NVPTXISD::Suld2DArrayI16Trap:
3064 Opc = NVPTX::SULD_2D_ARRAY_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003065 break;
3066 case NVPTXISD::Suld2DArrayI32Trap:
3067 Opc = NVPTX::SULD_2D_ARRAY_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003068 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003069 case NVPTXISD::Suld2DArrayI64Trap:
3070 Opc = NVPTX::SULD_2D_ARRAY_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003071 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003072 case NVPTXISD::Suld2DArrayV2I8Trap:
3073 Opc = NVPTX::SULD_2D_ARRAY_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003074 break;
3075 case NVPTXISD::Suld2DArrayV2I16Trap:
3076 Opc = NVPTX::SULD_2D_ARRAY_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003077 break;
3078 case NVPTXISD::Suld2DArrayV2I32Trap:
3079 Opc = NVPTX::SULD_2D_ARRAY_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003080 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003081 case NVPTXISD::Suld2DArrayV2I64Trap:
3082 Opc = NVPTX::SULD_2D_ARRAY_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003083 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003084 case NVPTXISD::Suld2DArrayV4I8Trap:
3085 Opc = NVPTX::SULD_2D_ARRAY_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003086 break;
3087 case NVPTXISD::Suld2DArrayV4I16Trap:
3088 Opc = NVPTX::SULD_2D_ARRAY_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003089 break;
3090 case NVPTXISD::Suld2DArrayV4I32Trap:
3091 Opc = NVPTX::SULD_2D_ARRAY_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003092 break;
3093 case NVPTXISD::Suld3DI8Trap:
3094 Opc = NVPTX::SULD_3D_I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003095 break;
3096 case NVPTXISD::Suld3DI16Trap:
3097 Opc = NVPTX::SULD_3D_I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003098 break;
3099 case NVPTXISD::Suld3DI32Trap:
3100 Opc = NVPTX::SULD_3D_I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003101 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003102 case NVPTXISD::Suld3DI64Trap:
3103 Opc = NVPTX::SULD_3D_I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003104 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003105 case NVPTXISD::Suld3DV2I8Trap:
3106 Opc = NVPTX::SULD_3D_V2I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003107 break;
3108 case NVPTXISD::Suld3DV2I16Trap:
3109 Opc = NVPTX::SULD_3D_V2I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003110 break;
3111 case NVPTXISD::Suld3DV2I32Trap:
3112 Opc = NVPTX::SULD_3D_V2I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003113 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003114 case NVPTXISD::Suld3DV2I64Trap:
3115 Opc = NVPTX::SULD_3D_V2I64_TRAP;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003116 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003117 case NVPTXISD::Suld3DV4I8Trap:
3118 Opc = NVPTX::SULD_3D_V4I8_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003119 break;
3120 case NVPTXISD::Suld3DV4I16Trap:
3121 Opc = NVPTX::SULD_3D_V4I16_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003122 break;
3123 case NVPTXISD::Suld3DV4I32Trap:
3124 Opc = NVPTX::SULD_3D_V4I32_TRAP;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003125 break;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003126 case NVPTXISD::Suld1DI8Zero:
3127 Opc = NVPTX::SULD_1D_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003128 break;
3129 case NVPTXISD::Suld1DI16Zero:
3130 Opc = NVPTX::SULD_1D_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003131 break;
3132 case NVPTXISD::Suld1DI32Zero:
3133 Opc = NVPTX::SULD_1D_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003134 break;
3135 case NVPTXISD::Suld1DI64Zero:
3136 Opc = NVPTX::SULD_1D_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003137 break;
3138 case NVPTXISD::Suld1DV2I8Zero:
3139 Opc = NVPTX::SULD_1D_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003140 break;
3141 case NVPTXISD::Suld1DV2I16Zero:
3142 Opc = NVPTX::SULD_1D_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003143 break;
3144 case NVPTXISD::Suld1DV2I32Zero:
3145 Opc = NVPTX::SULD_1D_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003146 break;
3147 case NVPTXISD::Suld1DV2I64Zero:
3148 Opc = NVPTX::SULD_1D_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003149 break;
3150 case NVPTXISD::Suld1DV4I8Zero:
3151 Opc = NVPTX::SULD_1D_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003152 break;
3153 case NVPTXISD::Suld1DV4I16Zero:
3154 Opc = NVPTX::SULD_1D_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003155 break;
3156 case NVPTXISD::Suld1DV4I32Zero:
3157 Opc = NVPTX::SULD_1D_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003158 break;
3159 case NVPTXISD::Suld1DArrayI8Zero:
3160 Opc = NVPTX::SULD_1D_ARRAY_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003161 break;
3162 case NVPTXISD::Suld1DArrayI16Zero:
3163 Opc = NVPTX::SULD_1D_ARRAY_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003164 break;
3165 case NVPTXISD::Suld1DArrayI32Zero:
3166 Opc = NVPTX::SULD_1D_ARRAY_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003167 break;
3168 case NVPTXISD::Suld1DArrayI64Zero:
3169 Opc = NVPTX::SULD_1D_ARRAY_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003170 break;
3171 case NVPTXISD::Suld1DArrayV2I8Zero:
3172 Opc = NVPTX::SULD_1D_ARRAY_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003173 break;
3174 case NVPTXISD::Suld1DArrayV2I16Zero:
3175 Opc = NVPTX::SULD_1D_ARRAY_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003176 break;
3177 case NVPTXISD::Suld1DArrayV2I32Zero:
3178 Opc = NVPTX::SULD_1D_ARRAY_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003179 break;
3180 case NVPTXISD::Suld1DArrayV2I64Zero:
3181 Opc = NVPTX::SULD_1D_ARRAY_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003182 break;
3183 case NVPTXISD::Suld1DArrayV4I8Zero:
3184 Opc = NVPTX::SULD_1D_ARRAY_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003185 break;
3186 case NVPTXISD::Suld1DArrayV4I16Zero:
3187 Opc = NVPTX::SULD_1D_ARRAY_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003188 break;
3189 case NVPTXISD::Suld1DArrayV4I32Zero:
3190 Opc = NVPTX::SULD_1D_ARRAY_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003191 break;
3192 case NVPTXISD::Suld2DI8Zero:
3193 Opc = NVPTX::SULD_2D_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003194 break;
3195 case NVPTXISD::Suld2DI16Zero:
3196 Opc = NVPTX::SULD_2D_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003197 break;
3198 case NVPTXISD::Suld2DI32Zero:
3199 Opc = NVPTX::SULD_2D_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003200 break;
3201 case NVPTXISD::Suld2DI64Zero:
3202 Opc = NVPTX::SULD_2D_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003203 break;
3204 case NVPTXISD::Suld2DV2I8Zero:
3205 Opc = NVPTX::SULD_2D_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003206 break;
3207 case NVPTXISD::Suld2DV2I16Zero:
3208 Opc = NVPTX::SULD_2D_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003209 break;
3210 case NVPTXISD::Suld2DV2I32Zero:
3211 Opc = NVPTX::SULD_2D_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003212 break;
3213 case NVPTXISD::Suld2DV2I64Zero:
3214 Opc = NVPTX::SULD_2D_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003215 break;
3216 case NVPTXISD::Suld2DV4I8Zero:
3217 Opc = NVPTX::SULD_2D_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003218 break;
3219 case NVPTXISD::Suld2DV4I16Zero:
3220 Opc = NVPTX::SULD_2D_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003221 break;
3222 case NVPTXISD::Suld2DV4I32Zero:
3223 Opc = NVPTX::SULD_2D_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003224 break;
3225 case NVPTXISD::Suld2DArrayI8Zero:
3226 Opc = NVPTX::SULD_2D_ARRAY_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003227 break;
3228 case NVPTXISD::Suld2DArrayI16Zero:
3229 Opc = NVPTX::SULD_2D_ARRAY_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003230 break;
3231 case NVPTXISD::Suld2DArrayI32Zero:
3232 Opc = NVPTX::SULD_2D_ARRAY_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003233 break;
3234 case NVPTXISD::Suld2DArrayI64Zero:
3235 Opc = NVPTX::SULD_2D_ARRAY_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003236 break;
3237 case NVPTXISD::Suld2DArrayV2I8Zero:
3238 Opc = NVPTX::SULD_2D_ARRAY_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003239 break;
3240 case NVPTXISD::Suld2DArrayV2I16Zero:
3241 Opc = NVPTX::SULD_2D_ARRAY_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003242 break;
3243 case NVPTXISD::Suld2DArrayV2I32Zero:
3244 Opc = NVPTX::SULD_2D_ARRAY_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003245 break;
3246 case NVPTXISD::Suld2DArrayV2I64Zero:
3247 Opc = NVPTX::SULD_2D_ARRAY_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003248 break;
3249 case NVPTXISD::Suld2DArrayV4I8Zero:
3250 Opc = NVPTX::SULD_2D_ARRAY_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003251 break;
3252 case NVPTXISD::Suld2DArrayV4I16Zero:
3253 Opc = NVPTX::SULD_2D_ARRAY_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003254 break;
3255 case NVPTXISD::Suld2DArrayV4I32Zero:
3256 Opc = NVPTX::SULD_2D_ARRAY_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003257 break;
3258 case NVPTXISD::Suld3DI8Zero:
3259 Opc = NVPTX::SULD_3D_I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003260 break;
3261 case NVPTXISD::Suld3DI16Zero:
3262 Opc = NVPTX::SULD_3D_I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003263 break;
3264 case NVPTXISD::Suld3DI32Zero:
3265 Opc = NVPTX::SULD_3D_I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003266 break;
3267 case NVPTXISD::Suld3DI64Zero:
3268 Opc = NVPTX::SULD_3D_I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003269 break;
3270 case NVPTXISD::Suld3DV2I8Zero:
3271 Opc = NVPTX::SULD_3D_V2I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003272 break;
3273 case NVPTXISD::Suld3DV2I16Zero:
3274 Opc = NVPTX::SULD_3D_V2I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003275 break;
3276 case NVPTXISD::Suld3DV2I32Zero:
3277 Opc = NVPTX::SULD_3D_V2I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003278 break;
3279 case NVPTXISD::Suld3DV2I64Zero:
3280 Opc = NVPTX::SULD_3D_V2I64_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003281 break;
3282 case NVPTXISD::Suld3DV4I8Zero:
3283 Opc = NVPTX::SULD_3D_V4I8_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003284 break;
3285 case NVPTXISD::Suld3DV4I16Zero:
3286 Opc = NVPTX::SULD_3D_V4I16_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003287 break;
3288 case NVPTXISD::Suld3DV4I32Zero:
3289 Opc = NVPTX::SULD_3D_V4I32_ZERO;
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003290 break;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003291 }
Benjamin Kramer806ae442017-08-20 17:30:32 +00003292
3293 // Copy over operands
3294 SmallVector<SDValue, 8> Ops(N->op_begin() + 1, N->op_end());
3295 Ops.push_back(N->getOperand(0)); // Move chain to the back.
3296
Justin Bogner8d83fb62016-05-13 21:12:53 +00003297 ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops));
3298 return true;
Justin Holewinski30d56a72014-04-09 15:39:15 +00003299}
3300
Justin Holewinski9a2350e2014-07-17 11:59:04 +00003301
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003302/// SelectBFE - Look for instruction sequences that can be made more efficient
3303/// by using the 'bfe' (bit-field extract) PTX instruction
Justin Bogner8d83fb62016-05-13 21:12:53 +00003304bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003305 SDLoc DL(N);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003306 SDValue LHS = N->getOperand(0);
3307 SDValue RHS = N->getOperand(1);
3308 SDValue Len;
3309 SDValue Start;
3310 SDValue Val;
3311 bool IsSigned = false;
3312
3313 if (N->getOpcode() == ISD::AND) {
3314 // Canonicalize the operands
3315 // We want 'and %val, %mask'
3316 if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) {
3317 std::swap(LHS, RHS);
3318 }
3319
3320 ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS);
3321 if (!Mask) {
3322 // We need a constant mask on the RHS of the AND
Justin Bogner8d83fb62016-05-13 21:12:53 +00003323 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003324 }
3325
3326 // Extract the mask bits
3327 uint64_t MaskVal = Mask->getZExtValue();
3328 if (!isMask_64(MaskVal)) {
3329 // We *could* handle shifted masks here, but doing so would require an
3330 // 'and' operation to fix up the low-order bits so we would trade
3331 // shr+and for bfe+and, which has the same throughput
Justin Bogner8d83fb62016-05-13 21:12:53 +00003332 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003333 }
3334
3335 // How many bits are in our mask?
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00003336 uint64_t NumBits = countTrailingOnes(MaskVal);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003337 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003338
3339 if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) {
3340 // We have a 'srl/and' pair, extract the effective start bit and length
3341 Val = LHS.getNode()->getOperand(0);
3342 Start = LHS.getNode()->getOperand(1);
3343 ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start);
3344 if (StartConst) {
3345 uint64_t StartVal = StartConst->getZExtValue();
3346 // How many "good" bits do we have left? "good" is defined here as bits
3347 // that exist in the original value, not shifted in.
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00003348 uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003349 if (NumBits > GoodBits) {
3350 // Do not handle the case where bits have been shifted in. In theory
3351 // we could handle this, but the cost is likely higher than just
3352 // emitting the srl/and pair.
Justin Bogner8d83fb62016-05-13 21:12:53 +00003353 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003354 }
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003355 Start = CurDAG->getTargetConstant(StartVal, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003356 } else {
3357 // Do not handle the case where the shift amount (can be zero if no srl
3358 // was found) is not constant. We could handle this case, but it would
3359 // require run-time logic that would be more expensive than just
3360 // emitting the srl/and pair.
Justin Bogner8d83fb62016-05-13 21:12:53 +00003361 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003362 }
3363 } else {
3364 // Do not handle the case where the LHS of the and is not a shift. While
3365 // it would be trivial to handle this case, it would just transform
3366 // 'and' -> 'bfe', but 'and' has higher-throughput.
Justin Bogner8d83fb62016-05-13 21:12:53 +00003367 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003368 }
3369 } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) {
3370 if (LHS->getOpcode() == ISD::AND) {
3371 ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS);
3372 if (!ShiftCnst) {
3373 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00003374 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003375 }
3376
3377 uint64_t ShiftAmt = ShiftCnst->getZExtValue();
3378
3379 SDValue AndLHS = LHS->getOperand(0);
3380 SDValue AndRHS = LHS->getOperand(1);
3381
3382 // Canonicalize the AND to have the mask on the RHS
3383 if (isa<ConstantSDNode>(AndLHS)) {
3384 std::swap(AndLHS, AndRHS);
3385 }
3386
3387 ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS);
3388 if (!MaskCnst) {
3389 // Mask must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00003390 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003391 }
3392
3393 uint64_t MaskVal = MaskCnst->getZExtValue();
3394 uint64_t NumZeros;
3395 uint64_t NumBits;
3396 if (isMask_64(MaskVal)) {
3397 NumZeros = 0;
3398 // The number of bits in the result bitfield will be the number of
3399 // trailing ones (the AND) minus the number of bits we shift off
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00003400 NumBits = countTrailingOnes(MaskVal) - ShiftAmt;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003401 } else if (isShiftedMask_64(MaskVal)) {
3402 NumZeros = countTrailingZeros(MaskVal);
Benjamin Kramer5f6a9072015-02-12 15:35:40 +00003403 unsigned NumOnes = countTrailingOnes(MaskVal >> NumZeros);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003404 // The number of bits in the result bitfield will be the number of
3405 // trailing zeros plus the number of set bits in the mask minus the
3406 // number of bits we shift off
3407 NumBits = NumZeros + NumOnes - ShiftAmt;
3408 } else {
3409 // This is not a mask we can handle
Justin Bogner8d83fb62016-05-13 21:12:53 +00003410 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003411 }
3412
3413 if (ShiftAmt < NumZeros) {
3414 // Handling this case would require extra logic that would make this
3415 // transformation non-profitable
Justin Bogner8d83fb62016-05-13 21:12:53 +00003416 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003417 }
3418
3419 Val = AndLHS;
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003420 Start = CurDAG->getTargetConstant(ShiftAmt, DL, MVT::i32);
3421 Len = CurDAG->getTargetConstant(NumBits, DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003422 } else if (LHS->getOpcode() == ISD::SHL) {
3423 // Here, we have a pattern like:
3424 //
3425 // (sra (shl val, NN), MM)
3426 // or
3427 // (srl (shl val, NN), MM)
3428 //
3429 // If MM >= NN, we can efficiently optimize this with bfe
3430 Val = LHS->getOperand(0);
3431
3432 SDValue ShlRHS = LHS->getOperand(1);
3433 ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS);
3434 if (!ShlCnst) {
3435 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00003436 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003437 }
3438 uint64_t InnerShiftAmt = ShlCnst->getZExtValue();
3439
3440 SDValue ShrRHS = RHS;
3441 ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS);
3442 if (!ShrCnst) {
3443 // Shift amount must be constant
Justin Bogner8d83fb62016-05-13 21:12:53 +00003444 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003445 }
3446 uint64_t OuterShiftAmt = ShrCnst->getZExtValue();
3447
3448 // To avoid extra codegen and be profitable, we need Outer >= Inner
3449 if (OuterShiftAmt < InnerShiftAmt) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00003450 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003451 }
3452
3453 // If the outer shift is more than the type size, we have no bitfield to
3454 // extract (since we also check that the inner shift is <= the outer shift
3455 // then this also implies that the inner shift is < the type size)
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00003456 if (OuterShiftAmt >= Val.getValueSizeInBits()) {
Justin Bogner8d83fb62016-05-13 21:12:53 +00003457 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003458 }
3459
Sanjay Patelb1f0a0f2016-09-14 16:05:51 +00003460 Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
3461 MVT::i32);
3462 Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
3463 DL, MVT::i32);
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003464
3465 if (N->getOpcode() == ISD::SRA) {
3466 // If we have a arithmetic right shift, we need to use the signed bfe
3467 // variant
3468 IsSigned = true;
3469 }
3470 } else {
3471 // No can do...
Justin Bogner8d83fb62016-05-13 21:12:53 +00003472 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003473 }
3474 } else {
3475 // No can do...
Justin Bogner8d83fb62016-05-13 21:12:53 +00003476 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003477 }
3478
3479
3480 unsigned Opc;
3481 // For the BFE operations we form here from "and" and "srl", always use the
3482 // unsigned variants.
3483 if (Val.getValueType() == MVT::i32) {
3484 if (IsSigned) {
3485 Opc = NVPTX::BFE_S32rii;
3486 } else {
3487 Opc = NVPTX::BFE_U32rii;
3488 }
3489 } else if (Val.getValueType() == MVT::i64) {
3490 if (IsSigned) {
3491 Opc = NVPTX::BFE_S64rii;
3492 } else {
3493 Opc = NVPTX::BFE_U64rii;
3494 }
3495 } else {
3496 // We cannot handle this type
Justin Bogner8d83fb62016-05-13 21:12:53 +00003497 return false;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003498 }
3499
3500 SDValue Ops[] = {
3501 Val, Start, Len
3502 };
3503
Justin Bogner8d83fb62016-05-13 21:12:53 +00003504 ReplaceNode(N, CurDAG->getMachineNode(Opc, DL, N->getVTList(), Ops));
3505 return true;
Justin Holewinskica7a4f12014-06-27 18:35:27 +00003506}
3507
Justin Holewinskiae556d32012-05-04 20:18:50 +00003508// SelectDirectAddr - Match a direct address for DAG.
3509// A direct address could be a globaladdress or externalsymbol.
3510bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
3511 // Return true if TGA or ES.
Justin Holewinski0497ab12013-03-30 14:29:21 +00003512 if (N.getOpcode() == ISD::TargetGlobalAddress ||
3513 N.getOpcode() == ISD::TargetExternalSymbol) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003514 Address = N;
3515 return true;
3516 }
3517 if (N.getOpcode() == NVPTXISD::Wrapper) {
3518 Address = N.getOperand(0);
3519 return true;
3520 }
Artem Belevichb2e76a52016-07-20 18:39:47 +00003521 // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
3522 if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
3523 if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
3524 CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
3525 CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
3526 return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003527 }
3528 return false;
3529}
3530
3531// symbol+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00003532bool NVPTXDAGToDAGISel::SelectADDRsi_imp(
3533 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003534 if (Addr.getOpcode() == ISD::ADD) {
3535 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00003536 SDValue base = Addr.getOperand(0);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003537 if (SelectDirectAddr(base, Base)) {
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003538 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3539 mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003540 return true;
3541 }
3542 }
3543 }
3544 return false;
3545}
3546
3547// symbol+offset
3548bool NVPTXDAGToDAGISel::SelectADDRsi(SDNode *OpNode, SDValue Addr,
3549 SDValue &Base, SDValue &Offset) {
3550 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i32);
3551}
3552
3553// symbol+offset
3554bool NVPTXDAGToDAGISel::SelectADDRsi64(SDNode *OpNode, SDValue Addr,
3555 SDValue &Base, SDValue &Offset) {
3556 return SelectADDRsi_imp(OpNode, Addr, Base, Offset, MVT::i64);
3557}
3558
3559// register+offset
Justin Holewinski0497ab12013-03-30 14:29:21 +00003560bool NVPTXDAGToDAGISel::SelectADDRri_imp(
3561 SDNode *OpNode, SDValue Addr, SDValue &Base, SDValue &Offset, MVT mvt) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003562 if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
3563 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003564 Offset = CurDAG->getTargetConstant(0, SDLoc(OpNode), mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003565 return true;
3566 }
3567 if (Addr.getOpcode() == ISD::TargetExternalSymbol ||
3568 Addr.getOpcode() == ISD::TargetGlobalAddress)
Justin Holewinski0497ab12013-03-30 14:29:21 +00003569 return false; // direct calls.
Justin Holewinskiae556d32012-05-04 20:18:50 +00003570
3571 if (Addr.getOpcode() == ISD::ADD) {
3572 if (SelectDirectAddr(Addr.getOperand(0), Addr)) {
3573 return false;
3574 }
3575 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
3576 if (FrameIndexSDNode *FIN =
Justin Holewinski0497ab12013-03-30 14:29:21 +00003577 dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
Justin Holewinskiae556d32012-05-04 20:18:50 +00003578 // Constant offset from frame ref.
3579 Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), mvt);
3580 else
3581 Base = Addr.getOperand(0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003582 Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(OpNode),
3583 mvt);
Justin Holewinskiae556d32012-05-04 20:18:50 +00003584 return true;
3585 }
3586 }
3587 return false;
3588}
3589
3590// register+offset
3591bool NVPTXDAGToDAGISel::SelectADDRri(SDNode *OpNode, SDValue Addr,
3592 SDValue &Base, SDValue &Offset) {
3593 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i32);
3594}
3595
3596// register+offset
3597bool NVPTXDAGToDAGISel::SelectADDRri64(SDNode *OpNode, SDValue Addr,
3598 SDValue &Base, SDValue &Offset) {
3599 return SelectADDRri_imp(OpNode, Addr, Base, Offset, MVT::i64);
3600}
3601
3602bool NVPTXDAGToDAGISel::ChkMemSDNodeAddressSpace(SDNode *N,
3603 unsigned int spN) const {
Craig Topper062a2ba2014-04-25 05:30:21 +00003604 const Value *Src = nullptr;
Justin Holewinskiae556d32012-05-04 20:18:50 +00003605 if (MemSDNode *mN = dyn_cast<MemSDNode>(N)) {
Nick Lewyckyaad475b2014-04-15 07:22:52 +00003606 if (spN == 0 && mN->getMemOperand()->getPseudoValue())
3607 return true;
3608 Src = mN->getMemOperand()->getValue();
Justin Holewinskiae556d32012-05-04 20:18:50 +00003609 }
3610 if (!Src)
3611 return false;
Craig Toppere3dcce92015-08-01 22:20:21 +00003612 if (auto *PT = dyn_cast<PointerType>(Src->getType()))
Justin Holewinskiae556d32012-05-04 20:18:50 +00003613 return (PT->getAddressSpace() == spN);
3614 return false;
3615}
3616
3617/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
3618/// inline asm expressions.
Justin Holewinski0497ab12013-03-30 14:29:21 +00003619bool NVPTXDAGToDAGISel::SelectInlineAsmMemoryOperand(
Daniel Sanders60f1db02015-03-13 12:45:09 +00003620 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
Justin Holewinskiae556d32012-05-04 20:18:50 +00003621 SDValue Op0, Op1;
Daniel Sanders60f1db02015-03-13 12:45:09 +00003622 switch (ConstraintID) {
Justin Holewinski0497ab12013-03-30 14:29:21 +00003623 default:
3624 return true;
Daniel Sanders60f1db02015-03-13 12:45:09 +00003625 case InlineAsm::Constraint_m: // memory
Justin Holewinskiae556d32012-05-04 20:18:50 +00003626 if (SelectDirectAddr(Op, Op0)) {
3627 OutOps.push_back(Op0);
Sergey Dmitrouk842a51b2015-04-28 14:05:47 +00003628 OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
Justin Holewinskiae556d32012-05-04 20:18:50 +00003629 return false;
3630 }
3631 if (SelectADDRri(Op.getNode(), Op, Op0, Op1)) {
3632 OutOps.push_back(Op0);
3633 OutOps.push_back(Op1);
3634 return false;
3635 }
3636 break;
3637 }
3638 return true;
3639}
Justin Holewinski9a6ea2c2016-05-02 18:12:02 +00003640
3641/// GetConvertOpcode - Returns the CVT_ instruction opcode that implements a
3642/// conversion from \p SrcTy to \p DestTy.
3643unsigned NVPTXDAGToDAGISel::GetConvertOpcode(MVT DestTy, MVT SrcTy,
3644 bool IsSigned) {
3645 switch (SrcTy.SimpleTy) {
3646 default:
3647 llvm_unreachable("Unhandled source type");
3648 case MVT::i8:
3649 switch (DestTy.SimpleTy) {
3650 default:
3651 llvm_unreachable("Unhandled dest type");
3652 case MVT::i16:
3653 return IsSigned ? NVPTX::CVT_s16_s8 : NVPTX::CVT_u16_u8;
3654 case MVT::i32:
3655 return IsSigned ? NVPTX::CVT_s32_s8 : NVPTX::CVT_u32_u8;
3656 case MVT::i64:
3657 return IsSigned ? NVPTX::CVT_s64_s8 : NVPTX::CVT_u64_u8;
3658 }
3659 case MVT::i16:
3660 switch (DestTy.SimpleTy) {
3661 default:
3662 llvm_unreachable("Unhandled dest type");
3663 case MVT::i8:
3664 return IsSigned ? NVPTX::CVT_s8_s16 : NVPTX::CVT_u8_u16;
3665 case MVT::i32:
3666 return IsSigned ? NVPTX::CVT_s32_s16 : NVPTX::CVT_u32_u16;
3667 case MVT::i64:
3668 return IsSigned ? NVPTX::CVT_s64_s16 : NVPTX::CVT_u64_u16;
3669 }
3670 case MVT::i32:
3671 switch (DestTy.SimpleTy) {
3672 default:
3673 llvm_unreachable("Unhandled dest type");
3674 case MVT::i8:
3675 return IsSigned ? NVPTX::CVT_s8_s32 : NVPTX::CVT_u8_u32;
3676 case MVT::i16:
3677 return IsSigned ? NVPTX::CVT_s16_s32 : NVPTX::CVT_u16_u32;
3678 case MVT::i64:
3679 return IsSigned ? NVPTX::CVT_s64_s32 : NVPTX::CVT_u64_u32;
3680 }
3681 case MVT::i64:
3682 switch (DestTy.SimpleTy) {
3683 default:
3684 llvm_unreachable("Unhandled dest type");
3685 case MVT::i8:
3686 return IsSigned ? NVPTX::CVT_s8_s64 : NVPTX::CVT_u8_u64;
3687 case MVT::i16:
3688 return IsSigned ? NVPTX::CVT_s16_s64 : NVPTX::CVT_u16_u64;
3689 case MVT::i32:
3690 return IsSigned ? NVPTX::CVT_s32_s64 : NVPTX::CVT_u32_u64;
3691 }
3692 }
3693}